From eli at dev.mellanox.co.il Fri Feb 1 02:23:49 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:23:49 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages Message-ID: <1201861429.6955.31.camel@eli-laptop> The following patches, based on ofed 1.3, are intended to address bugs https://bugs.openfabrics.org/show_bug.cgi?id=760 and https://bugs.openfabrics.org/show_bug.cgi?id=761. They address UD mode both send and receive and improve performance when using small messages UDP traffic. The observation we had is that at small UDP messages, the message rate is high and so what limits throughput is CPU, e.g. CPU is 100% busy. In the send flow I use a dedicated CQ for the send flow which in turn is never armed. CQEs consumption is done by polling after posting a send message. Also, the QP is configured for selective signaling and polling the CQ is done once in 16 messages. On the receive side the code is changed to post to receive queue once in 16 completions. This is done in for both UD and and CM. 0001-IB-ipoib-Split-CQs-for-IPOIB-UD.patch 0002-IB-ipoib-Unsingnalled-UD-QP.patch 0003-IPOIB-post-to-SRQ-every-n-buffers.patch 0004-IB-ipoib-rx-WQE-draft-in-IPOIB-UD.patch 0005-IB-ipoib-IPOIB-rx-post-list.patch Tziporet, please approve for inclusion in ofed 1.3 From eli at dev.mellanox.co.il Fri Feb 1 02:24:33 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:24:33 +0200 Subject: [ofa-general] [PATCH 1/5] IB/ipoib: Split CQs for IPOIB UD Message-ID: <1201861473.6955.32.camel@eli-laptop> IB/ipoib: Split CQs for IPOIB UD This comes as a preparation for using unsignalled QP in UD mode. It uses a dedicated CQ for the UD send. The CQ is not armed and is polled for completion right after sending a packet. This patch and the following patches fix bugs 760 and 761. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 11:42:31.776503000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 14:11:01.107304000 +0200 @@ -254,7 +254,7 @@ repost: "for buf %d\n", wr_id); } -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id; @@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + if (need_lock) + spin_lock_irqsave(&priv->tx_lock, flags); + ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (need_lock) + spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) @@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct wc->status, wr_id, wc->vendor_err); } +static void poll_tx(struct ipoib_dev_priv *priv, int need_lock) +{ + int n, i; + + n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock); +} + int ipoib_poll(struct napi_struct *napi, int budget) { struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); @@ -309,7 +322,7 @@ poll_more: int max = (budget - done); t = min(IPOIB_NUM_WC, max); - n = ib_poll_cq(priv->cq, t, priv->ibwc); + n = ib_poll_cq(priv->rcq, t, priv->ibwc); for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; @@ -320,12 +333,8 @@ poll_more: ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else { - if (wc->wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, wc); - else - ipoib_ib_handle_tx_wc(dev, wc); - } + } else + ipoib_cm_handle_tx_wc(priv->dev, wc); } if (n != t) @@ -334,7 +343,7 @@ poll_more: if (done < budget) { netif_rx_complete(dev, napi); - if (unlikely(ib_req_notify_cq(priv->cq, + if (unlikely(ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && netif_rx_reschedule(dev, napi)) @@ -344,7 +353,7 @@ poll_more: return done; } -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr) { struct net_device *dev = dev_ptr; struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c netif_rx_schedule(dev, &priv->napi); } +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + + poll_tx(priv, 1); +} + static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ib_ah *address, u32 qpn, @@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev, netif_stop_queue(dev); } } + + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1)) + poll_tx(priv, 0); + return; drop: @@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; do { - n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); + n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { /* * Convert any successful completions to flush @@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d if (priv->ibwc[i].wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1); } } } while (n == IPOIB_NUM_WC); @@ -737,7 +757,7 @@ timeout: msleep(1); } - ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP); return 0; } Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 11:42:32.043502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 14:16:34.690314000 +0200 @@ -94,6 +94,8 @@ enum { IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + + MAX_SEND_CQE = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -348,7 +350,8 @@ struct ipoib_dev_priv { u16 pkey_index; struct ib_pd *pd; struct ib_mr *mr; - struct ib_cq *cq; + struct ib_cq *rcq; + struct ib_cq *scq; struct ib_qp *qp; u32 qkey; @@ -368,7 +371,8 @@ struct ipoib_dev_priv { struct ib_send_wr tx_wr; unsigned tx_outstanding; - struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_wc send_wc[MAX_SEND_CQE]; struct list_head dead_ahs; @@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo /* functions */ int ipoib_poll(struct napi_struct *napi, int budget); -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr); +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr); @@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs static inline void ipoib_unregister_debugfs(void) { } #endif - #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 11:42:32.175502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 12:39:19.616627000 +0200 @@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_ goto out_free_pd; } - size = ipoib_sendq_size + ipoib_recvq_size + 1; + size = ipoib_recvq_size; ret = ipoib_cm_dev_init(dev); if (!ret) size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); - if (IS_ERR(priv->cq)) { - printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); + priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0); + if (IS_ERR(priv->rcq)) { + printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_free_mr; } + priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0); + if (IS_ERR(priv->scq)) { + printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); + goto out_free_rcq; + } + + coal = kzalloc(sizeof *coal, GFP_KERNEL); if (coal) { coal->rx_coalesce_usecs = 10; - coal->tx_coalesce_usecs = 10; coal->rx_max_coalesced_frames = 16; - coal->tx_max_coalesced_frames = 16; dev->ethtool_ops->set_coalesce(dev, coal); kfree(coal); } - if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) - goto out_free_cq; + if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP)) + goto out_free_scq; - init_attr.send_cq = priv->cq; - init_attr.recv_cq = priv->cq; + init_attr.send_cq = priv->scq; + init_attr.recv_cq = priv->rcq; priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_cq; + goto out_free_rcq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_ return 0; -out_free_cq: - ib_destroy_cq(priv->cq); +out_free_scq: + ib_destroy_cq(priv->scq); + +out_free_rcq: + ib_destroy_cq(priv->rcq); out_free_mr: ib_dereg_mr(priv->mr); @@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_destroy_cq(priv->cq)) + if (ib_destroy_cq(priv->scq)) + ipoib_warn(priv, "ib_cq_destroy failed\n"); + + if (ib_destroy_cq(priv->rcq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); ipoib_cm_dev_cleanup(dev); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 11:42:31.770505000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 12:18:43.243122000 +0200 @@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, - .send_cq = priv->cq, /* For drain WR */ - .recv_cq = priv->cq, + .send_cq = priv->rcq, /* For drain WR */ + .recv_cq = priv->rcq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ @@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_ { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->cq, - .recv_cq = priv->cq, + .send_cq = priv->rcq, + .recv_cq = priv->rcq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, .cap.max_send_sge = 1, Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.052502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.421466000 +0200 @@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net coal->tx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, + ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret) { ipoib_dbg(priv, "failed modifying CQ\n"); From eli at dev.mellanox.co.il Fri Feb 1 02:25:20 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:25:20 +0200 Subject: [ofa-general] [PATCH 4/5] IB/ipoib: rx WQE draft in IPOIB UD Message-ID: <1201861520.6955.35.camel@eli-laptop> IB/ipoib: rx WQE draft in IPOIB UD Put a prepared WQE in the private data to save time in the receive flow. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-28 10:18:39.000000000 +0200 @@ -92,21 +92,13 @@ void ipoib_free_ah(struct kref *kref) static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; - - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; + priv->sglist_draft.addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV; - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, @@ -202,7 +194,7 @@ static void ipoib_ib_handle_rx_wc(struct * Drop packets that this interface sent, ie multicast packets * that the HCA has replicated. */ - if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) + if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)) goto repost; /* Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 10:17:10.000000000 +0200 @@ -327,6 +327,8 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + struct ib_recv_wr rx_wr_draft; + struct ib_sge sglist_draft; struct napi_struct napi; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-28 10:17:10.000000000 +0200 @@ -217,6 +217,13 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_wr_draft.next = NULL; + priv->rx_wr_draft.sg_list = &priv->sglist_draft; + priv->rx_wr_draft.num_sge = 1; + + priv->sglist_draft.length = IPOIB_BUF_SIZE; + priv->sglist_draft.lkey = priv->mr->lkey; + return 0; out_free_cq: From eli at dev.mellanox.co.il Fri Feb 1 02:24:40 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:24:40 +0200 Subject: [ofa-general] [PATCH 2/5] IB/ipoib: Unsingnalled UD QP Message-ID: <1201861480.6955.33.camel@eli-laptop> Unsingnalled UD QP This is patch is using unsignalled QP for UD. Doing this reduces the number of times a CQ has to be polled and along with the fact that we do polling on the tx CQ, reduces the overhead on send and improving small messages BW. For example, on my Intel machines, send throughput of 128 byte UDP messages, went up from 380 mbps to 508 mbps. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h @@ -373,6 +373,7 @@ struct ipoib_dev_priv { struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_wc send_wc[MAX_SEND_CQE]; + unsigned int tx_poll; struct list_head dead_ahs; @@ -392,6 +393,8 @@ struct ipoib_dev_priv { struct dentry *path_dentry; #endif struct ipoib_ethtool_st etool; + struct timer_list poll_timer; + struct ib_ah *own_ah; }; struct ipoib_ah { @@ -454,7 +457,6 @@ extern struct workqueue_struct *ipoib_wo int ipoib_poll(struct napi_struct *napi, int budget); void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr); -void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -254,12 +254,10 @@ repost: "for buf %d\n", wr_id); } -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock) +static void _ipoib_ib_handle_tx_wc(struct net_device *dev, int wr_id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned int wr_id = wc->wr_id; struct ipoib_tx_buf *tx_req; - unsigned long flags; ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", wr_id, wc->status); @@ -272,39 +270,52 @@ static void ipoib_ib_handle_tx_wc(struct tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); - - ++dev->stats.tx_packets; - dev->stats.tx_bytes += tx_req->skb->len; - - dev_kfree_skb_any(tx_req->skb); - - if (need_lock) - spin_lock_irqsave(&priv->tx_lock, flags); - + if (tx_req->skb) { + ipoib_dma_unmap_tx(priv->ca, tx_req); + ++dev->stats.tx_packets; + dev->stats.tx_bytes += tx_req->skb->len; + dev_kfree_skb_any(tx_req->skb); + } ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); +} + +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + int i; + + i = priv->tx_poll; + do { + i &= (ipoib_sendq_size - 1); + _ipoib_ib_handle_tx_wc(dev, i); + } while (i++ != wr_id); + priv->tx_poll = i & (ipoib_sendq_size - 1); - if (need_lock) - spin_unlock_irqrestore(&priv->tx_lock, flags); + if (unlikely(wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR)) - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed send event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); } -static void poll_tx(struct ipoib_dev_priv *priv, int need_lock) +void poll_tx(struct ipoib_dev_priv *priv) { int n, i; - n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); - for (i = 0; i < n; ++i) - ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock); + while (1) { + n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + + if (n < MAX_SEND_CQE) + break; + } } int ipoib_poll(struct napi_struct *napi, int budget) @@ -361,11 +372,65 @@ void ipoib_ib_rx_completion(struct ib_cq netif_rx_schedule(dev, &priv->napi); } -void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr) +static inline int post_zlen_send_wr(struct ipoib_dev_priv *priv, unsigned wrid) +{ + struct ib_send_wr wr = { + .opcode = IB_WR_SEND, + .send_flags = IB_SEND_SIGNALED, + .wr_id = wrid, + }; + struct ib_send_wr *bad_wr; + + if (!priv->own_ah) + return -EBUSY; + + wr.wr.ud.ah = priv->own_ah; + wr.wr.ud.remote_qpn = priv->qp->qp_num; + return ib_post_send(priv->qp, &wr, &bad_wr); +} + +static void ipoib_ib_tx_timer_func(unsigned long dev_ptr) +{ + struct net_device *dev = (struct net_device *)dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned long flags; + unsigned int wrid; + + spin_lock_irqsave(&priv->tx_lock, flags); + if (((int)priv->tx_tail - (int)priv->tx_head < 0) && + time_after(jiffies, dev->trans_start + 10)) { + wrid = priv->tx_head & (ipoib_sendq_size - 1); + ipoib_dbg(priv, "posting zlen send, wrid = %d: head = %d, tail = %d\n", wrid, + priv->tx_head, priv->tx_tail); + priv->tx_ring[wrid].skb = NULL; + if (post_zlen_send_wr(priv, wrid)) + ipoib_warn(priv, "failed to post zlen send\n"); + else { + ++priv->tx_head; + ++priv->tx_outstanding; + ipoib_dbg(priv, "%s-%d: head = %d\n", __func__, __LINE__, priv->tx_head); + } + } + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); + + mod_timer(&priv->poll_timer, jiffies + HZ / 2); +} + +static void flush_tx_queue(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + unsigned long flags; + unsigned int wrid; - poll_tx(priv, 1); + spin_lock_irqsave(&priv->tx_lock, flags); + wrid = priv->tx_head & (ipoib_sendq_size - 1); + priv->tx_ring[wrid].skb = NULL; + if (!post_zlen_send_wr(priv, wrid)) { + ++priv->tx_head; + ++priv->tx_outstanding; + } + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -405,6 +470,11 @@ static inline int post_send(struct ipoib } else priv->tx_wr.opcode = IB_WR_SEND; + if (unlikely((priv->tx_head & (MAX_SEND_CQE - 1)) == MAX_SEND_CQE - 1)) + priv->tx_wr.send_flags |= IB_SEND_SIGNALED; + else + priv->tx_wr.send_flags &= ~IB_SEND_SIGNALED; + return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); } @@ -489,7 +559,7 @@ void ipoib_send(struct net_device *dev, } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1)) - poll_tx(priv, 0); + poll_tx(priv); return; @@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w round_jiffies_relative(HZ)); } +static int create_own_ah(struct ipoib_dev_priv *priv) +{ + struct ib_ah_attr attr = { + .dlid = priv->local_lid, + .port_num = priv->port, + }; + + if (priv->own_ah) { + ipoib_dbg(priv, "own ah already exists\n"); + return -EINVAL; + } + priv->own_ah = ib_create_ah(priv->pd, &attr); + return IS_ERR(priv->own_ah); +} + +static void destroy_own_ah(struct ipoib_dev_priv *priv) +{ + if (!priv->own_ah) { + ipoib_dbg(priv, "destroying an already destroyed own ah\n"); + return; + } + + ib_destroy_ah(priv->own_ah); + priv->own_ah = NULL; +} + int ipoib_ib_dev_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ret = create_own_ah(priv); + if (ret) { + priv->own_ah = NULL; + ipoib_warn(priv, "failed to create own ah\n"); + return -1; + } + ret = ipoib_init_qp(dev); if (ret) { ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); + destroy_own_ah(priv); return -1; } @@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, round_jiffies_relative(HZ)); + init_timer(&priv->poll_timer); + priv->poll_timer.function = ipoib_ib_tx_timer_func; + priv->poll_timer.data = (unsigned long)dev; + mod_timer(&priv->poll_timer, jiffies + HZ / 2); + set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); return 0; @@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d if (priv->ibwc[i].wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1); + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); } } } while (n == IPOIB_NUM_WC); @@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; unsigned long begin; - struct ipoib_tx_buf *tx_req; int i; + unsigned long flags; + del_timer_sync(&priv->poll_timer); clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); ipoib_cm_dev_stop(dev); + flush_tx_queue(priv); /* * Move our QP to the error state and then reinitialize in @@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device * assume the HW is wedged and just free up * all our pending work requests. */ - while ((int) priv->tx_tail - (int) priv->tx_head < 0) { - tx_req = &priv->tx_ring[priv->tx_tail & - (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); - dev_kfree_skb_any(tx_req->skb); - ++priv->tx_tail; - --priv->tx_outstanding; - } - for (i = 0; i < ipoib_recvq_size; ++i) { struct ipoib_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; - if (!rx_req->skb) - continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_req->skb); - rx_req->skb = NULL; + + if (rx_req->skb) { + ib_dma_unmap_single(priv->ca, + rx_req->mapping, + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_req->skb); + rx_req->skb = NULL; + } } goto timeout; } + if ((int) priv->tx_tail - (int) priv->tx_head < 0) { + spin_lock_irqsave(&priv->tx_lock, flags); + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); + } + ipoib_drain_cq(dev); msleep(1); @@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device ipoib_dbg(priv, "All sends and receives done.\n"); timeout: + destroy_own_ah(priv); qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_ .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1, .max_recv_sge = 1 }, - .sq_sig_type = IB_SIGNAL_ALL_WR, + .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_UD, .create_flags = QP_CREATE_LSO, }; @@ -184,7 +184,7 @@ int ipoib_transport_dev_init(struct net_ goto out_free_mr; } - priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0); + priv->scq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0); if (IS_ERR(priv->scq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_rcq; From eli at dev.mellanox.co.il Fri Feb 1 02:25:06 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:25:06 +0200 Subject: [ofa-general] [PATCH 3/5] IB/ipoib: post to SRQ every n buffers Message-ID: <1201861506.6955.34.camel@eli-laptop> IB/ipoib: post to SRQ every n buffers To reduce the overhead of posting receive buffers to the SRQ, we do it every 16 received buffers. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:50:46.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:52:55.745918000 +0200 @@ -96,6 +96,7 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, MAX_SEND_CQE = 16, + CM_POST_SRQ_COUNT = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -283,6 +284,11 @@ struct ipoib_cm_rx_buf { u64 mapping[IPOIB_CM_RX_SG]; }; +struct ipoib_cm_rx_wr { + struct ib_recv_wr wr; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; +}; + struct ipoib_cm_dev_priv { struct ib_srq *srq; struct ipoib_cm_rx_buf *srq_ring; @@ -301,10 +307,10 @@ struct ipoib_cm_dev_priv { struct list_head start_list; struct list_head reap_list; struct ib_wc ibwc[IPOIB_NUM_WC]; - struct ib_sge rx_sge[IPOIB_CM_RX_SG]; - struct ib_recv_wr rx_wr; int max_cm_mtu; int num_frags; + struct ipoib_cm_rx_wr *head; + struct ipoib_cm_rx_wr *rx_wr_arr; }; struct ipoib_ethtool_st { Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:50:45.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:51:45.846437000 +0200 @@ -81,24 +81,46 @@ static void ipoib_cm_dma_unmap_rx(struct ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } -static int ipoib_cm_post_receive(struct net_device *dev, int id) +static int ipoib_cm_post_receive(struct net_device *dev, int id, int pi) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; - int i, ret; + int i, ret = 0; + struct ipoib_cm_rx_wr *cur; + struct ipoib_cm_rx_wr *prev; + int post; + + ipoib_dbg_data(priv, "posting to id=%d, pi=%d\n", id, pi); + cur = &priv->cm.rx_wr_arr[id]; + prev = &priv->cm.rx_wr_arr[(id - 1) & (ipoib_recvq_size - 1)]; + + prev->wr.next = &cur->wr; + cur->wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + cur->wr.next = NULL; + - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + cur->rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + + post = pi || (((unsigned long)(cur - priv->cm.head) & (ipoib_recvq_size - 1)) + >= CM_POST_SRQ_COUNT); + if (post) { + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.head->wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, + priv->cm.srq_ring[id].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[id].skb); + priv->cm.srq_ring[id].skb = NULL; + bad_wr = bad_wr->next; + } + } else + priv->cm.head = &priv->cm.rx_wr_arr[(id + 1) & (ipoib_recvq_size - 1)]; + - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, - priv->cm.srq_ring[id].mapping); - dev_kfree_skb_any(priv->cm.srq_ring[id].skb); - priv->cm.srq_ring[id].skb = NULL; } return ret; @@ -483,7 +505,7 @@ void ipoib_cm_handle_rx_wc(struct net_de netif_receive_skb(skb); repost: - if (unlikely(ipoib_cm_post_receive(dev, wr_id))) + if (unlikely(ipoib_cm_post_receive(dev, wr_id, 0))) ipoib_warn(priv, "ipoib_cm_post_receive failed " "for buf %d\n", wr_id); } @@ -1277,7 +1299,7 @@ int ipoib_cm_dev_init(struct net_device .max_wr = ipoib_recvq_size, } }; - int ret, i; + int ret, i, j; struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); @@ -1307,8 +1329,14 @@ int ipoib_cm_dev_init(struct net_device srq_init_attr.attr.max_sge = attr.max_srq_sge; + priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size * sizeof priv->cm.rx_wr_arr[0], + GFP_KERNEL); + if (!priv->cm.rx_wr_arr) + return -ENOMEM; + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { + kfree(priv->cm.rx_wr_arr); ret = PTR_ERR(priv->cm.srq); priv->cm.srq = NULL; return ret; @@ -1328,15 +1356,19 @@ int ipoib_cm_dev_init(struct net_device return -ENOMEM; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; + for (j = 0; j < ipoib_recvq_size; ++j) { + for (i = 0; i < priv->cm.num_frags; ++i) + priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey; + + priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE; + + priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge; + priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags; + } - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + priv->cm.head = &priv->cm.rx_wr_arr[0]; for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1, @@ -1345,7 +1377,7 @@ int ipoib_cm_dev_init(struct net_device ipoib_cm_dev_cleanup(dev); return -ENOMEM; } - if (ipoib_cm_post_receive(dev, i)) { + if (ipoib_cm_post_receive(dev, i, 1)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); ipoib_cm_dev_cleanup(dev); return -EIO; @@ -1375,11 +1407,12 @@ void ipoib_cm_dev_cleanup(struct net_dev return; for (i = 0; i < ipoib_recvq_size; ++i) if (priv->cm.srq_ring[i].skb) { - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, priv->cm.srq_ring[i].mapping); dev_kfree_skb_any(priv->cm.srq_ring[i].skb); priv->cm.srq_ring[i].skb = NULL; } kfree(priv->cm.srq_ring); + kfree(priv->cm.rx_wr_arr); priv->cm.srq_ring = NULL; } From eli at dev.mellanox.co.il Fri Feb 1 02:25:25 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 12:25:25 +0200 Subject: [ofa-general] [PATCH 5/5] IB/ipoib: IPOIB rx post list Message-ID: <1201861525.6955.36.camel@eli-laptop> IB/ipoib: IPOIB rx post list Post a list of RX buffers every 16 recieved packets. This should reduce code cache trashing by make less jumps between the hw driver to ipoib. In any case it improves UD receive flow. Signed-off-by: Eli Cohen --- IB/ipoib: IPOIB rx post list Post a list of RX buffers every 16 recieved packets. This should reduce code cache trashing by make less jumps between the hw driver to ipoib. In any case it improves receive flow. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 18:51:56.117198000 +0200 @@ -97,6 +97,7 @@ enum { MAX_SEND_CQE = 16, CM_POST_SRQ_COUNT = 16, + UD_POST_RCV_COUNT = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -327,9 +328,10 @@ struct ipoib_ethtool_st { struct ipoib_dev_priv { spinlock_t lock; - struct net_device *dev; - struct ib_recv_wr rx_wr_draft; - struct ib_sge sglist_draft; + struct net_device *dev; + struct ib_recv_wr rx_wr_draft[UD_POST_RCV_COUNT]; + struct ib_sge sglist_draft[UD_POST_RCV_COUNT]; + unsigned int rx_outst; struct napi_struct napi; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 18:52:30.452975000 +0200 @@ -89,23 +89,45 @@ void ipoib_free_ah(struct kref *kref) spin_unlock_irqrestore(&priv->lock, flags); } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static void clean_pending_receives(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_recv_wr *bad_wr; - int ret; - - priv->sglist_draft.addr = priv->rx_ring[id].mapping; - priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV; + int i; + int id; - ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + for (i = 0; i < priv->rx_outst; ++i) { + id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV; ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } + priv->rx_outst = 0; +} + +static int ipoib_ib_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int ret = 0; + int i = priv->rx_outst; + + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; + if (++priv->rx_outst == UD_POST_RCV_COUNT) { + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); + + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~IPOIB_OP_RECV; + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } + } + priv->rx_outst = 0; + } return ret; } @@ -791,6 +813,7 @@ int ipoib_ib_dev_stop(struct net_device if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + clean_pending_receives(priv); /* Wait for all sends and receives to complete */ begin = jiffies; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 18:51:13.310744000 +0200 @@ -222,12 +222,16 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; - priv->rx_wr_draft.next = NULL; - priv->rx_wr_draft.sg_list = &priv->sglist_draft; - priv->rx_wr_draft.num_sge = 1; - - priv->sglist_draft.length = IPOIB_BUF_SIZE; - priv->sglist_draft.lkey = priv->mr->lkey; + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i].length = IPOIB_BUF_SIZE; + priv->sglist_draft[i].lkey = priv->mr->lkey; + + priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i]; + priv->rx_wr_draft[i].num_sge = 1; + if (i < UD_POST_RCV_COUNT - 1) + priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1]; + } + priv->rx_wr_draft[i].next = NULL; return 0; From vlad at lists.openfabrics.org Fri Feb 1 03:12:39 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 1 Feb 2008 03:12:39 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080201-0200 daily build status Message-ID: <20080201111239.13E63E6002C@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.20 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.18-53.el5 Passed on ppc64 with linux-2.6.18-8.el5 Failed: From eli at dev.mellanox.co.il Fri Feb 1 05:40:18 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:40:18 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages Message-ID: <1201873218.6677.4.camel@eli-laptop> The following patches, based on ofed 1.3, are intended to address bugs https://bugs.openfabrics.org/show_bug.cgi?id=760 and https://bugs.openfabrics.org/show_bug.cgi?id=761. They address UD mode both send and receive and improve performance when using small messages UDP traffic. The observation we had is that at small UDP messages, the message rate is high and so what limits throughput is CPU, e.g. CPU is 100% busy. In the send flow I use a dedicated CQ for the send flow which in turn is never armed. CQEs consumption is done by polling after posting a send message. Also, the QP is configured for selective signaling and polling the CQ is done once in 16 messages. On the receive side the code is changed to post to receive queue once in 16 completions. This is done in for both UD and and CM. 0001-IB-ipoib-Split-CQs-for-IPOIB-UD.patch 0002-IB-ipoib-Unsingnalled-UD-QP.patch 0003-IPOIB-post-to-SRQ-every-n-buffers.patch 0004-IB-ipoib-rx-WQE-draft-in-IPOIB-UD.patch 0005-IB-ipoib-IPOIB-rx-post-list.patch Tziporet, please approve for inclusion in ofed 1.3 From eli at dev.mellanox.co.il Fri Feb 1 05:40:39 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:40:39 +0200 Subject: [ofa-general] [PATCH 1/5] IB/ipoib: Split CQs for IPOIB UD Message-ID: <1201873239.6677.5.camel@eli-laptop> IB/ipoib: Split CQs for IPOIB UD This comes as a preparation for using unsignalled QP in UD mode. It uses a dedicated CQ for the UD send. The CQ is not armed and is polled for completion right after sending a packet. This patch and the following patches fix bugs 760 and 761. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 11:42:31.776503000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 14:11:01.107304000 +0200 @@ -254,7 +254,7 @@ repost: "for buf %d\n", wr_id); } -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id; @@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + if (need_lock) + spin_lock_irqsave(&priv->tx_lock, flags); + ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + if (need_lock) + spin_unlock_irqrestore(&priv->tx_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) @@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct wc->status, wr_id, wc->vendor_err); } +static void poll_tx(struct ipoib_dev_priv *priv, int need_lock) +{ + int n, i; + + n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock); +} + int ipoib_poll(struct napi_struct *napi, int budget) { struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); @@ -309,7 +322,7 @@ poll_more: int max = (budget - done); t = min(IPOIB_NUM_WC, max); - n = ib_poll_cq(priv->cq, t, priv->ibwc); + n = ib_poll_cq(priv->rcq, t, priv->ibwc); for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; @@ -320,12 +333,8 @@ poll_more: ipoib_cm_handle_rx_wc(dev, wc); else ipoib_ib_handle_rx_wc(dev, wc); - } else { - if (wc->wr_id & IPOIB_OP_CM) - ipoib_cm_handle_tx_wc(dev, wc); - else - ipoib_ib_handle_tx_wc(dev, wc); - } + } else + ipoib_cm_handle_tx_wc(priv->dev, wc); } if (n != t) @@ -334,7 +343,7 @@ poll_more: if (done < budget) { netif_rx_complete(dev, napi); - if (unlikely(ib_req_notify_cq(priv->cq, + if (unlikely(ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && netif_rx_reschedule(dev, napi)) @@ -344,7 +353,7 @@ poll_more: return done; } -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr) { struct net_device *dev = dev_ptr; struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c netif_rx_schedule(dev, &priv->napi); } +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + + poll_tx(priv, 1); +} + static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ib_ah *address, u32 qpn, @@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev, netif_stop_queue(dev); } } + + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1)) + poll_tx(priv, 0); + return; drop: @@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; do { - n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc); + n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { /* * Convert any successful completions to flush @@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d if (priv->ibwc[i].wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1); } } } while (n == IPOIB_NUM_WC); @@ -737,7 +757,7 @@ timeout: msleep(1); } - ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP); return 0; } Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 11:42:32.043502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 14:16:34.690314000 +0200 @@ -94,6 +94,8 @@ enum { IPOIB_MCAST_FLAG_SENDONLY = 1, IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + + MAX_SEND_CQE = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -348,7 +350,8 @@ struct ipoib_dev_priv { u16 pkey_index; struct ib_pd *pd; struct ib_mr *mr; - struct ib_cq *cq; + struct ib_cq *rcq; + struct ib_cq *scq; struct ib_qp *qp; u32 qkey; @@ -368,7 +371,8 @@ struct ipoib_dev_priv { struct ib_send_wr tx_wr; unsigned tx_outstanding; - struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_wc ibwc[IPOIB_NUM_WC]; + struct ib_wc send_wc[MAX_SEND_CQE]; struct list_head dead_ahs; @@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo /* functions */ int ipoib_poll(struct napi_struct *napi, int budget); -void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); +void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr); +void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr); @@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs static inline void ipoib_unregister_debugfs(void) { } #endif - #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 11:42:32.175502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 12:39:19.616627000 +0200 @@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_ goto out_free_pd; } - size = ipoib_sendq_size + ipoib_recvq_size + 1; + size = ipoib_recvq_size; ret = ipoib_cm_dev_init(dev); if (!ret) size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */; - priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); - if (IS_ERR(priv->cq)) { - printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); + priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0); + if (IS_ERR(priv->rcq)) { + printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); goto out_free_mr; } + priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0); + if (IS_ERR(priv->scq)) { + printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); + goto out_free_rcq; + } + + coal = kzalloc(sizeof *coal, GFP_KERNEL); if (coal) { coal->rx_coalesce_usecs = 10; - coal->tx_coalesce_usecs = 10; coal->rx_max_coalesced_frames = 16; - coal->tx_max_coalesced_frames = 16; dev->ethtool_ops->set_coalesce(dev, coal); kfree(coal); } - if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) - goto out_free_cq; + if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP)) + goto out_free_scq; - init_attr.send_cq = priv->cq; - init_attr.recv_cq = priv->cq; + init_attr.send_cq = priv->scq; + init_attr.recv_cq = priv->rcq; priv->qp = ib_create_qp(priv->pd, &init_attr); if (IS_ERR(priv->qp)) { printk(KERN_WARNING "%s: failed to create QP\n", ca->name); - goto out_free_cq; + goto out_free_rcq; } priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; @@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_ return 0; -out_free_cq: - ib_destroy_cq(priv->cq); +out_free_scq: + ib_destroy_cq(priv->scq); + +out_free_rcq: + ib_destroy_cq(priv->rcq); out_free_mr: ib_dereg_mr(priv->mr); @@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); } - if (ib_destroy_cq(priv->cq)) + if (ib_destroy_cq(priv->scq)) + ipoib_warn(priv, "ib_cq_destroy failed\n"); + + if (ib_destroy_cq(priv->rcq)) ipoib_warn(priv, "ib_cq_destroy failed\n"); ipoib_cm_dev_cleanup(dev); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 11:42:31.770505000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 12:18:43.243122000 +0200 @@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { .event_handler = ipoib_cm_rx_event_handler, - .send_cq = priv->cq, /* For drain WR */ - .recv_cq = priv->cq, + .send_cq = priv->rcq, /* For drain WR */ + .recv_cq = priv->rcq, .srq = priv->cm.srq, .cap.max_send_wr = 1, /* For drain WR */ .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ @@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_ { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_init_attr attr = { - .send_cq = priv->cq, - .recv_cq = priv->cq, + .send_cq = priv->rcq, + .recv_cq = priv->rcq, .srq = priv->cm.srq, .cap.max_send_wr = ipoib_sendq_size, .cap.max_send_sge = 1, Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.052502000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.421466000 +0200 @@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net coal->tx_max_coalesced_frames > 0xffff) return -EINVAL; - ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames, + ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); if (ret) { ipoib_dbg(priv, "failed modifying CQ\n"); From eli at dev.mellanox.co.il Fri Feb 1 05:40:49 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:40:49 +0200 Subject: [ofa-general] [PATCH 2/5] IB/ipoib: Unsingnalled UD QP Message-ID: <1201873249.6677.6.camel@eli-laptop> Unsingnalled UD QP This is patch is using unsignalled QP for UD. Doing this reduces the number of times a CQ has to be polled and along with the fact that we do polling on the tx CQ, reduces the overhead on send and improving small messages BW. For example, on my Intel machines, send throughput of 128 byte UDP messages, went up from 380 mbps to 508 mbps. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h @@ -373,6 +373,7 @@ struct ipoib_dev_priv { struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_wc send_wc[MAX_SEND_CQE]; + unsigned int tx_poll; struct list_head dead_ahs; @@ -392,6 +393,8 @@ struct ipoib_dev_priv { struct dentry *path_dentry; #endif struct ipoib_ethtool_st etool; + struct timer_list poll_timer; + struct ib_ah *own_ah; }; struct ipoib_ah { @@ -454,7 +457,6 @@ extern struct workqueue_struct *ipoib_wo int ipoib_poll(struct napi_struct *napi, int budget); void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr); -void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -254,12 +254,10 @@ repost: "for buf %d\n", wr_id); } -static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock) +static void _ipoib_ib_handle_tx_wc(struct net_device *dev, int wr_id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned int wr_id = wc->wr_id; struct ipoib_tx_buf *tx_req; - unsigned long flags; ipoib_dbg_data(priv, "send completion: id %d, status: %d\n", wr_id, wc->status); @@ -272,39 +270,52 @@ static void ipoib_ib_handle_tx_wc(struct tx_req = &priv->tx_ring[wr_id]; - ipoib_dma_unmap_tx(priv->ca, tx_req); - - ++dev->stats.tx_packets; - dev->stats.tx_bytes += tx_req->skb->len; - - dev_kfree_skb_any(tx_req->skb); - - if (need_lock) - spin_lock_irqsave(&priv->tx_lock, flags); - + if (tx_req->skb) { + ipoib_dma_unmap_tx(priv->ca, tx_req); + ++dev->stats.tx_packets; + dev->stats.tx_bytes += tx_req->skb->len; + dev_kfree_skb_any(tx_req->skb); + } ++priv->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(dev); +} + +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned int wr_id = wc->wr_id; + int i; + + i = priv->tx_poll; + do { + i &= (ipoib_sendq_size - 1); + _ipoib_ib_handle_tx_wc(dev, i); + } while (i++ != wr_id); + priv->tx_poll = i & (ipoib_sendq_size - 1); - if (need_lock) - spin_unlock_irqrestore(&priv->tx_lock, flags); + if (unlikely(wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR)) - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed send event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); } -static void poll_tx(struct ipoib_dev_priv *priv, int need_lock) +void poll_tx(struct ipoib_dev_priv *priv) { int n, i; - n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); - for (i = 0; i < n; ++i) - ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock); + while (1) { + n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc); + for (i = 0; i < n; ++i) + ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i); + + if (n < MAX_SEND_CQE) + break; + } } int ipoib_poll(struct napi_struct *napi, int budget) @@ -361,11 +372,65 @@ void ipoib_ib_rx_completion(struct ib_cq netif_rx_schedule(dev, &priv->napi); } -void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr) +static inline int post_zlen_send_wr(struct ipoib_dev_priv *priv, unsigned wrid) +{ + struct ib_send_wr wr = { + .opcode = IB_WR_SEND, + .send_flags = IB_SEND_SIGNALED, + .wr_id = wrid, + }; + struct ib_send_wr *bad_wr; + + if (!priv->own_ah) + return -EBUSY; + + wr.wr.ud.ah = priv->own_ah; + wr.wr.ud.remote_qpn = priv->qp->qp_num; + return ib_post_send(priv->qp, &wr, &bad_wr); +} + +static void ipoib_ib_tx_timer_func(unsigned long dev_ptr) +{ + struct net_device *dev = (struct net_device *)dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + unsigned long flags; + unsigned int wrid; + + spin_lock_irqsave(&priv->tx_lock, flags); + if (((int)priv->tx_tail - (int)priv->tx_head < 0) && + time_after(jiffies, dev->trans_start + 10)) { + wrid = priv->tx_head & (ipoib_sendq_size - 1); + ipoib_dbg(priv, "posting zlen send, wrid = %d: head = %d, tail = %d\n", wrid, + priv->tx_head, priv->tx_tail); + priv->tx_ring[wrid].skb = NULL; + if (post_zlen_send_wr(priv, wrid)) + ipoib_warn(priv, "failed to post zlen send\n"); + else { + ++priv->tx_head; + ++priv->tx_outstanding; + ipoib_dbg(priv, "%s-%d: head = %d\n", __func__, __LINE__, priv->tx_head); + } + } + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); + + mod_timer(&priv->poll_timer, jiffies + HZ / 2); +} + +static void flush_tx_queue(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + unsigned long flags; + unsigned int wrid; - poll_tx(priv, 1); + spin_lock_irqsave(&priv->tx_lock, flags); + wrid = priv->tx_head & (ipoib_sendq_size - 1); + priv->tx_ring[wrid].skb = NULL; + if (!post_zlen_send_wr(priv, wrid)) { + ++priv->tx_head; + ++priv->tx_outstanding; + } + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -405,6 +470,11 @@ static inline int post_send(struct ipoib } else priv->tx_wr.opcode = IB_WR_SEND; + if (unlikely((priv->tx_head & (MAX_SEND_CQE - 1)) == MAX_SEND_CQE - 1)) + priv->tx_wr.send_flags |= IB_SEND_SIGNALED; + else + priv->tx_wr.send_flags &= ~IB_SEND_SIGNALED; + return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); } @@ -489,7 +559,7 @@ void ipoib_send(struct net_device *dev, } if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1)) - poll_tx(priv, 0); + poll_tx(priv); return; @@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w round_jiffies_relative(HZ)); } +static int create_own_ah(struct ipoib_dev_priv *priv) +{ + struct ib_ah_attr attr = { + .dlid = priv->local_lid, + .port_num = priv->port, + }; + + if (priv->own_ah) { + ipoib_dbg(priv, "own ah already exists\n"); + return -EINVAL; + } + priv->own_ah = ib_create_ah(priv->pd, &attr); + return IS_ERR(priv->own_ah); +} + +static void destroy_own_ah(struct ipoib_dev_priv *priv) +{ + if (!priv->own_ah) { + ipoib_dbg(priv, "destroying an already destroyed own ah\n"); + return; + } + + ib_destroy_ah(priv->own_ah); + priv->own_ah = NULL; +} + int ipoib_ib_dev_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device } set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ret = create_own_ah(priv); + if (ret) { + priv->own_ah = NULL; + ipoib_warn(priv, "failed to create own ah\n"); + return -1; + } + ret = ipoib_init_qp(dev); if (ret) { ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); + destroy_own_ah(priv); return -1; } @@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, round_jiffies_relative(HZ)); + init_timer(&priv->poll_timer); + priv->poll_timer.function = ipoib_ib_tx_timer_func; + priv->poll_timer.data = (unsigned long)dev; + mod_timer(&priv->poll_timer, jiffies + HZ / 2); + set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); return 0; @@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d if (priv->ibwc[i].wr_id & IPOIB_OP_CM) ipoib_cm_handle_tx_wc(dev, priv->ibwc + i); else - ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1); + ipoib_ib_handle_tx_wc(dev, priv->ibwc + i); } } } while (n == IPOIB_NUM_WC); @@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; unsigned long begin; - struct ipoib_tx_buf *tx_req; int i; + unsigned long flags; + del_timer_sync(&priv->poll_timer); clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); ipoib_cm_dev_stop(dev); + flush_tx_queue(priv); /* * Move our QP to the error state and then reinitialize in @@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device * assume the HW is wedged and just free up * all our pending work requests. */ - while ((int) priv->tx_tail - (int) priv->tx_head < 0) { - tx_req = &priv->tx_ring[priv->tx_tail & - (ipoib_sendq_size - 1)]; - ipoib_dma_unmap_tx(priv->ca, tx_req); - dev_kfree_skb_any(tx_req->skb); - ++priv->tx_tail; - --priv->tx_outstanding; - } - for (i = 0; i < ipoib_recvq_size; ++i) { struct ipoib_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; - if (!rx_req->skb) - continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_req->skb); - rx_req->skb = NULL; + + if (rx_req->skb) { + ib_dma_unmap_single(priv->ca, + rx_req->mapping, + IPOIB_BUF_SIZE, + DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_req->skb); + rx_req->skb = NULL; + } } goto timeout; } + if ((int) priv->tx_tail - (int) priv->tx_head < 0) { + spin_lock_irqsave(&priv->tx_lock, flags); + poll_tx(priv); + spin_unlock_irqrestore(&priv->tx_lock, flags); + } + ipoib_drain_cq(dev); msleep(1); @@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device ipoib_dbg(priv, "All sends and receives done.\n"); timeout: + destroy_own_ah(priv); qp_attr.qp_state = IB_QPS_RESET; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to RESET state\n"); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_ .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1, .max_recv_sge = 1 }, - .sq_sig_type = IB_SIGNAL_ALL_WR, + .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_UD, .create_flags = QP_CREATE_LSO, }; @@ -184,7 +184,7 @@ int ipoib_transport_dev_init(struct net_ goto out_free_mr; } - priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0); + priv->scq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0); if (IS_ERR(priv->scq)) { printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); goto out_free_rcq; From eli at dev.mellanox.co.il Fri Feb 1 05:41:05 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:41:05 +0200 Subject: [ofa-general] [PATCH 3/5] IB/ipoib: post to SRQ every n buffers Message-ID: <1201873265.6677.7.camel@eli-laptop> IB/ipoib: post to SRQ every n buffers To reduce the overhead of posting receive buffers to the SRQ, we do it every 16 received buffers. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:50:46.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:52:55.745918000 +0200 @@ -96,6 +96,7 @@ enum { IPOIB_MCAST_FLAG_ATTACHED = 3, MAX_SEND_CQE = 16, + CM_POST_SRQ_COUNT = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -283,6 +284,11 @@ struct ipoib_cm_rx_buf { u64 mapping[IPOIB_CM_RX_SG]; }; +struct ipoib_cm_rx_wr { + struct ib_recv_wr wr; + struct ib_sge rx_sge[IPOIB_CM_RX_SG]; +}; + struct ipoib_cm_dev_priv { struct ib_srq *srq; struct ipoib_cm_rx_buf *srq_ring; @@ -301,10 +307,10 @@ struct ipoib_cm_dev_priv { struct list_head start_list; struct list_head reap_list; struct ib_wc ibwc[IPOIB_NUM_WC]; - struct ib_sge rx_sge[IPOIB_CM_RX_SG]; - struct ib_recv_wr rx_wr; int max_cm_mtu; int num_frags; + struct ipoib_cm_rx_wr *head; + struct ipoib_cm_rx_wr *rx_wr_arr; }; struct ipoib_ethtool_st { Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:50:45.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:51:45.846437000 +0200 @@ -81,24 +81,46 @@ static void ipoib_cm_dma_unmap_rx(struct ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } -static int ipoib_cm_post_receive(struct net_device *dev, int id) +static int ipoib_cm_post_receive(struct net_device *dev, int id, int pi) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_recv_wr *bad_wr; - int i, ret; + int i, ret = 0; + struct ipoib_cm_rx_wr *cur; + struct ipoib_cm_rx_wr *prev; + int post; + + ipoib_dbg_data(priv, "posting to id=%d, pi=%d\n", id, pi); + cur = &priv->cm.rx_wr_arr[id]; + prev = &priv->cm.rx_wr_arr[(id - 1) & (ipoib_recvq_size - 1)]; + + prev->wr.next = &cur->wr; + cur->wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; + cur->wr.next = NULL; + - priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + cur->rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; + + post = pi || (((unsigned long)(cur - priv->cm.head) & (ipoib_recvq_size - 1)) + >= CM_POST_SRQ_COUNT); + if (post) { + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.head->wr, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, + priv->cm.srq_ring[id].mapping); + dev_kfree_skb_any(priv->cm.srq_ring[id].skb); + priv->cm.srq_ring[id].skb = NULL; + bad_wr = bad_wr->next; + } + } else + priv->cm.head = &priv->cm.rx_wr_arr[(id + 1) & (ipoib_recvq_size - 1)]; + - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, - priv->cm.srq_ring[id].mapping); - dev_kfree_skb_any(priv->cm.srq_ring[id].skb); - priv->cm.srq_ring[id].skb = NULL; } return ret; @@ -483,7 +505,7 @@ void ipoib_cm_handle_rx_wc(struct net_de netif_receive_skb(skb); repost: - if (unlikely(ipoib_cm_post_receive(dev, wr_id))) + if (unlikely(ipoib_cm_post_receive(dev, wr_id, 0))) ipoib_warn(priv, "ipoib_cm_post_receive failed " "for buf %d\n", wr_id); } @@ -1277,7 +1299,7 @@ int ipoib_cm_dev_init(struct net_device .max_wr = ipoib_recvq_size, } }; - int ret, i; + int ret, i, j; struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); @@ -1307,8 +1329,14 @@ int ipoib_cm_dev_init(struct net_device srq_init_attr.attr.max_sge = attr.max_srq_sge; + priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size * sizeof priv->cm.rx_wr_arr[0], + GFP_KERNEL); + if (!priv->cm.rx_wr_arr) + return -ENOMEM; + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { + kfree(priv->cm.rx_wr_arr); ret = PTR_ERR(priv->cm.srq); priv->cm.srq = NULL; return ret; @@ -1328,15 +1356,19 @@ int ipoib_cm_dev_init(struct net_device return -ENOMEM; } - for (i = 0; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].lkey = priv->mr->lkey; + for (j = 0; j < ipoib_recvq_size; ++j) { + for (i = 0; i < priv->cm.num_frags; ++i) + priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey; + + priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE; + for (i = 1; i < priv->cm.num_frags; ++i) + priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE; + + priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge; + priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags; + } - priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < priv->cm.num_frags; ++i) - priv->cm.rx_sge[i].length = PAGE_SIZE; - priv->cm.rx_wr.next = NULL; - priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = priv->cm.num_frags; + priv->cm.head = &priv->cm.rx_wr_arr[0]; for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1, @@ -1345,7 +1377,7 @@ int ipoib_cm_dev_init(struct net_device ipoib_cm_dev_cleanup(dev); return -ENOMEM; } - if (ipoib_cm_post_receive(dev, i)) { + if (ipoib_cm_post_receive(dev, i, 1)) { ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i); ipoib_cm_dev_cleanup(dev); return -EIO; @@ -1375,11 +1407,12 @@ void ipoib_cm_dev_cleanup(struct net_dev return; for (i = 0; i < ipoib_recvq_size; ++i) if (priv->cm.srq_ring[i].skb) { - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, priv->cm.srq_ring[i].mapping); dev_kfree_skb_any(priv->cm.srq_ring[i].skb); priv->cm.srq_ring[i].skb = NULL; } kfree(priv->cm.srq_ring); + kfree(priv->cm.rx_wr_arr); priv->cm.srq_ring = NULL; } From eli at dev.mellanox.co.il Fri Feb 1 05:41:15 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:41:15 +0200 Subject: [ofa-general] [PATCH 4/5] IB/ipoib: rx WQE draft in IPOIB UD Message-ID: <1201873275.6677.8.camel@eli-laptop> IB/ipoib: rx WQE draft in IPOIB UD Put a prepared WQE in the private data to save time in the receive flow. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-28 10:18:39.000000000 +0200 @@ -92,21 +92,13 @@ void ipoib_free_ah(struct kref *kref) static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; - - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; + priv->sglist_draft.addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV; - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, @@ -202,7 +194,7 @@ static void ipoib_ib_handle_rx_wc(struct * Drop packets that this interface sent, ie multicast packets * that the HCA has replicated. */ - if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) + if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)) goto repost; /* Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 10:17:10.000000000 +0200 @@ -327,6 +327,8 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + struct ib_recv_wr rx_wr_draft; + struct ib_sge sglist_draft; struct napi_struct napi; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-28 10:12:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-28 10:17:10.000000000 +0200 @@ -217,6 +217,13 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_wr_draft.next = NULL; + priv->rx_wr_draft.sg_list = &priv->sglist_draft; + priv->rx_wr_draft.num_sge = 1; + + priv->sglist_draft.length = IPOIB_BUF_SIZE; + priv->sglist_draft.lkey = priv->mr->lkey; + return 0; out_free_cq: From eli at dev.mellanox.co.il Fri Feb 1 05:41:20 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 01 Feb 2008 15:41:20 +0200 Subject: [ofa-general] [PATCH 5/5] IB/ipoib: IPOIB rx post list Message-ID: <1201873280.6677.9.camel@eli-laptop> IB/ipoib: IPOIB rx post list Post a list of RX buffers every 16 recieved packets. This should reduce code cache trashing by make less jumps between the hw driver to ipoib. In any case it improves UD receive flow. Signed-off-by: Eli Cohen --- IB/ipoib: IPOIB rx post list Post a list of RX buffers every 16 recieved packets. This should reduce code cache trashing by make less jumps between the hw driver to ipoib. In any case it improves receive flow. Signed-off-by: Eli Cohen --- Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 18:51:56.117198000 +0200 @@ -97,6 +97,7 @@ enum { MAX_SEND_CQE = 16, CM_POST_SRQ_COUNT = 16, + UD_POST_RCV_COUNT = 16, }; #define IPOIB_OP_RECV (1ul << 31) @@ -327,9 +328,10 @@ struct ipoib_ethtool_st { struct ipoib_dev_priv { spinlock_t lock; - struct net_device *dev; - struct ib_recv_wr rx_wr_draft; - struct ib_sge sglist_draft; + struct net_device *dev; + struct ib_recv_wr rx_wr_draft[UD_POST_RCV_COUNT]; + struct ib_sge sglist_draft[UD_POST_RCV_COUNT]; + unsigned int rx_outst; struct napi_struct napi; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 18:52:30.452975000 +0200 @@ -89,23 +89,45 @@ void ipoib_free_ah(struct kref *kref) spin_unlock_irqrestore(&priv->lock, flags); } -static int ipoib_ib_post_receive(struct net_device *dev, int id) +static void clean_pending_receives(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_recv_wr *bad_wr; - int ret; - - priv->sglist_draft.addr = priv->rx_ring[id].mapping; - priv->rx_wr_draft.wr_id = id | IPOIB_OP_RECV; + int i; + int id; - ret = ib_post_recv(priv->qp, &priv->rx_wr_draft, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + for (i = 0; i < priv->rx_outst; ++i) { + id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV; ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } + priv->rx_outst = 0; +} + +static int ipoib_ib_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int ret = 0; + int i = priv->rx_outst; + + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; + if (++priv->rx_outst == UD_POST_RCV_COUNT) { + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); + + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~IPOIB_OP_RECV; + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } + } + priv->rx_outst = 0; + } return ret; } @@ -791,6 +813,7 @@ int ipoib_ib_dev_stop(struct net_device if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + clean_pending_receives(priv); /* Wait for all sends and receives to complete */ begin = jiffies; Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 18:49:57.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 18:51:13.310744000 +0200 @@ -222,12 +222,16 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; - priv->rx_wr_draft.next = NULL; - priv->rx_wr_draft.sg_list = &priv->sglist_draft; - priv->rx_wr_draft.num_sge = 1; - - priv->sglist_draft.length = IPOIB_BUF_SIZE; - priv->sglist_draft.lkey = priv->mr->lkey; + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i].length = IPOIB_BUF_SIZE; + priv->sglist_draft[i].lkey = priv->mr->lkey; + + priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i]; + priv->rx_wr_draft[i].num_sge = 1; + if (i < UD_POST_RCV_COUNT - 1) + priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1]; + } + priv->rx_wr_draft[i].next = NULL; return 0; From pawel.dziekonski at pwr.wroc.pl Fri Feb 1 07:19:02 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Fri, 1 Feb 2008 16:19:02 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080123194728.GA10437@cefeid.wcss.wroc.pl> <4797AD59.2000206@mellanox.co.il> <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> Message-ID: <20080201151902.GA16264@cefeid.wcss.wroc.pl> On Wed, 30 Jan 2008 at 02:04:17PM -0500, James Lentini wrote: > > # mount.rnfs -o rdma=10.2.2.1 10.2.2.1:/scratch /mnt > > Doing nfs/rdma mount to 10.2.2.1, mount protocol to 10.2.2.1 > > nfsmount: Invalid argument > > Are you using the mount.nfs command you built from nfs-utils-1.1.1? If > you installed nfs-utils, you should be doing something like this > (mount will redirect to /sbin/mount.nfs if it is present): > > /sbin/mount :/ /mnt -i -o rdma,port=2050 > > There is more info here: > > http://nfs-rdma.sourceforge.net/Documents/README hi, according to this page: # modprobe svc_rdma FATAL: Module svc_rdma not found. ??? # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs mount option: rdma looks like I definitelly need a rdma-enabled mount, which comes in http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: # mount.rnfs -h Usage: mount.rnfs [-o rdma[=host][,...] [-t nfs/nfs4]] # mount.rnfs -o rdma 10.2.2.1:/scratch /mnt Doing nfs/rdma mount to 10.2.2.1, mount protocol to 10.2.2.1 nfsmount: Invalid argument ??? -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From hartlch14 at gmail.com Fri Feb 1 07:31:22 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 1 Feb 2008 10:31:22 -0500 Subject: [ofa-general] Question about exchanging DAT_RMR_TRIPLET Message-ID: Hello, We are doing RDMA transfers using UDAPL and just added a machine with a different endianness than the rest of the current machines. We register a memory region and get a DAT_RMR_TRIPLET that we then send to the remote machine. On the receiving end, are you supposed to use the triplet as is, or do you have to byte swap it to the native order or some specific endianness? I couldn't find anything in the UDAPL document addressing this. On a related issue, we register a single large memory region that we are doing transfers from. However, we are doing small transfers at varying offsets within that region. We are currently doing this by modifying the address field of the DAT_RMR_TRIPLET we received. Is this an ok thing to do, or is there some other way to do RDMA transfers of varying size/offset from within a single registered memory region? Thanks, Chuck -------------- next part -------------- An HTML attachment was scrubbed... URL: From jlentini at netapp.com Fri Feb 1 07:56:55 2008 From: jlentini at netapp.com (James Lentini) Date: Fri, 1 Feb 2008 10:56:55 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080201151902.GA16264@cefeid.wcss.wroc.pl> References: <20080123194728.GA10437@cefeid.wcss.wroc.pl> <4797AD59.2000206@mellanox.co.il> <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> Message-ID: On Fri, 1 Feb 2008, Pawel Dziekonski wrote: > On Wed, 30 Jan 2008 at 02:04:17PM -0500, James Lentini wrote: > > > > # mount.rnfs -o rdma=10.2.2.1 10.2.2.1:/scratch /mnt > > > Doing nfs/rdma mount to 10.2.2.1, mount protocol to 10.2.2.1 > > > nfsmount: Invalid argument > > > > Are you using the mount.nfs command you built from nfs-utils-1.1.1? If > > you installed nfs-utils, you should be doing something like this > > (mount will redirect to /sbin/mount.nfs if it is present): > > > > /sbin/mount :/ /mnt -i -o rdma,port=2050 > > > > There is more info here: > > > > http://nfs-rdma.sourceforge.net/Documents/README > > hi, > > according to this page: > > # modprobe svc_rdma > FATAL: Module svc_rdma not found. > > ??? That is a typo, it should be # modprobe svcrdma Tom's tree has the patch I sent here: http://article.gmane.org/gmane.linux.nfs/18545/match= to simplify the NFS/RDMA build process. If CONFIG_SUNRPC_XPRT_RDMA is set to M (run "grep XPRT_RDMA /your/server/sources/.config" to check), you will need to load the module as shown above. If it is Y, you do not. It will be built in. Thanks for drawing this to my attention. I'll update the instructions. > # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs > mount option: rdma > > looks like I definitelly need a rdma-enabled mount, which comes in > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: If you are using the NFS/RDMA client in 2.6.24, you need the version of the mount.nfs command in nfs-utils-1.1.1 or greater. The "Unsupported nfs mount option: rdma" error message makes me suspect you are not using the correct version of mount.nfs. What is the output of "mount.nfs -V" and "mount -V"? It may be that the version of mount you are using does not automatically invoke mount.nfs for nfs mounts. I'd suggest specifying mount.nfs in the command above: mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 > # mount.rnfs -h > Usage: mount.rnfs [-o rdma[=host][,...] [-t nfs/nfs4]] > > # mount.rnfs -o rdma 10.2.2.1:/scratch /mnt > Doing nfs/rdma mount to 10.2.2.1, mount protocol to 10.2.2.1 > nfsmount: Invalid argument > > ??? > > > > -- > Pawel Dziekonski > Wroclaw Centre for Networking & Supercomputing, HPC Department > Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND > phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl > From pradeeps at linux.vnet.ibm.com Fri Feb 1 08:20:24 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Fri, 01 Feb 2008 08:20:24 -0800 Subject: [ofa-general] [Fwd: Re: non SRQ patch for OFED 1.3] -need some help Message-ID: <47A346C8.7010705@linux.vnet.ibm.com> I tried running ofed_scripts/ofed_makedist.sh before and after copying my patch to kernel_patches/fixes. In both cases makedist.sh seems to complete without errors and creates the tar.gz files for the various kernels. In short I am unable to reproduce the problem that Tziporet mentions. Any tips or pointers to resolve this issue would be appreciated. Thanks! Pradeep -------------- next part -------------- An embedded message was scrubbed... From: Tziporet Koren Subject: Re: non SRQ patch for OFED 1.3 Date: Thu, 31 Jan 2008 16:00:38 +0200 Size: 4582 URL: From pwatkins at sicortex.com Fri Feb 1 08:33:44 2008 From: pwatkins at sicortex.com (Peter Watkins) Date: Fri, 01 Feb 2008 11:33:44 -0500 Subject: [ofa-general] [PATCH] OFED 1.2.5.5 compile fails "too many args to sk_eat_skb()" Message-ID: <47A349E8.6020105@sicortex.com> OFED 1.2.5.5 fails to compile sdp code due to "too many arguments to sk_eat_skb(). # cat /etc/SuSE-release SUSE LINUX 10.1 (X86-64) VERSION = 10.1 # uname -a Linux pcitestbed 2.6.16.13-4-default #1 Wed May 3 04:53:23 UTC 2006 x86_64 x86_64 x86_64 GNU/Linux The problem seems to be that kernel_addons/backport/2.6.16_sles10/include/net/sock.h is missing. Patch attached. Also recorded this in bug #885. -------------- next part -------------- A non-text attachment was scrubbed... Name: ofa-suse10.1-sock.patch Type: text/x-patch Size: 1086 bytes Desc: not available URL: From ruben at lfbs.RWTH-Aachen.DE Fri Feb 1 09:12:28 2008 From: ruben at lfbs.RWTH-Aachen.DE (Ruben Niederhagen) Date: Fri, 01 Feb 2008 18:12:28 +0100 Subject: [ofa-general] ENOMEM Message-ID: <47A352FC.8090604@lfbs.rwth-aachen.de> Hi! When I try to run ibv_srq_pingpong as non-root-user, I get the error "Couldn't create QP[5]". The tail of strace is: _______________________ write(3, "\0\0\0\t\0\f\0\3\0\0\0\0\377\317\211\30\0\0\0\0\367\320\0\0\0\0\0\0\0\0\0@"..., 48) = 48 write(3, "\0\0\0\30\0\30\0\10\0\0\0\0\377\317\211\210\0\0\0\0\20\0022\30\0\0\0\1\0\0\0\0"..., 96) = 96 mmap(NULL, 196608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xf7cc0000 write(3, "\0\0\0\t\0\f\0\3\0\0\0\0\377\317\211\30\0\0\0\0\367\315\0\0\0\0\0\0\0\0\0@"..., 48) = -1 ENOMEM (Cannot allocate memory) munmap(0xf7cc0000, 196608) = 0 write(2, "Couldn\'t create QP[5]\n", 22Couldn't create QP[5] ) = 22 exit_group(1) = ? _______________________ As root-user everything is working fine... Does somebody have an idea, what's going wrong? I assume, I did not set the rights accordingly for some files? Thank you! Ruben Niederhagen From sashak at voltaire.com Fri Feb 1 09:46:46 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 1 Feb 2008 17:46:46 +0000 Subject: [ofa-general] Re: [PATCH 2/2] opensm: OpenSM state machine rework In-Reply-To: <20080130185606.GV11277@sashak.voltaire.com> References: <20080130185441.GU11277@sashak.voltaire.com> <20080130185606.GV11277@sashak.voltaire.com> Message-ID: <20080201174646.GK29624@sashak.voltaire.com> On 18:56 Wed 30 Jan , Sasha Khapyorsky wrote: > > Instead of tricky state machine it implements plain flow do_sweep() > function which uses wait_for_pending_transaction() blocker. > > One of the goals of this patch is to preserve the original OpenSM > behavior. Small bug is here. OpenSM queries NodeInfo only during discovery and finding a new nodes is expected. If so we don't need to request new discovery in this case. It is addition: diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c index 2106aa2..844dfcf 100644 --- a/opensm/opensm/osm_node_info_rcv.c +++ b/opensm/opensm/osm_node_info_rcv.c @@ -853,10 +853,9 @@ void osm_ni_rcv_process(IN void *context, IN void *data) osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG); - if (!p_node) { + if (!p_node) __osm_ni_rcv_process_new(sm, p_madw); - sm->p_subn->force_heavy_sweep = 1; - } else + else __osm_ni_rcv_process_existing(sm, p_node, p_madw); CL_PLOCK_RELEASE(sm->p_lock); Since CHANGE_DETECTED signal was ignored during subnet discovery phase by old OpenSM state machine it was useless event but worked. Sasha From ardavis at ichips.intel.com Fri Feb 1 09:58:28 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 01 Feb 2008 09:58:28 -0800 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: References: Message-ID: <47A35DC4.2000600@ichips.intel.com> Mahmoud Hanafi wrote: > > here is my mpirun command > > mpirun -np 128 -env I_MPI_DEVICE rdma:OpenIB-cma -env I_MPI_DEBUG 2 > /home/hanafim/HPL/xhpl > What IB adapter are you using and what F/W version (ibstat)? This might be related to following bug with large RDMA reads: https://bugs.openfabrics.org//show_bug.cgi?id=736 Try the following: Set "-env I_MPI_RDMA_MAX_MSG_SIZE 4194304" to workaround the issue. If it does not help, set I_MPI_RDMA_RNDV_WRITE to enable to use RDMA Write rendezvous protocol instead of the default RDMA Read. -arlin From dwnewmediatwinsm at newmediatwins.net Fri Feb 1 11:28:26 2008 From: dwnewmediatwinsm at newmediatwins.net (Sybil Spaulding) Date: Fri, 32 Jan 2008 20:28:26 +0100 Subject: [ofa-general] How easy it is to obtain a Un. Degree Message-ID: <06d0fd74$00000005$1ca358c8@dwnewmediatwinsm> University Degree OBTAIN A PROSPEROUS FUTURE, MONEY-EARNING POWER, AND THE PRESTIGE THAT COMES WITH HAVING THE CAREER POSITION YOU’VE ALWAYS DREAMED OF. DIPLOMA FROM PRESTIGIOUS NON-ACCREDITED UNVERSITIES BASED ON YOUR PRESENT KNOWLEDGE AND PROFESSIONAL EXPERIENCE. If you qualify, no required tests, classes, books or examinations. Confidentiality Assured 1-954-839-8054 24 hours a day, 7 days a week including Sundays and Holidays Fri, 32 Jan 2008 20:28:26 +0100. Thy sake and my poordoing. This is his majesty say your. I would swearThroca movousus. -------------- next part -------------- An HTML attachment was scrubbed... URL: From mashirle at us.ibm.com Fri Feb 1 01:45:34 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Fri, 01 Feb 2008 01:45:34 -0800 Subject: [ofa-general] Re: [PATCH 1/5] IB/ipoib: Split CQs for IPOIB UD In-Reply-To: <1201873239.6677.5.camel@eli-laptop> References: <1201873239.6677.5.camel@eli-laptop> Message-ID: <1201859134.19565.164.camel@localhost.localdomain> On Fri, 2008-02-01 at 15:40 +0200, Eli Cohen wrote: > IB/ipoib: Split CQs for IPOIB UD > > This comes as a preparation for using unsignalled QP in UD mode. It > uses a dedicated CQ for the UD send. The CQ is not armed and is polled > for completion right after sending a packet. > This patch and the following patches fix bugs 760 and 761. > > Signed-off-by: Eli Cohen I filed a patch back two years ago to split CQ. The feedback was it didn't benefit mthca since it had only one interrupt shared between send and recv. More context switches were generated when splitting CQ. Then I decided to wait the multiple interrupt vector to be implemented in device driver layer to push this patch later with multiple interrupt vectors (This is the next item on my list). The why the performance got improved without multiple interrupt support here? Thanks Shirley From rdreier at cisco.com Fri Feb 1 12:29:42 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 01 Feb 2008 12:29:42 -0800 Subject: [ofa-general] [RFC 2.6.24-rc] IB/srp: retry stale connections In-Reply-To: <1199830132.1988.18.camel@lap75545.ornl.gov> (David Dillow's message of "Tue, 08 Jan 2008 17:08:52 -0500") References: <1199830132.1988.18.camel@lap75545.ornl.gov> Message-ID: thanks, applied. From rdreier at cisco.com Fri Feb 1 12:33:39 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 01 Feb 2008 12:33:39 -0800 Subject: [ofa-general] Re: [PATCH 1 of 2] IB/mlx4: For 64-bit systems, use large virtually contiguous queue buffers (vmap) In-Reply-To: <200801281040.52138.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 28 Jan 2008 10:40:51 +0200") References: <200801281040.52138.jackm@dev.mellanox.co.il> Message-ID: > --- infiniband.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-27 10:44:25.000000000 +0200 > +++ infiniband/drivers/infiniband/hw/mlx4/qp.c 2008-01-27 10:52:21.000000000 +0200 > @@ -96,7 +96,7 @@ static int is_qp0(struct mlx4_ib_dev *de > > static void *get_wqe(struct mlx4_ib_qp *qp, int offset) > { > - if (qp->buf.nbufs == 1) > + if (BITS_PER_LONG == 64 || qp->buf.nbufs == 1) > return qp->buf.u.direct.buf + offset; > else > return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + I don't see any changes to cq.c -- it seems we would want to make the same optimization there, right? Or is it actually better to walk the two-level stuff ourselves when we can? - R. From rdreier at cisco.com Fri Feb 1 12:36:59 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 01 Feb 2008 12:36:59 -0800 Subject: [ofa-general] [PATCH] ib/fmr_pool: allocage page list only when caching enabled In-Reply-To: (Or Gerlitz's message of "Tue, 29 Jan 2008 12:56:18 +0200 (IST)") References: Message-ID: thanks, applied. From mhanafi at csc.com Fri Feb 1 13:17:08 2008 From: mhanafi at csc.com (Mahmoud Hanafi) Date: Fri, 1 Feb 2008 16:17:08 -0500 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: <47A35DC4.2000600@ichips.intel.com> Message-ID: Thanks for the tips. I got past the errors using the I_MPI_RDMA_RNDV_WRITE setting. But now I get the following error unexpected DAPL event 4008 from 111:n29 . . . Any ideas? I am using Cisco HCA's MT25208 Tavor Compat, DLGL revision A0 FW v4.7.6 build 3.2.0.118 Mahmoud Hanafi Sr. System Administrator CSC HPC COE Bld. 676 2435 Fifth Street WPAFB, Ohio 45433 (937) 255-1536 Computer Sciences Corporation Registered Office: 2100 East Grand Avenue, El Segundo California 90245, USA Registered in USA No: C-489-59 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- This is a PRIVATE message. If you are not the intended recipient, please delete without copying and kindly advise us by e-mail of the mistake in delivery. NOTE: Regardless of content, this e-mail shall not operate to bind CSC to any order or other contract unless pursuant to explicit written agreement or government initiative expressly permitting the use of e-mail for such purpose. ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Arlin Davis Sent by: general-bounces at lists.openfabrics.org 02/01/2008 12:58 PM To Mahmoud Hanafi/DEF/CSC at CSC cc general-bounces at lists.openfabrics.org, general at lists.openfabrics.org Subject Re: [ofa-general] ofed1.2.5rc2 and intel mpi error Mahmoud Hanafi wrote: > > here is my mpirun command > > mpirun -np 128 -env I_MPI_DEVICE rdma:OpenIB-cma -env I_MPI_DEBUG 2 > /home/hanafim/HPL/xhpl > What IB adapter are you using and what F/W version (ibstat)? This might be related to following bug with large RDMA reads: https://bugs.openfabrics.org//show_bug.cgi?id=736 Try the following: Set "-env I_MPI_RDMA_MAX_MSG_SIZE 4194304" to workaround the issue. If it does not help, set I_MPI_RDMA_RNDV_WRITE to enable to use RDMA Write rendezvous protocol instead of the default RDMA Read. -arlin _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: From robert.j.woodruff at intel.com Fri Feb 1 13:25:19 2008 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Fri, 1 Feb 2008 13:25:19 -0800 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: References: <47A35DC4.2000600@ichips.intel.com> Message-ID: Hanafi wrote, > Thanks for the tips. I got past the errors using the I_MPI_RDMA_RNDV_WRITE setting. But now I get the following error > unexpected DAPL event 4008 from 111:n29 . . . > Any ideas? This could be related to connection timeouts. We have seen this on larger clusters when the local sa cache is not enabled or if the SM node is down. I think that the local_sa_cache defaults to not enabled, but Arlin can confirm this. woody From YJia at tmriusa.com Fri Feb 1 14:14:01 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 16:14:01 -0600 Subject: [ofa-general] SA code? In-Reply-To: Message-ID: Hi Roland, Is there SA code available in OFED? I have a IB switch to connect all the mellanox HCAs, the SA is running on the switch's firmware. My goal is to skip the switch and use ring topology to connect the HCA with each other. Is it doable in OFED? Thanks! Yicheng -------------- next part -------------- An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Fri Feb 1 14:18:03 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 01 Feb 2008 14:18:03 -0800 Subject: [ofa-general] Question about exchanging DAT_RMR_TRIPLET In-Reply-To: References: Message-ID: <47A39A9B.7090907@ichips.intel.com> Chuck Hartley wrote: > Hello, > > We are doing RDMA transfers using UDAPL and just added a machine with a > different endianness than the rest of the current machines. We register > a memory region and get a DAT_RMR_TRIPLET that we then send to the > remote machine. On the receiving end, are you supposed to use the > triplet as is, or do you have to byte swap it to the native order or > some specific endianness? Unless interfaces are specifically defined as endian type then you need to assume host order. Convert your TRIPLET information to network order and exchange/swap accordingly. Look at v2 dtest.c or dtestx.c source as an example. I couldn't find anything in the UDAPL document > addressing this. > > On a related issue, we register a single large memory region that we are > doing transfers from. However, we are doing small transfers at varying > offsets within that region. We are currently doing this by modifying > the address field of the DAT_RMR_TRIPLET we received. Is this an ok > thing to do, or is there some other way to do RDMA transfers of varying > size/offset from within a single registered memory region? This will work fine, just make sure you are never accessing the same memory window via simultaneous operations. -arlin From rdreier at cisco.com Fri Feb 1 14:22:57 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 01 Feb 2008 14:22:57 -0800 Subject: [ofa-general] Re: SA code? In-Reply-To: (Yicheng Jia's message of "Fri, 1 Feb 2008 16:14:01 -0600") References: Message-ID: > Is there SA code available in OFED? I have a IB switch to connect all the > mellanox HCAs, the SA is running on the switch's firmware. My goal is to > skip the switch and use ring topology to connect the HCA with each other. > Is it doable in OFED? Are you looking for something other than opensm? From hrosenstock at xsigo.com Fri Feb 1 14:28:31 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 01 Feb 2008 14:28:31 -0800 Subject: [ofa-general] SA code? In-Reply-To: References: Message-ID: <1201904912.11210.107.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > Hi Roland, > > Is there SA code available in OFED? I have a IB switch to connect all > the mellanox HCAs, the SA is running on the switch's firmware. My goal > is to skip the switch and use ring topology to connect the HCA with > each other. Is it doable in OFED? I don't understand what you're trying to accomplish with this topology. In IB speak, each HCA <-> HCA link would be a separate subnet. Do you intend to forward across your ring ? If so, only some "router" mode would allow that with the topology you propose. -- Hal > Thanks! > Yicheng > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From YJia at tmriusa.com Fri Feb 1 14:38:51 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 16:38:51 -0600 Subject: [ofa-general] SA code? In-Reply-To: <1201904912.11210.107.camel@hrosenstock-ws.xsigo.com> Message-ID: Hi Hal, The only reason is cost down. The cost of IB switch weighs too much in our product, it seems the ring topology is the only way to go. There's no message across the ring, all the communication occurs within the ring. Would it cause the performance substantially go down? Thanks! Yicheng Hal Rosenstock 02/01/2008 04:25 PM To Yicheng Jia cc Roland Dreier , general at lists.openfabrics.org Subject Re: [ofa-general] SA code? On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > Hi Roland, > > Is there SA code available in OFED? I have a IB switch to connect all > the mellanox HCAs, the SA is running on the switch's firmware. My goal > is to skip the switch and use ring topology to connect the HCA with > each other. Is it doable in OFED? I don't understand what you're trying to accomplish with this topology. In IB speak, each HCA <-> HCA link would be a separate subnet. Do you intend to forward across your ring ? If so, only some "router" mode would allow that with the topology you propose. -- Hal > Thanks! > Yicheng > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general _____________________________________________________________________________ Scanned by IBM Email Security Management Services powered by MessageLabs. For more information please visit http://www.ers.ibm.com _____________________________________________________________________________ -------------- next part -------------- An HTML attachment was scrubbed... URL: From pawel.dziekonski at pwr.wroc.pl Fri Feb 1 14:45:30 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Fri, 1 Feb 2008 23:45:30 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <4797AD59.2000206@mellanox.co.il> <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> Message-ID: <20080201224530.GA16581@cefeid.wcss.wroc.pl> On Fri, 01 Feb 2008 at 10:56:55AM -0500, James Lentini wrote: > > # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs > > mount option: rdma > > > > looks like I definitelly need a rdma-enabled mount, which comes in > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: > > If you are using the NFS/RDMA client in 2.6.24, you need the version > of the mount.nfs command in nfs-utils-1.1.1 or greater. > > The "Unsupported nfs mount option: rdma" error message makes me > suspect you are not using the correct version of mount.nfs. What is > the output of "mount.nfs -V" and "mount -V"? It may be that the > version of mount you are using does not automatically invoke mount.nfs > for nfs mounts. I'd suggest specifying mount.nfs in the command above: # mount.nfs -V mount.nfs (linux nfs-utils 1.1.1) > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs: timeout set for Fri Feb 1 23:48:08 2008 mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' mount.nfs: internal error :( -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From ardavis at ichips.intel.com Fri Feb 1 14:45:16 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 01 Feb 2008 14:45:16 -0800 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: References: <47A35DC4.2000600@ichips.intel.com> Message-ID: <47A3A0FC.6020603@ichips.intel.com> > This could be related to connection timeouts. We have seen this > on larger clusters when the local sa cache is not enabled or if the SM > node is down. I think that the local_sa_cache defaults to not enabled, > but Arlin can confirm this. > > woody > That is true, OFED 1.2.5 disables SA caching by default. I would recommend enabling SA caching. When using rdma_cm to establish end-to-end connections we incur a 3 step process, each with various tunable knobs. There is ARP, Path Resolution, and CM req/reply. Anyone of these could cause the 4008 timeout error. Here are tunable parameters that may help: 1. ARP: ARP cache entries for ib0 can be increased from default of 30: sysctl –w net.ipv4.neigh.ib0.base_reachable_time=14400 2. PATH RESOLUTION: ib_sa.ko provides path record caching, no timer controls, auto refresh with new device notification events from SM/SA, manual refresh control for administrators, default == SA caching is OFF. To enable: add following to /etc/modprobe.conf - options ib_sa paths_per_dest=0x7f or echo 0x7f > /sys/module/ib_sa/paths_per_dest To manually refresh: echo 1 > /sys/module/ib_sa/refresh To monitor: cat /sys/module/ib_sa/lookup_method * 0 round robin 1 round robin cat /sys/module/ib_sa/paths_per_dest You can also increase the uDAPL PR timeout with the following enviroment variable (if you don't have SA caching): export DAPL_CM_ROUTE_TIMEOUT_MS=20000 (default=4000) 3. CM PROTOCOL: OFED 1.2.5 provides the following module parameters to increase the IB cm response timeout from default of 21: To increase timeout: add following to /etc/modprobe.conf - options rdma_cm cma_response_timeout=23 options ib_cm max_timeout=23 -arlin From hrosenstock at xsigo.com Fri Feb 1 14:48:08 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 01 Feb 2008 14:48:08 -0800 Subject: [ofa-general] SA code? In-Reply-To: References: Message-ID: <1201906088.11210.114.camel@hrosenstock-ws.xsigo.com> Hi Yicheng, On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > Hi Hal, > > The only reason is cost down. Understood. > The cost of IB switch weighs too much in our product, it seems the > ring topology is the only way to go. There's no message across the > ring, all the communication occurs within the ring. Are you saying all messages are between adjacent neighbors on the ring ? > Would it cause the performance substantially go down? If it is just A->B, B->C, ..., and never across the ring, then this can work but you need an SM instance per subnet. If you need some router mode, then this is still a work in progress for InfiniBand although you might be able to get to a proprietary solution in the short term. -- Hal > Thanks! > Yicheng > > > > Hal Rosenstock > > > 02/01/2008 04:25 PM > > > To > Yicheng Jia > > cc > Roland Dreier > , general at lists.openfabrics.org > Subject > Re: [ofa-general] > SA code? > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > Hi Roland, > > > > Is there SA code available in OFED? I have a IB switch to connect > all > > the mellanox HCAs, the SA is running on the switch's firmware. My > goal > > is to skip the switch and use ring topology to connect the HCA with > > each other. Is it doable in OFED? > > I don't understand what you're trying to accomplish with this > topology. > In IB speak, each HCA <-> HCA link would be a separate subnet. Do you > intend to forward across your ring ? If so, only some "router" mode > would allow that with the topology you propose. > > -- Hal > > > Thanks! > > Yicheng > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > From caitlin.bestler at neterion.com Fri Feb 1 14:52:49 2008 From: caitlin.bestler at neterion.com (Caitlin Bestler) Date: Fri, 1 Feb 2008 14:52:49 -0800 Subject: [ofa-general] Question about exchanging DAT_RMR_TRIPLET In-Reply-To: References: Message-ID: <469958e00802011452k42b7e42ap9aac09de682ceced@mail.gmail.com> DAT does not specify how a DAT_RMR_TRIPLET should be encoded in application messages. However, there are some heavy hints in the semantics of the fields. Both the virtual address and the length have integer semantics. They are not opaque. It is meaningful to divide a buffer in half by doing integer arithmetic on the address and lengths to form two new DAT_RMR_TRIPLETs referring to the first and second half of the original region. Therefore the virtual address should be passed in some way that will convey its 64-bit unsigned integer value correctly to the destinastion, and the length needs a method of conveying a 32-bit unsigned integer. It's up to the application to decide between standard wire ordering, "sender makes right", "receiver makes right", RPC, text encoding or even XML encoding. A DAT_RMR_CONTEXT any method that could encode an IB R-Key or RDMAC STag would be valid. On Feb 1, 2008 7:31 AM, Chuck Hartley wrote: > Hello, > > We are doing RDMA transfers using UDAPL and just added a machine with a > different endianness than the rest of the current machines. We register a > memory region and get a DAT_RMR_TRIPLET that we then send to the remote > machine. On the receiving end, are you supposed to use the triplet as is, or > do you have to byte swap it to the native order or some specific endianness? > I couldn't find anything in the UDAPL document addressing this. > > On a related issue, we register a single large memory region that we are > doing transfers from. However, we are doing small transfers at varying > offsets within that region. We are currently doing this by modifying the > address field of the DAT_RMR_TRIPLET we received. Is this an ok thing to do, > or is there some other way to do RDMA transfers of varying size/offset from > within a single registered memory region? > > Thanks, > Chuck > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From YJia at tmriusa.com Fri Feb 1 15:07:37 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 17:07:37 -0600 Subject: [ofa-general] SA code? In-Reply-To: <1201906088.11210.114.camel@hrosenstock-ws.xsigo.com> Message-ID: > Are you saying all messages are between adjacent neighbors on the ring ? Yes. Say I have 6 nodes: A, B, C, D, E, F, then the ring topo will be A<->B, B<->C, C<->D, D<->E, E<->F, F<->A, and there are totally 6 subnets. > If it is just A->B, B->C, ..., and never across the ring, then this can > work but you need an SM instance per subnet. Do I need a router per subnet as well? > If you need some router mode, then this is still a work in progress for > InfiniBand although you might be able to get to a proprietary solution > in the short term. What version of OFED will have router mode available? Thanks! Yicheng Hal Rosenstock 02/01/2008 04:45 PM To Yicheng Jia cc general at lists.openfabrics.org, Roland Dreier Subject Re: [ofa-general] SA code? Hi Yicheng, On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > Hi Hal, > > The only reason is cost down. Understood. > The cost of IB switch weighs too much in our product, it seems the > ring topology is the only way to go. There's no message across the > ring, all the communication occurs within the ring. Are you saying all messages are between adjacent neighbors on the ring ? > Would it cause the performance substantially go down? If it is just A->B, B->C, ..., and never across the ring, then this can work but you need an SM instance per subnet. If you need some router mode, then this is still a work in progress for InfiniBand although you might be able to get to a proprietary solution in the short term. -- Hal > Thanks! > Yicheng > > > > Hal Rosenstock > > > 02/01/2008 04:25 PM > > > To > Yicheng Jia > > cc > Roland Dreier > , general at lists.openfabrics.org > Subject > Re: [ofa-general] > SA code? > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > Hi Roland, > > > > Is there SA code available in OFED? I have a IB switch to connect > all > > the mellanox HCAs, the SA is running on the switch's firmware. My > goal > > is to skip the switch and use ring topology to connect the HCA with > > each other. Is it doable in OFED? > > I don't understand what you're trying to accomplish with this > topology. > In IB speak, each HCA <-> HCA link would be a separate subnet. Do you > intend to forward across your ring ? If so, only some "router" mode > would allow that with the topology you propose. > > -- Hal > > > Thanks! > > Yicheng > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > _____________________________________________________________________________ Scanned by IBM Email Security Management Services powered by MessageLabs. For more information please visit http://www.ers.ibm.com _____________________________________________________________________________ -------------- next part -------------- An HTML attachment was scrubbed... URL: From YJia at tmriusa.com Fri Feb 1 15:09:39 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 17:09:39 -0600 Subject: [ofa-general] Re: SA code? In-Reply-To: Message-ID: Thanks! I think OpenSM is enough for my case. -Yicheng Roland Dreier 02/01/2008 04:20 PM To Yicheng Jia cc general at lists.openfabrics.org Subject Re: SA code? > Is there SA code available in OFED? I have a IB switch to connect all the > mellanox HCAs, the SA is running on the switch's firmware. My goal is to > skip the switch and use ring topology to connect the HCA with each other. > Is it doable in OFED? Are you looking for something other than opensm? _____________________________________________________________________________ Scanned by IBM Email Security Management Services powered by MessageLabs. For more information please visit http://www.ers.ibm.com _____________________________________________________________________________ -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Fri Feb 1 15:35:23 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 01 Feb 2008 15:35:23 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: <1201718540.6850.41.camel@localhost.localdomain> (Shirley Ma's message of "Wed, 30 Jan 2008 10:42:20 -0800") References: <1201718540.6850.41.camel@localhost.localdomain> Message-ID: > The current IPoIB-UD implementation is limited IPoIB payload size to > 2048 through hard coding IPOIB_PACKET_SIZE. The implementation is > designed for kernel PAGE_SIZE equals or greater than 4K. If the kernel > PAGE_SIZE is equals to 2K, memory buffer allocation will be failure when > lack of large buffer of memory. However most of the Distros does support > PAGE_SIZE >= 4K. So this implementation has no problem for 2048 payload. > This implementation is simple but it prevents HCA device who does > support 4096 payload from performing, like IBM eHCA2. Not sure I understand this. Is there any possible configuration of any architecture where Linux runs where PAGE_SIZE < 4096? > This patch allows IPoIB-UD MTU up to 4092 (4K - IPOIB_ENCAP_LEN) when > HCA can support 4K MTU. In this patch, APIs for S/G buffer allocation in > IPoIB-CM mode has been made generic so IPoIB-UD and IPoIB-CM can share > the S/G code. This approach seems overly complex to me, since it ends up going through all the CM buffer fragment bookkeeping for the simple UD path. However, I now realize that my earlier idea of allocating a scratch buffer for the GRH and just allocating a 4096 byte skb doesn't work, because the skb_shinfo ends up being allocated along with the buffer, so trying to allocate a 4096-byte skb will bloat the data past a single page, which is what we're trying to avoid. So how about the following? When using a UD MTU of 4096 with a page size of 4096, allocate an skb of size 44 for the GRH and ethertype, and then allocate a single page for the fragment list. This means that the IP packet will start nicely 16-byte aligned for free, and all the bookkeeping is very simple. - R. From meier3 at llnl.gov Fri Feb 1 15:42:34 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Fri, 01 Feb 2008 15:42:34 -0800 Subject: [ofa-general] SA code? In-Reply-To: References: Message-ID: <47A3AE6A.4000909@llnl.gov> Yicheng Jia wrote: > > > Are you saying all messages are between adjacent neighbors on the ring ? > Yes. Say I have 6 nodes: A, B, C, D, E, F, then the ring topo will be > A<->B, B<->C, C<->D, D<->E, E<->F, F<->A, and there are totally 6 subnets. If you only plan on 6 nodes, you can get an 8 port switch for ~ $750 http://www.colfaxdirect.com/store/pc/viewCategories.asp?pageStyle=m&idCategory=7 > > > If it is just A->B, B->C, ..., and never across the ring, then this can > > work but you need an SM instance per subnet. > Do I need a router per subnet as well? > > > If you need some router mode, then this is still a work in progress for > > InfiniBand although you might be able to get to a proprietary solution > > in the short term. > What version of OFED will have router mode available? > > Thanks! > Yicheng > > > > *Hal Rosenstock * > > 02/01/2008 04:45 PM > > > To > Yicheng Jia > cc > general at lists.openfabrics.org, Roland Dreier > Subject > Re: [ofa-general] SA code? > > > > > > > > > Hi Yicheng, > > On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > > > Hi Hal, > > > > The only reason is cost down. > > Understood. > > > The cost of IB switch weighs too much in our product, it seems the > > ring topology is the only way to go. There's no message across the > > ring, all the communication occurs within the ring. > > Are you saying all messages are between adjacent neighbors on the ring ? > > > Would it cause the performance substantially go down? > > If it is just A->B, B->C, ..., and never across the ring, then this can > work but you need an SM instance per subnet. > > If you need some router mode, then this is still a work in progress for > InfiniBand although you might be able to get to a proprietary solution > in the short term. > > -- Hal > > > Thanks! > > Yicheng > > > > > > > > Hal Rosenstock > > > > > > 02/01/2008 04:25 PM > > > > > > To > > Yicheng Jia > > > > cc > > Roland Dreier > > , general at lists.openfabrics.org > > Subject > > Re: [ofa-general] > > SA code? > > > > > > > > > > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > > > Hi Roland, > > > > > > Is there SA code available in OFED? I have a IB switch to connect > > all > > > the mellanox HCAs, the SA is running on the switch's firmware. My > > goal > > > is to skip the switch and use ring topology to connect the HCA with > > > each other. Is it doable in OFED? > > > > I don't understand what you're trying to accomplish with this > > topology. > > In IB speak, each HCA <-> HCA link would be a separate subnet. Do you > > intend to forward across your ring ? If so, only some "router" mode > > would allow that with the topology you propose. > > > > -- Hal > > > > > Thanks! > > > Yicheng > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > > > > _____________________________________________________________________________ > > Scanned by IBM Email Security Management Services powered by > > MessageLabs. For more information please visit http://www.ers.ibm.com > > > _____________________________________________________________________________ > > > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > > > ------------------------------------------------------------------------ > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov From hrosenstock at xsigo.com Fri Feb 1 15:46:15 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 01 Feb 2008 15:46:15 -0800 Subject: [ofa-general] SA code? In-Reply-To: References: Message-ID: <1201909575.11210.125.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-01 at 17:07 -0600, Yicheng Jia wrote: > > > Are you saying all messages are between adjacent neighbors on the > ring ? > Yes. Say I have 6 nodes: A, B, C, D, E, F, then the ring topo will be > A<->B, B<->C, C<->D, D<->E, E<->F, F<->A, and there are totally 6 > subnets. > > > If it is just A->B, B->C, ..., and never across the ring, then this > can > > work but you need an SM instance per subnet. > Do I need a router per subnet as well? Not if the communication is just within each subnet; only if it is across subnets. > > If you need some router mode, then this is still a work in progress > for > > InfiniBand although you might be able to get to a proprietary > solution > > in the short term. > What version of OFED will have router mode available? There's already the beginnings of this but I don't know when a complete solution would be available. -- Hal > Thanks! > Yicheng > > > > Hal Rosenstock > > > 02/01/2008 04:45 PM > > > To > Yicheng Jia > > cc > general at lists.openfabrics.org, Roland Dreier > Subject > Re: [ofa-general] > SA code? > > > > > > > > > Hi Yicheng, > > On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > > > Hi Hal, > > > > The only reason is cost down. > > Understood. > > > The cost of IB switch weighs too much in our product, it seems the > > ring topology is the only way to go. There's no message across the > > ring, all the communication occurs within the ring. > > Are you saying all messages are between adjacent neighbors on the > ring ? > > > Would it cause the performance substantially go down? > > If it is just A->B, B->C, ..., and never across the ring, then this > can > work but you need an SM instance per subnet. > > If you need some router mode, then this is still a work in progress > for > InfiniBand although you might be able to get to a proprietary solution > in the short term. > > -- Hal > > > Thanks! > > Yicheng > > > > > > > > Hal Rosenstock > > > > > > 02/01/2008 04:25 PM > > > > > > To > > Yicheng Jia > > > > cc > > Roland Dreier > > , general at lists.openfabrics.org > > Subject > > Re: [ofa-general] > > SA code? > > > > > > > > > > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > > > Hi Roland, > > > > > > Is there SA code available in OFED? I have a IB switch to connect > > all > > > the mellanox HCAs, the SA is running on the switch's firmware. My > > goal > > > is to skip the switch and use ring topology to connect the HCA > with > > > each other. Is it doable in OFED? > > > > I don't understand what you're trying to accomplish with this > > topology. > > In IB speak, each HCA <-> HCA link would be a separate subnet. Do > you > > intend to forward across your ring ? If so, only some "router" mode > > would allow that with the topology you propose. > > > > -- Hal > > > > > Thanks! > > > Yicheng > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > > > > _____________________________________________________________________________ > > Scanned by IBM Email Security Management Services powered by > > MessageLabs. For more information please visit > http://www.ers.ibm.com > > > _____________________________________________________________________________ > > > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > From YJia at tmriusa.com Fri Feb 1 15:46:45 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 17:46:45 -0600 Subject: [ofa-general] SA code? In-Reply-To: <47A3AE6A.4000909@llnl.gov> Message-ID: Unfortunately I have 9 nodes. Thanks! Yicheng "Timothy A. Meier" 02/01/2008 05:39 PM To Yicheng Jia cc general at lists.openfabrics.org Subject Re: [ofa-general] SA code? Yicheng Jia wrote: > > > Are you saying all messages are between adjacent neighbors on the ring ? > Yes. Say I have 6 nodes: A, B, C, D, E, F, then the ring topo will be > A<->B, B<->C, C<->D, D<->E, E<->F, F<->A, and there are totally 6 subnets. If you only plan on 6 nodes, you can get an 8 port switch for ~ $750 http://www.colfaxdirect.com/store/pc/viewCategories.asp?pageStyle=m&idCategory=7 > > > If it is just A->B, B->C, ..., and never across the ring, then this can > > work but you need an SM instance per subnet. > Do I need a router per subnet as well? > > > If you need some router mode, then this is still a work in progress for > > InfiniBand although you might be able to get to a proprietary solution > > in the short term. > What version of OFED will have router mode available? > > Thanks! > Yicheng > > > > *Hal Rosenstock * > > 02/01/2008 04:45 PM > > > To > Yicheng Jia > cc > general at lists.openfabrics.org, Roland Dreier > Subject > Re: [ofa-general] SA code? > > > > > > > > > Hi Yicheng, > > On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > > > Hi Hal, > > > > The only reason is cost down. > > Understood. > > > The cost of IB switch weighs too much in our product, it seems the > > ring topology is the only way to go. There's no message across the > > ring, all the communication occurs within the ring. > > Are you saying all messages are between adjacent neighbors on the ring ? > > > Would it cause the performance substantially go down? > > If it is just A->B, B->C, ..., and never across the ring, then this can > work but you need an SM instance per subnet. > > If you need some router mode, then this is still a work in progress for > InfiniBand although you might be able to get to a proprietary solution > in the short term. > > -- Hal > > > Thanks! > > Yicheng > > > > > > > > Hal Rosenstock > > > > > > 02/01/2008 04:25 PM > > > > > > To > > Yicheng Jia > > > > cc > > Roland Dreier > > , general at lists.openfabrics.org > > Subject > > Re: [ofa-general] > > SA code? > > > > > > > > > > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > > > Hi Roland, > > > > > > Is there SA code available in OFED? I have a IB switch to connect > > all > > > the mellanox HCAs, the SA is running on the switch's firmware. My > > goal > > > is to skip the switch and use ring topology to connect the HCA with > > > each other. Is it doable in OFED? > > > > I don't understand what you're trying to accomplish with this > > topology. > > In IB speak, each HCA <-> HCA link would be a separate subnet. Do you > > intend to forward across your ring ? If so, only some "router" mode > > would allow that with the topology you propose. > > > > -- Hal > > > > > Thanks! > > > Yicheng > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > > > > _____________________________________________________________________________ > > Scanned by IBM Email Security Management Services powered by > > MessageLabs. For more information please visit http://www.ers.ibm.com > > > _____________________________________________________________________________ > > > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > > > ------------------------------------------------------------------------ > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov _____________________________________________________________________________ Scanned by IBM Email Security Management Services powered by MessageLabs. For more information please visit http://www.ers.ibm.com _____________________________________________________________________________ -------------- next part -------------- An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Fri Feb 1 15:51:36 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 01 Feb 2008 15:51:36 -0800 Subject: [ofa-general] OFED 1.3 rc2 kernel panic: trying to enable SA cache Message-ID: <47A3B088.30200@ichips.intel.com> - RHEL4u4, OFED 1.3 rc2, CA type: MT25208, FW 4.8.200, SDR, OpenSM echo 0x7f > /sys/module/ib_sa/paths_per_dest causes.... <5> Unable to handle kernel NULL pointer dereference at 0000000000000020 RIP: <5> {:ib_mthca:mthca_ah_grh_present+0} <5> PML4 1139ef067 PGD 10db04067 PMD 0 <5> Oops: 0000 [1] SMP <5> CPU 1 <5> Modules linked in: netconsole nfs lockd nfs_acl autofs4 i2c_dev i2c_core sunrpc rdma_ucm(U) qlgc_vnic(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_ipath(U) mlx4_ib(U) mlx4_core(U) ipt_REJECT ipt_state ip_conntrack iptable_filter ip_tables dm_mirror dm_mod button battery ac joydev uhci_hcd ehci_hcd hw_random ib_mthca(U) ib_ipoib(U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) md5 ipv6 e1000(U) ahci ext3 jbd ata_piix libata sd_mod scsi_mod <5> Pid: 1581, comm: ib_inform Not tainted 2.6.9-42.ELsmp <5> RIP: 0010:[] {:ib_mthca:mthca_ah_grh_present+0} <5> RSP: 0018:0000010118ff5b70 EFLAGS: 00010046 <5> RAX: 0000000000000005 RBX: 0000010037ebdd90 RCX: 0000010119186d88 <5> RDX: 0000000000000001 RSI: 0000010037ebdc00 RDI: 0000000000000000 <5> RBP: 0000010037ebdc00 R08: 000001011780d080 R09: 000001011780d090 <5> R10: 0000010119186c00 R11: 0000000000000100 R12: 000001011780d080 <5> R13: 0000010119186d88 R14: 00000101198fc000 R15: 000001011780d090 <5> FS: 0000000000000000(0000) GS:ffffffff804e5100(0000) knlGS:0000000000000000 <5> CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b <5> CR2: 0000000000000020 CR3: 0000000005d68000 CR4: 00000000000006e0 <5> Process ib_inform (pid: 1581, threadinfo 0000010118ff4000, task 0000010119965030) <5> Stack: ffffffffa016cba7 0000000000000001 000001011780d090 000001011780d000 <5> 00000101198fc000 0000010119186d88 0000010037ebdc00 0000000000000001 <5> ffffffffa016d470 0000000000000000 <5> Call Trace:{:ib_mthca:build_mlx_header+45} {:ib_mthca:mthca_tavor_post_send+1481} <5> {:ib_mad:ib_send_mad+246} {:ib_mad:ib_post_send_mad+1216} <5> {alloc_layer+67} {kzalloc+9} <5> {:ib_sa:send_mad+152} {:ib_sa:ib_sa_informinfo_query+330} <5> {:ib_sa:inform_work_handler+0} {:ib_sa:inform_work_handler+758} <5> {:ib_sa:reg_handler+0} {worker_thread+419} <5> {default_wake_function+0} {__wake_up_common+67} <5> {default_wake_function+0} {keventd_create_kthread+0} <5> {worker_thread+0} {keventd_create_kthread+0} <5> {kthread+200} {child_rip+8} <5> {keventd_create_kthread+0} {kthread+0} <5> {child_rip+0} <5> <5> Code: 48 8b 47 20 0f be 40 05 c1 e8 1f c3 41 54 b8 ea ff ff ff 49 <5> RIP {:ib_mthca:mthca_ah_grh_present+0} RSP <0000010118ff5b70> <5> CR2: 0000000000000020 <5> <0>Kernel panic - not syncing: Oops From YJia at tmriusa.com Fri Feb 1 15:53:46 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Fri, 1 Feb 2008 17:53:46 -0600 Subject: [ofa-general] SA code? In-Reply-To: <1201909575.11210.125.camel@hrosenstock-ws.xsigo.com> Message-ID: > Not if the communication is just within each subnet; only if it is > across subnets. It will cross the subnets. For my case, each node has the need of sending message to other nodes than its neighbor, e.g. A -> D or E->B. Thanks! Yicheng Hal Rosenstock 02/01/2008 05:43 PM To Yicheng Jia cc general at lists.openfabrics.org, Roland Dreier Subject Re: [ofa-general] SA code? On Fri, 2008-02-01 at 17:07 -0600, Yicheng Jia wrote: > > > Are you saying all messages are between adjacent neighbors on the > ring ? > Yes. Say I have 6 nodes: A, B, C, D, E, F, then the ring topo will be > A<->B, B<->C, C<->D, D<->E, E<->F, F<->A, and there are totally 6 > subnets. > > > If it is just A->B, B->C, ..., and never across the ring, then this > can > > work but you need an SM instance per subnet. > Do I need a router per subnet as well? Not if the communication is just within each subnet; only if it is across subnets. > > If you need some router mode, then this is still a work in progress > for > > InfiniBand although you might be able to get to a proprietary > solution > > in the short term. > What version of OFED will have router mode available? There's already the beginnings of this but I don't know when a complete solution would be available. -- Hal > Thanks! > Yicheng > > > > Hal Rosenstock > > > 02/01/2008 04:45 PM > > > To > Yicheng Jia > > cc > general at lists.openfabrics.org, Roland Dreier > Subject > Re: [ofa-general] > SA code? > > > > > > > > > Hi Yicheng, > > On Fri, 2008-02-01 at 16:38 -0600, Yicheng Jia wrote: > > > > Hi Hal, > > > > The only reason is cost down. > > Understood. > > > The cost of IB switch weighs too much in our product, it seems the > > ring topology is the only way to go. There's no message across the > > ring, all the communication occurs within the ring. > > Are you saying all messages are between adjacent neighbors on the > ring ? > > > Would it cause the performance substantially go down? > > If it is just A->B, B->C, ..., and never across the ring, then this > can > work but you need an SM instance per subnet. > > If you need some router mode, then this is still a work in progress > for > InfiniBand although you might be able to get to a proprietary solution > in the short term. > > -- Hal > > > Thanks! > > Yicheng > > > > > > > > Hal Rosenstock > > > > > > 02/01/2008 04:25 PM > > > > > > To > > Yicheng Jia > > > > cc > > Roland Dreier > > , general at lists.openfabrics.org > > Subject > > Re: [ofa-general] > > SA code? > > > > > > > > > > > > > > > > > > On Fri, 2008-02-01 at 16:14 -0600, Yicheng Jia wrote: > > > > > > Hi Roland, > > > > > > Is there SA code available in OFED? I have a IB switch to connect > > all > > > the mellanox HCAs, the SA is running on the switch's firmware. My > > goal > > > is to skip the switch and use ring topology to connect the HCA > with > > > each other. Is it doable in OFED? > > > > I don't understand what you're trying to accomplish with this > > topology. > > In IB speak, each HCA <-> HCA link would be a separate subnet. Do > you > > intend to forward across your ring ? If so, only some "router" mode > > would allow that with the topology you propose. > > > > -- Hal > > > > > Thanks! > > > Yicheng > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > > > > _____________________________________________________________________________ > > Scanned by IBM Email Security Management Services powered by > > MessageLabs. For more information please visit > http://www.ers.ibm.com > > > _____________________________________________________________________________ > > > > > _____________________________________________________________________________ > Scanned by IBM Email Security Management Services powered by > MessageLabs. For more information please visit http://www.ers.ibm.com > _____________________________________________________________________________ > _____________________________________________________________________________ Scanned by IBM Email Security Management Services powered by MessageLabs. For more information please visit http://www.ers.ibm.com _____________________________________________________________________________ -------------- next part -------------- An HTML attachment was scrubbed... URL: From ralph.campbell at qlogic.com Fri Feb 1 16:23:22 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 01 Feb 2008 16:23:22 -0800 Subject: [ofa-general] [PATCH] IB/libipathverbs - update license field in libipathverbs.spec Message-ID: <1201911802.27464.198.camel@brick.pathscale.com> Update the license field to match the exact format given in http://fedoraproject.org/wiki/Packaging/LicensingGuidelines for a package available under a choice of GPL or BSD license. Signed-off-by: Ralph Campbell diff --git a/libipathverbs.spec.in b/libipathverbs.spec.in index 515c177..9bc11eb 100644 --- a/libipathverbs.spec.in +++ b/libipathverbs.spec.in @@ -1,4 +1,4 @@ -# Copyright (c) 2007. QLogic Corp. All rights reserved. +# Copyright (c) 2007, 2008. QLogic Corp. All rights reserved. # Copyright (c) 2003, 2004, 2005. PathScale, Inc. All rights reserved. # # This software is available to you under a choice of one of two @@ -43,7 +43,7 @@ Release: %rel%{?dist} Summary: PathScale InfiniPath HCA Userspace Driver Group: System Environment/Libraries -License: GPL/BSD +License: GPLv2 or BSD Url: http://openib.org/ Source: http://openib.org/downloads/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-root From ralph.campbell at qlogic.com Fri Feb 1 16:25:17 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 01 Feb 2008 16:25:17 -0800 Subject: [ofa-general] [PATCH] IB/libipathverbs - Update %install section of libipathverbs.spec Message-ID: <1201911917.27464.201.camel@brick.pathscale.com> Change from using the %makeinstall macro to using "make install" directly. The page has this to say: "Fedora's RPM includes a %makeinstall macro but it must NOT be used when make install DESTDIR=%{buildroot} works. %makeinstall is a kludge.... It is error-prone and can have unexpected effects.... It can trigger unnecessary and wrong rebuilds.... ....it can cause broken *.la files to be installed.... Instead, Fedora packages should use: make DESTDIR=%{buildroot} install or make DESTDIR=$RPM_BUILD_ROOT install" Signed-off-by: Ralph Campbell diff --git a/libipathverbs.spec.in b/libipathverbs.spec.in index 9bc11eb..1283dd1 100644 --- a/libipathverbs.spec.in +++ b/libipathverbs.spec.in @@ -72,7 +72,7 @@ make %{?_smp_flags} %install rm -rf $RPM_BUILD_ROOT -%makeinstall +make DESTDIR=$RPM_BUILD_ROOT install # remove unpackaged files from the buildroot rm -f $RPM_BUILD_ROOT%{_libdir}/*.la From xma at us.ibm.com Fri Feb 1 16:36:50 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 1 Feb 2008 16:36:50 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: Message-ID: Hello, Roland, Thanks for your quick review. > Not sure I understand this. Is there any possible configuration of > any architecture where Linux runs where PAGE_SIZE < 4096? Technially it's a problem, pratically it's not since there is no architecture i can think of has PAGE_SIZE < 4096. > > This patch allows IPoIB-UD MTU up to 4092 (4K - IPOIB_ENCAP_LEN) when > > HCA can support 4K MTU. In this patch, APIs for S/G buffer allocation in > > IPoIB-CM mode has been made generic so IPoIB-UD and IPoIB-CM can share > > the S/G code. > > This approach seems overly complex to me, since it ends up going > through all the CM buffer fragment bookkeeping for the simple UD path. No, it's not complex, only one buffer is allocated if the page_size is bigger enough and if it's 2K MTU. > So how about the following? When using a UD MTU of 4096 with a page > size of 4096, allocate an skb of size 44 for the GRH and ethertype, > and then allocate a single page for the fragment list. This means > that the IP packet will start nicely 16-byte aligned for free, and all > the bookkeeping is very simple. It has 44 bytes head with another 4K page size without if condition check of mtu size and page size. Please look at the patches for detail. thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Fri Feb 1 17:00:02 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 1 Feb 2008 17:00:02 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: Message-ID: Hello Roland, > So how about the following? When using a UD MTU of 4096 with a page > size of 4096, allocate an skb of size 44 for the GRH and ethertype, > and then allocate a single page for the fragment list. This means > that the IP packet will start nicely 16-byte aligned for free, and all > the bookkeeping is very simple. This method would have condition check anywhere and the code will be more complicated than this implementation. Thanks Shirley Ma IBM Linux Technology Center 15300 SW Koll Parkway Beaverton, OR 97006-6063 Phone(Fax): (503) 578-7638 -------------- next part -------------- An HTML attachment was scrubbed... URL: From kilian at stanford.edu Fri Feb 1 17:05:50 2008 From: kilian at stanford.edu (Kilian CAVALOTTI) Date: Fri, 1 Feb 2008 17:05:50 -0800 Subject: [ofa-general] ENOMEM In-Reply-To: <47A352FC.8090604@lfbs.rwth-aachen.de> References: <47A352FC.8090604@lfbs.rwth-aachen.de> Message-ID: <200802011705.51202.kilian@stanford.edu> Hi Ruben, On Friday 01 February 2008 09:12:28 am Ruben Niederhagen wrote: > When I try to run ibv_srq_pingpong as non-root-user, I get the error > "Couldn't create QP[5]". >0\0\0@"..., 48) = -1 ENOMEM (Cannot allocate memory) > As root-user everything is working fine... You can try to check the non-root user's max-locked memory limits, with "ulimit -l" in a bash shell (those limits are usually set in /etc/security/limits.conf). Cheers -- Kilian From kliteyn at mellanox.co.il Fri Feb 1 17:08:36 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 2 Feb 2008 03:08:36 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-02:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-01 OpenSM git rev = Thu_Jan_31_20:03:04_2008 [798e8d88937d60e1454f058db2f4c705b41ec9df] ibutils git rev = Mon_Dec_24_10:42:01_2007 [675bec82306d6920555dd0b5e2f664983e27e60f] Total=400 Pass=400 Fail=0 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From dwsablelivrem at sablelivre.com Fri Feb 1 20:10:51 2008 From: dwsablelivrem at sablelivre.com (Casandra Wray) Date: Sat, 2 Feb 2008 12:10:51 +0800 Subject: [ofa-general] Work hard, play harder. Message-ID: <01c86594$a6ea5780$3498273a@dwsablelivrem> Feel like gambling? Golden Gate Casino is worth your attention. All popular casino games, great welcome bonus, fast to download, easy to use and completely free software! Great online casino Golden Gate is one of the leading casinos known for fair playing, excellent customer service available to contact 24 hour a day, 7 days a week and prompt payouts. http://geocities.com/kyleterry830/ Enjoy pure pleasure of gambling from home without stress! From xma at us.ibm.com Fri Feb 1 22:04:41 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 1 Feb 2008 23:04:41 -0700 Subject: [ofa-general] [UPDATE][PATCH 2/3] ib/ipoib: set IPoIB-UD RX S/G parameters In-Reply-To: <1201808148.19565.121.camel@localhost.localdomain> Message-ID: My unix mail is down. Here is the new update one. I need to resend this one when my unix mail back. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 13 +++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 ++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 3 +-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 14 ++++++++++++-- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index d1d3ca2..004a80b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -61,6 +61,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_MAX_IB_MTU = 4096, + IPOIB_UD_MAX_RX_SG = ALIGN(IPOIB_MAX_IB_MTU + IB_GRH_BYTES + 4, + PAGE_SIZE) / PAGE_SIZE, /* padding to align IP header */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -319,6 +323,9 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int max_ib_mtu; + struct ib_sge rx_sge[IPOIB_UD_MAX_RX_SG]; + struct ib_recv_wr rx_wr; }; struct ipoib_ah { @@ -359,6 +366,12 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +/* padding to align IP header */ +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + 4) +#define IPOIB_UD_HEAD_SIZE(ib_mtu) (IPOIB_UD_BUF_SIZE(ib_mtu)) % PAGE_SIZE +#define IPOIB_UD_RX_SG(ib_mtu) ALIGN(IPOIB_UD_BUF_SIZE(ib_mtu), PAGE_SIZE) / PAGE_SIZE + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a082466..242591f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -194,7 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -968,10 +968,6 @@ static void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1103,6 +1099,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1111,6 +1108,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339..630b429 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99a..7e2d4d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,13 +150,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu) }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; - int ret, size; + int ret, size, i; priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { @@ -208,6 +208,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.num_sge = 1; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu); + priv->rx_sge[0].lkey = priv->mr->lkey; + for (i = 1; i < IPOIB_UD_RX_SG(priv->max_ib_mtu); ++i) { + priv->rx_sge[i].length = PAGE_SIZE; + priv->rx_sge[i].lkey = priv->mr->lkey; + } + priv->rx_wr.num_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu); + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; out_free_cq: (See attached file: ipoib-4kmtu-ud-set.patch) Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib-4kmtu-ud-set.patch Type: application/octet-stream Size: 5360 bytes Desc: not available URL: From tehyqhxj at pentrel.com Sat Feb 2 00:29:28 2008 From: tehyqhxj at pentrel.com (Halle Givens) Date: Sat, 2 Feb 2008 16:29:28 +0800 Subject: [ofa-general] We have everything you need Message-ID: <541141433.43829430129778@pentrel.com> Dear valued member.Tell us straight - are you really that eager to buy artificially overpriced product? Stop wasting time and money - switch to Canadian products.We've got a huge variety of choice among all categories of high-quality product. The products of Canadian pharmaceutical industry are not less qualitative than the American products. They are just cheaper - that's the point.Get 12 free pills for over $300 order.Don't waste time - incredibly low prices are waiting for you. http://geocities.com/wadelancaster99/Thank You for Your time and for your attention Yours faithfully, Halle Givens -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Sat Feb 2 03:10:45 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 2 Feb 2008 03:10:45 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080202-0200 daily build status Message-ID: <20080202111045.BC0E5E609BB@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.16 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.20 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.23 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.18-53.el5 Failed: From xma at us.ibm.com Sat Feb 2 08:16:51 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sat, 2 Feb 2008 08:16:51 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: Message-ID: Hello Roland, > So how about the following? When using a UD MTU of 4096 with a page > size of 4096, allocate an skb of size 44 for the GRH and ethertype, > and then allocate a single page for the fragment list. This means > that the IP packet will start nicely 16-byte aligned for free, and all > the bookkeeping is very simple. My patch has been passed the stress test for both PPC and Intel architechture against OFED-1.3-RC2 bit for a couple days. And I didn't see performance imapct for 2K mtu. But I rethink about your suggestion here yesterday night. I can modify my patch to meet your thoughts here by keeping current implementation of 2K mtu and using if condition check for the new code. I will submit a new version of patchset today for review. Since I only have two days for my patch to be integred into OFED-1.3-RC3 for Distros to pick up. I would like to see your ack here for this approach as soon as possible. I will compare two different implementation's performance. Thanks for your inputs. Appreciate your prompt response. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From irazqp7 at 10-28.com Sat Feb 2 10:23:51 2008 From: irazqp7 at 10-28.com (Vera Alvarado) Date: Sat, 2 Feb 2008 18:23:51 +0000 Subject: [ofa-general] CapaciousPhallusElva Message-ID: <01c865c8$c27aee40$3992624d@irazqp7> BodypartObviousElba http://www.carpotiues.com From boqshs at bohemiantechnologies.com Sat Feb 2 13:22:41 2008 From: boqshs at bohemiantechnologies.com (Noreen Spencer) Date: , 3 Feb 2008 05:22:41 +0800 Subject: [ofa-general] Reliable software only! Message-ID: <01c86624$cc61dac0$097783dc@boqshs> Need some software urgently? Purchase, download and install right now! Software in English, German, French, Italian, and Spanish for IBM PC and Macintosh! Cheap prices give you the possibility to save or buy more software than you can afford purchasing software on a CD! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/willieochoa84/ Original software only! From mashirle at us.ibm.com Sat Feb 2 04:26:24 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 04:26:24 -0800 Subject: [ofa-general] [V2][PATCH 0/3]ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: <1201718540.6850.41.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> Message-ID: <1201955185.19565.182.camel@localhost.localdomain> Hello Roland, Please review below approach as early as you can. Thanks This patch is based on Eli and Roland's input. The idea is to keep IPoIB-UD 2K MTU current implementation and allows IPoIB-UD link MTU up to 4092 (4K - IPOB_ENCAP_LEN) when HCAs support 4K MTU. For IPoIB-UD 4K MTU, if the PAGE_SIZE is greater than IB MTU + GRH HEAD + 4, then no S/G is needed, use IPoIB-UD 2K MTU implementation, if PAGE_SIZE is smaller, then two buffers need to be used. One of the API IPoIB-CM RX S/G code has been made more generic, so it can be reused. This patchset includes three patches: 1. Make one IPoIB-CM RX S/G API generic. 2. Set up IPoIB-UD RX S/G ready. 3. Enable IPoIB-UD RX S/G when needed. Shirley From mashirle at us.ibm.com Sat Feb 2 04:29:07 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 04:29:07 -0800 Subject: [ofa-general] [V2][PATCH 1/3] ib/ipoib: Make IPoIB-CM RX S/G API generic In-Reply-To: <1201718716.6850.46.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201718716.6850.46.camel@localhost.localdomain> Message-ID: <1201955347.19565.185.camel@localhost.localdomain> This patch makes two of IPoIB-CM RX S/G APIs generic, so it can be reusable. This patch is the same as V1 previously submitted. Signed-of-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 26 +++++- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 135 ++++++------------------------- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 85 +++++++++++++++++++ 3 files changed, 132 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index fe250c6..d1d3ca2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -141,6 +141,11 @@ struct ipoib_rx_buf { u64 mapping; }; +struct ipoib_cm_rx_buf { + struct sk_buff *skb; + u64 mapping[IPOIB_CM_RX_SG]; +}; + struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping; @@ -212,11 +217,6 @@ struct ipoib_cm_tx { struct ib_wc ibwc[IPOIB_NUM_WC]; }; -struct ipoib_cm_rx_buf { - struct sk_buff *skb; - u64 mapping[IPOIB_CM_RX_SG]; -}; - struct ipoib_cm_dev_priv { struct ib_srq *srq; struct ipoib_cm_rx_buf *srq_ring; @@ -458,6 +458,22 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); +void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, + unsigned int length, struct sk_buff *toskb); +struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, + int id, int frags, int head_size, + int pad, u64 *mapping); +static void inline ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, + int head_size, u64 *mapping) +{ + int i; + ib_dma_unmap_single(priv->ca, mapping[0], head_size, DMA_FROM_DEVICE); + for (i = 0; i < frags; i++) + ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, + DMA_FROM_DEVICE); + +} + #ifdef CONFIG_INFINIBAND_IPOIB_CM diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1818f95..2c2c6b2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -77,17 +77,6 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = { static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); -static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, - u64 mapping[IPOIB_CM_RX_SG]) -{ - int i; - - ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); - - for (i = 0; i < frags; ++i) - ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); -} - static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -102,8 +91,9 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, - priv->cm.srq_ring[id].mapping); + ipoib_dma_unmap_rx(priv, priv->cm.num_frags - 1, + IPOIB_CM_HEAD_SIZE, + priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; } @@ -126,8 +116,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, - rx->rx_ring[id].mapping); + ipoib_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, IPOIB_CM_HEAD_SIZE, + rx->rx_ring[id].mapping); dev_kfree_skb_any(rx->rx_ring[id].skb); rx->rx_ring[id].skb = NULL; } @@ -135,59 +125,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, return ret; } -static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, - struct ipoib_cm_rx_buf *rx_ring, - int id, int frags, - u64 mapping[IPOIB_CM_RX_SG]) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct sk_buff *skb; - int i; - - skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); - if (unlikely(!skb)) - return NULL; - - /* - * IPoIB adds a 4 byte header. So we need 12 more bytes to align the - * IP header to a multiple of 16. - */ - skb_reserve(skb, 12); - - mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { - dev_kfree_skb_any(skb); - return NULL; - } - - for (i = 0; i < frags; i++) { - struct page *page = alloc_page(GFP_ATOMIC); - - if (!page) - goto partial_error; - skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); - - mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, - 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) - goto partial_error; - } - - rx_ring[id].skb = skb; - return skb; - -partial_error: - - ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); - - for (; i > 0; --i) - ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); - - dev_kfree_skb_any(skb); - return NULL; -} - static void ipoib_cm_free_rx_ring(struct net_device *dev, struct ipoib_cm_rx_buf *rx_ring) { @@ -196,8 +133,9 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, for (i = 0; i < ipoib_recvq_size; ++i) if (rx_ring[i].skb) { - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, - rx_ring[i].mapping); + ipoib_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + IPOIB_CM_HEAD_SIZE, + rx_ring[i].mapping); dev_kfree_skb_any(rx_ring[i].skb); } @@ -345,8 +283,12 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i spin_unlock_irq(&priv->lock); for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, - rx->rx_ring[i].mapping)) { + rx->rx_ring[i].skb = ipoib_cm_alloc_rx_skb(dev, i, + IPOIB_CM_RX_SG - 1, + IPOIB_CM_HEAD_SIZE, + 12, + rx->rx_ring[i].mapping); + if (!rx->rx_ring[i].skb) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; @@ -480,38 +422,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, return 0; } } -/* Adjust length of skb with fragments to match received data */ -static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, - unsigned int length, struct sk_buff *toskb) -{ - int i, num_frags; - unsigned int size; - - /* put header into skb */ - size = min(length, hdr_space); - skb->tail += size; - skb->len += size; - length -= size; - - num_frags = skb_shinfo(skb)->nr_frags; - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (length == 0) { - /* don't need this page */ - skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE); - --skb_shinfo(skb)->nr_frags; - } else { - size = min(length, (unsigned) PAGE_SIZE); - - frag->size = size; - skb->data_len += size; - skb->truesize += size; - skb->len += size; - length -= size; - } - } -} void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { @@ -581,7 +491,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; - newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); + newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, IPOIB_CM_HEAD_SIZE, + 12, mapping); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump @@ -592,7 +503,10 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) goto repost; } - ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); + rx_ring[wr_id].skb = newskb; + + ipoib_dma_unmap_rx(priv, frags, IPOIB_CM_HEAD_SIZE, + rx_ring[wr_id].mapping); memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", @@ -1481,9 +1395,12 @@ int ipoib_cm_dev_init(struct net_device *dev) if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, - priv->cm.num_frags - 1, - priv->cm.srq_ring[i].mapping)) { + priv->cm.srq_ring[i].skb = + ipoib_cm_alloc_rx_skb(dev, i, + priv->cm.num_frags - 1, + IPOIB_CM_HEAD_SIZE, 12, + priv->cm.srq_ring[i].mapping); + if (!priv->cm.srq_ring[i].skb) { ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i); ipoib_cm_dev_cleanup(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 52bc2bd..c40329f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -116,6 +116,91 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) return ret; } +struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, + int id, int frags, int head_size, + int pad, u64 *mapping) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + int i; + + skb = dev_alloc_skb(head_size + pad); + if (unlikely(!skb)) + return NULL; + + /* + * IPoIB adds a 4 byte header. So we need pad more bytes to align the + * IP header to a multiple of 16. For CM mode, you add pad 12, + * for UD mode, we add pad 4. + */ + skb_reserve(skb, pad); + + mapping[0] = ib_dma_map_single(priv->ca, skb->data, head_size, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return NULL; + } + + for (i = 0; i < frags; i++) { + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + goto partial_error; + skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); + + mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, + 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) + goto partial_error; + } + + return skb; + +partial_error: + + ib_dma_unmap_single(priv->ca, mapping[0], head_size, DMA_FROM_DEVICE); + + for (; i > 0; --i) + ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); + + dev_kfree_skb_any(skb); + return NULL; +} + +/* Adjust length of skb with fragments to match received data */ +void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, + unsigned int length, struct sk_buff *toskb) +{ + int i, num_frags; + unsigned int size; + + /* put header into skb */ + size = min(length, hdr_space); + skb->tail += size; + skb->len += size; + length -= size; + + num_frags = skb_shinfo(skb)->nr_frags; + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (length == 0) { + /* don't need this page */ + skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE); + --skb_shinfo(skb)->nr_frags; + } else { + size = min(length, (unsigned) PAGE_SIZE); + + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } + } +} + static int ipoib_alloc_rx_skb(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); From mashirle at us.ibm.com Sat Feb 2 04:30:46 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 04:30:46 -0800 Subject: [ofa-general] [V2][PATCH 2/3] ib/ipoib: set IPoIB-UD RX S/G parameters In-Reply-To: <1201721611.6850.48.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201721611.6850.48.camel@localhost.localdomain> Message-ID: <1201955447.19565.188.camel@localhost.localdomain> This patch is the same as previous submitted version (V1). This patch makes IPoIB-UD RX S/G to be ready. Signed-off-by: Shirley Ma --- diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index d1d3ca2..004a80b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -61,6 +61,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_MAX_IB_MTU = 4096, + IPOIB_UD_MAX_RX_SG = ALIGN(IPOIB_MAX_IB_MTU + IB_GRH_BYTES + 4, + PAGE_SIZE) / PAGE_SIZE, /* padding to align IP header */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -319,6 +323,9 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int max_ib_mtu; + struct ib_sge rx_sge[IPOIB_UD_MAX_RX_SG]; + struct ib_recv_wr rx_wr; }; struct ipoib_ah { @@ -359,6 +366,12 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +/* padding to align IP header */ +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + 4) +#define IPOIB_UD_HEAD_SIZE(ib_mtu) (IPOIB_UD_BUF_SIZE(ib_mtu)) % PAGE_SIZE +#define IPOIB_UD_RX_SG(ib_mtu) ALIGN(IPOIB_UD_BUF_SIZE(ib_mtu), PAGE_SIZE) / PAGE_SIZE + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a082466..242591f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -194,7 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -968,10 +968,6 @@ static void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1103,6 +1099,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1111,6 +1108,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339..630b429 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99a..0d800dd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,13 +150,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu) }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD }; - int ret, size; + int ret, size, i; priv->pd = ib_alloc_pd(priv->ca); if (IS_ERR(priv->pd)) { @@ -208,6 +208,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.num_sge = 1; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu); + priv->rx_sge[0].lkey = priv->mr->lkey; + for (i = 1; i < IPOIB_UD_RX_SG(priv->max_ib_mtu); ++i) { + priv->rx_sge[i].length = PAGE_SIZE; + priv->rx_sge[i].lkey = priv->mr->lkey; + } + priv->rx_wr.num_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu); + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; out_free_cq: From mashirle at us.ibm.com Sat Feb 2 04:39:17 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 04:39:17 -0800 Subject: [ofa-general] [V2][PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support In-Reply-To: <1201725009.6850.54.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> Message-ID: <1201955957.19565.196.camel@localhost.localdomain> This patch keeps existing 2K MTU IPoIB-UD implemenation to be used by both 2K MTU and no S/G 4K MTU. 4K MTU RX S/G is needed when necessary. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 28 ++++----- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 ++-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 108 ++++++++++++++++++++++--------- 3 files changed, 95 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 004a80b..6c33d7d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -56,9 +56,6 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, IPOIB_MAX_IB_MTU = 4096, @@ -140,12 +137,7 @@ struct ipoib_mcast { struct net_device *dev; }; -struct ipoib_rx_buf { - struct sk_buff *skb; - u64 mapping; -}; - -struct ipoib_cm_rx_buf { +struct ipoib_sg_rx_buf { struct sk_buff *skb; u64 mapping[IPOIB_CM_RX_SG]; }; @@ -198,7 +190,7 @@ enum ipoib_cm_state { struct ipoib_cm_rx { struct ib_cm_id *id; struct ib_qp *qp; - struct ipoib_cm_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; struct list_head list; struct net_device *dev; unsigned long jiffies; @@ -223,7 +215,7 @@ struct ipoib_cm_tx { struct ipoib_cm_dev_priv { struct ib_srq *srq; - struct ipoib_cm_rx_buf *srq_ring; + struct ipoib_sg_rx_buf *srq_ring; struct ib_cm_id *id; struct list_head passive_ids; /* state: LIVE */ struct list_head rx_error_list; /* state: ERROR */ @@ -294,7 +286,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; @@ -367,10 +359,14 @@ struct ipoib_neigh { }; #define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) /* padding to align IP header */ -#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + 4) -#define IPOIB_UD_HEAD_SIZE(ib_mtu) (IPOIB_UD_BUF_SIZE(ib_mtu)) % PAGE_SIZE -#define IPOIB_UD_RX_SG(ib_mtu) ALIGN(IPOIB_UD_BUF_SIZE(ib_mtu), PAGE_SIZE) / PAGE_SIZE +#define IPOIB_UD_HEAD_SIZE(ib_mtu) (IPOIB_UD_BUF_SIZE(ib_mtu) + 4) % PAGE_SIZE +#define IPOIB_UD_RX_SG(ib_mtu) ALIGN(IPOIB_UD_BUF_SIZE(ib_mtu) + 4, PAGE_SIZE) / PAGE_SIZE +static inline int ipoib_ud_need_sg(int ib_mtu) +{ + return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0; +} /* * We stash a pointer to our private neighbour information after our @@ -473,7 +469,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, unsigned int length, struct sk_buff *toskb); -struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, +struct sk_buff *ipoib_alloc_sg_rx_skb(struct net_device *dev, int id, int frags, int head_size, int pad, u64 *mapping); static void inline ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 2c2c6b2..4667f70 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -126,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, } static void ipoib_cm_free_rx_ring(struct net_device *dev, - struct ipoib_cm_rx_buf *rx_ring) + struct ipoib_sg_rx_buf *rx_ring) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i; @@ -283,7 +283,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i spin_unlock_irq(&priv->lock); for (i = 0; i < ipoib_recvq_size; ++i) { - rx->rx_ring[i].skb = ipoib_cm_alloc_rx_skb(dev, i, + rx->rx_ring[i].skb = ipoib_alloc_sg_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, IPOIB_CM_HEAD_SIZE, 12, @@ -426,7 +426,7 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_cm_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct sk_buff *skb, *newskb; struct ipoib_cm_rx *p; @@ -491,7 +491,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; - newskb = ipoib_cm_alloc_rx_skb(dev, wr_id, frags, IPOIB_CM_HEAD_SIZE, + newskb = ipoib_alloc_sg_rx_skb(dev, wr_id, frags, IPOIB_CM_HEAD_SIZE, 12, mapping); if (unlikely(!newskb)) { /* @@ -1396,7 +1396,7 @@ int ipoib_cm_dev_init(struct net_device *dev) if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { priv->cm.srq_ring[i].skb = - ipoib_cm_alloc_rx_skb(dev, i, + ipoib_alloc_sg_rx_skb(dev, i, priv->cm.num_frags - 1, IPOIB_CM_HEAD_SIZE, 12, priv->cm.srq_ring[i].mapping); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index c40329f..dcdb042 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -95,8 +95,8 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; + list.addr = priv->rx_ring[id].mapping[0]; + list.length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); list.lkey = priv->mr->lkey; param.next = NULL; @@ -104,19 +104,29 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) param.sg_list = &list; param.num_sge = 1; - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); - if (unlikely(ret)) { - ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - dev_kfree_skb_any(priv->rx_ring[id].skb); - priv->rx_ring[id].skb = NULL; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); + ipoib_dma_unmap_rx(priv, IPOIB_UD_RX_SG(priv->max_ib_mtu) - 1, + IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), + priv->rx_ring[id].mapping); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } else { + ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } } return ret; } -struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, +struct sk_buff *ipoib_alloc_sg_rx_skb(struct net_device *dev, int id, int frags, int head_size, int pad, u64 *mapping) { @@ -207,7 +217,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) struct sk_buff *skb; u64 addr; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); if (!skb) return -ENOMEM; @@ -218,7 +228,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); @@ -226,7 +236,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) } priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].mapping[0] = addr; return 0; } @@ -237,7 +247,17 @@ static int ipoib_ib_post_receives(struct net_device *dev) int i; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + priv->rx_ring[i].skb + = ipoib_alloc_sg_rx_skb(dev, i, + IPOIB_UD_RX_SG(priv->max_ib_mtu) - 1, + IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), 4, + priv->rx_ring[i].mapping); + if (!priv->rx_ring[i].skb) { + ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); + return -ENOMEM; + } + } else if (ipoib_alloc_rx_skb(dev, i)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -254,8 +274,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; - struct sk_buff *skb; - u64 addr; + struct sk_buff *skb, *newskb = NULL; + u64 mapping[IPOIB_UD_RX_SG(priv->max_ib_mtu)]; + int frags = 0; + u64 addr = 0; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -267,15 +289,21 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) + addr = priv->rx_ring[wr_id].mapping[0]; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_dma_unmap_rx(priv, IPOIB_UD_RX_SG(priv->max_ib_mtu) - 1, + IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), + priv->rx_ring[wr_id].mapping); + else + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -292,17 +320,32 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, + (unsigned)(IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu)))) / PAGE_SIZE; + newskb = ipoib_alloc_sg_rx_skb(dev, wr_id, frags, + IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), 4, mapping); + priv->rx_ring[wr_id].skb = newskb; + if (unlikely(newskb)) { + ++dev->stats.rx_dropped; + goto repost; + } + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ipoib_dma_unmap_rx(priv, frags, IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), + priv->rx_ring[wr_id].mapping); + skb_put_frags(skb, IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), wc->byte_len, newskb); + } else { + ib_dma_unmap_single(priv->ca, addr, IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); + } skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -710,15 +753,20 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) } for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; - + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_dma_unmap_rx(priv, + IPOIB_UD_RX_SG(priv->max_ib_mtu) - 1, + IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu), + priv->rx_ring[i].mapping); + else + ib_dma_unmap_single(priv->ca, + rx_req->mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } From mashirle at us.ibm.com Sat Feb 2 04:52:36 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 04:52:36 -0800 Subject: [RESENT][ofa-general] [PATCH 2/3] ib/ipoib: set IPoIB-UD RX S/G parameters In-Reply-To: <1201721611.6850.48.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201721611.6850.48.camel@localhost.localdomain> Message-ID: <1201956756.19565.197.camel@localhost.localdomain> Signed-off-by: Shirley Ma diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 138f1a3..65b1159 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -56,11 +56,11 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, + IPOIB_MAX_IB_MTU = 4096, /* max ib device payload is 4096 */ + IPOIB_UD_MAX_RX_SG = ALIGN(IPOIB_MAX_IB_MTU + IB_GRH_BYTES + 4, PAGE_SIZE) / PAGE_SIZE, /* padding to align IP header */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -314,6 +314,9 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int max_ib_mtu; + struct ib_sge rx_sge[IPOIB_UD_MAX_RX_SG]; + struct ib_recv_wr rx_wr; }; struct ipoib_ah { @@ -354,6 +357,11 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + 4) /* padding to align IP header */ +#define IPOIB_UD_HEAD_SIZE(ib_mtu) (IPOIB_UD_BUF_SIZE(ib_mtu)) % PAGE_SIZE +#define IPOIB_UD_RX_SG(ib_mtu) ALIGN(IPOIB_UD_BUF_SIZE(ib_mtu), PAGE_SIZE) / PAGE_SIZE + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a082466..646aeb2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -194,7 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -968,10 +968,6 @@ static void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1103,6 +1099,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1111,6 +1108,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339..630b429 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99a..eefdb6a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,7 +150,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu) }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD @@ -208,6 +208,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.num_sge = 1; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE(priv->max_ib_mtu); + for (i = 0; i < IPOIB_UD_RX_SG(priv->max_ib_mtu) - 1; ++i) { + priv->rx_sge[i].lkey = priv->mr->lkey; + priv->rx_sge[i + 1].length = PAGE_SIZE; + } + priv->rx_sge[i + 1].lkey = priv->mr->lkey; + priv->rx_wr.num_sge = IPOIB_UD_RX_SG(priv->max_ib_mtu); + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; out_free_cq: From kliteyn at mellanox.co.il Sat Feb 2 17:28:42 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 3 Feb 2008 03:28:42 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-03:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-02 OpenSM git rev = Thu_Jan_31_20:03:04_2008 [798e8d88937d60e1454f058db2f4c705b41ec9df] ibutils git rev = Mon_Dec_24_10:42:01_2007 [675bec82306d6920555dd0b5e2f664983e27e60f] Total=400 Pass=399 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 9 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From BROYHILL at lists.openfabrics.org Sat Feb 2 17:59:19 2008 From: BROYHILL at lists.openfabrics.org (BROYHILL at lists.openfabrics.org) Date: Sun, 03 Feb 2008 05:29:19 +0330 Subject: [ofa-general] ONLINE JOB OPPORTUNITY (PART-TIME) . Message-ID: If you're interested in exploring this employment opportunity, please mail your information to: Corporate Human Resources, Staffing and Recruiting Dept, BROYHILL FURNITURE INDUSTRIES,INC One Broyhill Park Lenoir, NC 28633 www.broyhillfurniture.com email: broyhillfurnitures.inc.employments at hotmail.com Dear Applicant, We have a job opening for the position of Accounts recievable position.Would you like to work form your home and get paid weekly? We are offering this position to all interested applicants. Please carefully read through. BROYHILL FURNITURE INDUSTRIES.INC is a well established Manufacturing Firm,Over the decades, a world-class manufacturing and consumer-focused marketing company has evolved because of our commitment to uncompromised quality, our promise of undeniable value, our resources to invest in the newest technology, our artistic and consumer-conscious attention to innovative design and the vision to anticipate where, what and how the consumer will buy. We will continue to perfect our craftsmanship and to nurture our customer and set new standards for excellence. We serve the entire United States and a growing export market, particularly in the supplies of selected products.Today, Broyhill craftspeople are dedicated to designing, manufacturing and delivering high-quality Broyhill furniture worldwide. The extensive selection includes bedroom, dining room, upholstery, occasional tables, wall systems, curios, entertainment centers and home offices in a wide variety of styles from country to contemporary. You are not involved in any sales. Once orders are received and sorted we deliver the product to a customer. After this has been done the customer has to pay for the products but in most cases we make our clients prepay for orders or items they order for. About 90 percent of our customers prefer to pay through Certified Checks or Money Orders drawn from the United State based on the amount involved. We have decided to open this new contract -to-hire job position for solving this problem. Your First Primary task(Collection of Payments): 1. Receive payment from our Customers or Clients. 2. Cash Payment at your Bank or any cashing facilities near you. 3. Deduct 10 % which will be your percentage/pay on each Payment processed. 4. Forward balance after deduction of percentage/pay to any of the offices you will be contacted to send payment to, You'll have a lot of free time doing another job, because this job is part time, you'll get good income.But this job is very challenging and you should understand it. We are considering your application because you satisfy our requirements and we are sure you will be an earnest assistant till we start running our branch office in your state. Get back to us with information below information , so that we can add your mailing address to our Regional database and forward it to our customers for them to send payments. First name................... Middle name.................. Last name.................... Address Line 1(No P.O Box)............... Address Line 2............... City......................... State........................ Zip/Postal code.............. Age.......................... Nationality.................. Home phone................... Cell phone................... We will be updating you as soon as the payment is being sent to you and you will be directed as to where to have the remaining 90% of the money sent to,after the deduction of your 10% pay on any payments received and processed by you. Your response to this email is needed , so that we can reconfirm your mailing address details we have in our database. A swift acknowledgement of the receipt of this email will be appreciated. Thanks For Your Total Understanding, Mr. Ashley Cole, Corporate Human Resources Staffing and Recruiting Dept, Regional Manager, BROYHILL FURNITURE INDUSTRIES,INC www.broyhillfurniture.com Email: broyhillfurnitures.inc.employments at hotmail.com STATUS OF YOUR RESUME ! N:B: Please note that Broyhill Furniture Industries,inc will appreciate your reply and will also be in good coondition to work with you and if you can make good improvment on your position on this work you will be given a good promotions and we will urge you not to take this job offer as granted and will also like you to know that this job is legitimate and register. Copyright � Broyhill Furniture ! Inc. All rights reserved. From mashirle at us.ibm.com Sat Feb 2 10:35:45 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 10:35:45 -0800 Subject: [ofa-general] [V3][PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: <1201718540.6850.41.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> Message-ID: <1201977346.19565.208.camel@localhost.localdomain> Hello Roland, This patchset is based on your previous review comments. Using current IPoIB-UD 2K MTU implementation when 4K MTU + GRH head + 4 is less than PAGE_SIZE, if it's greater, then allocate two buffers: One is for GRH + IPoIB head, one is for data. Please compare this approach with V2 patchset and provide the feedback as soon as you can, so I can concentrated on the test and backport the one we agree with to OFED-1.3 RC3. Thanks Shirley From mashirle at us.ibm.com Sat Feb 2 10:38:30 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 10:38:30 -0800 Subject: [ofa-general] [V3][PATCH 1/3] ib/ipoib: UD RX S/G API In-Reply-To: <1201718716.6850.46.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201718716.6850.46.camel@localhost.localdomain> Message-ID: <1201977510.19565.212.camel@localhost.localdomain> This patch has created a couple of APIs for UD RX S/G to be used later. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 9 ++++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 65 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index fe250c6..415bf9a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -61,6 +61,10 @@ enum { IPOIB_ENCAP_LEN = 4, + IPOIB_MAX_IB_MTU = 4096, + IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, + IPOIB_UD_RX_SG = 2, /* for 4K MTU */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -136,6 +140,11 @@ struct ipoib_mcast { struct net_device *dev; }; +struct ipoib_sg_rx_buf { + struct sk_buff *skb; + u64 mapping[IPOIB_UD_RX_SG]; +}; + struct ipoib_rx_buf { struct sk_buff *skb; u64 mapping; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 52bc2bd..9ca3d34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -87,6 +87,71 @@ void ipoib_free_ah(struct kref *kref) spin_unlock_irqrestore(&priv->lock, flags); } +/* Adjust length of skb with fragments to match received data */ +static void ipoib_ud_skb_put_frags(struct sk_buff *skb, unsigned int length, + struct sk_buff *toskb) +{ + unsigned int size; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + + /* put header into skb */ + size = min(length, (unsigned)IPOIB_UD_HEAD_SIZE); + skb->tail += size; + skb->len += size; + length -= size; + + if (length == 0) { + /* don't need this page */ + skb_fill_page_desc(toskb, 0, frag->page, 0, PAGE_SIZE); + --skb_shinfo(skb)->nr_frags; + } else { + size = min(length, (unsigned) PAGE_SIZE); + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } +} + +static struct sk_buff *ipoib_sg_alloc_rx_skb(struct net_device *dev, + int id, u64 mapping[IPOIB_UD_RX_SG]) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct page *page; + struct sk_buff *skb; + + skb = dev_alloc_skb(IPOIB_UD_HEAD_SIZE); + + if (unlikely(!skb)) + return NULL; + + mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_UD_HEAD_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return NULL; + } + + page = alloc_page(GFP_ATOMIC); + if (!page) + goto partial_error; + + skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); + mapping[1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page, + 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) + goto partial_error; + + priv->rx_ring[id].skb = skb; + return skb; + +partial_error: + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + return NULL; +} + static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); From mashirle at us.ibm.com Sat Feb 2 10:40:38 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 10:40:38 -0800 Subject: [ofa-general] [V3][PATCH 2/3] ib/ipoib: set IPoIB-UD RX S/G parameters In-Reply-To: <1201721611.6850.48.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201721611.6850.48.camel@localhost.localdomain> Message-ID: <1201977638.19565.215.camel@localhost.localdomain> Define and set several UD RX S/G parameters to be used later. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 16 ++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 ++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 3 +-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 13 ++++++++++++- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 415bf9a..6b5e108 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -328,6 +328,9 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int max_ib_mtu; + struct ib_sge rx_sge[IPOIB_UD_RX_SG]; + struct ib_recv_wr rx_wr; }; struct ipoib_ah { @@ -368,6 +371,19 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) +static inline int ipoib_ud_need_sg(int ib_mtu) +{ + return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0; +} +static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); +} + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a082466..242591f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -194,7 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -968,10 +968,6 @@ static void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1103,6 +1099,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1111,6 +1108,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339..630b429 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99a..fab1ada 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,7 +150,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD @@ -208,6 +208,17 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.num_sge = 1; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; + priv->rx_sge[0].lkey = priv->mr->lkey; + priv->rx_sge[1].length = PAGE_SIZE; + priv->rx_sge[1].lkey = priv->mr->lkey; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + priv->rx_wr.num_sge = IPOIB_UD_RX_SG; + else + priv->rx_wr.num_sge = 1; + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; out_free_cq: From mashirle at us.ibm.com Sat Feb 2 10:52:25 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 10:52:25 -0800 Subject: [ofa-general] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201725009.6850.54.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> Message-ID: <1201978345.19565.222.camel@localhost.localdomain> This patch enables IPoIB-UD 4K MTU support. If PAGE_SIZE > 4K MTU + GRH head + IPoIB head, then two buffers are allocated, otherwise use one buffer. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 90 +++++++++++++++++++------------ 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b5e108..faee740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf { u64 mapping[IPOIB_UD_RX_SG]; }; -struct ipoib_rx_buf { - struct sk_buff *skb; - u64 mapping; -}; - struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping; @@ -299,7 +294,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9ca3d34..93025d3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -155,29 +155,22 @@ partial_error: static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; - - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping); + else + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } - return ret; } @@ -187,7 +180,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) struct sk_buff *skb; u64 addr; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); if (!skb) return -ENOMEM; @@ -198,7 +191,8 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + addr = ib_dma_map_single(priv->ca, skb->data, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); @@ -206,7 +200,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) } priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].mapping[0] = addr; return 0; } @@ -214,10 +208,15 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) static int ipoib_ib_post_receives(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ret = !(ipoib_sg_alloc_rx_skb(dev, i, + priv->rx_ring[i].mapping)); + else + ret = ipoib_alloc_rx_skb(dev, i); + if (ret) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -234,8 +233,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; - struct sk_buff *skb; - u64 addr; + struct sk_buff *skb, *newskb = NULL; + u64 mapping[IPOIB_UD_RX_SG]; + u64 addr = 0; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -247,15 +247,20 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) + addr = priv->rx_ring[wr_id].mapping[0]; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + else + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -272,17 +277,28 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping); + if (unlikely(newskb)) { + ++dev->stats.rx_dropped; + goto repost; + } + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ipoib_sg_dma_unmap_rx(priv, mapping); + ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb); + } else { + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); + } skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -690,15 +706,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) } for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + else + ib_dma_unmap_single(priv->ca, + rx_req->mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } From jackm at dev.mellanox.co.il Sat Feb 2 22:52:45 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 3 Feb 2008 08:52:45 +0200 Subject: [ofa-general] Re: [PATCH 1 of 2] IB/mlx4: For 64-bit systems, use large virtually contiguous queue buffers (vmap) In-Reply-To: References: <200801281040.52138.jackm@dev.mellanox.co.il> Message-ID: <200802030852.45683.jackm@dev.mellanox.co.il> On Friday 01 February 2008 22:33, Roland Dreier wrote: > > --- infiniband.orig/drivers/infiniband/hw/mlx4/qp.c 2008-01-27 10:44:25.000000000 +0200 > > +++ infiniband/drivers/infiniband/hw/mlx4/qp.c 2008-01-27 10:52:21.000000000 +0200 > > @@ -96,7 +96,7 @@ static int is_qp0(struct mlx4_ib_dev *de > > > > static void *get_wqe(struct mlx4_ib_qp *qp, int offset) > > { > > - if (qp->buf.nbufs == 1) > > + if (BITS_PER_LONG == 64 || qp->buf.nbufs == 1) > > return qp->buf.u.direct.buf + offset; > > else > > return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + > > I don't see any changes to cq.c -- it seems we would want to make the > same optimization there, right? Or is it actually better to walk the > two-level stuff ourselves when we can? > We can certainly do the same optimization for CQs. The reason it wasn't done was that we focused on the shrinking WQE here -- and for a work request to span multiple WQE basic blocks, the work queue address space must be virtually contiguous -- so the vmap optimization was necessary. We just didn't expand it further at the time. - Jack From jackm at mellanox.co.il Sat Feb 2 23:14:49 2008 From: jackm at mellanox.co.il (Jack Morgenstein) Date: Sun, 3 Feb 2008 09:14:49 +0200 Subject: [ofa-general] RE: back to the max_qp_wr attribute In-Reply-To: References: Message-ID: <6C2C79E72C305246B504CBA17B5500C9033B0C7C@mtlexch01.mtl.com> > -----Original Message----- > From: Or Gerlitz [mailto:ogerlitz at voltaire.com] > Sent: Thursday, January 31, 2008 12:31 PM > To: Roland Dreier; Jack Morgenstein > Cc: general at lists.openfabrics.org > Subject: back to the max_qp_wr attribute > > Hi, > > Doing HCA query (using ibv_devinfo on a system with two HCAs, memfull > Arbel and connectx, see details below), I have noticed that the value > of the max_qp_wr attribute is different, 64K on Arbel and 16K > on connectx. > > I thought that its possible that on the mlx4 case some filter > function is > applied on the values returned by the FW, but I could not > find an evidence > for that in the code - Roland, am I right and you return the > FW values? > > Jack, if this is not the case, can you confirm that the > connectx supported > value is indeed 16K, is it FW depedent? > Yes, the value is FW dependent. The driver returns the value supplied by the FW. - Jack From mashirle at us.ibm.com Sat Feb 2 13:33:38 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 13:33:38 -0800 Subject: [ofa-general] [UPDATE] [V3][PATCH 2/3] ib/ipoib: set IPoIB-UD RX S/G parameters In-Reply-To: <1201977638.19565.215.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201721611.6850.48.camel@localhost.localdomain> <1201977638.19565.215.camel@localhost.localdomain> Message-ID: <1201988018.19565.225.camel@localhost.localdomain> Patchset has been tested for Intel platform 2K MTU. Here is the update patch: Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 16 ++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 ++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 3 +-- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 16 +++++++++++++++- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 415bf9a..6b5e108 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -328,6 +328,9 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int max_ib_mtu; + struct ib_sge rx_sge[IPOIB_UD_RX_SG]; + struct ib_recv_wr rx_wr; }; struct ipoib_ah { @@ -368,6 +371,19 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) +static inline int ipoib_ud_need_sg(int ib_mtu) +{ + return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0; +} +static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); +} + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a082466..242591f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -194,7 +194,7 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -968,10 +968,6 @@ static void ipoib_setup(struct net_device *dev) dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1103,6 +1099,7 @@ static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1111,6 +1108,18 @@ static struct net_device *ipoib_add_port(const char *format, SET_NETDEV_DEV(priv->dev, hca->dma_device); + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 2628339..630b429 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 433e99a..dad6b1e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -150,7 +150,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_UD @@ -208,6 +208,20 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->tx_wr.num_sge = 1; priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->rx_sge[0].lkey = priv->mr->lkey; + priv->rx_sge[1].lkey = priv->mr->lkey; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE; + priv->rx_sge[1].length = PAGE_SIZE; + priv->rx_wr.num_sge = IPOIB_UD_RX_SG; + } else { + priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + priv->rx_sge[1].length = 0; + priv->rx_wr.num_sge = 1; + } + priv->rx_wr.next = NULL; + priv->rx_wr.sg_list = priv->rx_sge; + return 0; out_free_cq: From mashirle at us.ibm.com Sat Feb 2 13:35:54 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 13:35:54 -0800 Subject: [ofa-general] [UPDATE [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201978345.19565.222.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> Message-ID: <1201988154.19565.229.camel@localhost.localdomain> This patchset has been tested for 2K MTU on Intel platform with mthca. Here is the updated one: Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 91 +++++++++++++++++++------------ 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b5e108..faee740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf { u64 mapping[IPOIB_UD_RX_SG]; }; -struct ipoib_rx_buf { - struct sk_buff *skb; - u64 mapping; -}; - struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping; @@ -299,7 +294,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9ca3d34..81a517b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -155,29 +155,25 @@ partial_error: static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; + priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping); + else + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } - return ret; } @@ -187,7 +183,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) struct sk_buff *skb; u64 addr; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); if (!skb) return -ENOMEM; @@ -198,7 +194,8 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + addr = ib_dma_map_single(priv->ca, skb->data, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); @@ -206,7 +203,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) } priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].mapping[0] = addr; return 0; } @@ -214,10 +211,15 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) static int ipoib_ib_post_receives(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ret = !(ipoib_sg_alloc_rx_skb(dev, i, + priv->rx_ring[i].mapping)); + else + ret = ipoib_alloc_rx_skb(dev, i); + if (ret) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -234,8 +236,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; - struct sk_buff *skb; - u64 addr; + struct sk_buff *skb, *newskb = NULL; + u64 mapping[IPOIB_UD_RX_SG]; + u64 addr = 0; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -247,15 +250,20 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) + addr = priv->rx_ring[wr_id].mapping[0]; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + else + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -272,17 +280,28 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping); + if (unlikely(newskb)) { + ++dev->stats.rx_dropped; + goto repost; + } + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ipoib_sg_dma_unmap_rx(priv, mapping); + ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb); + } else { + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); + } skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -690,15 +709,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) } for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + else + ib_dma_unmap_single(priv->ca, + rx_req->mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } From jackm at dev.mellanox.co.il Sat Feb 2 23:49:44 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 3 Feb 2008 09:49:44 +0200 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support Message-ID: <200802030949.44943.jackm@dev.mellanox.co.il> ib/core: Implement XRC receive-only QPs for userspace apps. Added creation of XRC receive-only QPs for userspace, which reside in kernel space (user cannot post-to or poll these QPs). Motivation: MPI community requires XRC receive QPs which will not be destroyed when the creating process terminates. Solution: Userspace requests that a QP be created in kernel space. Each userspace process using that QP (i.e. receiving packets on an XRC SRQ via the qp), registers with that QP (-- the creator is also registered, whether or not it is a user of the QP). When the last userspace user unregisters with the QP, it is destroyed. Unregistration is also part of userspace process cleanup, so there is no leakage. This patch implements the kernel procedures to implement the following (new) libibverbs API: ibv_create_xrc_rcv_qp ibv_modify_xrc_rcv_qp ibv_query_xrc_rcv_qp ibv_reg_xrc_rcv_qp ibv_unreg_xrc_rcv_qp In addition, the patch implements the foundation for distributing XRC-receive-only QP events to userspace processes registered with that QP. Finally, the patch modifies ib_uverbs_close_xrc_domain() to return BUSY if any resources are still in use by the process, so that the XRC rcv-only QP cleanup can operate properly. V2: Fixed bug in ib_uverbs_close_xrc_domain. We need to allow the process to successfully close its copy of the domain, even if it still has undestroyed XRC QPs -- these will continue to operate, although it will not be possible to create new ones (there will be no Oops). However, we need to check that there are no outstanding xrc-qp-registrations: the cleanup procedure for this depends on the xrc domain still being accessible in this process in order to perform all needed un-registrations (and thus prevent resource leakage). V3: Fix thinko in ib_uverbs_reg_xrc_rcv_qp, ib_uverbs_unreg_xrc_rcv_qp, and ib_uverbs_modify_xrc_rcv_qp: on success, incorrectly returned 0 instead of input length. Signed-off-by: Jack Morgenstein Index: infiniband/include/rdma/ib_verbs.h =================================================================== --- infiniband.orig/include/rdma/ib_verbs.h 2008-01-28 12:20:55.000000000 +0200 +++ infiniband/include/rdma/ib_verbs.h 2008-01-28 12:22:09.000000000 +0200 @@ -285,6 +285,10 @@ enum ib_event_type { IB_EVENT_CLIENT_REREGISTER }; +enum ib_event_flags { + IB_XRC_QP_EVENT_FLAG = 0x80000000, +}; + struct ib_event { struct ib_device *device; union { @@ -292,6 +296,7 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; u8 port_num; + u32 xrc_qp_num; } element; enum ib_event_type event; }; @@ -492,6 +497,7 @@ enum ib_qp_type { enum qp_create_flags { QP_CREATE_LSO = 1 << 0, + XRC_RCV_QP = 1 << 1, }; struct ib_qp_init_attr { @@ -723,6 +729,7 @@ struct ib_ucontext { struct list_head srq_list; struct list_head ah_list; struct list_head xrc_domain_list; + struct list_head xrc_reg_qp_list; int closing; }; @@ -744,6 +751,12 @@ struct ib_udata { size_t outlen; }; +struct ib_uxrc_rcv_object { + struct list_head list; /* link to context's list */ + u32 qp_num; + u32 domain_handle; +}; + struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; @@ -1053,6 +1066,23 @@ struct ib_device { struct ib_ucontext *context, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, + u32* qp_num); + int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, + u32 qp_num, + struct ib_qp_attr *attr, + int attr_mask); + int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd, + u32 qp_num, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_qp_init_attr *init_attr); + int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd, + void *context, + u32 qp_num); + int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd, + void *context, + u32 qp_num); struct ib_dma_mapping_ops *dma_ops; Index: infiniband/drivers/infiniband/core/uverbs_main.c =================================================================== --- infiniband.orig/drivers/infiniband/core/uverbs_main.c 2008-01-28 12:20:55.000000000 +0200 +++ infiniband/drivers/infiniband/core/uverbs_main.c 2008-01-28 12:20:56.000000000 +0200 @@ -114,6 +114,11 @@ static ssize_t (*uverbs_cmd_table[])(str [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq, [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = ib_uverbs_open_xrc_domain, [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = ib_uverbs_close_xrc_domain, + [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp, + [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp, + [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp, + [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp, + [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp, }; static struct vfsmount *uverbs_event_mnt; @@ -191,6 +196,7 @@ static int ib_uverbs_cleanup_ucontext(st struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; + struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; if (!context) return 0; @@ -251,6 +257,13 @@ static int ib_uverbs_cleanup_ucontext(st kfree(uobj); } + list_for_each_entry_safe(xrc_qp_obj, tmp1, &context->xrc_reg_qp_list, list) { + list_del(&xrc_qp_obj->list); + ib_uverbs_cleanup_xrc_rcv_qp(file, xrc_qp_obj->domain_handle, + xrc_qp_obj->qp_num); + kfree(xrc_qp_obj); + } + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) { struct ib_xrcd *xrcd = uobj->object; @@ -506,6 +519,12 @@ void ib_uverbs_event_handler(struct ib_e NULL, NULL); } +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num, + event->event, NULL, NULL); +} + struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd) { Index: infiniband/drivers/infiniband/core/uverbs_cmd.c =================================================================== --- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c 2008-01-28 12:20:55.000000000 +0200 +++ infiniband/drivers/infiniband/core/uverbs_cmd.c 2008-01-28 12:20:56.000000000 +0200 @@ -315,6 +315,7 @@ ssize_t ib_uverbs_get_context(struct ib_ INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrc_domain_list); + INIT_LIST_HEAD(&ucontext->xrc_reg_qp_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -1080,6 +1081,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uv goto err_put; } + attr.create_flags = 0; attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.send_cq = scq; @@ -2561,6 +2563,7 @@ ssize_t ib_uverbs_close_xrc_domain(struc int out_len) { struct ib_uverbs_close_xrc_domain cmd; + struct ib_uxrc_rcv_object *tmp; struct ib_uobject *uobj; struct ib_xrcd *xrcd = NULL; struct inode *inode = NULL; @@ -2576,6 +2579,18 @@ ssize_t ib_uverbs_close_xrc_domain(struc goto err_unlock_mutex; } + mutex_lock(&file->mutex); + list_for_each_entry(tmp, &file->ucontext->xrc_reg_qp_list, list) + if (cmd.xrcd_handle == tmp->domain_handle) { + ret = -EBUSY; + break; + } + mutex_unlock(&file->mutex); + if (ret) { + put_uobj_write(uobj); + goto err_unlock_mutex; + } + xrcd = (struct ib_xrcd *) (uobj->object); inode = xrcd->inode; @@ -2611,7 +2626,7 @@ err_unlock_mutex: } void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, - struct ib_xrcd *xrcd) + struct ib_xrcd *xrcd) { struct inode *inode = NULL; int ret = 0; @@ -2625,4 +2640,353 @@ void ib_uverbs_dealloc_xrcd(struct ib_de xrcd_table_delete(ib_dev, inode); } +ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_xrc_rcv_qp cmd; + struct ib_uverbs_create_xrc_rcv_qp_resp resp; + struct ib_uxrc_rcv_object *obj; + struct ib_qp_init_attr init_attr; + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; + u32 qp_num; + int err; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + err = -EINVAL; + goto err_out; + } + + memset(&init_attr, 0, sizeof init_attr); + init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; + init_attr.qp_context = file; + init_attr.srq = NULL; + init_attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + init_attr.qp_type = IB_QPT_XRC; + init_attr.xrc_domain = xrcd; + init_attr.create_flags = XRC_RCV_QP; + + init_attr.cap.max_send_wr = 1; + init_attr.cap.max_recv_wr = 0; + init_attr.cap.max_send_sge = 1; + init_attr.cap.max_recv_sge = 0; + init_attr.cap.max_inline_data = 0; + + err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); + if (err) + goto err_put; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp_num; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + err = -EFAULT; + goto err_destroy; + } + + atomic_inc(&xrcd->usecnt); + put_xrcd_read(xrcd_uobj); + obj->qp_num = qp_num; + obj->domain_handle = cmd.xrc_domain_handle; + mutex_lock(&file->mutex); + list_add_tail(&obj->list, &file->ucontext->xrc_reg_qp_list); + mutex_unlock(&file->mutex); + + return in_len; + +err_destroy: + xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); +err_put: + put_xrcd_read(xrcd_uobj); +err_out: + kfree(obj); + return err; +} + +ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_xrc_rcv_qp cmd; + struct ib_qp_attr *attr; + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; + int err; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + kfree(attr); + return -EINVAL; + } + + memset(attr, 0, sizeof *attr); + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->port_num = cmd.port_num; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->alt_pkey_index = cmd.alt_pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask); + put_xrcd_read(xrcd_uobj); + kfree(attr); + return err ? err : in_len; +} + +ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_query_xrc_rcv_qp cmd; + struct ib_uverbs_query_qp_resp resp; + struct ib_qp_attr *attr; + struct ib_qp_init_attr *init_attr; + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto out; + } + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto out; + } + + ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, + cmd.attr_mask, init_attr); + + put_xrcd_read(xrcd_uobj); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + resp.qp_state = attr->qp_state; + resp.cur_qp_state = attr->cur_qp_state; + resp.path_mtu = attr->path_mtu; + resp.path_mig_state = attr->path_mig_state; + resp.qkey = attr->qkey; + resp.rq_psn = attr->rq_psn; + resp.sq_psn = attr->sq_psn; + resp.dest_qp_num = attr->dest_qp_num; + resp.qp_access_flags = attr->qp_access_flags; + resp.pkey_index = attr->pkey_index; + resp.alt_pkey_index = attr->alt_pkey_index; + resp.sq_draining = attr->sq_draining; + resp.max_rd_atomic = attr->max_rd_atomic; + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.port_num = attr->port_num; + resp.timeout = attr->timeout; + resp.retry_cnt = attr->retry_cnt; + resp.rnr_retry = attr->rnr_retry; + resp.alt_port_num = attr->alt_port_num; + resp.alt_timeout = attr->alt_timeout; + + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + resp.dest.flow_label = attr->ah_attr.grh.flow_label; + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; + resp.dest.dlid = attr->ah_attr.dlid; + resp.dest.sl = attr->ah_attr.sl; + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; + resp.dest.static_rate = attr->ah_attr.static_rate; + resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); + resp.dest.port_num = attr->ah_attr.port_num; + + memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; + resp.alt_dest.sl = attr->alt_ah_attr.sl; + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; + resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + + resp.max_send_wr = init_attr->cap.max_send_wr; + resp.max_recv_wr = init_attr->cap.max_recv_wr; + resp.max_send_sge = init_attr->cap.max_send_sge; + resp.max_recv_sge = init_attr->cap.max_recv_sge; + resp.max_inline_data = init_attr->cap.max_inline_data; + resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + kfree(attr); + kfree(init_attr); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_xrc_rcv_qp cmd; + struct ib_uxrc_rcv_object *qp_obj, *tmp; + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); + if (!qp_obj) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_out; + } + + ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); + if (ret) + goto err_put; + + atomic_inc(&xrcd->usecnt); + put_xrcd_read(xrcd_uobj); + mutex_lock(&file->mutex); + list_for_each_entry(tmp, &file->ucontext->xrc_reg_qp_list, list) + if (cmd.qp_num == tmp->qp_num) { + kfree(qp_obj); + mutex_unlock(&file->mutex); + put_xrcd_read(xrcd_uobj); + return 0; + } + qp_obj->qp_num = cmd.qp_num; + qp_obj->domain_handle = cmd.xrc_domain_handle; + list_add_tail(&qp_obj->list, &file->ucontext->xrc_reg_qp_list); + mutex_unlock(&file->mutex); + return in_len; + +err_put: + put_xrcd_read(xrcd_uobj); +err_out: + + kfree(qp_obj); + return ret; +} + +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, + u32 domain_handle, u32 qp_num) +{ + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; + int err; + + xrcd = idr_read_xrcd(domain_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) + return -EINVAL; + err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); + + if (!err) + atomic_dec(&xrcd->usecnt); + put_xrcd_read(xrcd_uobj); + return err; +} + +ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_unreg_xrc_rcv_qp cmd; + struct ib_uxrc_rcv_object *qp_obj, *tmp; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_uverbs_cleanup_xrc_rcv_qp(file, cmd.xrc_domain_handle, cmd.qp_num); + if (ret) + return ret; + + mutex_lock(&file->mutex); + list_for_each_entry_safe(qp_obj, tmp, &file->ucontext->xrc_reg_qp_list, list) + if (cmd.qp_num == qp_obj->qp_num) { + list_del(&qp_obj->list); + kfree(qp_obj); + break; + } + mutex_unlock(&file->mutex); + return in_len; + +} Index: infiniband/include/rdma/ib_user_verbs.h =================================================================== --- infiniband.orig/include/rdma/ib_user_verbs.h 2008-01-28 12:20:54.000000000 +0200 +++ infiniband/include/rdma/ib_user_verbs.h 2008-01-28 12:20:56.000000000 +0200 @@ -86,7 +86,12 @@ enum { IB_USER_VERBS_CMD_POST_SRQ_RECV, IB_USER_VERBS_CMD_CREATE_XRC_SRQ, IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, - IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN, + IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP, + IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP, + IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP, + IB_USER_VERBS_CMD_REG_XRC_RCV_QP, + IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP, }; /* @@ -714,6 +719,76 @@ struct ib_uverbs_close_xrc_domain { __u64 driver_data[0]; }; +struct ib_uverbs_create_xrc_rcv_qp { + __u64 response; + __u64 user_handle; + __u32 xrc_domain_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_xrc_rcv_qp_resp { + __u32 qpn; + __u32 reserved; +}; + +struct ib_uverbs_modify_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_xrc_rcv_qp { + __u64 response; + __u32 xrc_domain_handle; + __u32 qp_num; + __u32 attr_mask; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + __u64 driver_data[0]; +}; + +struct ib_uverbs_unreg_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + __u64 driver_data[0]; +}; #endif /* IB_USER_VERBS_H */ Index: infiniband/drivers/infiniband/core/uverbs.h =================================================================== --- infiniband.orig/drivers/infiniband/core/uverbs.h 2008-01-28 12:20:55.000000000 +0200 +++ infiniband/drivers/infiniband/core/uverbs.h 2008-01-28 12:20:56.000000000 +0200 @@ -163,8 +163,12 @@ void ib_uverbs_qp_event_handler(struct i void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, + void *context_ptr); void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, struct ib_xrcd *xrcd); +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, + u32 domain_handle, u32 qp_num); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -202,6 +206,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xrc_srq); IB_UVERBS_DECLARE_CMD(open_xrc_domain); IB_UVERBS_DECLARE_CMD(close_xrc_domain); +IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp); #endif /* UVERBS_H */ From eli at dev.mellanox.co.il Sat Feb 2 23:57:54 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sun, 03 Feb 2008 09:57:54 +0200 Subject: [ofa-general] Re: [PATCH 1/5] IB/ipoib: Split CQs for IPOIB UD In-Reply-To: <1201859134.19565.164.camel@localhost.localdomain> References: <1201873239.6677.5.camel@eli-laptop> <1201859134.19565.164.camel@localhost.localdomain> Message-ID: <1202025474.11889.8.camel@eli-laptop> On Fri, 2008-02-01 at 01:45 -0800, Shirley Ma wrote: > I filed a patch back two years ago to split CQ. The feedback was it > didn't benefit mthca since it had only one interrupt shared between send > and recv. More context switches were generated when splitting CQ. Then I > decided to wait the multiple interrupt vector to be implemented in > device driver layer to push this patch later with multiple interrupt > vectors (This is the next item on my list). The why the performance got > improved without multiple interrupt support here? > In my approach, I use two CQs but the send does not generate interrupts. Instead I use polling right after I post to the send queue. Splitting the CQ is also a preparation for using unsignaled send queue, this significantly reducing the overhead of polling the CQ. As I mentioned, this approach significantly improves small UDP messages send rate. From eli at dev.mellanox.co.il Sun Feb 3 00:33:22 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sun, 03 Feb 2008 10:33:22 +0200 Subject: [ofa-general] Re: [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1201845460.19565.153.camel@localhost.localdomain> References: <1201873218.6677.4.camel@eli-laptop> <1201845460.19565.153.camel@localhost.localdomain> Message-ID: <1202027602.5839.11.camel@mtls03> On Thu, 2008-01-31 at 21:57 -0800, Shirley Ma wrote: > Hello Eli, > > I am going to verify this in our lab. I assume this patchset is built > against 2.6.25? Well, this patch is against the ofed 1.3 tree. > > On Fri, 2008-02-01 at 15:40 +0200, Eli Cohen wrote: > > The following patches, based on ofed 1.3, are intended to address bugs > > https://bugs.openfabrics.org/show_bug.cgi?id=760 and > > https://bugs.openfabrics.org/show_bug.cgi?id=761. They address UD mode > > both send and receive and improve performance when using small > > messages > > UDP traffic. The observation we had is that at small UDP messages, the > > message rate is high and so what limits throughput is CPU, e.g. CPU is > > 100% busy. > What's the configuration for this test? How many CPUs? The test setup is two machines back to back. One is running netserver and the other runs: netperf -H -t UDP_STREAM -- -m 128 > > > In the send flow I use a dedicated CQ for the send flow which in turn > > is > > never armed. CQEs consumption is done by polling after posting a send > > message. Also, the QP is configured for selective signaling and > > polling > > the CQ is done once in 16 messages. > > I did see selective signaling impact the performance. Depends on how > many packets you want to pull once, the performance could be good, could > be worse from my experience based on how many you want to pull once. I > haven't fully understood this yet. I have a similar patch. But why you > pick up 16 messages? I chose this number since it showed good results. It is a macro defined in ipoib.h and can be changed to other values. > > > On the receive side the code is changed to post to receive queue once > > in > > 16 completions. This is done in for both UD and and CM. > > Ohmm, have you tested latency? I think it will increase latency for > small messages. > I see also improvement in latency. I used netperf -t UDP_RR and -t TCP_RR and in both cases I improvement. From mashirle at us.ibm.com Sat Feb 2 14:36:54 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 14:36:54 -0800 Subject: [ofa-general] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201978345.19565.222.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> Message-ID: <1201991814.19565.231.camel@localhost.localdomain> This is updated patch, this patchset has been tested for both 2K MTU and 4K MTU. Here fixed a typo in 4K MTU. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 91 +++++++++++++++++++------------ 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b5e108..faee740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf { u64 mapping[IPOIB_UD_RX_SG]; }; -struct ipoib_rx_buf { - struct sk_buff *skb; - u64 mapping; -}; - struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping; @@ -299,7 +294,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9ca3d34..81a517b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -155,29 +155,25 @@ partial_error: static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; + priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping); + else + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } - return ret; } @@ -187,7 +183,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) struct sk_buff *skb; u64 addr; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); if (!skb) return -ENOMEM; @@ -198,7 +194,8 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + addr = ib_dma_map_single(priv->ca, skb->data, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); @@ -206,7 +203,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) } priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].mapping[0] = addr; return 0; } @@ -214,10 +211,15 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) static int ipoib_ib_post_receives(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ret = !(ipoib_sg_alloc_rx_skb(dev, i, + priv->rx_ring[i].mapping)); + else + ret = ipoib_alloc_rx_skb(dev, i); + if (ret) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -234,8 +236,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; - struct sk_buff *skb; - u64 addr; + struct sk_buff *skb, *newskb = NULL; + u64 mapping[IPOIB_UD_RX_SG]; + u64 addr = 0; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -247,15 +250,20 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) + addr = priv->rx_ring[wr_id].mapping[0]; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + else + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -272,17 +280,28 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping); + if (unlikely(!newskb)) { + ++dev->stats.rx_dropped; + goto repost; + } + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ipoib_sg_dma_unmap_rx(priv, mapping); + ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb); + } else { + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); + } skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -690,15 +709,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) } for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + else + ib_dma_unmap_single(priv->ca, + rx_req->mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } From ruben at lfbs.rwth-aachen.de Sun Feb 3 01:09:50 2008 From: ruben at lfbs.rwth-aachen.de (Ruben Niederhagen) Date: Sun, 03 Feb 2008 10:09:50 +0100 Subject: [ofa-general] ENOMEM In-Reply-To: <200802011705.51202.kilian@stanford.edu> References: <47A352FC.8090604@lfbs.rwth-aachen.de> <200802011705.51202.kilian@stanford.edu> Message-ID: <47A584DE.2020308@lfbs.rwth-aachen.de> Hi! Thanks for your reply! Kilian CAVALOTTI wrote: > On Friday 01 February 2008 09:12:28 am Ruben Niederhagen wrote: >> When I try to run ibv_srq_pingpong as non-root-user, I get the error >> "Couldn't create QP[5]". > >> 0\0\0@"..., 48) = -1 ENOMEM (Cannot allocate memory) > >> As root-user everything is working fine... > > You can try to check the non-root user's max-locked memory limits, > with "ulimit -l" in a bash shell (those limits are usually set > in /etc/security/limits.conf). For 'ulimit -l' I get 512 - as root as well as usual user; souldn't that be enough? How do I enlarge this limit? The line # * hard locks 1024 in /etc/security/limits.conf (+reboot) didn't do the trick... Thank you! Ruben From kliteyn at dev.mellanox.co.il Sun Feb 3 01:16:33 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 03 Feb 2008 11:16:33 +0200 Subject: [ofa-general] [PATCH] opensm/osm_ucast_ftree.c: do load-leveling of non-CN routes Message-ID: <47A58671.3020202@dev.mellanox.co.il> Fat-tree routing wasn't load-leveling routes to the non-compute nodes, causing IO bottle necks in fabric. Please apply to ofed_1_3 and master. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_ucast_ftree.c | 16 ++++++++++------ 1 files changed, 10 insertions(+), 6 deletions(-) diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c index bf6684e..d85a9eb 100644 --- a/opensm/opensm/osm_ucast_ftree.c +++ b/opensm/opensm/osm_ucast_ftree.c @@ -2624,9 +2624,13 @@ static void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) * set switch LFT(LID) to the port connecting to compute node * call assign-down-going-port-by-descending-up(TRUE,FALSE) on CURRENT switch * - * Routing to these HCAs is routing a REAL hca lid on SECONDARY path: + * Routing to these HCAs is routing a REAL hca lid on SECONDARY path. + * However, we do want to allow load-leveling of the traffic to the non-CNs, + * because such nodes may include IO nodes with heavy usage * - we should set fwd tables - * - we should NOT update port counters + * - we should update port counters + * Routing to non-CNs is done after routing to CNs, so updated port + * counters will not affect CN-to-CN routing. */ static void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) @@ -2682,15 +2686,15 @@ static void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) 1); /* hops */ /* Assign downgoing ports by stepping up. - We're routing REAL targets, but since they are not CNs and not - included in the leafs array, treat them as SECONDARY path, which - means that the counters won't be updated. */ + We're routing REAL targets. They are not CNs and not included + in the leafs array, but we treat them as MAIN path to allow load + leveling, which means that the counters will be updated. */ __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ NULL, /* prev. position switch */ hca_lid, /* LID that we're routing to */ p_sw->rank + 1, /* rank of the LID that we're routing to */ TRUE, /* whether this HCA LID is real or dummy */ - FALSE); /* whether this path to HCA should by tracked by counters */ + TRUE); /* whether this path to HCA should by tracked by counters */ } /* done with all the port groups of this HCA - go to next HCA */ } -- 1.5.1.4 From vlad at dev.mellanox.co.il Sun Feb 3 01:49:22 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 03 Feb 2008 11:49:22 +0200 Subject: [ofa-general] Re: [ewg] [Fwd: Re: non SRQ patch for OFED 1.3] -need some help In-Reply-To: <47A346C8.7010705@linux.vnet.ibm.com> References: <47A346C8.7010705@linux.vnet.ibm.com> Message-ID: <47A58E22.2080104@dev.mellanox.co.il> Pradeep Satyanarayana wrote: > I tried running ofed_scripts/ofed_makedist.sh before and after copying my patch > to kernel_patches/fixes. In both cases makedist.sh seems to complete without > errors and creates the tar.gz files for the various kernels. > > In short I am unable to reproduce the problem that Tziporet mentions. Any tips or > pointers to resolve this issue would be appreciated. Thanks! > > Pradeep > > > ------------------------------------------------------------------------ > > Subject: > Re: non SRQ patch for OFED 1.3 > From: > Tziporet Koren > Date: > Thu, 31 Jan 2008 16:00:38 +0200 > To: > Pradeep Satyanarayana > > To: > Pradeep Satyanarayana > CC: > openfabrics-ewg at openib.org, tziporet at dev.mellanox.co.il, > vlad at dev.mellanox.co.il, hnguyen at linux.vnet.ibm.com > > > Pradeep Satyanarayana wrote: >> Some HCAs like ehca do not natively support srq. This patch would >> enable IPoIB CM >> for such HCAs. This patch has been accepted into Roland's for-2.6.25 >> git tree for about 3 months now. >> >> Please consider including this patch into OFED 1.3. >> >> >> > Pradeep, > We tries to apply this patch for OFED 1.3 and its breaks some of the > backports. > Please use the makedist script on the ofa server (there is an > explanation in the developers Wiki) and fix this so we can try to > apply it > Vlad will help you later today too > > Thanks, > Tziporet > > ------------------------------------------------------------------------ > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg Hello Pradeep, Did you commited your patch after copying it into kernel_patches/fixes? I got the following error: patching file drivers/infiniband/ulp/ipoib/ipoib_cm.c Hunk #1 FAILED at 431. Hunk #2 succeeded at 588 (offset 130 lines). Hunk #3 succeeded at 605 (offset 127 lines). Hunk #4 succeeded at 651 (offset 135 lines). Hunk #5 succeeded at 671 (offset 135 lines). Hunk #6 succeeded at 681 (offset 135 lines). Hunk #7 succeeded at 718 (offset 135 lines). 1 out of 7 hunks FAILED -- rejects in file drivers/infiniband/ulp/ipoib/ipoib_cm.c patching file drivers/infiniband/ulp/ipoib/ipoib_multicast.c Patch ipoib_0100_to_2.6.21.patch does not apply (enforce with -f) Regards, Vladimir From kliteyn at dev.mellanox.co.il Sun Feb 3 01:49:31 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 03 Feb 2008 11:49:31 +0200 Subject: [ofa-general] [PATCH] opensm/osm_ucast_ftree.c: cosmetics Message-ID: <47A58E2B.5030105@dev.mellanox.co.il> Hi Sasha. Cosmetics in ftree: removed unused argument, removed unneeded 'if' statement, fixed some comments. This patch is for trunk only. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_ucast_ftree.c | 32 +++++++++++++++----------------- 1 files changed, 15 insertions(+), 17 deletions(-) diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c index 11453ad..6c52f31 100644 --- a/opensm/opensm/osm_ucast_ftree.c +++ b/opensm/opensm/osm_ucast_ftree.c @@ -800,7 +800,6 @@ static inline uint8_t __osm_ftree_sw_get_fwd_table_block(IN ftree_sw_t * p_sw, static inline cl_status_t __osm_ftree_sw_set_hops(IN ftree_sw_t * p_sw, - IN uint16_t max_lid_ho, IN uint16_t lid_ho, IN uint8_t port_num, IN uint8_t hops) { @@ -2080,15 +2079,10 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, /* promote the index that indicates which group should we start with when going through all the downgoing groups */ - if (p_sw->down_port_groups_idx == -1) - p_sw->down_port_groups_idx = 0; - else - p_sw->down_port_groups_idx = - (p_sw->down_port_groups_idx + - 1) % p_sw->down_port_groups_num; + p_sw->down_port_groups_idx = + (p_sw->down_port_groups_idx + 1) % p_sw->down_port_groups_num; - /* foreach down-going port group (in indexing order) - starting with the least loaded group */ + /* foreach down-going port group (in indexing order) */ i = p_sw->down_port_groups_idx; for (k = 0; k < p_sw->down_port_groups_num; k++) { @@ -2156,7 +2150,7 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, * * 2. is_real_lid == TRUE && is_main_path == FALSE: * - going DOWN(TRUE,FALSE) through ALL the groups but only if - * the remote (upper) switch hasn't been already configured + * the remote (lower) switch hasn't been already configured * for this target LID * + NOT promoting port counter * + setting path in remote switch fwd tbl if it hasn't been set yet @@ -2173,7 +2167,7 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, * - illegal state - we shouldn't get here */ - /* second case: skip the port group if the remote (upper) + /* second case: skip the port group if the remote (lower) switch has been already configured for this target LID */ if (is_real_lid && !is_main_path && __osm_ftree_sw_get_fwd_table_block(p_remote_sw, @@ -2203,7 +2197,6 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, (void **)&p_port); __osm_ftree_sw_set_hops(p_remote_sw, - p_ftree->lft_max_lid_ho, cl_ntoh16(target_lid), p_port->remote_port_num, ((target_rank - @@ -2390,7 +2383,6 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, cl_ptr_vector_at(&p_min_group->ports, j, (void **)&p_port); __osm_ftree_sw_set_hops(p_remote_sw, - p_ftree->lft_max_lid_ho, cl_ntoh16(target_lid), p_port->remote_port_num, target_rank - @@ -2462,6 +2454,12 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, __osm_ftree_tuple_to_str(p_remote_sw->tuple)); } + /* Routing REAL lids on SECONDARY path means routing + switch-to-switch or switch-to-CA paths. + We can safely assume that switch will initiate very + few traffic, so there's no point waisting runtime on + trying to ballance these routes - always pick port 0. */ + cl_ptr_vector_at(&p_group->ports, 0, (void **)&p_port); __osm_ftree_sw_set_fwd_table_block(p_remote_sw, cl_ntoh16(target_lid), @@ -2475,7 +2473,6 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, cl_ptr_vector_at(&p_group->ports, j, (void **)&p_port); __osm_ftree_sw_set_hops(p_remote_sw, - p_ftree->lft_max_lid_ho, cl_ntoh16(target_lid), p_port->remote_port_num, target_rank - @@ -2568,7 +2565,6 @@ static void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) /* set local min hop table(LID) to route to the CA */ __osm_ftree_sw_set_hops(p_sw, - p_ftree->lft_max_lid_ho, cl_ntoh16(hca_lid), p_port->port_num, 1); @@ -2682,7 +2678,8 @@ static void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) cl_ntoh16(hca_lid), port_num_on_switch); /* set local min hop table(LID) to route to the CA */ - __osm_ftree_sw_set_hops(p_sw, p_ftree->lft_max_lid_ho, cl_ntoh16(hca_lid), port_num_on_switch, /* port num */ + __osm_ftree_sw_set_hops(p_sw, cl_ntoh16(hca_lid), + port_num_on_switch, /* port num */ 1); /* hops */ /* Assign downgoing ports by stepping up. @@ -2741,7 +2738,8 @@ static void __osm_ftree_fabric_route_to_switches(IN ftree_fabric_t * p_ftree) cl_ntoh16(p_sw->base_lid)); /* set min hop table of the switch to itself */ - __osm_ftree_sw_set_hops(p_sw, p_ftree->lft_max_lid_ho, cl_ntoh16(p_sw->base_lid), 0, /* port_num */ + __osm_ftree_sw_set_hops(p_sw, cl_ntoh16(p_sw->base_lid), + 0, /* port_num */ 0); /* hops */ __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ -- 1.5.1.4 From mashirle at us.ibm.com Sat Feb 2 16:02:43 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 16:02:43 -0800 Subject: [ofa-general] [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201988154.19565.229.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> Message-ID: <1201996963.19565.240.camel@localhost.localdomain> I have fixed a bug found in 4K MTU test. Here is the new patch. I am running stress tonight. Signed-off-by: Shirley Ma --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +-- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 93 +++++++++++++++++++----------- 2 files changed, 60 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 6b5e108..faee740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf { u64 mapping[IPOIB_UD_RX_SG]; }; -struct ipoib_rx_buf { - struct sk_buff *skb; - u64 mapping; -}; - struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping; @@ -299,7 +294,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9ca3d34..dfb5cc2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -155,29 +155,25 @@ partial_error: static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ib_sge list; - struct ib_recv_wr param; struct ib_recv_wr *bad_wr; int ret; - list.addr = priv->rx_ring[id].mapping; - list.length = IPOIB_BUF_SIZE; - list.lkey = priv->mr->lkey; + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; + priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; + priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - param.next = NULL; - param.wr_id = id | IPOIB_OP_RECV; - param.sg_list = &list; - param.num_sge = 1; - - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); if (unlikely(ret)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping); + else + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } - return ret; } @@ -187,7 +183,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) struct sk_buff *skb; u64 addr; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); if (!skb) return -ENOMEM; @@ -198,7 +194,8 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, + addr = ib_dma_map_single(priv->ca, skb->data, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { dev_kfree_skb_any(skb); @@ -206,7 +203,7 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) } priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].mapping[0] = addr; return 0; } @@ -214,10 +211,15 @@ static int ipoib_alloc_rx_skb(struct net_device *dev, int id) static int ipoib_ib_post_receives(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ret = !(ipoib_sg_alloc_rx_skb(dev, i, + priv->rx_ring[i].mapping)); + else + ret = ipoib_alloc_rx_skb(dev, i); + if (ret) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -234,8 +236,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; - struct sk_buff *skb; - u64 addr; + struct sk_buff *skb, *newskb = NULL; + u64 mapping[IPOIB_UD_RX_SG]; + u64 addr = 0; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -247,15 +250,20 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) + addr = priv->rx_ring[wr_id].mapping[0]; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + else + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -272,17 +280,30 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping); + if (unlikely(!newskb)) { + ++dev->stats.rx_dropped; + goto repost; + } + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { ++dev->stats.rx_dropped; goto repost; } ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + memcpy(priv->rx_ring[wr_id].mapping, mapping, + IPOIB_UD_RX_SG * sizeof *mapping); + ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb); + } else { + ib_dma_unmap_single(priv->ca, addr, + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); + skb_put(skb, wc->byte_len); + } skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -690,15 +711,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush) } for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (!rx_req->skb) continue; - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + else + ib_dma_unmap_single(priv->ca, + rx_req->mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } From eli at mellanox.co.il Sun Feb 3 02:07:29 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Sun, 03 Feb 2008 12:07:29 +0200 Subject: [ofa-general] Re: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201996963.19565.240.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> Message-ID: <1202033249.5839.17.camel@mtls03> Hi Shirley, you patches cannot be applied cleanly. It seems like your email client wraps around long lines. Can please check if this is the case? On Sat, 2008-02-02 at 16:02 -0800, Shirley Ma wrote: > I have fixed a bug found in 4K MTU test. Here is the new patch. I am > running stress tonight. > > Signed-off-by: Shirley Ma > --- > > drivers/infiniband/ulp/ipoib/ipoib.h | 7 +-- > drivers/infiniband/ulp/ipoib/ipoib_ib.c | 93 > +++++++++++++++++++----------- > 2 files changed, 60 insertions(+), 40 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h > b/drivers/infiniband/ulp/ipoib/ipoib.h > index 6b5e108..faee740 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > @@ -145,11 +145,6 @@ struct ipoib_sg_rx_buf { > u64 mapping[IPOIB_UD_RX_SG]; > }; > > -struct ipoib_rx_buf { > - struct sk_buff *skb; > - u64 mapping; > -}; > - > struct ipoib_tx_buf { > struct sk_buff *skb; > u64 mapping; > @@ -299,7 +294,7 @@ struct ipoib_dev_priv { > unsigned int admin_mtu; > unsigned int mcast_mtu; > > - struct ipoib_rx_buf *rx_ring; > + struct ipoib_sg_rx_buf *rx_ring; > > spinlock_t tx_lock; > struct ipoib_tx_buf *tx_ring; > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > index 9ca3d34..dfb5cc2 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c > @@ -155,29 +155,25 @@ partial_error: > static int ipoib_ib_post_receive(struct net_device *dev, int id) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > - struct ib_sge list; > - struct ib_recv_wr param; > struct ib_recv_wr *bad_wr; > int ret; > > - list.addr = priv->rx_ring[id].mapping; > - list.length = IPOIB_BUF_SIZE; > - list.lkey = priv->mr->lkey; > + priv->rx_wr.wr_id = id | IPOIB_OP_RECV; > + priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0]; > + priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; > > - param.next = NULL; > - param.wr_id = id | IPOIB_OP_RECV; > - param.sg_list = &list; > - param.num_sge = 1; > - > - ret = ib_post_recv(priv->qp, ¶m, &bad_wr); > + ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); > if (unlikely(ret)) { > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) > + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[id].mapping); > + else > + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0], > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > + DMA_FROM_DEVICE); > ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); > - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, > - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); > dev_kfree_skb_any(priv->rx_ring[id].skb); > priv->rx_ring[id].skb = NULL; > } > - > return ret; > } > > @@ -187,7 +183,7 @@ static int ipoib_alloc_rx_skb(struct net_device > *dev, int id) > struct sk_buff *skb; > u64 addr; > > - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); > + skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4); > if (!skb) > return -ENOMEM; > > @@ -198,7 +194,8 @@ static int ipoib_alloc_rx_skb(struct net_device > *dev, int id) > */ > skb_reserve(skb, 4); > > - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, > + addr = ib_dma_map_single(priv->ca, skb->data, > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > DMA_FROM_DEVICE); > if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { > dev_kfree_skb_any(skb); > @@ -206,7 +203,7 @@ static int ipoib_alloc_rx_skb(struct net_device > *dev, int id) > } > > priv->rx_ring[id].skb = skb; > - priv->rx_ring[id].mapping = addr; > + priv->rx_ring[id].mapping[0] = addr; > > return 0; > } > @@ -214,10 +211,15 @@ static int ipoib_alloc_rx_skb(struct net_device > *dev, int id) > static int ipoib_ib_post_receives(struct net_device *dev) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > - int i; > + int i, ret; > > for (i = 0; i < ipoib_recvq_size; ++i) { > - if (ipoib_alloc_rx_skb(dev, i)) { > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) > + ret = !(ipoib_sg_alloc_rx_skb(dev, i, > + priv->rx_ring[i].mapping)); > + else > + ret = ipoib_alloc_rx_skb(dev, i); > + if (ret) { > ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); > return -ENOMEM; > } > @@ -234,8 +236,9 @@ static void ipoib_ib_handle_rx_wc(struct net_device > *dev, struct ib_wc *wc) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; > - struct sk_buff *skb; > - u64 addr; > + struct sk_buff *skb, *newskb = NULL; > + u64 mapping[IPOIB_UD_RX_SG]; > + u64 addr = 0; > > ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", > wr_id, wc->status); > @@ -247,15 +250,20 @@ static void ipoib_ib_handle_rx_wc(struct > net_device *dev, struct ib_wc *wc) > } > > skb = priv->rx_ring[wr_id].skb; > - addr = priv->rx_ring[wr_id].mapping; > + if (!ipoib_ud_need_sg(priv->max_ib_mtu)) > + addr = priv->rx_ring[wr_id].mapping[0]; > > if (unlikely(wc->status != IB_WC_SUCCESS)) { > if (wc->status != IB_WC_WR_FLUSH_ERR) > ipoib_warn(priv, "failed recv event " > "(status=%d, wrid=%d vend_err %x)\n", > wc->status, wr_id, wc->vendor_err); > - ib_dma_unmap_single(priv->ca, addr, > - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) > + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); > + else > + ib_dma_unmap_single(priv->ca, addr, > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > + DMA_FROM_DEVICE); > dev_kfree_skb_any(skb); > priv->rx_ring[wr_id].skb = NULL; > return; > @@ -272,17 +280,30 @@ static void ipoib_ib_handle_rx_wc(struct > net_device *dev, struct ib_wc *wc) > * If we can't allocate a new RX buffer, dump > * this packet and reuse the old buffer. > */ > - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { > + newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping); > + if (unlikely(!newskb)) { > + ++dev->stats.rx_dropped; > + goto repost; > + } > + } else if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { > ++dev->stats.rx_dropped; > goto repost; > } > > ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", > wc->byte_len, wc->slid); > - > - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); > - > - skb_put(skb, wc->byte_len); > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { > + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); > + memcpy(priv->rx_ring[wr_id].mapping, mapping, > + IPOIB_UD_RX_SG * sizeof *mapping); > + ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb); > + } else { > + ib_dma_unmap_single(priv->ca, addr, > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > + DMA_FROM_DEVICE); > + skb_put(skb, wc->byte_len); > + } > skb_pull(skb, IB_GRH_BYTES); > > skb->protocol = ((struct ipoib_header *) skb->data)->proto; > @@ -690,15 +711,19 @@ int ipoib_ib_dev_stop(struct net_device *dev, int > flush) > } > > for (i = 0; i < ipoib_recvq_size; ++i) { > - struct ipoib_rx_buf *rx_req; > + struct ipoib_sg_rx_buf *rx_req; > > rx_req = &priv->rx_ring[i]; > if (!rx_req->skb) > continue; > - ib_dma_unmap_single(priv->ca, > - rx_req->mapping, > - IPOIB_BUF_SIZE, > - DMA_FROM_DEVICE); > + if (ipoib_ud_need_sg(priv->max_ib_mtu)) > + ipoib_sg_dma_unmap_rx(priv, > + priv->rx_ring[i].mapping); > + else > + ib_dma_unmap_single(priv->ca, > + rx_req->mapping[0], > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > + DMA_FROM_DEVICE); > dev_kfree_skb_any(rx_req->skb); > rx_req->skb = NULL; > } > > From mashirle at us.ibm.com Sat Feb 2 16:21:59 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 16:21:59 -0800 Subject: [ofa-general] Re: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1202033249.5839.17.camel@mtls03> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> Message-ID: <1201998120.19565.245.camel@localhost.localdomain> Thanks Eli. Too bad :(. I have struggled with my email for a while. Let me send you an attachment file for the whole patch built against OFED-1.3-RC3 kernel here first. I will work on my email client tomorrow. I am too tired today. Let me know right away if there is any problem. Shirley -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib-4kmtu-rc3-2.6.24.patch Type: text/x-patch Size: 13655 bytes Desc: not available URL: From eli at mellanox.co.il Sun Feb 3 02:25:47 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Sun, 3 Feb 2008 12:25:47 +0200 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1201998120.19565.245.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> <1201998120.19565.245.camel@localhost.localdomain> Message-ID: <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> Go to sleep :) I'll get along with the wrapped lines. I am reviewing now your patches against Roland's tree. After that I'll look at the attachements. -----Original Message----- From: Shirley Ma [mailto:mashirle at us.ibm.com] Sent: א 03 פברואר 2008 02:22 To: Eli Cohen Cc: Roland Dreier; general at lists.openfabrics.org Subject: Re: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU Thanks Eli. Too bad :(. I have struggled with my email for a while. Let me send you an attachment file for the whole patch built against OFED-1.3-RC3 kernel here first. I will work on my email client tomorrow. I am too tired today. Let me know right away if there is any problem. Shirley From mashirle at us.ibm.com Sat Feb 2 16:30:22 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 02 Feb 2008 16:30:22 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> <1201998120.19565.245.camel@localhost.localdomain> <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> Message-ID: <1201998622.19565.250.camel@localhost.localdomain> On Sun, 2008-02-03 at 12:25 +0200, Eli Cohen wrote: > Go to sleep :) I'll get along with the wrapped lines. I am reviewing now your patches against Roland's tree. After that I'll look at the attachements. Thank you so much, Eli! I have done 4 different implementations in the last few days to make it possible to be included in OFED-1.3 as well as Distros. I am totally exhausted. If any issues, let me know. It's quiet possible for me to make mistakes when working like this. I will run stress test overnight on both intel (mthca 2K mtu) and ppc (ehca 4K mtu). Thanks Shirley From vlad at lists.openfabrics.org Sun Feb 3 03:12:35 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 3 Feb 2008 03:12:35 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080203-0200 daily build status Message-ID: <20080203111235.AE841E60087@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.20 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.22.5-31-default Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Failed: From tziporet at dev.mellanox.co.il Sun Feb 3 03:54:50 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sun, 03 Feb 2008 13:54:50 +0200 Subject: [ofa-general] Bonding and hw_csum In-Reply-To: <47A1C83B.1040700@voltaire.com> References: <479DFA58.7050800@intec.ugent.be> <47A07CC9.8030005@voltaire.com> <47A0A90B.40506@mellanox.co.il> <1201784262.7095.17.camel@koenVRT> <47A1C83B.1040700@voltaire.com> Message-ID: <47A5AB8A.9030208@mellanox.co.il> Or Gerlitz wrote: > Koen Segers wrote: >> I just saw some patches on the mailing list concerning csum offloading. >> Are these applied in RC3? Or are they going to be introduced in the >> daily build of tomorrow? >> Is it correct to state that these patches replace the hw_csum parameter >> by offloading the csum computation to the mthca? This would mean that >> the results should be similar also. > > no and no, best if you take a look on the presentation @ > http://openfabrics.org/archives/nov2007sc/IPoIB-UD%20SO.pdf > > Basically the "checksum offloading" patches are for the datagram mode > and is the standard offload as in the Ethernet world, where the > "hw_csum" patch was for the connected mode. > >> Does the new offload patch depend on the type of hca being used? >> According to lspci, we have the "InfiniBand: Mellanox Technologies >> MT25208 InfiniHost III Ex (rev a0)" card. Do these patches work on a >> sles 10 sp1 installed on x3755 and x3655 machines of IBM that have this >> card inserted? > > checksum offloading is supported by the connectx and some of the other > Mellanox devices, I am quite sure that 25208 is one of them, but you > have to clarify this with Mellanox > > ConnectX and MT25208 InfiniHost III Ex supports IPoIB "checksum offloading" Tziporet From vlad at dev.mellanox.co.il Sun Feb 3 04:14:39 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 03 Feb 2008 14:14:39 +0200 Subject: [ofa-general] Re: [ewg] [Fwd: Re: non SRQ patch for OFED 1.3] -need some help In-Reply-To: <47A346C8.7010705@linux.vnet.ibm.com> References: <47A346C8.7010705@linux.vnet.ibm.com> Message-ID: <47A5B02F.6060901@dev.mellanox.co.il> Pradeep Satyanarayana wrote: > I tried running ofed_scripts/ofed_makedist.sh before and after copying my patch > to kernel_patches/fixes. In both cases makedist.sh seems to complete without > errors and creates the tar.gz files for the various kernels. > > In short I am unable to reproduce the problem that Tziporet mentions. Any tips or > pointers to resolve this issue would be appreciated. Thanks! > > Pradeep > > > ------------------------------------------------------------------------ > > Subject: > Re: non SRQ patch for OFED 1.3 > From: > Tziporet Koren > Date: > Thu, 31 Jan 2008 16:00:38 +0200 > To: > Pradeep Satyanarayana > > To: > Pradeep Satyanarayana > CC: > openfabrics-ewg at openib.org, tziporet at dev.mellanox.co.il, > vlad at dev.mellanox.co.il, hnguyen at linux.vnet.ibm.com > > > Pradeep Satyanarayana wrote: >> Some HCAs like ehca do not natively support srq. This patch would >> enable IPoIB CM >> for such HCAs. This patch has been accepted into Roland's for-2.6.25 >> git tree for about 3 months now. >> >> Please consider including this patch into OFED 1.3. >> >> >> > Pradeep, Shir > We tries to apply this patch for OFED 1.3 and its breaks some of the > backports. > Please use the makedist script on the ofa server (there is an > explanation in the developers Wiki) and fix this so we can try to > apply it > Vlad will help you later today too > > Thanks, > Tziporet > > ------------------------------------------------------------------------ > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg Pradeep, I added your patch (kernel_patches/fixes/ipoib_0200_non_srq.patch) and fixed the backport issue (ipoib_0100_to_2.6.21.patch). Please check if ofed_1_3/linux-2.6.git ofed_kernel is ok. Regards, Vladimir From tziporet at dev.mellanox.co.il Sun Feb 3 04:20:44 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sun, 03 Feb 2008 14:20:44 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1201861429.6955.31.camel@eli-laptop> References: <1201861429.6955.31.camel@eli-laptop> Message-ID: <47A5B19C.1080202@mellanox.co.il> Eli Cohen wrote: > The following patches, based on ofed 1.3, are intended to address bugs > https://bugs.openfabrics.org/show_bug.cgi?id=760 and > https://bugs.openfabrics.org/show_bug.cgi?id=761. They address UD mode > both send and receive and improve performance when using small messages > UDP traffic. The observation we had is that at small UDP messages, the > message rate is high and so what limits throughput is CPU, e.g. CPU is > 100% busy. > In the send flow I use a dedicated CQ for the send flow which in turn is > never armed. CQEs consumption is done by polling after posting a send > message. Also, the QP is configured for selective signaling and polling > the CQ is done once in 16 messages. > > On the receive side the code is changed to post to receive queue once in > 16 completions. This is done in for both UD and and CM. > > > 0001-IB-ipoib-Split-CQs-for-IPOIB-UD.patch > 0002-IB-ipoib-Unsingnalled-UD-QP.patch > 0003-IPOIB-post-to-SRQ-every-n-buffers.patch > 0004-IB-ipoib-rx-WQE-draft-in-IPOIB-UD.patch > 0005-IB-ipoib-IPOIB-rx-post-list.patch > > Tziporet, please approve for inclusion in ofed 1.3 > > > _______________________________________________ > > Eli, Can you send the performance gain you got with these patches? Thanks, Tziporet From tziporet at dev.mellanox.co.il Sun Feb 3 04:22:53 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sun, 03 Feb 2008 14:22:53 +0200 Subject: [ofa-general] [PATCH] OFED 1.2.5.5 compile fails "too many args to sk_eat_skb()" In-Reply-To: <47A349E8.6020105@sicortex.com> References: <47A349E8.6020105@sicortex.com> Message-ID: <47A5B21D.9010803@mellanox.co.il> Peter Watkins wrote: > OFED 1.2.5.5 fails to compile sdp code due to "too many arguments to > sk_eat_skb(). > > # cat /etc/SuSE-release > SUSE LINUX 10.1 (X86-64) > VERSION = 10.1 > > # uname -a > Linux pcitestbed 2.6.16.13-4-default #1 Wed May 3 04:53:23 UTC 2006 > x86_64 > x86_64 x86_64 GNU/Linux > > The problem seems to be that > kernel_addons/backport/2.6.16_sles10/include/net/sock.h > > is missing. > > Patch attached. Also recorded this in bug #885. > Thanks, Vlad - please add this patch to 1.2.5 tree We are not planning more 1.2.x releases but best if we have the patch checked in in case we will have one Tziporet From vlad at dev.mellanox.co.il Sun Feb 3 07:12:08 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 03 Feb 2008 17:12:08 +0200 Subject: [ofa-general] [PATCH] OFED 1.2.5.5 compile fails "too many args to sk_eat_skb()" In-Reply-To: <47A349E8.6020105@sicortex.com> References: <47A349E8.6020105@sicortex.com> Message-ID: <47A5D9C8.6050500@dev.mellanox.co.il> Peter Watkins wrote: > OFED 1.2.5.5 fails to compile sdp code due to "too many arguments to > sk_eat_skb(). > > # cat /etc/SuSE-release > SUSE LINUX 10.1 (X86-64) > VERSION = 10.1 > > # uname -a > Linux pcitestbed 2.6.16.13-4-default #1 Wed May 3 04:53:23 UTC 2006 > x86_64 > x86_64 x86_64 GNU/Linux > > The problem seems to be that > kernel_addons/backport/2.6.16_sles10/include/net/sock.h > > is missing. > > Patch attached. Also recorded this in bug #885. Peter, OFED 1.2.5.5 supports SLES10 (starting from 2.6.16.21-0.8-smp) and SLES10 Sp1 (starting from 2.6.16.43-0.3-smp). Please update your kernel version or use the patch that you sent locally. Regards, Vladimir From aexuscouhm at bobsbinding.com Sun Feb 3 07:52:17 2008 From: aexuscouhm at bobsbinding.com (Katy Pittman) Date: , 3 Feb 2008 07:52:17 -0800 Subject: [ofa-general] Software in many languages! Message-ID: <01c86639$b2438e80$298a505c@aexuscouhm> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. Free of charge professional installation consultations could be of great help. Prompt reply on all your requests. Money back guarantee ensures the quality of product. http://geocities.com/patrickoneill83/ Incredible selection of programs and applications! From dotanb at dev.mellanox.co.il Sun Feb 3 07:55:04 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 3 Feb 2008 17:55:04 +0200 Subject: [ofa-general] [PATCH] libibverbs : some man-pages fixes Message-ID: <200802031755.04681.dotanb@dev.mellanox.co.il> Some fixes and updates to several man pages. Signed-off-by: Dotan Barak --- diff --git a/man/ibv_create_cq.3 b/man/ibv_create_cq.3 index c39fe83..bb256d5 100644 --- a/man/ibv_create_cq.3 +++ b/man/ibv_create_cq.3 @@ -51,6 +51,7 @@ fails if any queue pair is still associated with this CQ. .SH "SEE ALSO" .BR ibv_resize_cq (3), .BR ibv_req_notify_cq (3), +.BR ibv_ack_cq_events (3), .BR ibv_create_qp (3) .SH "AUTHORS" .TP diff --git a/man/ibv_create_qp.3 b/man/ibv_create_qp.3 index 68a08b0..abd5449 100644 --- a/man/ibv_create_qp.3 +++ b/man/ibv_create_qp.3 @@ -8,9 +8,9 @@ ibv_create_qp, ibv_destroy_qp \- create or destroy a queue pair (QP) .B #include .sp .BI "struct ibv_qp *ibv_create_qp(struct ibv_pd " "*pd" , -.BI " struct ibv_qp_init_attr " "*qp_init_attr)" ; +.BI " struct ibv_qp_init_attr " "*qp_init_attr" ); .nl -.BI "int ibv_destroy_qp(struct ibv_qp " "*qp)" ; +.BI "int ibv_destroy_qp(struct ibv_qp " "*qp" ); .fi .SH "DESCRIPTION" .B ibv_create_qp() diff --git a/man/ibv_fork_init.3 b/man/ibv_fork_init.3 index d911c3f..6f11103 100644 --- a/man/ibv_fork_init.3 +++ b/man/ibv_fork_init.3 @@ -23,6 +23,15 @@ are always blocked until all child processes end or change address spaces via an .B exec() operation. +.PP +The +.B fork() +is supported as long as the parent process does not continue to run before the child +process dies or calls +.B exec()\fR. +The former can be achieved by calling +.B wait() +until the child process ends and the latter can be achieved by application specific means. .SH "RETURN VALUE" .B ibv_fork_init() returns 0 on success, or the value of errno on failure (which indicates the failure reason). @@ -49,7 +58,9 @@ regions. The precise performance impact depends on the workload and usually will not be significant. .SH "SEE ALSO" .BR fork (2), +.BR wait (2), .BR system (3), +.BR exec (3), .BR ibv_get_device_list (3) .SH "AUTHORS" .TP diff --git a/man/ibv_get_cq_event.3 b/man/ibv_get_cq_event.3 index 695734b..430ffd0 100644 --- a/man/ibv_get_cq_event.3 +++ b/man/ibv_get_cq_event.3 @@ -18,11 +18,11 @@ ibv_get_cq_event, ibv_ack_cq_events \- get and acknowledge completion queue (CQ) .B ibv_get_cq_event() waits for the next completion event in the completion event channel .I channel\fR. -The argument +Fills the arguments .I cq -is used to return the CQ that caused the event and +with the CQ that got the event and .I cq_context -is used to return the CQ's context. +with it's context\fR. .PP .B ibv_ack_cq_events() acknowledges @@ -102,7 +102,7 @@ if (ibv_get_cq_event(channel, &ev_cq, &ev_ctx)) { ibv_ack_cq_events(ev_cq, 1); .PP /* Request notification upon the next completion event */ -if (ibv_req_notify_cq(cq, 0)) { +if (ibv_req_notify_cq(ev_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\en"); return 1; } @@ -114,6 +114,10 @@ do { fprintf(stderr, "Failed to poll completions from the CQ\en"); return 1; } + + /* there may be an extra event with no completion in the CQ */ + if (ne == 0) + continue; .PP if (wc.status != IBV_WC_SUCCESS) { fprintf(stderr, "Completion with status 0x%x was found\en", wc.status); diff --git a/man/ibv_modify_qp.3 b/man/ibv_modify_qp.3 index a870744..f045900 100644 --- a/man/ibv_modify_qp.3 +++ b/man/ibv_modify_qp.3 @@ -130,7 +130,7 @@ Next state Required attributes \-\-\-\-\-\-\-\-\-\- \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- Init \fB IBV_QP_STATE, IBV_QP_PKEY_INDEX, IBV_QP_PORT, \fR \fB IBV_QP_QKEY \fR -RTR \fB None \fR +RTR \fB IBV_QP_STATE \fR RTS \fB IBV_QP_STATE, IBV_QP_SQ_PSN \fR .fi .PP diff --git a/man/ibv_poll_cq.3 b/man/ibv_poll_cq.3 index b634fc9..75e4d7c 100644 --- a/man/ibv_poll_cq.3 +++ b/man/ibv_poll_cq.3 @@ -32,7 +32,7 @@ uint32_t vendor_err; /* Vendor error syndrome */ uint32_t byte_len; /* Number of bytes transferred */ uint32_t imm_data; /* Immediate data (in network byte order) */ uint32_t qp_num; /* Local QP number of completed WR */ -uint32_t src_qp; /* Source QP number (remote QP number) of completed WR */ +uint32_t src_qp; /* Source QP number (remote QP number) of completed WR (valid only for UD QPs) */ enum ibv_wc_flags wc_flags; /* Flags of the completed WR */ uint16_t pkey_index; /* P_Key index (valid only for GSI QPs) */ uint16_t slid; /* Source LID */ @@ -47,7 +47,7 @@ The attribute wc_flags describes the properties of the work completion. It is either 0 or the bitwise OR of one or more of the following flags: .PP .TP -.B IBV_WC_GRH \fR GRH is present +.B IBV_WC_GRH \fR GRH is present (valid only for UD QPs) .TP .B IBV_WC_WITH_IMM \fR Immediate data value is valid .PP diff --git a/man/ibv_post_recv.3 b/man/ibv_post_recv.3 index 478b67a..8efa9d3 100644 --- a/man/ibv_post_recv.3 +++ b/man/ibv_post_recv.3 @@ -44,7 +44,6 @@ uint32_t lkey; /* Key of the local Memory Region */ .in -8 }; .fi - .SH "RETURN VALUE" .B ibv_post_recv() returns 0 on success, or the value of errno on failure (which indicates the failure reason). @@ -60,6 +59,9 @@ is associated with a shared receive queue, you must use the function and not .B ibv_post_recv()\fR, since the QP's own receive queue will not be used. +.PP +If a WR is being posted to a UD QP, the Global Routing Header (GRH) will be placed +in the first 40 bytes of the buffer (whether or not GRH is actually being used by the QP). .SH "SEE ALSO" .BR ibv_create_qp (3), .BR ibv_post_send (3), diff --git a/man/ibv_post_srq_recv.3 b/man/ibv_post_srq_recv.3 index c7e2302..2663a6d 100644 --- a/man/ibv_post_srq_recv.3 +++ b/man/ibv_post_srq_recv.3 @@ -51,6 +51,9 @@ returns 0 on success, or the value of errno on failure (which indicates the fail The buffers used by a WR can only be safely reused after WR the request is fully executed and a work completion has been retrieved from the corresponding completion queue (CQ). +.PP +If a WR is being consumed by a UD QP, the Global Routing Header (GRH) will be placed +in the first 40 bytes of the buffer (whether or not GRH is actually being used by the QP). .SH "SEE ALSO" .BR ibv_create_qp (3), .BR ibv_post_send (3), diff --git a/man/ibv_query_device.3 b/man/ibv_query_device.3 index 344f5b3..f327769 100644 --- a/man/ibv_query_device.3 +++ b/man/ibv_query_device.3 @@ -22,8 +22,8 @@ is a pointer to an ibv_device_attr struct, as defined in . struct ibv_device_attr { .in +8 char fw_ver[64]; /* FW version */ -uint64_t node_guid; /* Node GUID */ -uint64_t sys_image_guid; /* System image GUID */ +uint64_t node_guid; /* Node GUID (in network byte order) */ +uint64_t sys_image_guid; /* System image GUID (in network byte order) */ uint64_t max_mr_size; /* Largest contiguous block that can be registered */ uint64_t page_size_cap; /* Supported memory shift sizes */ uint32_t vendor_id; /* Vendor ID, per IEEE */ diff --git a/man/ibv_query_pkey.3 b/man/ibv_query_pkey.3 index 37c408d..f6d37c9 100644 --- a/man/ibv_query_pkey.3 +++ b/man/ibv_query_pkey.3 @@ -12,7 +12,7 @@ ibv_query_pkey \- query an InfiniBand port's P_Key table .fi .SH "DESCRIPTION" .B ibv_query_pkey() -returns the P_Key value in entry +returns the P_Key value (in network byte order) in entry .I index of port .I port_num From dotanb at dev.mellanox.co.il Sun Feb 3 07:56:52 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 3 Feb 2008 17:56:52 +0200 Subject: [ofa-general] [PATCH] libibverbs: add verbs.7 Message-ID: <200802031756.52568.dotanb@dev.mellanox.co.il> From dotanb at dev.mellanox.co.il Sun Feb 3 07:58:53 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Sun, 3 Feb 2008 17:58:53 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 Message-ID: <200802031758.53692.dotanb@dev.mellanox.co.il> (sorry about the previous empty email) Added the man page verbs.7 which is an introduction to libibverbs man pages. Signed-off-by: Dotan Barak Signed-off-by: Or Gerlitz --- diff --git a/Makefile.am b/Makefile.am index 705b184..45914d3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,7 +52,7 @@ man_MANS = man/ibv_asyncwatch.1 man/ibv_devices.1 man/ibv_devinfo.1 \ man/ibv_post_srq_recv.3 man/ibv_query_device.3 man/ibv_query_gid.3 \ man/ibv_query_pkey.3 man/ibv_query_port.3 man/ibv_query_qp.3 \ man/ibv_query_srq.3 man/ibv_rate_to_mult.3 man/ibv_reg_mr.3 \ - man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 + man/ibv_req_notify_cq.3 man/ibv_resize_cq.3 man/verbs.7 DEBIAN = debian/changelog debian/compat debian/control debian/copyright \ debian/ibverbs-utils.install debian/libibverbs1.install \ diff --git a/libibverbs.spec.in b/libibverbs.spec.in index f61e451..ff3e5af 100644 --- a/libibverbs.spec.in +++ b/libibverbs.spec.in @@ -72,6 +72,7 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib*.so %{_includedir}/* %{_mandir}/man3/* +%{_mandir}/man7/* %files devel-static %defattr(-,root,root,-) diff --git a/man/verbs.7 b/man/verbs.7 new file mode 100644 index 0000000..565286f --- /dev/null +++ b/man/verbs.7 @@ -0,0 +1,201 @@ +.\" -*- nroff -*- +.\" +.TH VERBS 7 2008-01-17 libibverbs "Libibverbs Programmer's Manual" +.SH "NAME" +verbs \- Infiniband verbs library +.SH "SYNOPSIS" +.nf +.B #include +.fi +.SH "DESCRIPTION" +This library is an implementation of the verbs according to the Infiniband specification volume 1.2. It handles the control path of creating, modifying, querying and destroying resources such as Protection Domains (PD), Completion Queues (CQ), Queue-Pairs (QP), Shared Receive Queues (SRQ), Address Handles (AH), Memory Regions (MR). It also handles sending and receiving data posted to QPs and SRQs, getting completions from CQs using polling and completions events. + +The control path is implemented through system calls to the uverbs kernel module which further calls the low level HW driver. The data path is implemented through calls made to low level HW library which in most cases interacts directly with the HW providing kernel and network stack bypass (saving context/mode switches) along with zero copy and an asynchronous I/O model. + + +Typically, under network and RDMA programming, there are operations which involve interaction with remote peers (such as address resolution and connection establishment) and remote entities (such as route resolution and joining a multicast group under IB), where a resource managed through IB verbs such as QP or AH would be eventually created or effected from this interaction. In such cases, applications whose addressing semantics is based on IP can use librdmacm (see rdma_cm(7)) which works in conjunction with libibverbs. + +This library is thread safe library and verbs can be called from every thread in the process (the same resource can even be handled from different threads, for example: ibv_poll_cq can be called from more than one thread). + +However, it is up to the user to stop working with a resource after it was destroyed (by the same thread or by any other thread), this may result a segmentation fault. + +If fork (or any other system call that perform fork directly or indirectly) is being used, please see ibv_fork_init(3). + +.LP +The following shall be declared as functions and may also be defined +as macros. Function prototypes shall be provided. +.RS +.nf + +\fB +.B Library functions + +int ibv_fork_init(void); + +.B Device functions + +struct ibv_device **ibv_get_device_list(int *num_devices); +void ibv_free_device_list(struct ibv_device **list); +const char *ibv_get_device_name(struct ibv_device *device); +uint64_t ibv_get_device_guid(struct ibv_device *device); + +.B Context functions + +struct ibv_context *ibv_open_device(struct ibv_device *device); +int ibv_close_device(struct ibv_context *context); + +.B Queries + +int ibv_query_device(struct ibv_context *context, + struct ibv_device_attr *device_attr); +int ibv_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); +int ibv_query_pkey(struct ibv_context *context, uint8_t port_num, + int index, uint16_t *pkey); +int ibv_query_gid(struct ibv_context *context, uint8_t port_num, + int index, union ibv_gid *gid); + +.B Asynchronous events + +int ibv_get_async_event(struct ibv_context *context, + struct ibv_async_event *event); +void ibv_ack_async_event(struct ibv_async_event *event); + +.B Protection Domains + +struct ibv_pd *ibv_alloc_pd(struct ibv_context *context); +int ibv_dealloc_pd(struct ibv_pd *pd); + +.B Memory Regions + +struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr, + size_t length, enum ibv_access_flags access); +int ibv_dereg_mr(struct ibv_mr *mr); + +.B Address Handles + +struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr); +int ibv_init_ah_from_wc(struct ibv_context *context, uint8_t port_num, + struct ibv_wc *wc, struct ibv_grh *grh, + struct ibv_ah_attr *ah_attr); +struct ibv_ah *ibv_create_ah_from_wc(struct ibv_pd *pd, struct ibv_wc *wc, + struct ibv_grh *grh, uint8_t port_num); +int ibv_destroy_ah(struct ibv_ah *ah); + +.B Completion event channels + +struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context); +int ibv_destroy_comp_channel(struct ibv_comp_channel *channel); + +.B Completion Queues Control + +struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector); +int ibv_destroy_cq(struct ibv_cq *cq); +int ibv_resize_cq(struct ibv_cq *cq, int cqe); + +.B Reading Completions from CQ + +int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc); + +.B Requesting / Managing CQ events + +int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only); +int ibv_get_cq_event(struct ibv_comp_channel *channel, + struct ibv_cq **cq, void **cq_context); +void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents); + +.B Shared Receive Queue control + +struct ibv_srq *ibv_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr); +int ibv_destroy_srq(struct ibv_srq *srq); +int ibv_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask); +int ibv_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr); + +.B Queue Pair control + +struct ibv_qp *ibv_create_qp(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr); +int ibv_destroy_qp(struct ibv_qp *qp); +int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); +int ibv_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask, + struct ibv_qp_init_attr *init_attr); + +.B posting Work Requests to QPs/SRQs +int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); +int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); +int ibv_post_srq_recv(struct ibv_srq *srq, + struct ibv_recv_wr *recv_wr, + struct ibv_recv_wr **bad_recv_wr); + +.B Multicast group + +int ibv_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); +int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); + +.B General functions + +int ibv_rate_to_mult(enum ibv_rate rate); +enum ibv_rate mult_to_ibv_rate(int mult); +\fP +.SH "SEE ALSO" +.LP +\fIibv_fork_init\fP(), +\fIibv_get_device_list\fP(), +\fIibv_free_device_list\fP(), +\fIibv_get_device_name\fP(), +\fIibv_get_device_guid\fP(), +\fIibv_open_device\fP(), +\fIibv_close_device\fP(), +\fIibv_query_device\fP(), +\fIibv_query_port\fP(), +\fIibv_query_pkey\fP(), +\fIibv_query_gid\fP(), +\fIibv_get_async_event\fP(), +\fIibv_ack_async_event\fP(), +\fIibv_alloc_pd\fP(), +\fIibv_dealloc_pd\fP(), +\fIibv_reg_mr\fP(), +\fIibv_dereg_mr\fP(), +\fIibv_create_ah\fP(), +\fIibv_init_ah_from_wc\fP(), +\fIibv_create_ah_from_wc\fP(), +\fIibv_destroy_ah\fP(), +\fIibv_create_comp_channel\fP(), +\fIibv_destroy_comp_channel\fP(), +\fIibv_create_cq\fP(), +\fIibv_destroy_cq\fP(), +\fIibv_resize_cq\fP(), +\fIibv_poll_cq\fP(), +\fIibv_req_notify_cq\fP(), +\fIibv_get_cq_event\fP(), +\fIibv_ack_cq_events\fP(), +\fIibv_create_srq\fP(), +\fIibv_destroy_srq\fP(), +\fIibv_modify_srq\fP(), +\fIibv_query_srq\fP(), +\fIibv_post_srq_recv\fP(), +\fIibv_create_qp\fP(), +\fIibv_destroy_qp\fP(), +\fIibv_modify_qp\fP(), +\fIibv_query_qp\fP(), +\fIibv_post_send\fP(), +\fIibv_post_recv\fP(), +\fIibv_attach_mcast\fP(), +\fIibv_detach_mcast\fP(), +\fIibv_rate_to_mult\fP(), +\fImult_to_ibv_rate\fP() +.SH "AUTHORS" +.TP +Dotan Barak +.TP +Or Gerlitz From eli at dev.mellanox.co.il Sun Feb 3 08:50:37 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sun, 03 Feb 2008 18:50:37 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <47A5B19C.1080202@mellanox.co.il> References: <1201861429.6955.31.camel@eli-laptop> <47A5B19C.1080202@mellanox.co.il> Message-ID: <1202057437.18209.3.camel@mtls03> On Sun, 2008-02-03 at 14:20 +0200, Tziporet Koren wrote: > > > Eli, > Can you send the performance gain you got with these patches? When running on kernel 2.6.24 I got the sender improved from 380 mpbs to 508 mbps. I was using netperf: netperf -H -t UDP_STREAM -- -m 128 From xma at us.ibm.com Sun Feb 3 09:09:25 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sun, 3 Feb 2008 10:09:25 -0700 Subject: [ofa-general] Re: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G support for 4K MTU In-Reply-To: <1201998120.19565.245.camel@localhost.localdomain> Message-ID: Hello Tziporet, I have done 4 different approaches for IPoIB-UD 4K mtu implmenetation. I have tested and validated three of them and I didn't see any performance difference among these implementations for both 2K mtu and 4K mtu. However I picked up V3 patch since this V3 patch is based on Eli and Roland's review comment: Keep existing 2K mtu implementation, don't merge IPoIB-UD RX S/G and IPoIB-CM RX S/G. Using 2 buffers for 4K MTU, one buffer is HEAD=GRH+IPoIB-head=44 bytes, one buffer is 4K for data when PAGE_SIZE is not bigger enough for 4K MTU+HEAD. I have tested and validated this patch on both mthca driver intel based platform and ehca driver ppc platform. Stress test has passed whole night without any problem on on intel based platform for 2K MTU validation against 2.6.24 kernel for OFED-1.3-RC3 tree + Pradeep's noSRQ patch. The attachment is the patch built against OFED-1.3-RC3. One line is needed for backporting to other kernel: ++dev->stats vs. ++priv->stats. Please review it for OFED-1.3 inclusion. If there is any issues, please let me know. (See attached file: ipoib-4kmtu-rc3-2.6.24.patch) Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib-4kmtu-rc3-2.6.24.patch Type: application/octet-stream Size: 13655 bytes Desc: not available URL: From eli at mellanox.co.il Sun Feb 3 09:07:29 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Sun, 03 Feb 2008 19:07:29 +0200 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1201998622.19565.250.camel@localhost.localdomain> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> <1201998120.19565.245.camel@localhost.localdomain> <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> <1201998622.19565.250.camel@localhost.localdomain> Message-ID: <1202058449.18209.17.camel@mtls03> Hi Shirley, I have reviewed the patches against Roland's tree and have the following comments: 1. I see that there are a few if statements added on the fast pass and I am concerned they might hurt performance of slow UDP messages. Unfortunately I have not been able to test with an SM defining the broadcast group to 4K MTU (currently opensm uses 2K). 2. The usage of ipoib_ud_skb_put_frags() seems to be redundant and will only hurt performance since you would never reuse anything from the old SKB. This is because the headlen is 40 bytes for GRH and the rest of the data is in the first (and only) fragment. 3. I think it would be better to allocate room for real data in the head of the SKB since the tcp/ip stack seems to have less overhead if the headers are on the linear data. 4. I would consider using a pre-allocated buffer for the GRH of all received data (not as part of the SKB). From xma at us.ibm.com Sun Feb 3 09:10:57 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sun, 3 Feb 2008 09:10:57 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202057437.18209.3.camel@mtls03> Message-ID: Hello Eli, Can you send me a combined attachment patch for RC3-2.6.24 kernel so i can validate for both mthca and ehca here? Thanks Shirley Eli Cohen tziporet at dev.mellanox.co.il Sent by: cc general-b Roland Dreier , ounces at li openfabrics sts.openf abrics.or Subject g Re: [ofa-general] [PATCH 0/5]: Improve small UDP messages 02/03/08 08:50 AM On Sun, 2008-02-03 at 14:20 +0200, Tziporet Koren wrote: > > > Eli, > Can you send the performance gain you got with these patches? When running on kernel 2.6.24 I got the sender improved from 380 mpbs to 508 mbps. I was using netperf: netperf -H -t UDP_STREAM -- -m 128 _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic16880.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From dwslipstreamxm at slipstreamx.net Sun Feb 3 09:16:17 2008 From: dwslipstreamxm at slipstreamx.net (Irma Koenig) Date: , 3 Feb 2008 19:16:17 +0200 Subject: [ofa-general] Experience for yourself the excitement of winning real money online. Message-ID: <01c86699$40029680$9c4a6655@dwslipstreamxm> Visit Golden Gate casino and you won't be disappointed. Huge welcome bonus! Free to download software! Most popular games! Register free account today and take the advantage of playing when and whatever you like. Golden Gate Casino guarantees competent customer support for all players, quick response in case you have question or problem and instant payouts. Fair gaming only! http://geocities.com/montemoses700/ Accessible even for amateurs! From xma at us.ibm.com Sun Feb 3 09:24:27 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sun, 3 Feb 2008 09:24:27 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202058449.18209.17.camel@mtls03> Message-ID: general-bounces at lists.openfabrics.org wrote on 02/03/2008 09:07:29 AM: > Hi Shirley, > > I have reviewed the patches against Roland's tree and have the following > comments: Appreciate you quick review. > 1. I see that there are a few if statements added on the fast pass and I > am concerned they might hurt performance of slow UDP messages. > Unfortunately I have not been able to test with an SM defining the > broadcast group to 4K MTU (currently opensm uses 2K). What kind of parameters you prefer here for me to test this patch? I can test it right away when you send me your recommendations. > 2. The usage of ipoib_ud_skb_put_frags() seems to be redundant and will > only hurt performance since you would never reuse anything from the old > SKB. This is because the headlen is 40 bytes for GRH and the rest of the > data is in the first (and only) fragment. The header is 44 bytes, the IP payload data is in the first fragment. > 3. I think it would be better to allocate room for real data in the head > of the SKB since the tcp/ip stack seems to have less overhead if the > headers are on the linear data. > 4. I would consider using a pre-allocated buffer for the GRH of all > received data (not as part of the SKB). Comments 2,3,4 can be combined of one if we use a pre-allocated buffer for GRH+IPoIB-head for all IP payload data, right? This is a performance enhancement if any. I think this could be done after this patch being checked in. And I will fix it before RC4 out. Do you agree? Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From changquing.tang at hp.com Sun Feb 3 09:30:33 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Sun, 3 Feb 2008 17:30:33 +0000 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support In-Reply-To: <200802030949.44943.jackm@dev.mellanox.co.il> References: <200802030949.44943.jackm@dev.mellanox.co.il> Message-ID: > > In addition, the patch implements the foundation for > distributing XRC-receive-only QP events to userspace > processes registered with that QP. If r1 is on node1, r2 and r3 are on node2, r1 and r2 have the XRC conection established, r1 knows SRQ number from both r2 and r3, but r3 does not register the recv QP created by r2, can r3 still receive message from r1 ? --CQ From xma at us.ibm.com Sun Feb 3 09:36:45 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sun, 3 Feb 2008 09:36:45 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202058449.18209.17.camel@mtls03> Message-ID: Does your recommendation is the same as Roland's before? I hope it's not, otherwise, it doesn't work. Since the first buffer is GRH + IPoIB HEAD = 44 bytes not 40 bytes. If we put all skb data in the first frag, then the IP header is not aligned to 16 bytes. I am copying Roland's comments regarding this approach: --------- However, I now realize that my earlier idea of allocating a scratch buffer for the GRH and just allocating a 4096 byte skb doesn't work, because the skb_shinfo ends up being allocated along with the buffer, so trying to allocate a 4096-byte skb will bloat the data past a single page, which is what we're trying to avoid. So how about the following? When using a UD MTU of 4096 with a page size of 4096, allocate an skb of size 44 for the GRH and ethertype, and then allocate a single page for the fragment list. This means that the IP packet will start nicely 16-byte aligned for free, and all the bookkeeping is very simple. ------- thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From chas at cmf.nrl.navy.mil Sun Feb 3 12:30:41 2008 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Sun, 03 Feb 2008 15:30:41 -0500 Subject: [ofa-general] Re: [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202027602.5839.11.camel@mtls03> Message-ID: <200802032030.m13KUfqM012552@cmf.nrl.navy.mil> In message <1202027602.5839.11.camel at mtls03>,Eli Cohen writes: >> > On the receive side the code is changed to post to receive queue once in >> > 16 completions. This is done in for both UD and and CM. >> >> Ohmm, have you tested latency? I think it will increase latency for >> small messages. > >I see also improvement in latency. I used netperf -t UDP_RR and -t >TCP_RR and in both cases I improvement. this is just going to affect the resupply of rx buffers if i understand the code correctly. this wont delay the arrival of packets but perhaps it should be some fraction of the rq length instead of just 16. From pradeeps at linux.vnet.ibm.com Sun Feb 3 17:14:01 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Sun, 03 Feb 2008 17:14:01 -0800 Subject: [ofa-general] Re: [ewg] [Fwd: Re: non SRQ patch for OFED 1.3] -need some help In-Reply-To: <47A5B02F.6060901@dev.mellanox.co.il> References: <47A346C8.7010705@linux.vnet.ibm.com> <47A5B02F.6060901@dev.mellanox.co.il> Message-ID: <47A666D9.2080301@linux.vnet.ibm.com> >> Pradeep, Shir >> We tries to apply this patch for OFED 1.3 and its breaks some of the >> backports. >> Please use the makedist script on the ofa server (there is an >> explanation in the developers Wiki) and fix this so we can try to >> apply it >> Vlad will help you later today too >> >> Thanks, >> Tziporet >> >> ------------------------------------------------------------------------ >> >> _______________________________________________ >> ewg mailing list >> ewg at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > Pradeep, > I added your patch (kernel_patches/fixes/ipoib_0200_non_srq.patch) and > fixed the backport issue (ipoib_0100_to_2.6.21.patch). > Please check if ofed_1_3/linux-2.6.git ofed_kernel is ok. Hello Vladimir, I downloaded it and tried it on a 2.6.24 kernel (Sles10Sp2b1) and it compiled fine. I touch tested it and looks okay too. Thanks for your help. However, when I tried on a 2.6.16.57-0.9-ppc64 (Sles10sp2b1) after running ofed_scripts/configure, the make failed as follows: drivers/infiniband/core/addr.c: In function addr_arp_recv: drivers/infiniband/core/addr.c:359: error: âstruct sk_buffâ has no member named nh This seems to be coming from the addr_1_netevents_revert_to_2_6_17.patch patch, which is completely unrelated to this patch. Is there a place where the steps in the build process is completely described. The Wiki at : https://wiki.openfabrics.org/tiki-index.php?page=HOWTO%20Build%20OFED-1.3 is probably missing a few steps. It would help greatly if you could describe all the steps. Why is it that we see differing results? Pradeep From kliteyn at mellanox.co.il Sun Feb 3 17:28:57 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 4 Feb 2008 03:28:57 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-04:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-03 OpenSM git rev = Thu_Jan_31_20:03:04_2008 [798e8d88937d60e1454f058db2f4c705b41ec9df] ibutils git rev = Mon_Dec_24_10:42:01_2007 [675bec82306d6920555dd0b5e2f664983e27e60f] Total=400 Pass=398 Fail=2 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 9 Multicast IS3-128.topo 9 LidMgr IS3-128.topo Failures: 1 Multicast IS3-128.topo 1 LidMgr IS3-128.topo From mhanafi at csc.com Sun Feb 3 20:18:59 2008 From: mhanafi at csc.com (Mahmoud Hanafi) Date: Sun, 3 Feb 2008 23:18:59 -0500 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: <47A3A0FC.6020603@ichips.intel.com> Message-ID: How do I ensure that local_sa_cache is enables? I have tried all the other suggestions but I am still getting the error. Mahmoud Hanafi Sr. System Administrator CSC HPC COE Bld. 676 2435 Fifth Street WPAFB, Ohio 45433 (937) 255-1536 Computer Sciences Corporation Registered Office: 2100 East Grand Avenue, El Segundo California 90245, USA Registered in USA No: C-489-59 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- This is a PRIVATE message. If you are not the intended recipient, please delete without copying and kindly advise us by e-mail of the mistake in delivery. NOTE: Regardless of content, this e-mail shall not operate to bind CSC to any order or other contract unless pursuant to explicit written agreement or government initiative expressly permitting the use of e-mail for such purpose. ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Arlin Davis 02/01/2008 05:45 PM To "Woodruff, Robert J" cc Mahmoud Hanafi/DEF/CSC at CSC, general-bounces at lists.openfabrics.org, general at lists.openfabrics.org Subject Re: [ofa-general] ofed1.2.5rc2 and intel mpi error > This could be related to connection timeouts. We have seen this > on larger clusters when the local sa cache is not enabled or if the SM > node is down. I think that the local_sa_cache defaults to not enabled, > but Arlin can confirm this. > > woody > That is true, OFED 1.2.5 disables SA caching by default. I would recommend enabling SA caching. When using rdma_cm to establish end-to-end connections we incur a 3 step process, each with various tunable knobs. There is ARP, Path Resolution, and CM req/reply. Anyone of these could cause the 4008 timeout error. Here are tunable parameters that may help: 1. ARP: ARP cache entries for ib0 can be increased from default of 30: sysctl –w net.ipv4.neigh.ib0.base_reachable_time=14400 2. PATH RESOLUTION: ib_sa.ko provides path record caching, no timer controls, auto refresh with new device notification events from SM/SA, manual refresh control for administrators, default == SA caching is OFF. To enable: add following to /etc/modprobe.conf - options ib_sa paths_per_dest=0x7f or echo 0x7f > /sys/module/ib_sa/paths_per_dest To manually refresh: echo 1 > /sys/module/ib_sa/refresh To monitor: cat /sys/module/ib_sa/lookup_method * 0 round robin 1 round robin cat /sys/module/ib_sa/paths_per_dest You can also increase the uDAPL PR timeout with the following enviroment variable (if you don't have SA caching): export DAPL_CM_ROUTE_TIMEOUT_MS=20000 (default=4000) 3. CM PROTOCOL: OFED 1.2.5 provides the following module parameters to increase the IB cm response timeout from default of 21: To increase timeout: add following to /etc/modprobe.conf - options rdma_cm cma_response_timeout=23 options ib_cm max_timeout=23 -arlin -------------- next part -------------- An HTML attachment was scrubbed... URL: From eli at dev.mellanox.co.il Sun Feb 3 23:05:02 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 09:05:02 +0200 Subject: [ofa-general] Re: [PATCH 0/5]: Improve small UDP messages In-Reply-To: <200802032030.m13KUfqM012552@cmf.nrl.navy.mil> References: <200802032030.m13KUfqM012552@cmf.nrl.navy.mil> Message-ID: <1202108702.18209.22.camel@mtls03> On Sun, 2008-02-03 at 15:30 -0500, chas williams - CONTRACTOR wrote: > this is just going to affect the resupply of rx buffers if i understand > the code correctly. this wont delay the arrival of packets but perhaps > it should be some fraction of the rq length instead of just 16. I used 16 because this seems to be both large enough to benefit from posting a list. Maybe I should require that ipoib_recvq_size is not less than 64 for example - smaller value will fail to load the driver? From vlad at dev.mellanox.co.il Sun Feb 3 23:09:59 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 04 Feb 2008 09:09:59 +0200 Subject: [ofa-general] Re: [ewg] [Fwd: Re: non SRQ patch for OFED 1.3] -need some help In-Reply-To: <47A666D9.2080301@linux.vnet.ibm.com> References: <47A346C8.7010705@linux.vnet.ibm.com> <47A5B02F.6060901@dev.mellanox.co.il> <47A666D9.2080301@linux.vnet.ibm.com> Message-ID: <47A6BA47.9010206@dev.mellanox.co.il> Pradeep Satyanarayana wrote: >>> Pradeep, Shir >>> We tries to apply this patch for OFED 1.3 and its breaks some of the >>> backports. >>> Please use the makedist script on the ofa server (there is an >>> explanation in the developers Wiki) and fix this so we can try to >>> apply it >>> Vlad will help you later today too >>> >>> Thanks, >>> Tziporet >>> >>> ------------------------------------------------------------------------ >>> >>> _______________________________________________ >>> ewg mailing list >>> ewg at lists.openfabrics.org >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg >>> >> Pradeep, >> I added your patch (kernel_patches/fixes/ipoib_0200_non_srq.patch) and >> fixed the backport issue (ipoib_0100_to_2.6.21.patch). >> Please check if ofed_1_3/linux-2.6.git ofed_kernel is ok. >> > > Hello Vladimir, > > I downloaded it and tried it on a 2.6.24 kernel (Sles10Sp2b1) and > it compiled fine. I touch tested it and looks okay too. Thanks for > your help. > > However, when I tried on a 2.6.16.57-0.9-ppc64 (Sles10sp2b1) after > running ofed_scripts/configure, the make failed as follows: > > drivers/infiniband/core/addr.c: In function addr_arp_recv: > drivers/infiniband/core/addr.c:359: error: âstruct sk_buffâ has no member named nh > > This seems to be coming from the addr_1_netevents_revert_to_2_6_17.patch > patch, which is completely unrelated to this patch. > > Is there a place where the steps in the build process is completely described. > The Wiki at : > https://wiki.openfabrics.org/tiki-index.php?page=HOWTO%20Build%20OFED-1.3 > is probably missing a few steps. It would help greatly if you could describe > all the steps. Why is it that we see differing results? > > Pradeep > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > Hello Pradeep, We probably should add support for SLES10 SP2 in a different backport directory: kernel_patches/backport/2.6.16_sles10_sp2 kernel_addons/backport/2.6.16_sles10_sp2 and then update ofed_scripts/ofed_patch.sh. You can copy 2.6.16_sles10_sp1 to 2.6.16_sles10_sp2 and then remove/update relevant patches/headers. Meanwhile, I will try to get this kernel. Regards, Vladimir From 997seqra at jpma.com Mon Feb 4 00:40:54 2008 From: 997seqra at jpma.com (Gretchen Delaney) Date: Mon, 4 Feb 2008 09:40:54 +0100 Subject: [ofa-general] Where have you been? Message-ID: <960705971.65098908481692@jpma.com> Hello! I am bored this afternoon. I am nice girl that would like to chat with you. Email me at Amanda at EHealThies.info only, because I am using my friend's email to write this. Hope you will like my pictures. From dwrealestateincm at realestateinc.com Mon Feb 4 00:15:36 2008 From: dwrealestateincm at realestateinc.com (Kennith Stubbs) Date: Mon, 4 Feb 2008 09:15:36 +0100 Subject: [ofa-general] Gamble in the best online casino! Message-ID: <01c8670e$80502c00$cae8755b@dwrealestateincm> Feel like gambling? Golden Gate Casino is worth your attention. All popular casino games, great welcome bonus, fast to download, easy to use and completely free software! Register with Golden Gate Casino and enjoy a great atmosphere of the real casino, friendly customer support, absolute security and safety. http://geocities.com/philipblankenship657/ Start downloading free software now! From recruitingdepot.net at pdagamez.com Mon Feb 4 00:32:33 2008 From: recruitingdepot.net at pdagamez.com (James Rogers) Date: Mon, 04 Feb 2008 09:32:33 +0100 Subject: [ofa-general] Adobe Creative Suite 3 MAC/XP/Vista for 269, Retails 1799 (You save 1529) Message-ID: <000a01c86708$c5715f80$0100007f@emufw> adobe encore dvd 2 - 49 creative suite premium 2 - 149 Use 'softnugood. com' in Internet Exp!orer (delete spaces and quotes) stuffit deluxe 11 for mac - 29 sony acid pro 6 - 59 coreldraw graphics suite 12 - 49 roxio toast titanium 8 - 39 sony acid pro 6 - 59 corel wordperfect office x3 standard - 49 cyberlink powerdvd ultra deluxe 7 - 29 adobe font folio 11 - 189 media tools professional 5 - 39 adobe fireworks cs3 - 59 Your profit is 76-90%! From kliteyn at dev.mellanox.co.il Mon Feb 4 01:01:22 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 04 Feb 2008 11:01:22 +0200 Subject: [ofa-general] [PATCH] opensm/man: partition cfg file location Message-ID: <47A6D462.7090904@dev.mellanox.co.il> Hi Sasha, Fixing a wrong partition cfg file location in opensm man page. Please apply to ofed_1_3 and master. Signed-off-by: Yevgeny Kliteynik --- opensm/man/opensm.8 | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 index ab7fb8e..460e948 100644 --- a/opensm/man/opensm.8 +++ b/opensm/man/opensm.8 @@ -200,7 +200,7 @@ is accumulative. .TP \fB\-P\fR, \fB\-\-Pconfig\fR This option defines the optional partition configuration file. -The default name is \'/etc/opensm/opensm-partitions.conf\'. +The default name is \'/etc/ofa/opensm-partitions.conf\'. .TP .BI --prefix_routes_file= path Prefix routes control how the SA responds to path record queries for -- 1.5.1.4 From haberci at haftasonuevi.com Sun Feb 3 23:56:18 2008 From: haberci at haftasonuevi.com (Haftasonu Evi) Date: Mon, 4 Feb 2008 09:56:18 +0200 Subject: [ofa-general] =?windows-1254?q?Ankara=27da_Sehrin_Kesmekesinden_U?= =?windows-1254?q?zakta_K=FCt=FCk_Ev_Projesi?= Message-ID: <3821-2200821475618284@user-v0gpperj84> Ankara Çubuk Karagöl'e yaklaşık 800 metre mesafede, amatör kayakçılık, Yedigöllerde olduğu gibi renk cümbüşü ormanda trekking, bisiklete binme, kısa yürüyüşler yapabileceğimiz , kuşburnu, alıç ve ahlat gibi yabani meyveler toplayıp, günün bitiminde ormanın yanıbaşında huzurlu kısa tatillerimizi geçireceğimiz mütevazi kütük evimizde konaklayacağımız, organik tarım çerçevesinde hem sebze , hem de ciddi gelir getirici ceviz ağacı yetiştirmek amacıyla 60 dönüm tapulu bir arazi satın aldık. Bizlerle aynı duyguları paylaşan, şehrin beton ve yapaylığından bıkıp ormanda ki doğal ortamda kendi evinde ve kocaman bahçesinde huzur bulmak isteyen çok az sayıda doğa severe, sizlere sesleniyoruz.Projeyi yapanlarında projenin içerisinde yer aldığı , bu on evlik proje hakkında daha ayrıntılı bilgi için : http://www.haftasonuevi.com adresini inceleyebilirsiniz. From tziporet at dev.mellanox.co.il Mon Feb 4 02:10:42 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 04 Feb 2008 12:10:42 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202057437.18209.3.camel@mtls03> References: <1201861429.6955.31.camel@eli-laptop> <47A5B19C.1080202@mellanox.co.il> <1202057437.18209.3.camel@mtls03> Message-ID: <47A6E4A2.2090300@mellanox.co.il> Eli Cohen wrote: > On Sun, 2008-02-03 at 14:20 +0200, Tziporet Koren wrote: > >>> >>> >> Eli, >> Can you send the performance gain you got with these patches? >> > > When running on kernel 2.6.24 I got the sender improved from 380 mpbs to > 508 mbps. I was using netperf: > netperf -H -t UDP_STREAM -- -m 128 > > > > OK - lets push these to OFED 1.3 Tziporet From sashak at voltaire.com Mon Feb 4 02:49:14 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 4 Feb 2008 10:49:14 +0000 Subject: [ofa-general] Re: [PATCH] opensm/osm_ucast_ftree.c: do load-leveling of non-CN routes In-Reply-To: <47A58671.3020202@dev.mellanox.co.il> References: <47A58671.3020202@dev.mellanox.co.il> Message-ID: <20080204104914.GN29624@sashak.voltaire.com> On 11:16 Sun 03 Feb , Yevgeny Kliteynik wrote: > Fat-tree routing wasn't load-leveling routes to the non-compute nodes, > causing IO bottle necks in fabric. > > Please apply to ofed_1_3 and master. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Mon Feb 4 02:49:33 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 4 Feb 2008 10:49:33 +0000 Subject: [ofa-general] Re: [PATCH] opensm/osm_ucast_ftree.c: cosmetics In-Reply-To: <47A58E2B.5030105@dev.mellanox.co.il> References: <47A58E2B.5030105@dev.mellanox.co.il> Message-ID: <20080204104933.GO29624@sashak.voltaire.com> On 11:49 Sun 03 Feb , Yevgeny Kliteynik wrote: > Hi Sasha. > > Cosmetics in ftree: removed unused argument, removed > unneeded 'if' statement, fixed some comments. > > This patch is for trunk only. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From vlad at lists.openfabrics.org Mon Feb 4 03:00:15 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 4 Feb 2008 03:00:15 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080204-0200 daily build status Message-ID: <20080204110015.7B0F1E6090F@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Failed: From dwsfarzom at sfarzo.us Mon Feb 4 03:14:35 2008 From: dwsfarzom at sfarzo.us (Fredric Stiles) Date: Mon, 4 Feb 2008 12:14:35 +0100 Subject: [ofa-general] Buy cheap Canadian drugs and start saving now with CanadianPharmacy. Message-ID: <01c86727$81819dc0$537dd14d@dwsfarzom> We are glad to offer you the possibility to save on your medications and to receive top quality pharmaceutical products. It becomes possible with ŤCanadianPharmacyť! ŤCanadianPharmacyť has an excellent level of service, helpful and cooperating customer care team. Purchase with ŤCanadianPharmacyť and your medications will come right on time well packed and in perfect condition. Privacy and confidentiality are guaranteed! Great selection of medications! http://geocities.com/wandawoodward293/ Enjoy new saving options with ŤCanadianPharmacyť! From dwraemm at raem.com Mon Feb 4 04:00:27 2008 From: dwraemm at raem.com (Jenna Hanna) Date: Mon, 4 Feb 2008 13:00:27 +0100 Subject: [ofa-general] Reliable software only! Message-ID: <01c8672d$e9c1ca80$057d1a53@dwraemm> The quickest and most convenient way to get software is to download it from our site. Low prices, fully functional and original programs only. Localized versions in all European languages! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/parkercoffey375/ The best software products at the best prices. From fayechambers.m203 at googlemail.com Mon Feb 4 04:29:47 2008 From: fayechambers.m203 at googlemail.com (Faye Mohammed) Date: Mon, 4 Feb 2008 13:29:47 +0100 Subject: [ofa-general] INSEARCH OF NEXT OF KIN/BUSINESS PROPOSITION Message-ID: Please find attached. Thanking you in anticipation for your prompt response. Barrister Faye Mohammed -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: IN SEARCH OF NEXT OF KIN.doc Type: application/msword Size: 24576 bytes Desc: not available URL: From dwsilverboym at silverboy.com Mon Feb 4 05:33:00 2008 From: dwsilverboym at silverboy.com (Tabatha Reyes) Date: Mon, 4 Feb 2008 14:33:00 +0100 Subject: [ofa-general] Medications that you need. Message-ID: <01c8673a$d7e60800$7733b44d@dwsilverboym> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/osvaldoatkinson234 We provide confidential and secure purchase! From eli at mellanox.co.il Mon Feb 4 05:54:05 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 15:54:05 +0200 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: References: Message-ID: <1202133245.18209.46.camel@mtls03> On Sun, 2008-02-03 at 09:36 -0800, Shirley Ma wrote: > Does your recommendation is the same as Roland's before? I hope it's > not, otherwise, it doesn't work. Since the first buffer is GRH + IPoIB > HEAD = 44 bytes not 40 bytes. If we put all skb data in the first > frag, then the IP header is not aligned to 16 bytes. I am copying > Roland's comments regarding this approach: > --------- > However, I now realize that my earlier idea of allocating a scratch > buffer for the GRH and just allocating a 4096 byte skb doesn't work, > because the skb_shinfo ends up being allocated along with the buffer, > so trying to allocate a 4096-byte skb will bloat the data past a > single page, which is what we're trying to avoid. > > So how about the following? When using a UD MTU of 4096 with a page > size of 4096, allocate an skb of size 44 for the GRH and ethertype, > and then allocate a single page for the fragment list. This means > that the IP packet will start nicely 16-byte aligned for free, and all > the bookkeeping is very simple. > ------- > I actually say lets allocate for example, 128 bytes in the linear data and then a 4K page. The first 128 bytes will be used for GRH, for the encapsulation header, and for the IP and TCP/UDP headers. The following 4K fragment will have large enough space to contain the rest of the packet. Another thing to consider is use a 3 entries receive scatter list: 1. The first will point to 40 bytes generic buffer (allocated once per netdevice). All receive buffer will point to this buffer. As Roland suggested before, this will save us the skb_pull on the GRH. 2. A 128 bytes buffer which comes from the linear part of the SKB - we can align this buffer to ensure IP is aligned at 16 byte boundary. 3. A 4K page to in the first fragment. We can then check when the packet is received whether the overall packet length is small enough such that it did not touch the page. If it did not we can use this page for the newly posted buffer. ** the above 128 bytes value can be a macro and we can determine what is the correct value. From hartlch14 at gmail.com Mon Feb 4 05:59:36 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Mon, 4 Feb 2008 08:59:36 -0500 Subject: [ofa-general] Question about exchanging DAT_RMR_TRIPLET In-Reply-To: <469958e00802011452k42b7e42ap9aac09de682ceced@mail.gmail.com> References: <469958e00802011452k42b7e42ap9aac09de682ceced@mail.gmail.com> Message-ID: Arlin and Caitlin - Thanks for the insights about the triplets. I did some experimentation with a test program and got it straightened out. Turned out that we had two problems to consider. First was the endian issue, which I expected but couldn't figure out why the byte swapping wasn't fixing it. Then I realized that the new box was using DAPL 2.0 and the others are 1.2 version. Redefining the message we are sending to account for both DAT_RMR_TRIPLET types took care of everything. Chuck -------------- next part -------------- An HTML attachment was scrubbed... URL: From jlentini at netapp.com Mon Feb 4 06:59:50 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 4 Feb 2008 09:59:50 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080201224530.GA16581@cefeid.wcss.wroc.pl> References: <4797AD59.2000206@mellanox.co.il> <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> Message-ID: On Fri, 1 Feb 2008, Pawel Dziekonski wrote: > On Fri, 01 Feb 2008 at 10:56:55AM -0500, James Lentini wrote: > > > # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs > > > mount option: rdma > > > > > > looks like I definitelly need a rdma-enabled mount, which comes in > > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: > > > > If you are using the NFS/RDMA client in 2.6.24, you need the version > > of the mount.nfs command in nfs-utils-1.1.1 or greater. > > > > The "Unsupported nfs mount option: rdma" error message makes me > > suspect you are not using the correct version of mount.nfs. What is > > the output of "mount.nfs -V" and "mount -V"? It may be that the > > version of mount you are using does not automatically invoke mount.nfs > > for nfs mounts. I'd suggest specifying mount.nfs in the command above: > > # mount.nfs -V > mount.nfs (linux nfs-utils 1.1.1) > > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 > > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > mount.nfs: timeout set for Fri Feb 1 23:48:08 2008 > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > mount.nfs: internal error > > :( I'm going to guess that the "internal error" message means that your kernel does not support the NFS string mount API. Are you sure your kernel you are using is an exact copy of Tom Tucker's git tree? It can NOT have the OFED 1.2 package installed. If the kernel is correct, can you send the output of: strace -e trace=all 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 From tziporet at mellanox.co.il Mon Feb 4 07:16:32 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 4 Feb 2008 17:16:32 +0200 Subject: [ofa-general] Please send all patches for OFED 1.3 rc4 by end of Monday (Feb 4) Message-ID: <6C2C79E72C305246B504CBA17B5500C90340E13C@mtlexch01.mtl.com> Thanks Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at dev.mellanox.co.il Mon Feb 4 07:17:41 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 04 Feb 2008 17:17:41 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1201873218.6677.4.camel@eli-laptop> References: <1201873218.6677.4.camel@eli-laptop> Message-ID: <47A72C95.7000608@dev.mellanox.co.il> Eli Cohen wrote: > The following patches, based on ofed 1.3, are intended to address bugs > https://bugs.openfabrics.org/show_bug.cgi?id=760 and > https://bugs.openfabrics.org/show_bug.cgi?id=761. They address UD mode > both send and receive and improve performance when using small messages > UDP traffic. The observation we had is that at small UDP messages, the > message rate is high and so what limits throughput is CPU, e.g. CPU is > 100% busy. > In the send flow I use a dedicated CQ for the send flow which in turn is > never armed. CQEs consumption is done by polling after posting a send > message. Also, the QP is configured for selective signaling and polling > the CQ is done once in 16 messages. > > On the receive side the code is changed to post to receive queue once in > 16 completions. This is done in for both UD and and CM. > > > 0001-IB-ipoib-Split-CQs-for-IPOIB-UD.patch > 0002-IB-ipoib-Unsingnalled-UD-QP.patch > 0003-IPOIB-post-to-SRQ-every-n-buffers.patch > 0004-IB-ipoib-rx-WQE-draft-in-IPOIB-UD.patch > 0005-IB-ipoib-IPOIB-rx-post-list.patch > > Tziporet, please approve for inclusion in ofed 1.3 > > > Applied to ofed 1.3, Regards, Vladimir From pawel.dziekonski at pwr.wroc.pl Mon Feb 4 07:28:58 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Mon, 4 Feb 2008 16:28:58 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> Message-ID: <20080204152858.GA25343@cefeid.wcss.wroc.pl> On Mon, 04 Feb 2008 at 09:59:50AM -0500, James Lentini wrote: > > > On Fri, 1 Feb 2008, Pawel Dziekonski wrote: > > > On Fri, 01 Feb 2008 at 10:56:55AM -0500, James Lentini wrote: > > > > # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs > > > > mount option: rdma > > > > > > > > looks like I definitelly need a rdma-enabled mount, which comes in > > > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: > > > > > > If you are using the NFS/RDMA client in 2.6.24, you need the version > > > of the mount.nfs command in nfs-utils-1.1.1 or greater. > > > > > > The "Unsupported nfs mount option: rdma" error message makes me > > > suspect you are not using the correct version of mount.nfs. What is > > > the output of "mount.nfs -V" and "mount -V"? It may be that the > > > version of mount you are using does not automatically invoke mount.nfs > > > for nfs mounts. I'd suggest specifying mount.nfs in the command above: > > > > # mount.nfs -V > > mount.nfs (linux nfs-utils 1.1.1) > > > > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 > > > > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > mount.nfs: timeout set for Fri Feb 1 23:48:08 2008 > > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > > mount.nfs: internal error > > > > :( > > I'm going to guess that the "internal error" message means that your > kernel does not support the NFS string mount API. Are you sure your > kernel you are using is an exact copy of Tom Tucker's git tree? It > can NOT have the OFED 1.2 package installed. kernel was pulled from Tom's tree, Jan 25th. # cat .git/config [core] repositoryformatversion = 0 filemode = true bare = false logallrefupdates = true [remote "origin"] url = git://git.linux-nfs.org/projects/tomtucker/xprt-switch-2.6.git fetch = +refs/heads/*:refs/remotes/origin/* [branch "merged-fixes"] remote = origin merge = refs/heads/merged-fixes config: https://cefeid.wcss.wroc.pl/d/tmp/c-2.6.24-rc6 What do you mean 'It can NOT have the OFED 1.2 package installed.'? I can not install OFED 1.2 on this machine at all?! > If the kernel is correct, can you send the output of: > > strace -e trace=all 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 execve("/sbin/mount.nfs", ["mount.nfs", "10.2.2.1:/scratch", "/mnt", "-i", "-o", "rdma,port=2050"], [/* 28 vars */]) = 0 uname({sys="Linux", node="ib2", ...}) = 0 brk(0) = 0x613000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a78000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/mkl/10.0.1.014/lib/em64t/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/mkl/10.0.1.014/lib/em64t/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/mkl/10.0.1.014/lib/em64t/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/mkl/10.0.1.014/lib/em64t/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/mkl/10.0.1.014/lib/em64t", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 open("/opt/intel/fce/10.1.011/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/fce/10.1.011/lib/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/fce/10.1.011/lib/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/fce/10.1.011/lib/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/fce/10.1.011/lib/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/fce/10.1.011/lib/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/fce/10.1.011/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/fce/10.1.011/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 open("/opt/intel/cce/10.1.011/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/cce/10.1.011/lib/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/cce/10.1.011/lib/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/cce/10.1.011/lib/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/cce/10.1.011/lib/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/cce/10.1.011/lib/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) open("/opt/intel/cce/10.1.011/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) stat("/opt/intel/cce/10.1.011/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=38220, ...}) = 0 mmap(NULL, 38220, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b68b8a79000 close(3) = 0 open("/lib64/tls/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\304"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1622600, ...}) = 0 mmap(0x33f2600000, 2314184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x33f2600000 mprotect(0x33f272c000, 1085384, PROT_NONE) = 0 mmap(0x33f282c000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12c000) = 0x33f282c000 mmap(0x33f2831000, 16328, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x33f2831000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a83000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a84000 mprotect(0x33f282c000, 12288, PROT_READ) = 0 mprotect(0x33f2514000, 4096, PROT_READ) = 0 arch_prctl(ARCH_SET_FS, 0x2b68b8a83b00) = 0 munmap(0x2b68b8a79000, 38220) = 0 getuid() = 0 uname({sys="Linux", node="ib2", ...}) = 0 uname({sys="Linux", node="ib2", ...}) = 0 brk(0) = 0x613000 brk(0x634000) = 0x634000 lstat("/mnt", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 stat("/mnt", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 access("/mnt", X_OK) = 0 mount("10.2.2.1:/scratch", "/mnt", "nfs", 0, "rdma,port=2050,addr=10.2.2.1") = -1 EIO (Input/output error) write(2, "mount.nfs: internal error\n", 26mount.nfs: internal error ) = 26 exit_group(32) = ? Process 32607 detached -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From mashirle at us.ibm.com Sun Feb 3 21:31:13 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sun, 03 Feb 2008 21:31:13 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202133245.18209.46.camel@mtls03> References: <1202133245.18209.46.camel@mtls03> Message-ID: <1202103073.4502.2.camel@localhost.localdomain> Hello Eli, On Mon, 2008-02-04 at 15:54 +0200, Eli Cohen wrote > Another thing to consider is use a 3 entries receive scatter list: > 1. The first will point to 40 bytes generic buffer (allocated once per > netdevice). All receive buffer will point to this buffer. As Roland > suggested before, this will save us the skb_pull on the GRH. > > 2. A 128 bytes buffer which comes from the linear part of the SKB - we > can align this buffer to ensure IP is aligned at 16 byte boundary. > > 3. A 4K page to in the first fragment. > We can then check when the packet is received whether the overall > packet > length is small enough such that it did not touch the page. If it did > not we can use this page for the newly posted buffer. > > ** the above 128 bytes value can be a macro and we can determine what > is > the correct value. Are you saying we also do this for 2K MTU? Otherwise the if condition check can't not be avoid. And I don't know how much performance gain from this approach. Thanks Shirley From eli at mellanox.co.il Mon Feb 4 07:44:47 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 17:44:47 +0200 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202103073.4502.2.camel@localhost.localdomain> References: <1202133245.18209.46.camel@mtls03> <1202103073.4502.2.camel@localhost.localdomain> Message-ID: <1202139887.18209.65.camel@mtls03> On Sun, 2008-02-03 at 21:31 -0800, Shirley Ma wrote: > Are you saying we also do this for 2K MTU? Otherwise the if condition > check can't not be avoid. And I don't know how much performance gain > from this approach. > Hi Shirley, I think it we can do it for 2K MTU is well and avoid all the if . But first let's get this to ofed 1.3 and then work on the changes. Unfortunately you'll have to build again your patches on top of the current ofed tree. Can you do it today? From xma at us.ibm.com Mon Feb 4 08:01:15 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 08:01:15 -0800 Subject: [ofa-general] Please send all patches for OFED 1.3 rc4 by end of Monday (Feb 4) In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90340E13C@mtlexch01.mtl.com> Message-ID: Hello Tziporet, Eli has reviewed the IPoIB-UD 4K mtu patchset. He suggested an alternative way (reserve one buffer for all header including GRH, IPoIB-ethernet, IP header, TCP header, and leave the user data in other buffer) for the implementation but This approach can't avoid if condition check without changing IPoIB-2K MTU implementation. And I am not sure whether or how much performance gain from this approach, it's pretty risky to change IPoIB-2K MTU at this moment. I would like to keep my approach, and will limit to one "if need-S/G check" in fast path. and I have passed stress test for both mthca and ehca in the past few days. I don't see any issues for OFED-1.3 RC3. Do you agree to include this IPoIB-UD 4K MTU patch into OFED-1.3 RC4? I need to regerate this patch today since Eli's small messages size patch has been in Git tree. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From mashirle at us.ibm.com Sun Feb 3 22:03:41 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sun, 03 Feb 2008 22:03:41 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202139887.18209.65.camel@mtls03> References: <1202133245.18209.46.camel@mtls03> <1202103073.4502.2.camel@localhost.localdomain> <1202139887.18209.65.camel@mtls03> Message-ID: <1202105021.4502.5.camel@localhost.localdomain> On Mon, 2008-02-04 at 17:44 +0200, Eli Cohen wrote: > On Sun, 2008-02-03 at 21:31 -0800, Shirley Ma wrote: > > > Are you saying we also do this for 2K MTU? Otherwise the if condition > > check can't not be avoid. And I don't know how much performance gain > > from this approach. > > > > Hi Shirley, > > I think it we can do it for 2K MTU is well and avoid all the if . But > first let's get this to ofed 1.3 and then work on the changes. > Unfortunately you'll have to build again your patches on top of the > current ofed tree. Can you do it today? Thanks Eli. I will pull git tree and do it today. I will limit the need-S/G check in one in fast path. thanks Shirley From eli at dev.mellanox.co.il Mon Feb 4 08:02:44 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 18:02:44 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202140964.18209.69.camel@mtls03> On Sun, 2008-02-03 at 09:10 -0800, Shirley Ma wrote: > Hello Eli, > > Can you send me a combined attachment patch for RC3-2.6.24 kernel so i > can validate for both mthca and ehca here? > > Thanks > Shirley It is already in the latest ofed build and also in the attached files. -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib_0180_split_cq.patch Type: text/x-patch Size: 10750 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib_0190_unsig_udqp.patch Type: text/x-patch Size: 11358 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib_0210_draft_wr.patch Type: text/x-patch Size: 2882 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib_0220_ud_post_list.patch Type: text/x-patch Size: 4933 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib_0230_srq_post_n.patch Type: text/x-patch Size: 5941 bytes Desc: not available URL: From jlentini at netapp.com Mon Feb 4 08:09:43 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 4 Feb 2008 11:09:43 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080204152858.GA25343@cefeid.wcss.wroc.pl> References: <20080126193035.GA21209@cefeid.wcss.wroc.pl> <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> Message-ID: On Mon, 4 Feb 2008, Pawel Dziekonski wrote: > On Mon, 04 Feb 2008 at 09:59:50AM -0500, James Lentini wrote: > > > > > > On Fri, 1 Feb 2008, Pawel Dziekonski wrote: > > > > > On Fri, 01 Feb 2008 at 10:56:55AM -0500, James Lentini wrote: > > > > > # mount 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 Unsupported nfs > > > > > mount option: rdma > > > > > > > > > > looks like I definitelly need a rdma-enabled mount, which comes in > > > > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so: > > > > > > > > If you are using the NFS/RDMA client in 2.6.24, you need the version > > > > of the mount.nfs command in nfs-utils-1.1.1 or greater. > > > > > > > > The "Unsupported nfs mount option: rdma" error message makes me > > > > suspect you are not using the correct version of mount.nfs. What is > > > > the output of "mount.nfs -V" and "mount -V"? It may be that the > > > > version of mount you are using does not automatically invoke mount.nfs > > > > for nfs mounts. I'd suggest specifying mount.nfs in the command above: > > > > > > # mount.nfs -V > > > mount.nfs (linux nfs-utils 1.1.1) > > > > > > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 > > > > > > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > > mount.nfs: timeout set for Fri Feb 1 23:48:08 2008 > > > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > > > mount.nfs: internal error > > > > > > :( > > > > I'm going to guess that the "internal error" message means that your > > kernel does not support the NFS string mount API. Are you sure your > > kernel you are using is an exact copy of Tom Tucker's git tree? It > > can NOT have the OFED 1.2 package installed. > > kernel was pulled from Tom's tree, Jan 25th. > > # cat .git/config > [core] > repositoryformatversion = 0 > filemode = true > bare = false > logallrefupdates = true > [remote "origin"] > url = git://git.linux-nfs.org/projects/tomtucker/xprt-switch-2.6.git > fetch = +refs/heads/*:refs/remotes/origin/* > [branch "merged-fixes"] > remote = origin > merge = refs/heads/merged-fixes > > > config: https://cefeid.wcss.wroc.pl/d/tmp/c-2.6.24-rc6 > > What do you mean 'It can NOT have the OFED 1.2 package installed.'? > I can not install OFED 1.2 on this machine at all?! I'm refering to the OFED 1.2 NFS/RDMA package you cited above: http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so You can NOT install that on Tom Tucker's kernel. That is an old version of the NFS/RDMA software that will conflict with the latest code. If the OFED version you want to use is supported on Tom Tucker's kernel (2.6.24-rc6), there should be no problem using OFED. The OFED 1.2 NFS/RDMA release is a different story. > > If the kernel is correct, can you send the output of: > > > > strace -e trace=all 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 > > execve("/sbin/mount.nfs", ["mount.nfs", "10.2.2.1:/scratch", "/mnt", "-i", "-o", "rdma,port=2050"], [/* 28 vars */]) = 0 > uname({sys="Linux", node="ib2", ...}) = 0 > brk(0) = 0x613000 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a78000 > access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) > open("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/mkl/10.0.1.014/lib/em64t/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/mkl/10.0.1.014/lib/em64t/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/mkl/10.0.1.014/lib/em64t/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/mkl/10.0.1.014/lib/em64t/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/mkl/10.0.1.014/lib/em64t/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/mkl/10.0.1.014/lib/em64t", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 > open("/opt/intel/fce/10.1.011/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/fce/10.1.011/lib/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/fce/10.1.011/lib/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/fce/10.1.011/lib/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/fce/10.1.011/lib/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/fce/10.1.011/lib/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/fce/10.1.011/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/fce/10.1.011/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 > open("/opt/intel/cce/10.1.011/lib/tls/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/cce/10.1.011/lib/tls/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/cce/10.1.011/lib/tls/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/cce/10.1.011/lib/tls", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/cce/10.1.011/lib/x86_64/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/cce/10.1.011/lib/x86_64", 0x7ffff202db10) = -1 ENOENT (No such file or directory) > open("/opt/intel/cce/10.1.011/lib/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory) > stat("/opt/intel/cce/10.1.011/lib", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 > open("/etc/ld.so.cache", O_RDONLY) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=38220, ...}) = 0 > mmap(NULL, 38220, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b68b8a79000 > close(3) = 0 > open("/lib64/tls/libc.so.6", O_RDONLY) = 3 > read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\304"..., 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=1622600, ...}) = 0 > mmap(0x33f2600000, 2314184, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x33f2600000 > mprotect(0x33f272c000, 1085384, PROT_NONE) = 0 > mmap(0x33f282c000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12c000) = 0x33f282c000 > mmap(0x33f2831000, 16328, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x33f2831000 > close(3) = 0 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a83000 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b68b8a84000 > mprotect(0x33f282c000, 12288, PROT_READ) = 0 > mprotect(0x33f2514000, 4096, PROT_READ) = 0 > arch_prctl(ARCH_SET_FS, 0x2b68b8a83b00) = 0 > munmap(0x2b68b8a79000, 38220) = 0 > getuid() = 0 > uname({sys="Linux", node="ib2", ...}) = 0 > uname({sys="Linux", node="ib2", ...}) = 0 > brk(0) = 0x613000 > brk(0x634000) = 0x634000 > lstat("/mnt", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 > stat("/mnt", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 > access("/mnt", X_OK) = 0 > mount("10.2.2.1:/scratch", "/mnt", "nfs", 0, "rdma,port=2050,addr=10.2.2.1") = -1 EIO (Input/output error) The above makes me suspect that the OFED 1.2 NFS/RDMA package is installed. If that is not the case, can you turn on NFS mount debugging? dmesc -c > /dev/null echo 1024 > /proc/sys/sunrpc/nfs_debug mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 dmesg > output Please send the contents of "output". > write(2, "mount.nfs: internal error\n", 26mount.nfs: internal error > ) = 26 > exit_group(32) = ? > Process 32607 detached > > > -- > Pawel Dziekonski > Wroclaw Centre for Networking & Supercomputing, HPC Department > Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND > phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From tziporet at dev.mellanox.co.il Mon Feb 4 08:14:08 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 04 Feb 2008 18:14:08 +0200 Subject: [ewg] Re: [ofa-general] Please send all patches for OFED 1.3 rc4 by end of Monday (Feb 4) In-Reply-To: References: Message-ID: <47A739D0.5090605@mellanox.co.il> Shirley Ma wrote: > > Hello Tziporet, > > Eli has reviewed the IPoIB-UD 4K mtu patchset. He suggested an > alternative way (reserve one buffer for all header including GRH, > IPoIB-ethernet, IP header, TCP header, and leave the user data in > other buffer) for the implementation but This approach can't avoid if > condition check without changing IPoIB-2K MTU implementation. And I am > not sure whether or how much performance gain from this approach, it's > pretty risky to change IPoIB-2K MTU at this moment. > I think the way Eli proposed is better but the point you raised (do not touch the 2K MTU) is also important for stability. > > I would like to keep my approach, and will limit to one "if need-S/G > check" in fast path. and I have passed stress test for both mthca and > ehca in the past few days. I don't see any issues for OFED-1.3 RC3. Do > you agree to include this IPoIB-UD 4K MTU patch into OFED-1.3 RC4? I > need to regerate this patch today since Eli's small messages size > patch has been in Git tree. > OK - go ahead and regenerate patch and we will be able to include it in RC4 BTW - how did you test it with mthca? It does not support 4K MTU. You can test it with ConnectX since it does supports 4K MTU (with a special burning configuration). Please let me know if you have ConnectX and you wish to test it with 4K MTU Tziporet From eli at mellanox.co.il Mon Feb 4 08:31:21 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 18:31:21 +0200 Subject: [ofa-general] comparison between 4K mtu and none 4K mtu Message-ID: <1202142681.18209.77.camel@mtls03> FYI I have made a few experiments with the 4K mtu patches based on Roland's tree. The results are in the attached execl file. I used netperf to take these results. -------------- next part -------------- A non-text attachment was scrubbed... Name: mtu_4k.xls Type: application/vnd.ms-excel Size: 104960 bytes Desc: not available URL: From tziporet at dev.mellanox.co.il Mon Feb 4 08:37:02 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 04 Feb 2008 18:37:02 +0200 Subject: [ofa-general] OFED meeting agenda on 1.3-rc3 status and rc4 readiness Message-ID: <47A73F2E.4080401@mellanox.co.il> This is the agenda to OFED meeting today on 1.3-rc3 status and rc4 readiness Reminder for the release schedule * RC3 - done (30-Jan) * RC4 - Feb 6 or 7 * RC5 - Feb 18 <== Gold (is this a vacation day in US?) * GA - Feb 25 Agenda: 1. Status update - all 2. Agree on the above schedual 3. Critical/Major bugs review: 874 critical jeremy.brown at qlogic.com Intel MPI (IMB test) hangs intermittently on the qlogic HCA 846 critical jim at mellanox.com SDP crash on RHEL5 ppc64 running netserver 878 critical monis at voltaire.com slow failover with bonding and connected mode 888 critical pasha at mellanox.co.il OSU latency benchmark (old version with iteration and message size parameter) stuck sometime 887 critical pasha at mellanox.co.il IMB benchmark stuck 760 major eli at mellanox.co.il UDP performance on Rx is lower than Tx 736 major tziporet at mellanox.co.il IBV_WC_RETRY_EXC_ERR errors with local rdma_reads -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at mellanox.co.il Mon Feb 4 08:39:25 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 4 Feb 2008 18:39:25 +0200 Subject: [ofa-general] OFED meeting agenda on 1.3-rc3 status and rc4 readiness Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E541@mtlexch01.mtl.com> This is the agenda to OFED meeting today on 1.3-rc3 status and rc4 readiness Reminder for the release schedule * RC3 - done (30-Jan) * RC4 - Feb 6 or 7 * RC5 - Feb 18 <== Gold (is this a vacation day in US?) * GA - Feb 25 Agenda: 1. Status update - all 2. Agree on the above schedule 3. Critical/Major bugs review: 874 critical jeremy.brown at qlogic.com Intel MPI (IMB test) hangs intermittently on the qlogic HCA 846 critical jim at mellanox.com SDP crash on RHEL5 ppc64 running netserver 878 critical monis at voltaire.com slow failover with bonding and connected mode 888 critical pasha at mellanox.co.il OSU latency benchmark (old version with iteration and message size parameter) stuck sometime 887 critical pasha at mellanox.co.il IMB benchmark stuck 760 major eli at mellanox.co.il UDP performance on Rx is lower than Tx 736 major tziporet at mellanox.co.il IBV_WC_RETRY_EXC_ERR errors with local rdma_reads Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Mon Feb 4 08:45:19 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 04 Feb 2008 10:45:19 -0600 Subject: [ofa-general] Re: [ewg] OFED meeting agenda on 1.3-rc3 status and rc4 readiness In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E541@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90282E541@mtlexch01.mtl.com> Message-ID: <47A7411F.6090406@opengridcomputing.com> Tziporet, I cannot attend today's call. I have status below: Tziporet Koren wrote: > > This is the agenda to OFED meeting today on 1.3-rc3 status and rc4 readiness > > Reminder for the release schedule > > * RC3 - done (30-Jan) > * RC4 - Feb 6 or 7 > * RC5 - Feb 18 <== Gold (is this a vacation day in US?) > * GA - Feb 25 > > > Agenda: > 1. Status update - all Uncovered a cxgb3 bug that we need fixed for ofed-1.3. I just opened 890 to track this. I hope to have a fix today or tomorrow... Also, I posted a trivial change to rmda_lat to enable it on chelsio devices. This was an oversight that should have been fixed a while ago. > > 2. Agree on the above schedule Agree on the schedule, but I need bug 890 in. Most likely it'll have to go in RC5. steve. From conangdatinh88 at yahoo.com Mon Feb 4 08:52:45 2008 From: conangdatinh88 at yahoo.com (HANOI-FASHION) Date: Mon, 4 Feb 2008 23:52:45 +0700 Subject: [ofa-general] HANOI-FASHION: CHUYEN VESTON NAM NU CAO CAP Message-ID: <20080204165249.0DEAFE6094F@openfabrics.org> An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: VESTON NAM NU.JPG Type: image/jpeg Size: 202377 bytes Desc: not available URL: From xma at us.ibm.com Mon Feb 4 08:53:12 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 08:53:12 -0800 Subject: [ofa-general] comparison between 4K mtu and none 4K mtu In-Reply-To: <1202142681.18209.77.camel@mtls03> Message-ID: Hello Eli, What's the CPU utilization? I do see much better performance (40-100%) regarding ehca. I wonder whether CPU is the limitation there. Have you tried multiple streams, duplex? Thanks Shirley Eli Cohen mashirle at linux.vnet.ibm.com, Tziporet Koren Sent by: , openfabrics general-b ounces at li cc sts.openf abrics.or Subject g [ofa-general] comparison between 4K mtu and none 4K mtu 02/04/08 08:31 AM FYI I have made a few experiments with the 4K mtu patches based on Roland's tree. The results are in the attached execl file. I used netperf to take these results. [attachment "mtu_4k.xls" deleted by Shirley Ma/Beaverton/IBM] _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic13533.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From eli at mellanox.co.il Mon Feb 4 09:00:33 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Mon, 04 Feb 2008 19:00:33 +0200 Subject: [ofa-general] comparison between 4K mtu and none 4K mtu In-Reply-To: References: Message-ID: <1202144433.18209.80.camel@mtls03> On Mon, 2008-02-04 at 08:53 -0800, Shirley Ma wrote: > Hello Eli, > > What's the CPU utilization? I do see much better performance (40-100%) > regarding ehca. I wonder whether CPU is the limitation there. Have you > tried multiple streams, duplex? > > Thanks > Shirley > > I did not measure CPU utilization but for small UDP messages I expect it to be 100% and so CPU would be limiting the throughput. I might take more measurements tomorrow though. From sean.hefty at intel.com Mon Feb 4 09:05:21 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 4 Feb 2008 09:05:21 -0800 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <200802031758.53692.dotanb@dev.mellanox.co.il> References: <200802031758.53692.dotanb@dev.mellanox.co.il> Message-ID: <000101c86750$1ffce090$a937170a@amr.corp.intel.com> >+.SH "DESCRIPTION" >+This library is an implementation of the verbs according to the Infiniband >specification volume 1.2. It handles the control path of creating, modifying, >querying and destroying resources such as Protection Domains (PD), Completion >Queues (CQ), Queue-Pairs (QP), Shared Receive Queues (SRQ), Address Handles >(AH), Memory Regions (MR). It also handles sending and receiving data posted to >QPs and SRQs, getting completions from CQs using polling and completions >events. The verbs also support iWarp devices and are not necessarily restricted to the 1.2 IB spec definitions. It might make sense to state that the IB implementation is based on the 1.2 spec in an IB specific section, but keep the general documentation transport neutral at this point. - Sean From ardavis at ichips.intel.com Mon Feb 4 09:10:12 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Mon, 04 Feb 2008 09:10:12 -0800 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: References: Message-ID: <47A746F4.2020607@ichips.intel.com> Mahmoud Hanafi wrote: > > How do I ensure that local_sa_cache is enables? > cat /sys/module/ib_sa/paths_per_dest should return 7F if it is enabled and running. -arlin From xma at us.ibm.com Mon Feb 4 08:49:34 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 08:49:34 -0800 Subject: ***SPAM*** Re: [ewg] Re: [ofa-general] Please send all patches for OFED 1.3 rc4 by end of Monday (Feb 4) In-Reply-To: <47A739D0.5090605@mellanox.co.il> Message-ID: Tziporet Koren wrote on 02/04/2008 08:14:08 AM: > OK - go ahead and regenerate patch and we will be able to include it in RC4 > BTW - how did you test it with mthca? It does not support 4K MTU. You > can test it with ConnectX since it does supports 4K MTU (with a special > burning configuration). Please let me know if you have ConnectX and you > wish to test it with 4K MTU > > Tziporet Thanks Tzipoeret. I would like to test ConnectX. But I can't test right it now since the switch connected to ConnectX is configured as 2K MTU and the test team has other test task to finish. But I can suggest the test team to include 4K MTU test as port of their system validation. Please send me the instructions on how to enable it for ConnectX. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From kilian at stanford.edu Mon Feb 4 09:31:45 2008 From: kilian at stanford.edu (Kilian CAVALOTTI) Date: Mon, 4 Feb 2008 09:31:45 -0800 Subject: [ofa-general] ENOMEM In-Reply-To: <47A584DE.2020308@lfbs.rwth-aachen.de> References: <47A352FC.8090604@lfbs.rwth-aachen.de> <200802011705.51202.kilian@stanford.edu> <47A584DE.2020308@lfbs.rwth-aachen.de> Message-ID: <200802040931.45952.kilian@stanford.edu> Hi Ruben, On Sunday 03 February 2008 01:09:50 am Ruben Niederhagen wrote: > For 'ulimit -l' I get 512 - as root as well as usual user; souldn't > that be enough? The only way to be sure is to increase it and see if it changes anything. :) > How do I enlarge this limit? The line > # > * hard locks 1024 > in /etc/security/limits.conf (+reboot) didn't do the trick... I believe the relevant type is "memlock", and you can set the value up to "unlimited". Setting a hard limit won't change the default user limit (set a "soft" limit for this) but will affect users' ability to increase this limit. So, with: * hard memlock unlimited in /etc/security/limits.conf, you should be able to do, as a user: $ ulimit -l unlimited and try your ibv_srq_pingpong again. HTH. Cheers, -- Kilian From hrosenstock at xsigo.com Mon Feb 4 09:39:22 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 04 Feb 2008 09:39:22 -0800 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UD RX S/G supportfor 4K MTU In-Reply-To: <1202058449.18209.17.camel@mtls03> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> <1201998120.19565.245.camel@localhost.localdomain> <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> <1201998622.19565.250.camel@localhost.localdomain> <1202058449.18209.17.camel@mtls03> Message-ID: <1202146762.11210.235.camel@hrosenstock-ws.xsigo.com> Eli, On Sun, 2008-02-03 at 19:07 +0200, Eli Cohen wrote: > Hi Shirley, > > I have reviewed the patches against Roland's tree and have the following > comments: > > 1. I see that there are a few if statements added on the fast pass and I > am concerned they might hurt performance of slow UDP messages. > Unfortunately I have not been able to test with an SM defining the > broadcast group to 4K MTU (currently opensm uses 2K). The default is 2K (mtu=4). You can get opensm to make it 4K if you want as follows: /etc/ofa/opensm-partitions.conf: Default=0x7fff,ipoib,mtu=5:ALL=full; -- Hal > 2. The usage of ipoib_ud_skb_put_frags() seems to be redundant and will > only hurt performance since you would never reuse anything from the old > SKB. This is because the headlen is 40 bytes for GRH and the rest of the > data is in the first (and only) fragment. > > 3. I think it would be better to allocate room for real data in the head > of the SKB since the tcp/ip stack seems to have less overhead if the > headers are on the linear data. > > 4. I would consider using a pre-allocated buffer for the GRH of all > received data (not as part of the SKB). > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From jim at mellanox.com Mon Feb 4 10:34:41 2008 From: jim at mellanox.com (Jim Mott) Date: Mon, 4 Feb 2008 10:34:41 -0800 Subject: [ofa-general] RE: [ewg] Not seeing any SDP performance changes References: <47445630.10000@dev.mellanox.co.il> <000001c8338c$206e80d0$614b8270$@rr.com> <4798A9F5.7030109@gmail.com> <005301c85f5d$e03e36b0$a0baa410$@rr.com> Message-ID: Hi, I am back in the office and have installed a fresh Rhat4U4 system on a test machine that was running Rhat5. The only non-default options I used were: - No fireware - Disable SELinux Then I built Netperf 2.4.3 on the new system. (./configure; make; make install) Then I downloaded and installed today's OFED 1.3 release. At this point I have two identical hardware platforms running Rhat4U4 (2.6.9-42.ELsmp) kernel right off the install media. They are both running Netperf 2.4.3 and today's OFED stack. Both are using ConnectX cards with 2.3 firmware. Running as root on both (netserver and netperf) sides my little shell script pulled out the following bandwidth numbers: 64K 128K 1M SDP 8215.17 6429.09 6862.66 BZCOPY 8748.00 9997.07 9847.76 Looking at uS/KB transferred we see: 64K 128K 1M LCL RMT LCL RMT LCL RMT SDP 1.025 1.243 1.391 1.493 1.274 1.407 BZCOPY 0.966 1.148 0.838 1.014 0.603 0.984 The output of "lspci -vv" for the HCA is: 0a:00.0 InfiniBand: Mellanox Technologies: Unknown device 634a (rev a0) Subsystem: Mellanox Technologies: Unknown device 634a Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- /sys/module/ib_sdp/sdp_zcopy_thresh # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 64K 87380 16384 16384 60.00 7106.72 13.32 14.87 1.228 1.370 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 128K 87380 16384 16384 60.00 6906.18 14.02 15.18 1.330 1.441 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 1M 87380 16384 16384 60.00 7030.98 13.97 15.13 1.303 1.410 # echo 1 > /sys/module/ib_sdp/sdp_zcopy_thresh # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 64K 87380 16384 16384 60.00 6491.93 13.83 14.90 1.396 1.504 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 128K 87380 16384 16384 60.00 6536.61 14.19 14.80 1.423 1.484 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---r 1M 87380 16384 16384 60.00 6623.94 13.68 14.82 1.353 1.466 Now these numbers look like what you report. The problem here is that we are giving SDP data in 16K chunks (Send Message Size bytes is 16384), and the overhead of pinning 16K, sending it, and unpinning it is too high to give us any benefit. Rerunning the whole test with -m instead of -r give me the numbers that I keep reporting: # echo $LD_PRELOAD libsdp.so # echo 0 > /sys/module/ib_sdp/sdp_zcopy_thresh # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 64K 87380 16384 65536 60.00 8323.20 12.96 13.64 1.020 1.074 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 128K 87380 16384 131072 60.00 6661.77 13.74 13.41 1.352 1.320 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 1M 87380 16384 1048576 60.00 6691.83 13.39 13.58 1.312 1.330 # echo 1 > /sys/module/ib_sdp/sdp_zcopy_thresh # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 64K 87380 16384 65536 60.00 9052.22 12.88 14.18 0.932 1.027 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 128K 87380 16384 131072 60.00 10294.87 12.70 13.44 0.808 0.855 # netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 1M 87380 16384 1048576 60.00 10254.89 7.74 13.07 0.495 0.835 Maybe this is the problem? My tests are giving sdp_sendmsg() enough data to sink its teeth into. When you send a big buffer, instead of lots of little ones, you can see the benefit. Could you guys try the "-m size" instead of "-r size" and see if that works better? Thanks, JIm Jim Mott Mellanox Technologies Ltd. mail: jim at mellanox.com Phone: 512-294-5481 -----Original Message----- From: Jim Mott Sent: Friday, January 25, 2008 4:07 PM To: 'Scott Weitzenkamp (sweitzen)'; Weikuan Yu Cc: general at lists.openfabrics.org Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP performance changes inOFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh Not today, but I will give it a shot next time I get a free machine. I have tested between Rhat4u4 MLX4 and Rhat4u4 mthca and seen the same trend though. Thanks, JIm Jim Mott Mellanox Technologies Ltd. mail: jim at mellanox.com Phone: 512-294-5481 -----Original Message----- From: Scott Weitzenkamp (sweitzen) [mailto:sweitzen at cisco.com] Sent: Friday, January 25, 2008 4:03 PM To: Jim Mott; Weikuan Yu Cc: general at lists.openfabrics.org Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP performance changes inOFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh Is there any way you can make sender and receiver the same RHEL kernel? > -----Original Message----- > From: Jim Mott [mailto:jim at mellanox.com] > Sent: Friday, January 25, 2008 1:58 PM > To: Scott Weitzenkamp (sweitzen); Weikuan Yu > Cc: general at lists.openfabrics.org > Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP > performance changes inOFED 1.3 beta, and I get Oops when > enabling sdp_zcopy_thresh > > Receive side: > - 2.6.23.8 kernel.org kernel on Rhat5 distro > - HCA is MLX4 with 2.3.914 > I get the same number on released 2.3 firmware > > Send side: > - 2.6.9-42.ELsmp x86_64 (Rhat4u4) > - HCA is MLX4 with 2.3.914 > > I get the same trends (SDP < BZCOPY if message_size > 64K) on > unmodifed > Rhat5, Rhat4u4, and SLES10-SP1-RT distros. I also see it on > kernel.org > kernels 2.6.23.12, 2.6.24-rc2, 2.6.23, and 2.6.22.9. I am in > the midst > of testing some things, so I do not have all the machines available > right now to repeat most of the tests though. > > > Thanks, > JIm > > Jim Mott > Mellanox Technologies Ltd. > mail: jim at mellanox.com > Phone: 512-294-5481 > > > -----Original Message----- > From: Scott Weitzenkamp (sweitzen) [mailto:sweitzen at cisco.com] > Sent: Friday, January 25, 2008 3:39 PM > To: Jim Mott; Weikuan Yu > Cc: general at lists.openfabrics.org > Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP performance > changes inOFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh > > Jim, what kernel and HCA are these numbers for? > > Scott > > > > > -----Original Message----- > > From: Jim Mott [mailto:jim at mellanox.com] > > Sent: Friday, January 25, 2008 11:09 AM > > To: Scott Weitzenkamp (sweitzen); Weikuan Yu > > Cc: general at lists.openfabrics.org > > Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP > > performance changes inOFED 1.3 beta, and I get Oops when > > enabling sdp_zcopy_thresh > > > > Right you are (as usual). > > > > Hunting around these systems shows that I have been using > > netperf-2.4.3 > > for testing. No configuration options; just ./configure; make; make > > install. > > > > To try and understand version differences, I installed 2.4.1 (your > > version?), 2.4.3, and 2.4.4. Built them with default > options and ran > > the tests using each. > > > > Using netperf-2.4.1 and reran "netperf -v2 -4 -H > > 193.168.10.143 -l 30 -t > > TCP_STREAM -c -C -- -m size" with target AMD and driver as > > 8-processor > > Intel: > > > > 64K 128K 1M > > SDP 7749.66 6925.68 6281.17 > > BZCOPY 8492.85 9867.06 11105.50 > > > > I tried running these tests a few times and saw a lot of > > variance in the > > reported results. Reloading 2.4.3 and running the same tests: > > > > 64K 128K 1M > > SDP 7553.77 6747.58 5986.42 > > BZCOPY 8839.46 9572.49 10654.52 > > > > and finally, I tried 2.4.4 and running the same tests: > > > > 64K 128K 1M > > SDP 7935.97 6325.69 7682.65 > > BZCOPY 8905.94 9935.45 10615.03 > > > > At this point, I am confused. The difference between SDP with and > > without Bzcopy is obvious in all three sets of numbers. I can not > > explain why you see something different. > > > > If you could try a vanilla netperf build, it would be > > interesting to see > > if you get any different results. > > > > Thanks, > > JIm > > > > Jim Mott > > Mellanox Technologies Ltd. > > mail: jim at mellanox.com > > Phone: 512-294-5481 > > > > > > -----Original Message----- > > From: Scott Weitzenkamp (sweitzen) [mailto:sweitzen at cisco.com] > > Sent: Friday, January 25, 2008 10:36 AM > > To: Jim Mott; Jim Mott; Weikuan Yu > > Cc: general at lists.openfabrics.org > > Subject: RE: [ofa-general] RE: [ewg] Not seeing any SDP performance > > changes inOFED 1.3 beta, and I get Oops when enabling > sdp_zcopy_thresh > > > > > So I see your results (sort of). I have been using the > > > netperf that ships with the OS (Rhat4u4 and Rhat5 mostly) or > > > is built with > > > default options. Maybe that is the difference. > > > > Jim, AFAIK Red Hat does not ship netperf with RHEL. > > > > Scott > > > From sashak at voltaire.com Mon Feb 4 10:48:26 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 4 Feb 2008 18:48:26 +0000 Subject: [ofa-general] Re: [PATCH] opensm/man: partition cfg file location In-Reply-To: <47A6D462.7090904@dev.mellanox.co.il> References: <47A6D462.7090904@dev.mellanox.co.il> Message-ID: <20080204184826.GF1392@sashak.voltaire.com> Hi Yevgeny, On 11:01 Mon 04 Feb , Yevgeny Kliteynik wrote: > Hi Sasha, > > Fixing a wrong partition cfg file location in opensm man page. > > Please apply to ofed_1_3 and master. > > Signed-off-by: Yevgeny Kliteynik > --- > opensm/man/opensm.8 | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 > index ab7fb8e..460e948 100644 > --- a/opensm/man/opensm.8 > +++ b/opensm/man/opensm.8 > @@ -200,7 +200,7 @@ is accumulative. > .TP > \fB\-P\fR, \fB\-\-Pconfig\fR > This option defines the optional partition configuration file. > -The default name is \'/etc/opensm/opensm-partitions.conf\'. > +The default name is \'/etc/ofa/opensm-partitions.conf\'. It is also wrong name - partition config file name is configurable with OpenSM (look at './configure --help') and default default value is '/opensm/partitions.conf'. 'opensm -h' shows valid value. Probably better is to do have all config file names configurable in man pages? Thoughts? Sasha From sashak at voltaire.com Mon Feb 4 10:51:19 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 4 Feb 2008 18:51:19 +0000 Subject: [ofa-general] Re: [PATCH] opensm/man: partition cfg file location In-Reply-To: <20080204184826.GF1392@sashak.voltaire.com> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> Message-ID: <20080204185119.GG1392@sashak.voltaire.com> On 18:48 Mon 04 Feb , Sasha Khapyorsky wrote: > > --- a/opensm/man/opensm.8 > > +++ b/opensm/man/opensm.8 > > @@ -200,7 +200,7 @@ is accumulative. > > .TP > > \fB\-P\fR, \fB\-\-Pconfig\fR > > This option defines the optional partition configuration file. > > -The default name is \'/etc/opensm/opensm-partitions.conf\'. > > +The default name is \'/etc/ofa/opensm-partitions.conf\'. > > It is also wrong name - partition config file name is configurable with > OpenSM (look at './configure --help') and default default value is > '/opensm/partitions.conf'. 'opensm -h' shows valid value. BTW in OFED-1.3 it is '/etc/opensm/partitions.conf'. Sasha From eli at mellanox.co.il Mon Feb 4 10:55:47 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Mon, 4 Feb 2008 20:55:47 +0200 Subject: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UDRX S/G supportfor 4K MTU In-Reply-To: <1202146762.11210.235.camel@hrosenstock-ws.xsigo.com> References: <1201718540.6850.41.camel@localhost.localdomain> <1201725009.6850.54.camel@localhost.localdomain> <1201978345.19565.222.camel@localhost.localdomain> <1201988154.19565.229.camel@localhost.localdomain> <1201996963.19565.240.camel@localhost.localdomain> <1202033249.5839.17.camel@mtls03> <1201998120.19565.245.camel@localhost.localdomain> <6C2C79E72C305246B504CBA17B5500C9033B0F44@mtlexch01.mtl.com> <1201998622.19565.250.camel@localhost.localdomain> <1202058449.18209.17.camel@mtls03> <1202146762.11210.235.camel@hrosenstock-ws.xsigo.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C90340E2A5@mtlexch01.mtl.com> Thanks. -----Original Message----- From: Hal Rosenstock [mailto:hrosenstock at xsigo.com] Sent: ב 04 פברואר 2008 19:39 To: Eli Cohen Cc: Shirley Ma; Roland Dreier; general at lists.openfabrics.org; sashak at voltaire.com Subject: Re: [ofa-general] RE: [UPDATE] [V3] [PATCH 3/3] ib/ipoib: IPoIB-UDRX S/G supportfor 4K MTU Eli, On Sun, 2008-02-03 at 19:07 +0200, Eli Cohen wrote: > Hi Shirley, > > I have reviewed the patches against Roland's tree and have the > following > comments: > > 1. I see that there are a few if statements added on the fast pass and > I am concerned they might hurt performance of slow UDP messages. > Unfortunately I have not been able to test with an SM defining the > broadcast group to 4K MTU (currently opensm uses 2K). The default is 2K (mtu=4). You can get opensm to make it 4K if you want as follows: /etc/ofa/opensm-partitions.conf: Default=0x7fff,ipoib,mtu=5:ALL=full; -- Hal > 2. The usage of ipoib_ud_skb_put_frags() seems to be redundant and > will only hurt performance since you would never reuse anything from > the old SKB. This is because the headlen is 40 bytes for GRH and the > rest of the data is in the first (and only) fragment. > > 3. I think it would be better to allocate room for real data in the > head of the SKB since the tcp/ip stack seems to have less overhead if > the headers are on the linear data. > > 4. I would consider using a pre-allocated buffer for the GRH of all > received data (not as part of the SKB). > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From pradeeps at linux.vnet.ibm.com Mon Feb 4 12:24:14 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Mon, 04 Feb 2008 12:24:14 -0800 Subject: [ofa-general] Oops with today's OFED 1.3 Message-ID: <47A7746E.6030303@linux.vnet.ibm.com> I pulled today's (Feb 4th) OFED build and saw the following Oops while touch testing on ehca1 on a 2.6.24 kernel. Modules linked in: ib_ipoib ib_cm ib_sa ib_uverbs ib_umad ib_ehca ib_mthca ib_mad ib_core joydev st ide_cd ipv6 sg pdc202xx_new e1000 ibmveth dm_mod ipr libata firmware_class sr_mod cdrom sd_mod scsi_mod NIP: d000000000299ca8 LR: d000000000299a70 CTR: d00000000015ec04 REGS: c0000001cc85f3b0 TRAP: 0300 Not tainted (2.6.23-ppc64) MSR: 8000000000009032 CR: 24022424 XER: 00000020 DAR: 000000000000002c, DSISR: 0000000042000000 TASK = c0000001d883d4a0[17052] 'modprobe' THREAD: c0000001cc85c000 CPU: 2 GPR00: 0000000000000000 c0000001cc85f630 d0000000002b5cf0 ffffffffffffffda GPR04: c0000001cc85f760 ffffffffffffffda d0000000002a7eb0 0000000000000000 GPR08: 0000000000000000 0000000000000000 0000000000000001 00000000001b4800 GPR12: d00000000029ef30 c0000000005a8280 c0000001d895aa20 0000000000000000 GPR16: 0000000000000008 0000000000000000 0000000000000000 d00000000040f27e GPR20: 0000000000000211 0000000000000000 0000000000000000 c0000001cd1e0000 GPR24: 0000000000000000 d0000000002ad9d8 d0000000002a7eb0 0000000000000001 GPR28: c0000001cc85f760 0000000000000000 d0000000002b4ce0 c0000001cd1e0780 NIP [d000000000299ca8] .ipoib_cm_dev_init+0x440/0x63c [ib_ipoib] LR [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] Call Trace: [c0000001cc85f630] [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] (unreliable) [c0000001cc85f7d0] [d000000000297f4c] .ipoib_transport_dev_init+0x120/0x458 [ib_ipoib] [c0000001cc85f930] [d00000000029463c] .ipoib_ib_dev_init+0x44/0xb8 [ib_ipoib] [c0000001cc85f9c0] [d0000000002902ec] .ipoib_dev_init+0xe0/0x138 [ib_ipoib] [c0000001cc85fa60] [d000000000290544] .ipoib_add_one+0x200/0x424 [ib_ipoib] [c0000001cc85fb20] [d0000000001610e4] .ib_register_client+0x94/0xf4 [ib_core] [c0000001cc85fbb0] [d00000000029dcac] .ipoib_init_module+0x1f8/0x246c [ib_ipoib] [c0000001cc85fc70] [c0000000000905f0] .sys_init_module+0x176c/0x187c [c0000001cc85fe30] [c00000000000852c] syscall_exit+0x0/0x40 Instruction dump: 801f0f20 3b600000 2f800000 409d0040 e81f0f30 e97f04f0 7b6926e4 395b0001 7d5b07b4 7c080214 816b0018 7d290214 <9169002c> 60000000 60000000 60000000 I tracked this down to the following area of code: + for (j = 0; j < ipoib_recvq_size; ++j) { + for (i = 0; i < priv->cm.num_frags; ++i) + priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey; in ipoib_0230_srq_post_n.patch. Touch tested after removing this patch seems to solve the problem. Pradeep From eitan at mellanox.co.il Mon Feb 4 12:40:08 2008 From: eitan at mellanox.co.il (Eitan Zahavi) Date: Mon, 4 Feb 2008 22:40:08 +0200 Subject: [ofa-general] IBTA Management Working Group: Extending SubnMgmt attributes Message-ID: <6C2C79E72C305246B504CBA17B5500C90340E307@mtlexch01.mtl.com> Dear IB Management Developer, This mail serves as a notice to the IB management developers community. During its work the MgtWG has discovered some of the SubnMgmt class attributes were defined such that the space after the last attribute component was not explicitly reserved. This space is important to allow extension of existing attributes with new optional components - enabling future enhancements to the specification. Although the use of the "spare" space was ratified by IBTA committees the MgtWG decided to approach the developer community before the first standardized extension makes use of that space. Not only the SubnMgmt attribute (e.g. SwitchInfo) will get extended, but it's occurrences in the response SubnAdmin Record (e.g. SwitchInfoRecord) needs to be updated too. Note that the existing specification enables modification of the size of the embedded attributes - by defining the AttributeOffset component of the RMPP header. But it is the implementation of the RMPP requestor and responder responsibility to coordinate the new size of the attributes. Please send feedback to mgtwg at infinibandta.org or directly to me. Thanks Eitan Zahavi IBTA MgtWG Co-Chair -------------- next part -------------- An HTML attachment was scrubbed... URL: From arlin.r.davis at intel.com Mon Feb 4 12:58:49 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 4 Feb 2008 12:58:49 -0800 Subject: [ofa-general] [PATCH] DAPL v2 - fix package to build against and target /dat2/include Message-ID: <000001c86770$bd944cb0$9f97070a@amr.corp.intel.com> Fix OFED v2 package to build against and target /dat2/include directory. Signed-off by: Arlin Davis diff --git a/Makefile.am b/Makefile.am index c4cb1bf..60b3db6 100755 --- a/Makefile.am +++ b/Makefile.am @@ -183,17 +183,17 @@ dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) libdatincludedir = $(includedir)/dat2 -libdatinclude_HEADERS = dat/include/dat/dat.h \ - dat/include/dat/dat_error.h \ - dat/include/dat/dat_platform_specific.h \ - dat/include/dat/dat_redirection.h \ - dat/include/dat/dat_registry.h \ - dat/include/dat/dat_vendor_specific.h \ - dat/include/dat/udat_config.h \ - dat/include/dat/udat.h \ - dat/include/dat/udat_redirection.h \ - dat/include/dat/udat_vendor_specific.h \ - dat/include/dat/dat_ib_extensions.h +libdatinclude_HEADERS = dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ + dat/include/dat2/dat_ib_extensions.h man_MANS = man/dtest.1 man/dapltest.1 @@ -203,16 +203,16 @@ EXTRA_DIST = dat/common/dat_dictionary.h \ dat/common/dat_sr.h \ dat/udat/udat_sr_parser.h \ dat/udat/linux/dat_osd.h \ - dat/include/dat/dat.h \ - dat/include/dat/dat_error.h \ - dat/include/dat/dat_platform_specific.h \ - dat/include/dat/dat_redirection.h \ - dat/include/dat/dat_registry.h \ - dat/include/dat/dat_vendor_specific.h \ - dat/include/dat/udat_config.h \ - dat/include/dat/udat.h \ - dat/include/dat/udat_redirection.h \ - dat/include/dat/udat_vendor_specific.h \ + dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ dapl/common/dapl_adapter_util.h \ dapl/common/dapl_cno_util.h \ dapl/common/dapl_cookie.h \ diff --git a/dapl/ibal/dapl_ibal_dto.h b/dapl/ibal/dapl_ibal_dto.h index be5686f..283fd91 100644 --- a/dapl/ibal/dapl_ibal_dto.h +++ b/dapl/ibal/dapl_ibal_dto.h @@ -52,7 +52,7 @@ #include "dapl_ibal_util.h" #ifdef DAT_EXTENSIONS -#include +#include #endif extern DAT_RETURN diff --git a/dapl/include/dapl.h b/dapl/include/dapl.h index 49a3f49..42db81e 100755 --- a/dapl/include/dapl.h +++ b/dapl/include/dapl.h @@ -45,11 +45,11 @@ #define _DAPL_H_ #if defined(__KERNEL__) -#include +#include #else -#include +#include #endif /* defined(__KERNEL__) */ -#include +#include #include "dapl_osd.h" #include "dapl_debug.h" diff --git a/dapl/openib_cma/dapl_ib_dto.h b/dapl/openib_cma/dapl_ib_dto.h index 09cef14..cea989b 100644 --- a/dapl/openib_cma/dapl_ib_dto.h +++ b/dapl/openib_cma/dapl_ib_dto.h @@ -51,7 +51,7 @@ #include "dapl_ib_util.h" #ifdef DAT_EXTENSIONS -#include +#include #endif #define DEFAULT_DS_ENTRIES 8 diff --git a/dapl/udapl/dapl_init.c b/dapl/udapl/dapl_init.c index 94a4619..ce92f9f 100644 --- a/dapl/udapl/dapl_init.c +++ b/dapl/udapl/dapl_init.c @@ -37,7 +37,7 @@ **********************************************************************/ #include "dapl.h" -#include /* Provider API function prototypes */ +#include /* Provider API function prototypes */ #include "dapl_hca_util.h" #include "dapl_init.h" #include "dapl_provider.h" diff --git a/dapl/udapl/linux/dapl_osd.h b/dapl/udapl/linux/dapl_osd.h index e932ae0..caf971f 100644 --- a/dapl/udapl/linux/dapl_osd.h +++ b/dapl/udapl/linux/dapl_osd.h @@ -54,7 +54,7 @@ #endif -#include +#include #include #include #include diff --git a/dat/common/dat_api.c b/dat/common/dat_api.c index a381f05..cb4105d 100755 --- a/dat/common/dat_api.c +++ b/dat/common/dat_api.c @@ -47,9 +47,10 @@ * $Id: dat_api.c 1326 2005-05-20 22:25:31Z jlentini $ **********************************************************************/ +#include +#include #include "dat_osd.h" #include "dat_init.h" -#include /* * structure to deal with IA handles diff --git a/dat/common/dat_dr.c b/dat/common/dat_dr.c index 640d808..6d79829 100644 --- a/dat/common/dat_dr.c +++ b/dat/common/dat_dr.c @@ -37,9 +37,8 @@ * $Id: dat_dr.c,v 1.17 2005/03/24 05:58:27 jlentini Exp $ **********************************************************************/ - +#include #include "dat_dr.h" - #include "dat_dictionary.h" diff --git a/dat/common/dat_dr.h b/dat/common/dat_dr.h index 752042e..3012252 100644 --- a/dat/common/dat_dr.h +++ b/dat/common/dat_dr.h @@ -42,7 +42,7 @@ #include "dat_osd.h" -#include /* Provider API function prototypes */ +#include /* Provider API function prototypes */ /********************************************************************* diff --git a/dat/common/dat_init.c b/dat/common/dat_init.c index 20b3746..56dd11d 100644 --- a/dat/common/dat_init.c +++ b/dat/common/dat_init.c @@ -38,8 +38,8 @@ * $Id: dat_init.c,v 1.18 2005/03/24 05:58:27 jlentini Exp $ **********************************************************************/ +#include #include "dat_init.h" - #include "dat_dr.h" #include "dat_osd.h" diff --git a/dat/common/dat_sr.h b/dat/common/dat_sr.h index efdc898..86be8a0 100644 --- a/dat/common/dat_sr.h +++ b/dat/common/dat_sr.h @@ -41,8 +41,8 @@ #define _DAT_SR_H_ -#include -#include +#include +#include #include "dat_osd.h" diff --git a/dat/common/dat_strerror.c b/dat/common/dat_strerror.c index 885a261..d14b60f 100644 --- a/dat/common/dat_strerror.c +++ b/dat/common/dat_strerror.c @@ -38,9 +38,9 @@ **********************************************************************/ #ifdef __KDAPL__ -#include +#include #else /*__UDAPL__*/ -#include +#include #endif /* __UDAPL__ */ /********************************************************************* diff --git a/dat/include/dat2/dat.h b/dat/include/dat2/dat.h index ed0ac1e..d4e4cea 100755 --- a/dat/include/dat2/dat.h +++ b/dat/include/dat2/dat.h @@ -56,7 +56,7 @@ #ifndef _DAT_H_ #define _DAT_H_ -#include +#include #ifdef __cplusplus extern "C" diff --git a/dat/include/dat2/dat_platform_specific.h b/dat/include/dat2/dat_platform_specific.h index a058301..b46097e 100644 --- a/dat/include/dat2/dat_platform_specific.h +++ b/dat/include/dat2/dat_platform_specific.h @@ -90,7 +90,7 @@ * #include * #include * #include - * #include + * #include * * struct sockaddr_in6 addr; * DAT_IA_ADDRESS_PTR ia_addr; diff --git a/dat/include/dat2/dat_registry.h b/dat/include/dat2/dat_registry.h index 80c3801..2c0edcb 100644 --- a/dat/include/dat2/dat_registry.h +++ b/dat/include/dat2/dat_registry.h @@ -65,9 +65,9 @@ extern "C" #endif #if defined(_UDAT_H_) -#include +#include #elif defined(_KDAT_H_) -#include +#include #else #error Must include udat.h or kdat.h #endif diff --git a/dat/include/dat2/kdat.h b/dat/include/dat2/kdat.h index 848a22e..704c1cb 100644 --- a/dat/include/dat2/kdat.h +++ b/dat/include/dat2/kdat.h @@ -57,9 +57,9 @@ #ifndef _KDAT_H_ #define _KDAT_H_ -#include +#include -#include +#include #if 1 #define EXPORT_SYMBOL_NOVERS(sym) EXPORT_SYMBOL(sym) @@ -109,7 +109,7 @@ typedef enum dat_evd_param_mask typedef DAT_UINT64 DAT_PROVIDER_ATTR_MASK; -#include +#include typedef DAT_CONTEXT DAT_LMR_COOKIE; @@ -313,7 +313,7 @@ struct dat_ia_attr /* General Provider attributes. kdat specific. */ -#include +#include /* Provider should support merging of all event stream types. Provider * attribute specify support for merging different event stream types. diff --git a/dat/include/dat2/kdat_redirection.h b/dat/include/dat2/kdat_redirection.h index 038acb3..d7e5628 100644 --- a/dat/include/dat2/kdat_redirection.h +++ b/dat/include/dat2/kdat_redirection.h @@ -220,7 +220,7 @@ typedef DAT_RETURN (*DAT_IA_RESERVED_LMR_FUNC) ( OUT DAT_LMR_HANDLE *, /* lmr_handle */ OUT DAT_LMR_CONTEXT * ); /* lmr_context */ -#include +#include struct dat_provider { diff --git a/dat/include/dat2/kdat_vendor_specific.h b/dat/include/dat2/kdat_vendor_specific.h index fe8e873..3a89d19 100644 --- a/dat/include/dat2/kdat_vendor_specific.h +++ b/dat/include/dat2/kdat_vendor_specific.h @@ -50,7 +50,7 @@ #ifndef _KDAT_VENDOR_SPECIFIC_H_ #define _KDAT_VENDOR_SPECIFIC_H_ -#include +#include /* Vendor-specific extensions */ diff --git a/dat/include/dat2/udat.h b/dat/include/dat2/udat.h index a9bb2ac..4ea491e 100755 --- a/dat/include/dat2/udat.h +++ b/dat/include/dat2/udat.h @@ -56,9 +56,8 @@ #ifndef _UDAT_H_ #define _UDAT_H_ -#include - -#include +#include +#include #ifdef __cplusplus extern "C" @@ -139,7 +138,7 @@ enum dat_lmr_param_mask DAT_LMR_FIELD_ALL = 0x7FF }; -#include +#include typedef DAT_HANDLE DAT_CNO_HANDLE; @@ -323,7 +322,7 @@ typedef enum dat_pz_support DAT_PZ_SHAREABLE } DAT_PZ_SUPPORT; -#include +#include /* Provider should support merging of all event stream types. Provider * attribute specify support for merging different event stream types. diff --git a/dat/include/dat2/udat_redirection.h b/dat/include/dat2/udat_redirection.h index d73f9bd..f180417 100755 --- a/dat/include/dat2/udat_redirection.h +++ b/dat/include/dat2/udat_redirection.h @@ -237,7 +237,7 @@ typedef DAT_RETURN (DAT_API *DAT_EVD_CLEAR_UNWAITABLE_FUNC) ( IN DAT_EVD_HANDLE); /* evd_handle */ -#include +#include struct dat_provider { diff --git a/dat/include/dat2/udat_vendor_specific.h b/dat/include/dat2/udat_vendor_specific.h index 2a8bd94..dd955f8 100644 --- a/dat/include/dat2/udat_vendor_specific.h +++ b/dat/include/dat2/udat_vendor_specific.h @@ -50,7 +50,7 @@ #ifndef _UDAT_VENDOR_SPECIFIC_H_ #define _UDAT_VENDOR_SPECIFIC_H_ -#include +#include /* Vendor-specific extensions */ diff --git a/dat/udat/udat.c b/dat/udat/udat.c index 2bf5678..bb1c580 100755 --- a/dat/udat/udat.c +++ b/dat/udat/udat.c @@ -37,8 +37,8 @@ * $Id: udat.c,v 1.22 2005/03/24 05:58:35 jlentini Exp $ **********************************************************************/ -#include -#include /* Provider API function prototypes */ +#include +#include /* Provider API function prototypes */ #include "dat_dr.h" #include "dat_init.h" diff --git a/dat/udat/udat_api.c b/dat/udat/udat_api.c index 9a9ea4b..a77d42b 100644 --- a/dat/udat/udat_api.c +++ b/dat/udat/udat_api.c @@ -46,8 +46,9 @@ * $Id: udat_api.c 1301 2005-03-24 05:58:55Z jlentini $ **********************************************************************/ +#include +#include #include "dat_osd.h" -#include #include "dat_init.h" #define UDAT_IS_BAD_HANDLE(h) ( NULL == (p) ) diff --git a/dat/udat/udat_sr_parser.c b/dat/udat/udat_sr_parser.c index 84b5b9d..5761e3b 100644 --- a/dat/udat/udat_sr_parser.c +++ b/dat/udat/udat_sr_parser.c @@ -37,7 +37,7 @@ * $Id: udat_sr_parser.c,v 1.6 2005/03/24 05:58:36 jlentini Exp $ **********************************************************************/ - +#include #include "udat_sr_parser.h" #include "dat_sr.h" diff --git a/test/dapltest/Makefile.am b/test/dapltest/Makefile.am index 18660c8..bf3fc2b 100755 --- a/test/dapltest/Makefile.am +++ b/test/dapltest/Makefile.am @@ -8,7 +8,7 @@ dapltest_CFLAGS = $(XFLAGS) INCLUDES = -I include \ -I mdep/linux \ - -I $(srcdir)/../../dat/include + -I $(srcdir)/../../dat2/include bin_PROGRAMS = dapltest diff --git a/test/dapltest/include/dapl_proto.h b/test/dapltest/include/dapl_proto.h index 98785eb..9de42e2 100644 --- a/test/dapltest/include/dapl_proto.h +++ b/test/dapltest/include/dapl_proto.h @@ -32,9 +32,9 @@ #define __DAPL_PROTO_H__ #ifdef __KERNEL__ -#include +#include #else -#include +#include #include #include diff --git a/test/dapltest/mdep/linux/dapl_mdep_kernel.h b/test/dapltest/mdep/linux/dapl_mdep_kernel.h index 13a4d39..c0734e9 100644 --- a/test/dapltest/mdep/linux/dapl_mdep_kernel.h +++ b/test/dapltest/mdep/linux/dapl_mdep_kernel.h @@ -40,7 +40,7 @@ # include # include # include -# include +# include # include "kdapl_ioctl.h" #ifndef UPCALL_FROM_IRQ #include /* for spin_lock_bh */ diff --git a/test/dtest/Makefile.am b/test/dtest/Makefile.am index aabd026..f4b86ea 100755 --- a/test/dtest/Makefile.am +++ b/test/dtest/Makefile.am @@ -9,6 +9,6 @@ dtestx_CFLAGS = -DDAT_EXTENSIONS dtestx_LDADD = $(srcdir)/../../dat/udat/libdat2.la endif -INCLUDES = -I $(srcdir)/../../dat/include +INCLUDES = -I $(srcdir)/../../dat2/include dtest_LDADD = $(srcdir)/../../dat/udat/libdat2.la diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c index 314dcdf..57b5790 100755 --- a/test/dtest/dtest.c +++ b/test/dtest/dtest.c @@ -115,7 +115,7 @@ #define MAX_PROCS 1000 /* Header files needed for DAT/uDAPL */ -#include "dat/udat.h" +#include "dat2/udat.h" /* definitions */ #define SERVER_CONN_QUAL 45248 diff --git a/test/dtest/dtestx.c b/test/dtest/dtestx.c index 568b09a..58fa85f 100755 --- a/test/dtest/dtestx.c +++ b/test/dtest/dtestx.c @@ -59,8 +59,8 @@ #endif -#include "dat/udat.h" -#include "dat/dat_ib_extensions.h" +#include "dat2/udat.h" +#include "dat2/dat_ib_extensions.h" int disconnect_ep(void); From pawel.dziekonski at pwr.wroc.pl Mon Feb 4 13:19:08 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Mon, 4 Feb 2008 22:19:08 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> Message-ID: <20080204211908.GB15115@cefeid.wcss.wroc.pl> On Mon, 04 Feb 2008 at 11:09:43AM -0500, James Lentini wrote: > I'm refering to the OFED 1.2 NFS/RDMA package you cited above: > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so > > You can NOT install that on Tom Tucker's kernel. That is an old > version of the NFS/RDMA software that will conflict with the latest > code. > > If the OFED version you want to use is supported on Tom Tucker's > kernel (2.6.24-rc6), there should be no problem using OFED. The OFED > 1.2 NFS/RDMA release is a different story. OK! I do not insist on this version of OFED. I was trying to use it because of lack of info which OFED is compatible with Tom's tree and OFED-1.2-NFS-RDMA.gz has RDMA in name. ;) I want to use ANY version of OFED that would allow me to use nfs/rdma TOGETHER with IPoIB, MPIoIB and SDP - please suggest which version it would be. I really appreciate your help! Pawel -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From weiny2 at llnl.gov Mon Feb 4 13:20:51 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 4 Feb 2008 13:20:51 -0800 Subject: [ofa-general] [PATCH] Update man page for configurable partition and prefix-routes file. (WAS: Re: [PATCH] opensm/man: partition cfg file location) In-Reply-To: <20080204185119.GG1392@sashak.voltaire.com> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> <20080204185119.GG1392@sashak.voltaire.com> Message-ID: <20080204132051.61e28322.weiny2@llnl.gov> This is my bad. When I changed the config file locations and "configurability" I should have updated the man page. This patch fixes this to reflect the location and names chosen at configure time. There will be a follow on patch (another email) which adds the other config files to the "FILES" section. It depends on this change. Sorry, Ira On Mon, 4 Feb 2008 18:51:19 +0000 Sasha Khapyorsky wrote: > On 18:48 Mon 04 Feb , Sasha Khapyorsky wrote: > > > --- a/opensm/man/opensm.8 > > > +++ b/opensm/man/opensm.8 > > > @@ -200,7 +200,7 @@ is accumulative. > > > .TP > > > \fB\-P\fR, \fB\-\-Pconfig\fR > > > This option defines the optional partition configuration file. > > > -The default name is \'/etc/opensm/opensm-partitions.conf\'. > > > +The default name is \'/etc/ofa/opensm-partitions.conf\'. > > > > It is also wrong name - partition config file name is configurable with > > OpenSM (look at './configure --help') and default default value is > > '/opensm/partitions.conf'. 'opensm -h' shows valid value. > > BTW in OFED-1.3 it is '/etc/opensm/partitions.conf'. > > Sasha >From cd1344594a988f2f18a903ac454ae858f42490c0 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 4 Feb 2008 13:05:16 -0800 Subject: [PATCH] Update man page for configurable partition and prefix-routes file. This changes the man page to be auto-generated based on the chosen configure options. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 4 + opensm/man/opensm.8 | 941 ------------------------------------------------ opensm/man/opensm.8.in | 941 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 945 insertions(+), 941 deletions(-) delete mode 100644 opensm/man/opensm.8 create mode 100644 opensm/man/opensm.8.in diff --git a/opensm/configure.in b/opensm/configure.in index 79a914e..455630e 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -95,6 +95,7 @@ CONF_DIR="`eval echo $CONF_DIR_TMP2`" AC_DEFINE_UNQUOTED(OPENSM_CONFIG_DIR, ["$CONF_DIR"], [Define OpenSM config directory]) +AC_SUBST(CONF_DIR) dnl Check for a different default node name map file NODENAMEMAPFILE=ib-node-name-map @@ -135,6 +136,7 @@ AC_MSG_RESULT(${withpartitionsconf=no}) AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PARTITION_CONFIG_FILE, ["$CONF_DIR/$PARTITION_CONFIG_FILE"], [Define a QOS policy config file]) +AC_SUBST(PARTITION_CONFIG_FILE) dnl Check for a different QOS policy file QOS_POLICY_FILE=qos-policy.conf @@ -172,5 +174,7 @@ OPENIB_APP_OSMV_CHECK_LIB # overrides. CFLAGS=$ac_env_CFLAGS_value +AC_CONFIG_FILES([man/opensm.8]) + dnl Create the following Makefiles AC_OUTPUT([include/opensm/osm_version.h Makefile include/Makefile complib/Makefile libvendor/Makefile opensm/Makefile osmeventplugin/Makefile osmtest/Makefile opensm.spec]) diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 deleted file mode 100644 index ab7fb8e..0000000 --- a/opensm/man/opensm.8 +++ /dev/null @@ -1,941 +0,0 @@ -.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" - -.SH NAME -opensm \- InfiniBand subnet manager and administration (SM/SA) - -.SH SYNOPSIS -.B opensm -[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] -[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] -[\-R | \-\-routing_engine ] -[\-z | \-\-connect_roots] -[\-M | \-\-lid_matrix_file ] -[\-U | \-\-ucast_file ] -[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] -[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] -[\-t(imeout) ] [\-maxsmps ] -[\-console [off | local | socket | loopback]] [\-console-port ] -[\-i(gnore-guids) ] [\-f | \-\-log_file] -[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] -[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] -[\-B | \-\-daemon] [\-I | \-\-inactive] -[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] -[\-\-prefix_routes_file ] -[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] - -.SH DESCRIPTION -.PP -opensm is an InfiniBand compliant Subnet Manager and Administration, -and runs on top of OpenIB. - -opensm provides an implementation of an InfiniBand Subnet Manager and -Administration. Such a software entity is required to run for in order -to initialize the InfiniBand hardware (at least one per each -InfiniBand subnet). - -opensm also now contains an experimental version of a performance -manager as well. - -opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB -fabric, initialize it, and sweep occasionally for changes. - -opensm attaches to a specific IB port on the local machine and configures only -the fabric connected to it. (If the local machine has other IB ports, -opensm will ignore the fabrics connected to those other ports). If no port is -specified, it will select the first "best" available port. - -opensm can present the available ports and prompt for a port number to -attach to. - -By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. -The first file will register only general major events, whereas the second -will include details of reported errors. All errors reported in this second -file should be treated as indicators of IB fabric health issues. -(Note that when a fatal and non-recoverable error occurs, opensm will exit.) -Both log files should include the message "SUBNET UP" if opensm was able to -setup the subnet correctly. - -.SH OPTIONS - -.PP -.TP -\fB\-c\fR, \fB\-\-cache-options\fR -Write out a list of all tunable OpenSM parameters, -including their current values from the command line -as well as defaults for others, into the file -OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to -/var/cache/opensm if the corresponding environment -variable is not set). The options file is then -used for subsequent OpenSM invocations but any -command line options take precedence. -.TP -\fB\-g\fR, \fB\-\-guid\fR -This option specifies the local port GUID value -with which OpenSM should bind. OpenSM may be -bound to 1 port at a time. -If GUID given is 0, OpenSM displays a list -of possible port GUIDs and waits for user input. -Without -g, OpenSM tries to use the default port. -.TP -\fB\-l\fR, \fB\-\-lmc\fR -This option specifies the subnet's LMC value. -The number of LIDs assigned to each port is 2^LMC. -The LMC value must be in the range 0-7. -LMC values > 0 allow multiple paths between ports. -LMC values > 0 should only be used if the subnet -topology actually provides multiple paths between -ports, i.e. multiple interconnects between switches. -Without -l, OpenSM defaults to LMC = 0, which allows -one path between any two ports. -.TP -\fB\-p\fR, \fB\-\-priority\fR -This option specifies the SM\'s PRIORITY. -This will effect the handover cases, where master -is chosen by priority and GUID. Range goes from 0 -(default and lowest priority) to 15 (highest). -.TP -\fB\-smkey\fR -This option specifies the SM\'s SM_Key (64 bits). -This will effect SM authentication. -.TP -\fB\-r\fR, \fB\-\-reassign_lids\fR -This option causes OpenSM to reassign LIDs to all -end nodes. Specifying -r on a running subnet -may disrupt subnet traffic. -Without -r, OpenSM attempts to preserve existing -LID assignments resolving multiple use of same LID. -.TP -\fB\-R\fR, \fB\-\-routing_engine\fR -This option chooses routing engine instead of Min Hop -algorithm (default). -Supported engines: minhop, updn, file, ftree, lash, dor -.TP -\fB\-z\fR, \fB\-\-connect_roots\fR -This option enforces a routing engine (currently up/down -only) to make connectivity between root switches and in -this way to be fully IBA complaint. In many cases this can -violate "pure" deadlock free algorithm, so use it carefully. -.TP -\fB\-M\fR, \fB\-\-lid_matrix_file\fR -This option specifies the name of the lid matrix dump file -from where switch lid matrices (min hops tables will be -loaded. -.TP -\fB\-U\fR, \fB\-\-ucast_file\fR -This option specifies the name of the unicast dump file -from where switch forwarding tables will be loaded. -.TP -\fB\-S\fR, \fB\-\-sadb_file\fR -This option specifies the name of the SA DB dump file -from where SA database will be loaded. -.TP -\fB\-a\fR, \fB\-\-root_guid_file\fR -Set the root nodes for the Up/Down or Fat-Tree routing -algorithm to the guids provided in the given file (one to a line). -.TP -\fB\-u\fR, \fB\-\-cn_guid_file\fR -Set the compute nodes for the Fat-Tree routing algorithm -to the guids provided in the given file (one to a line). -.TP -\fB\-o\fR, \fB\-\-once\fR -This option causes OpenSM to configure the subnet -once, then exit. Ports remain in the ACTIVE state. -.TP -\fB\-s\fR, \fB\-\-sweep\fR -This option specifies the number of seconds between -subnet sweeps. Specifying -s 0 disables sweeping. -Without -s, OpenSM defaults to a sweep interval of -10 seconds. -.TP -\fB\-t\fR, \fB\-\-timeout\fR -This option specifies the time in milliseconds -used for transaction timeouts. -Specifying -t 0 disables timeouts. -Without -t, OpenSM defaults to a timeout value of -200 milliseconds. -.TP -\fB\-maxsmps\fR -This option specifies the number of VL15 SMP MADs -allowed on the wire at any one time. -Specifying -maxsmps 0 allows unlimited outstanding -SMPs. -Without -maxsmps, OpenSM defaults to a maximum of -4 outstanding SMPs. -.TP -\fB\-console [off | local | socket | loopback]\fR -This option brings up the OpenSM console (default off). -Note that the socket and loopback options will only be available -if OpenSM was built with --enable-console-socket. -.TP -\fB\-console-port\fR -Specify an alternate telnet port for the socket console (default 10000). -Note that this option only appears if OpenSM was built with ---enable-console-socket. -.TP -\fB\-i\fR, \fB\-ignore-guids\fR -This option provides the means to define a set of ports -(by guid) that will be ignored by the link load -equalization algorithm. -.TP -\fB\-x\fR, \fB\-\-honor_guid2lid\fR -This option forces OpenSM to honor the guid2lid file, -when it comes out of Standby state, if such file exists -under OSM_CACHE_DIR, and is valid. -By default, this is FALSE. -.TP -\fB\-f\fR, \fB\-\-log_file\fR -This option defines the log to be the given file. -By default, the log goes to /var/log/opensm.log. -For the log to go to standard output use -f stdout. -.TP -\fB\-L\fR, \fB\-\-log_limit\fR -This option defines maximal log file size in MB. When -specified the log file will be truncated upon reaching -this limit. -.TP -\fB\-e\fR, \fB\-\-erase_log_file\fR -This option will cause deletion of the log file -(if it previously exists). By default, the log file -is accumulative. -.TP -\fB\-P\fR, \fB\-\-Pconfig\fR -This option defines the optional partition configuration file. -The default name is \'/etc/opensm/opensm-partitions.conf\'. -.TP -.BI --prefix_routes_file= path -Prefix routes control how the SA responds to path record queries for -off-subnet DGIDs. By default, the SA fails such queries. The -.B PREFIX ROUTES -section below describes the format of the configuration file. -The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. -.TP -\fB\-Q\fR, \fB\-\-qos\fR -This option enables QoS setup. It is disabled by default. -.TP -\fB\-N\fR, \fB\-\-no_part_enforce\fR -This option disables partition enforcement on switch external ports. -.TP -\fB\-y\fR, \fB\-\-stay_on_fatal\fR -This option will cause SM not to exit on fatal initialization -issues: if SM discovers duplicated guids or a 12x link with -lane reversal badly configured. -By default, the SM will exit on these errors. -.TP -\fB\-B\fR, \fB\-\-daemon\fR -Run in daemon mode - OpenSM will run in the background. -.TP -\fB\-I\fR, \fB\-\-inactive\fR -Start SM in inactive rather than init SM state. This -option can be used in conjunction with the perfmgr so as to -run a standalone performance manager without SM/SA. However, -this is NOT currently implemented in the performance manager. -.TP -\fB\-perfmgr\fR -Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at -configure time. -.TP -\fB\-perfmgr_sweep_time_s\fR -Specify the sweep time for the performance manager in seconds -(default is 180 seconds). Only takes -effect if --enable-perfmgr was specified at configure time. -.TP -.BI --consolidate_ipv6_snm_reqests -Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast -group. -.TP -\fB\-v\fR, \fB\-\-verbose\fR -This option increases the log verbosity level. -The -v option may be specified multiple times -to further increase the verbosity level. -See the -D option for more information about -log verbosity. -.TP -\fB\-V\fR -This option sets the maximum verbosity level and -forces log flushing. -The -V option is equivalent to \'-D 0xFF -d 2\'. -See the -D option for more information about -log verbosity. -.TP -\fB\-D\fR -This option sets the log verbosity level. -A flags field must follow the -D option. -A bit set/clear in the flags enables/disables a -specific log level as follows: - - BIT LOG LEVEL ENABLED - ---- ----------------- - 0x01 - ERROR (error messages) - 0x02 - INFO (basic messages, low volume) - 0x04 - VERBOSE (interesting stuff, moderate volume) - 0x08 - DEBUG (diagnostic, high volume) - 0x10 - FUNCS (function entry/exit, very high volume) - 0x20 - FRAMES (dumps all SMP and GMP frames) - 0x40 - ROUTING (dump FDB routing information) - 0x80 - currently unused. - -Without -D, OpenSM defaults to ERROR + INFO (0x3). -Specifying -D 0 disables all messages. -Specifying -D 0xFF enables all messages (see -V). -High verbosity levels may require increasing -the transaction timeout with the -t option. -.TP -\fB\-d\fR, \fB\-\-debug\fR -This option specifies a debug option. -These options are not normally needed. -The number following -d selects the debug -option to enable as follows: - - OPT Description - --- ----------------- - -d0 - Ignore other SM nodes - -d1 - Force single threaded dispatching - -d2 - Force log flushing after each log message - -d3 - Disable multicast support -.TP -\fB\-h\fR, \fB\-\-help\fR -Display this usage info then exit. -.TP -\fB\-?\fR -Display this usage info then exit. - -.SH ENVIRONMENT VARIABLES -.PP -The following environment variables control opensm behavior: - -OSM_TMP_DIR - controls the directory in which the temporary files generated by -opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and -opensm.mcfdbs. By default, this directory is /var/log. - -OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent -runs are consistent. The default directory used is /var/cache/opensm. -The following files are included in it: - - guid2lid - stores the LID range assigned to each GUID - - opensm.opts - an optional file that holds a complete set of opensm - configuration options - -.SH NOTES -.PP -When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. -.PP -Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for -logrotate purposes. - -.SH PARTITION CONFIGURATION -.PP -The default name of OpenSM partitions configuration file is -\'/etc/ofa/opensm-partitions.conf\'. The default may be changed by using ---Pconfig (-P) option with OpenSM. - -The default partition will be created by OpenSM unconditionally even -when partition configuration file does not exist or cannot be accessed. - -The default partition has P_Key value 0x7fff. OpenSM\'s port will have -full membership in default partition. All other end ports will have -partial membership. - -File Format - -Comments: - -Line content followed after \'#\' character is comment and ignored by -parser. - -General file format: - -: ; - -Partition Definition: - -[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] - - PartitionName - string, will be used with logging. When omitted - empty string will be used. - PKey - P_Key value for this partition. Only low 15 bits will - be used. When omitted will be autogenerated. - flag - used to indicate IPoIB capability of this partition. - defmember=full|limited - specifies default membership for port guid - list. Default is limited. - -Currently recognized flags are: - - ipoib - indicates that this partition may be used for IPoIB, as - result IPoIB capable MC group will be created. - rate= - specifies rate for this IPoIB MC group - (default is 3 (10GBps)) - mtu= - specifies MTU for this IPoIB MC group - (default is 4 (2048)) - sl= - specifies SL for this IPoIB MC group - (default is 0) - scope= - specifies scope for this IPoIB MC group - (default is 2 (link local)). Multiple scope settings - are permitted for a partition. - -Note that values for rate, mtu, and scope should be specified as -defined in the IBTA specification (for example, mtu=4 for 2048). - -PortGUIDs list: - - PortGUID - GUID of partition member EndPort. Hexadecimal - numbers should start from 0x, decimal numbers - are accepted too. - full or limited - indicates full or limited membership for this - port. When omitted (or unrecognized) limited - membership is assumed. - -There are two useful keywords for PortGUID definition: - - - 'ALL' means all end ports in this subnet. - - 'SELF' means subnet manager's port. - -Empty list means no ports in this partition. - -Notes: - -White space is permitted between delimiters ('=', ',',':',';'). - -The line can be wrapped after ':' followed after Partition Definition and -between. - -PartitionName does not need to be unique, PKey does need to be unique. -If PKey is repeated then those partition configurations will be merged -and first PartitionName will be used (see also next note). - -It is possible to split partition configuration in more than one -definition, but then PKey should be explicitly specified (otherwise -different PKey values will be generated for those definitions). - -Examples: - - Default=0x7fff : ALL, SELF=full ; - - NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; - - YetAnotherOne = 0x300 : SELF=full ; - YetAnotherOne = 0x300 : ALL=limited ; - - ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; - # 0x123453, 0x123454 will be limited - ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; - # 0x123456, 0x123457 will be limited - ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; - ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; - ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; - - -Note: - -The following rule is equivalent to how OpenSM used to run prior to the -partition manager: - - Default=0x7fff,ipoib:ALL=full; - -.SH QOS CONFIGURATION -.PP -There are a set of QoS related low-level configuration parameters. -All these parameter names are prefixed by "qos_" string. Here is a full -list of these parameters: - - qos_max_vls - The maximum number of VLs that will be on the subnet - qos_high_limit - The limit of High Priority component of VL - Arbitration table (IBA 7.6.9) - qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) - template - qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) - template - Both VL arbitration templates are pairs of - VL and weight - qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is - a list of VLs corresponding to SLs 0-15 (Note - that VL15 used here means drop this SL) - -Typical default values (hard-coded in OpenSM initialization) are: - - qos_max_vls=15 - qos_high_limit=0 - qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 - qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 - qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 - -The syntax is compatible with rest of OpenSM configuration options and -values may be stored in OpenSM config file (cached options file). - -In addition to the above, we may define separate QoS configuration -parameters sets for various target types. As targets, we currently support -CAs, routers, switch external ports, and switch's enhanced port 0. The -names of such specialized parameters are prefixed by "qos__" -string. Here is a full list of the currently supported sets: - - qos_ca_ - QoS configuration parameters set for CAs. - qos_rtr_ - parameters set for routers. - qos_sw0_ - parameters set for switches' port 0. - qos_swe_ - parameters set for switches' external ports. - -Examples: - qos_sw0_max_vls=2 - qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, - qos_swe_high_limit=0 - -.SH PREFIX ROUTES -.PP -Prefix routes control how the SA responds to path record queries for -off-subnet DGIDs. By default, the SA fails such queries. -Note that IBA does not specify how the SA should obtain off-subnet path -record information. -The prefix routes configuration is meant as a stop-gap until the -specification is completed. -.PP -Each line in the configuration file is a 64-bit prefix followed by a -64-bit GUID, separated by white space. -The GUID specifies the router port on the local subnet that will -handle the prefix. -Blank lines are ignored, as is anything between a \fB#\fP character -and the end of the line. -The prefix and GUID are both in hex, the leading 0x is optional. -Either, or both, can be wild-carded by specifying an -asterisk instead of an explicit prefix or GUID. -.PP -When responding to a path record query for an off-subnet DGID, -opensm searches for the first prefix match in the configuration file. -Therefore, the order of the lines in the configuration file is important: -a wild-carded prefix at the beginning of the configuration file renders -all subsequent lines useless. -If there is no match, then opensm fails the query. -It is legal to repeat prefixes in the configuration file, -opensm will return the path to the first available matching router. -A configuration file with a single line where both prefix and GUID -are wild-carded means that a path record query specifying any -off-subnet DGID should return a path to the first available router. -This configuration yields the same behaviour formerly achieved by -compiling opensm with -DROUTER_EXP. - -.SH ROUTING -.PP -OpenSM now offers five routing engines: - -1. Min Hop Algorithm - based on the minimum hops to each node where the -path length is optimized. - -2. UPDN Unicast routing algorithm - also based on the minimum hops to each -node, but it is constrained to ranking rules. This algorithm should be chosen -if the subnet is not a pure Fat Tree, and deadlock may occur due to a -loop in the subnet. - -3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing -for congestion-free "shift" communication pattern. -It should be chosen if a subnet is a symmetrical Fat Trees of various types, -not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. -Similar to UPDN, Fat Tree routing is constrained to ranking rules. - -4. LASH unicast routing algorithm - uses Infiniband virtual layers -(SL) to provide deadlock-free shortest-path routing while also -distributing the paths between layers. LASH is an alternative -deadlock-free topology-agnostic routing algorithm to the non-minimal -UPDN algorithm avoiding the use of a potentially congested root node. - -5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but -avoids port equalization except for redundant links between the same -two switches. This provides deadlock free routes for hypercubes when -the fabric is cabled as a hypercube and for meshes when cabled as a -mesh (see details below). - -OpenSM also supports a file method which -can load routes from a table. See \'Modular Routing Engine\' for more -information on this. - -The basic routing algorithm is comprised of two stages: - -1. MinHop matrix calculation - How many hops are required to get from each port to each LID ? - The algorithm to fill these tables is different if you run standard -(min hop) or Up/Down. - For standard routing, a "relaxation" algorithm is used to propagate -min hop from every destination LID through neighbor switches - For Up/Down routing, a BFS from every target is used. The BFS tracks link -direction (up or down) and avoid steps that will perform up after a down -step was used. - -2. Once MinHop matrices exist, each switch is visited and for each target LID a -decision is made as to what port should be used to get to that LID. - This step is common to standard and Up/Down routing. Each port has a -counter counting the number of target LIDs going through it. - When there are multiple alternative ports with same MinHop to a LID, -the one with less previously assigned ports is selected. - If LMC > 0, more checks are added: Within each group of LIDs assigned to -same target port, - a. use only ports which have same MinHop - b. first prefer the ones that go to different systemImageGuid (then -the previous LID of the same LMC group) - c. if none - prefer those which go through another NodeGuid - d. fall back to the number of paths method (if all go to same node). - -Effect of Topology Changes - -OpenSM will preserve existing routing in any case where there is no change in -the fabric switches unless the -r (--reassign_lids) option is specified. - --r -.br ---reassign_lids - This option causes OpenSM to reassign LIDs to all - end nodes. Specifying -r on a running subnet - may disrupt subnet traffic. - Without -r, OpenSM attempts to preserve existing - LID assignments resolving multiple use of same LID. - -If a link is added or removed, OpenSM does not recalculate -the routes that do not have to change. A route has to change -if the port is no longer UP or no longer the MinHop. When routing changes -are performed, the same algorithm for balancing the routes is invoked. - -In the case of using the file based routing, any topology changes are -currently ignored The 'file' routing engine just loads the LFTs from the file -specified, with no reaction to real topology. Obviously, this will not be able -to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent -switches will be skipped. Multicast is not affected by 'file' routing engine -(this uses min hop tables). - - -Min Hop Algorithm - -The Min Hop algorithm is invoked when neither UPDN or the file method are -specified. - -The Min Hop algorithm is divided into two stages: computation of -min-hop tables on every switch and LFT output port assignment. Link -subscription is also equalized with the ability to override based on -port GUID. The latter is supplied by: - --i -.br --ignore-guids - This option provides the means to define a set of ports - (by guid) that will be ignored by the link load - equalization algorithm. Note that only endports (CA, - switch port 0, and router ports) and not switch external - ports are supported. - -LMC awareness routes based on (remote) system or switch basis. - - -Purpose of UPDN Algorithm - -The UPDN algorithm is designed to prevent deadlocks from occurring in loops -of the subnet. A loop-deadlock is a situation in which it is no longer -possible to send data between any two hosts connected through the loop. As -such, the UPDN routing algorithm should be used if the subnet is not a pure -Fat Tree, and one of its loops may experience a deadlock (due, for example, -to high pressure). - -The UPDN algorithm is based on the following main stages: - -1. Auto-detect root nodes - based on the CA hop length from any switch in -the subnet, a statistical histogram is built for each switch (hop num vs -number of occurrences). If the histogram reflects a specific column (higher -than others) for a certain node, then it is marked as a root node. Since -the algorithm is statistical, it may not find any root nodes. The list of -the root nodes found by this auto-detect stage is used by the ranking -process stage. - - Note 1: The user can override the node list manually. - Note 2: If this stage cannot find any root nodes, and the user did - not specify a guid list file, OpenSM defaults back to the - Min Hop routing algorithm. - -2. Ranking process - All root switch nodes (found in stage 1) are assigned -a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the -subnet are ranked incrementally. This ranking aids in the process of enforcing -rules that ensure loop-free paths. - -3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from -each (CA or switch) node in the subnet. During the BFS process, the FDB table -of each switch node traversed by BFS is updated, in reference to the starting -node, based on the ranking rules and guid values. - -At the end of the process, the updated FDB tables ensure loop-free paths -through the subnet. - -Note: Up/Down routing does not allow LID routing communication between -switches that are located inside spine "switch systems". -The reason is that there is no way to allow a LID route between them -that does not break the Up/Down rule. -One ramification of this is that you cannot run SM on switches other -than the leaf switches of the fabric. - - -UPDN Algorithm Usage - -Activation through OpenSM - -Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. -Use '-a ' for adding an UPDN guid file that contains the -root nodes for ranking. -If the `-a' option is not used, OpenSM uses its auto-detect root nodes -algorithm. - -Notes on the guid list file: - -1. A valid guid file specifies one guid in each line. Lines with an invalid -format will be discarded. -.br -2. The user should specify the root switch guids. However, it is also -possible to specify CA guids; OpenSM will use the guid of the switch (if -it exists) that connects the CA to the subnet as a root node. - - -Fat-tree Routing Algorithm - -The fat-tree algorithm optimizes routing for "shift" communication pattern. -It should be chosen if a subnet is a symmetrical or almost symmetrical -fat-tree of various types. -It supports not just K-ary-N-Trees, by handling for non-constant K, -cases where not all leafs (CAs) are present, any CBB ratio. -As in UPDN, fat-tree also prevents credit-loop-deadlocks. - -If the root guid file is not provided ('-a' or '--root_guid_file' options), -the topology has to be pure fat-tree that complies with the following rules: - - Tree rank should be between two and eight (inclusively) - - Switches of the same rank should have the same number - of UP-going port groups*, unless they are root switches, - in which case the shouldn't have UP-going ports at all. - - Switches of the same rank should have the same number - of DOWN-going port groups, unless they are leaf switches. - - Switches of the same rank should have the same number - of ports in each UP-going port group. - - Switches of the same rank should have the same number - of ports in each DOWN-going port group. - - All the CAs have to be at the same tree level (rank). - -If the root guid file is provided, the topology doesn't have to be pure -fat-tree, and it should only comply with the following rules: - - Tree rank should be between two and eight (inclusively) - - All the Compute Nodes** have to be at the same tree level (rank). - Note that non-compute node CAs are allowed here to be at different - tree ranks. - -* ports that are connected to the same remote switch are referenced as -\'port group\'. - -** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' -OpenSM options. - -Topologies that do not comply cause a fallback to min hop routing. -Note that this can also occur on link failures which cause the topology -to no longer be "pure" fat-tree. - -Note that although fat-tree algorithm supports trees with non-integer CBB -ratio, the routing will not be as balanced as in case of integer CBB ratio. -In addition to this, although the algorithm allows leaf switches to have any -number of CAs, the closer the tree is to be fully populated, the more -effective the "shift" communication pattern will be. -In general, even if the root list is provided, the closer the topology to a -pure and symmetrical fat-tree, the more optimal the routing will be. - -The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) -in the same directory where the OpenSM log resides. This ordering file provides -the CN order that may be used to create efficient communication pattern, that -will match the routing tables. - -Activation through OpenSM - -Use '-R ftree' option to activate the fat-tree algorithm. -Use '-a ' to provide root nodes for ranking. If the `-a' option -is not used, routing algorithm will detect roots automatically. -Use '-u ' to provide the list of compute nodes. If the `-u' option -is not used, all the CAs are considered as compute nodes. - -Note: LMC > 0 is not supported by fat-tree routing. If this is -specified, the default routing algorithm is invoked instead. - - -LASH Routing Algorithm - -LASH is an acronym for LAyered SHortest Path Routing. It is a -deterministic shortest path routing algorithm that enables topology -agnostic deadlock-free routing within communication networks. - -When computing the routing function, LASH analyzes the network -topology for the shortest-path routes between all pairs of sources / -destinations and groups these paths into virtual layers in such a way -as to avoid deadlock. - -Note LASH analyzes routes and ensures deadlock freedom between switch -pairs. The link from HCA between and switch does not need virtual -layers as deadlock will not arise between switch and HCA. - -In more detail, the algorithm works as follows: - -1) LASH determines the shortest-path between all pairs of source / -destination switches. Note, LASH ensures the same SL is used for all -SRC/DST - DST/SRC pairs and there is no guarantee that the return -path for a given DST/SRC will be the reverse of the route SRC/DST. - -2) LASH then begins an SL assignment process where a route is assigned -to a layer (SL) if the addition of that route does not cause deadlock -within that layer. This is achieved by maintaining and analysing a -channel dependency graph for each layer. Once the potential addition -of a path could lead to deadlock, LASH opens a new layer and continues -the process. - -3) Once this stage has been completed, it is highly likely that the -first layers processed will contain more paths than the latter ones. -To better balance the use of layers, LASH moves paths from one layer -to another so that the number of paths in each layer averages out. - -Note, the implementation of LASH in opensm attempts to use as few layers -as possible. This number can be less than the number of actual layers -available. - -In general LASH is a very flexible algorithm. It can, for example, -reduce to Dimension Order Routing in certain topologies, it is topology -agnostic and fares well in the face of faults. - -It has been shown that for both regular and irregular topologies, LASH -outperforms Up/Down. The reason for this is that LASH distributes the -traffic more evenly through a network, avoiding the bottleneck issues -related to a root node and always routes shortest-path. - -The algorithm was developed by Simula Research Laboratory. - - -Use '-R lash -Q ' option to activate the LASH algorithm. - -Note: QoS support has to be turned on in order that SL/VL mappings are -used. - -Note: LMC > 0 is not supported by the LASH routing. If this is -specified, the default routing algorithm is invoked instead. - - -DOR Routing Algorithm - -The Dimension Order Routing algorithm is based on the Min Hop -algorithm and so uses shortest paths. Instead of spreading traffic -out across different paths with the same shortest distance, it chooses -among the available shortest paths based on an ordering of dimensions. -Each port must be consistently cabled to represent a hypercube -dimension or a mesh dimension. Paths are grown from a destination -back to a source using the lowest dimension (port) of available paths -at each step. This provides the ordering necessary to avoid deadlock. -When there are multiple links between any two switches, they still -represent only one dimension and traffic is balanced across them -unless port equalization is turned off. In the case of hypercubes, -the same port must be used throughout the fabric to represent the -hypercube dimension and match on both ends of the cable. In the case -of meshes, the dimension should consistently use the same pair of -ports, one port on one end of the cable, and the other port on the -other end, continuing along the mesh dimension. - -Use '-R dor' option to activate the DOR algorithm. - - -Routing References - -To learn more about deadlock-free routing, see the article -"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" -by William J Dally and Charles L Seitz (1985). - -To learn more about the up/down algorithm, see the article -"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" -by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the -Universidad Politecnica de Valencia. - -To learn more about LASH and the flexibility behind it, the requirement -for layers, performance comparisons to other algorithms, see the -following articles: - -"Layered Routing in Irregular Networks", Lysne et al, IEEE -Transactions on Parallel and Distributed Systems, VOL.16, No12, -December 2005. - -"Routing for the ASI Fabric Manager", Solheim et al. IEEE -Communications Magazine, Vol.44, No.7, July 2006. - -"Layered Shortest Path (LASH) Routing in Irregular System Area -Networks", Skeie et al. IEEE Computer Society Communication -Architecture for Clusters 2002. - - -Modular Routine Engine - -Modular routing engine structure allows for the ease of -"plugging" new routing modules. - -Currently, only unicast callbacks are supported. Multicast -can be added later. - -One existing routing module is up-down "updn", which may be -activated with '-R updn' option (instead of old '-u'). - -General usage is: -$ opensm -R 'module-name' - -There is also a trivial routing module which is able -to load LFT tables from a dump file. - -Main features: - - - this will load switch LFTs and/or LID matrices (min hops tables) - - this will load switch LFTs according to the path entries introduced - in the dump file - - no additional checks will be performed (such as "is port connected", - etc.) - - in case when fabric LIDs were changed this will try to reconstruct - LFTs correctly if endport GUIDs are represented in the dump file - (in order to disable this, GUIDs may be removed from the dump file - or zeroed) - -The dump file format is compatible with output of 'ibroute' util and for -whole fabric can be generated with dump_lfts.sh script. - -To activate file based routing module, use: - - opensm -R file -U /path/to/dump_file - -If the dump_file is not found or is in error, the default routing -algorithm is utilized. - -The ability to dump switch lid matrices (aka min hops tables) to file and -later to load these is also supported. - -The usage is similar to unicast forwarding tables loading from dump -file (introduced by 'file' routing engine), but new lid matrix file -name should be specified by -M or --lid_matrix_file option. For example: - - opensm -R file -M ./opensm-lid-matrix.dump - -The dump file is named \'opensm-lid-matrix.dump\' and will be generated -in standard opensm dump directory (/var/log by default) when -OSM_LOG_ROUTING logging flag is set. - -When routing engine 'file' is activated, but dump file is not specified -or not cannot be open default lid matrix algorithm will be used. - -There is also a switch forwarding tables dumper which generates -a file compatible with dump_lfts.sh output. This file can be used -as input for forwarding tables loading by 'file' routing engine. -Both or one of options -U and -M can be specified together with \'-R file\'. - -.SH FILES -.TP -.B /etc/opensm/prefix-routes.conf -default prefix routes file. - -.SH AUTHORS -.TP -Hal Rosenstock -.RI < hal at xsigo.com > -.TP -Sasha Khapyorsky -.RI < sashak at voltaire.com > -.TP -Eitan Zahavi -.RI < eitan at mellanox.co.il > -.TP -Yevgeny Kliteynik -.RI < kliteyn at mellanox.co.il > -.TP -Thomas Sodring -.RI < tsodring at simula.no > diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in new file mode 100644 index 0000000..115ab56 --- /dev/null +++ b/opensm/man/opensm.8.in @@ -0,0 +1,941 @@ +.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" + +.SH NAME +opensm \- InfiniBand subnet manager and administration (SM/SA) + +.SH SYNOPSIS +.B opensm +[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] +[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] +[\-R | \-\-routing_engine ] +[\-z | \-\-connect_roots] +[\-M | \-\-lid_matrix_file ] +[\-U | \-\-ucast_file ] +[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] +[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] +[\-t(imeout) ] [\-maxsmps ] +[\-console [off | local | socket | loopback]] [\-console-port ] +[\-i(gnore-guids) ] [\-f | \-\-log_file] +[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] +[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] +[\-B | \-\-daemon] [\-I | \-\-inactive] +[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] +[\-\-prefix_routes_file ] +[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] + +.SH DESCRIPTION +.PP +opensm is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. + +opensm provides an implementation of an InfiniBand Subnet Manager and +Administration. Such a software entity is required to run for in order +to initialize the InfiniBand hardware (at least one per each +InfiniBand subnet). + +opensm also now contains an experimental version of a performance +manager as well. + +opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB +fabric, initialize it, and sweep occasionally for changes. + +opensm attaches to a specific IB port on the local machine and configures only +the fabric connected to it. (If the local machine has other IB ports, +opensm will ignore the fabrics connected to those other ports). If no port is +specified, it will select the first "best" available port. + +opensm can present the available ports and prompt for a port number to +attach to. + +By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. +The first file will register only general major events, whereas the second +will include details of reported errors. All errors reported in this second +file should be treated as indicators of IB fabric health issues. +(Note that when a fatal and non-recoverable error occurs, opensm will exit.) +Both log files should include the message "SUBNET UP" if opensm was able to +setup the subnet correctly. + +.SH OPTIONS + +.PP +.TP +\fB\-c\fR, \fB\-\-cache-options\fR +Write out a list of all tunable OpenSM parameters, +including their current values from the command line +as well as defaults for others, into the file +OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to +/var/cache/opensm if the corresponding environment +variable is not set). The options file is then +used for subsequent OpenSM invocations but any +command line options take precedence. +.TP +\fB\-g\fR, \fB\-\-guid\fR +This option specifies the local port GUID value +with which OpenSM should bind. OpenSM may be +bound to 1 port at a time. +If GUID given is 0, OpenSM displays a list +of possible port GUIDs and waits for user input. +Without -g, OpenSM tries to use the default port. +.TP +\fB\-l\fR, \fB\-\-lmc\fR +This option specifies the subnet's LMC value. +The number of LIDs assigned to each port is 2^LMC. +The LMC value must be in the range 0-7. +LMC values > 0 allow multiple paths between ports. +LMC values > 0 should only be used if the subnet +topology actually provides multiple paths between +ports, i.e. multiple interconnects between switches. +Without -l, OpenSM defaults to LMC = 0, which allows +one path between any two ports. +.TP +\fB\-p\fR, \fB\-\-priority\fR +This option specifies the SM\'s PRIORITY. +This will effect the handover cases, where master +is chosen by priority and GUID. Range goes from 0 +(default and lowest priority) to 15 (highest). +.TP +\fB\-smkey\fR +This option specifies the SM\'s SM_Key (64 bits). +This will effect SM authentication. +.TP +\fB\-r\fR, \fB\-\-reassign_lids\fR +This option causes OpenSM to reassign LIDs to all +end nodes. Specifying -r on a running subnet +may disrupt subnet traffic. +Without -r, OpenSM attempts to preserve existing +LID assignments resolving multiple use of same LID. +.TP +\fB\-R\fR, \fB\-\-routing_engine\fR +This option chooses routing engine instead of Min Hop +algorithm (default). +Supported engines: minhop, updn, file, ftree, lash, dor +.TP +\fB\-z\fR, \fB\-\-connect_roots\fR +This option enforces a routing engine (currently up/down +only) to make connectivity between root switches and in +this way to be fully IBA complaint. In many cases this can +violate "pure" deadlock free algorithm, so use it carefully. +.TP +\fB\-M\fR, \fB\-\-lid_matrix_file\fR +This option specifies the name of the lid matrix dump file +from where switch lid matrices (min hops tables will be +loaded. +.TP +\fB\-U\fR, \fB\-\-ucast_file\fR +This option specifies the name of the unicast dump file +from where switch forwarding tables will be loaded. +.TP +\fB\-S\fR, \fB\-\-sadb_file\fR +This option specifies the name of the SA DB dump file +from where SA database will be loaded. +.TP +\fB\-a\fR, \fB\-\-root_guid_file\fR +Set the root nodes for the Up/Down or Fat-Tree routing +algorithm to the guids provided in the given file (one to a line). +.TP +\fB\-u\fR, \fB\-\-cn_guid_file\fR +Set the compute nodes for the Fat-Tree routing algorithm +to the guids provided in the given file (one to a line). +.TP +\fB\-o\fR, \fB\-\-once\fR +This option causes OpenSM to configure the subnet +once, then exit. Ports remain in the ACTIVE state. +.TP +\fB\-s\fR, \fB\-\-sweep\fR +This option specifies the number of seconds between +subnet sweeps. Specifying -s 0 disables sweeping. +Without -s, OpenSM defaults to a sweep interval of +10 seconds. +.TP +\fB\-t\fR, \fB\-\-timeout\fR +This option specifies the time in milliseconds +used for transaction timeouts. +Specifying -t 0 disables timeouts. +Without -t, OpenSM defaults to a timeout value of +200 milliseconds. +.TP +\fB\-maxsmps\fR +This option specifies the number of VL15 SMP MADs +allowed on the wire at any one time. +Specifying -maxsmps 0 allows unlimited outstanding +SMPs. +Without -maxsmps, OpenSM defaults to a maximum of +4 outstanding SMPs. +.TP +\fB\-console [off | local | socket | loopback]\fR +This option brings up the OpenSM console (default off). +Note that the socket and loopback options will only be available +if OpenSM was built with --enable-console-socket. +.TP +\fB\-console-port\fR +Specify an alternate telnet port for the socket console (default 10000). +Note that this option only appears if OpenSM was built with +--enable-console-socket. +.TP +\fB\-i\fR, \fB\-ignore-guids\fR +This option provides the means to define a set of ports +(by guid) that will be ignored by the link load +equalization algorithm. +.TP +\fB\-x\fR, \fB\-\-honor_guid2lid\fR +This option forces OpenSM to honor the guid2lid file, +when it comes out of Standby state, if such file exists +under OSM_CACHE_DIR, and is valid. +By default, this is FALSE. +.TP +\fB\-f\fR, \fB\-\-log_file\fR +This option defines the log to be the given file. +By default, the log goes to /var/log/opensm.log. +For the log to go to standard output use -f stdout. +.TP +\fB\-L\fR, \fB\-\-log_limit\fR +This option defines maximal log file size in MB. When +specified the log file will be truncated upon reaching +this limit. +.TP +\fB\-e\fR, \fB\-\-erase_log_file\fR +This option will cause deletion of the log file +(if it previously exists). By default, the log file +is accumulative. +.TP +\fB\-P\fR, \fB\-\-Pconfig\fR +This option defines the optional partition configuration file. +The default name is \fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. +.TP +.BI --prefix_routes_file= path +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. The +.B PREFIX ROUTES +section below describes the format of the configuration file. +The default path is \fB\%@CONF_DIR@/prefix\-routes.conf\fP. +.TP +\fB\-Q\fR, \fB\-\-qos\fR +This option enables QoS setup. It is disabled by default. +.TP +\fB\-N\fR, \fB\-\-no_part_enforce\fR +This option disables partition enforcement on switch external ports. +.TP +\fB\-y\fR, \fB\-\-stay_on_fatal\fR +This option will cause SM not to exit on fatal initialization +issues: if SM discovers duplicated guids or a 12x link with +lane reversal badly configured. +By default, the SM will exit on these errors. +.TP +\fB\-B\fR, \fB\-\-daemon\fR +Run in daemon mode - OpenSM will run in the background. +.TP +\fB\-I\fR, \fB\-\-inactive\fR +Start SM in inactive rather than init SM state. This +option can be used in conjunction with the perfmgr so as to +run a standalone performance manager without SM/SA. However, +this is NOT currently implemented in the performance manager. +.TP +\fB\-perfmgr\fR +Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at +configure time. +.TP +\fB\-perfmgr_sweep_time_s\fR +Specify the sweep time for the performance manager in seconds +(default is 180 seconds). Only takes +effect if --enable-perfmgr was specified at configure time. +.TP +.BI --consolidate_ipv6_snm_reqests +Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast +group. +.TP +\fB\-v\fR, \fB\-\-verbose\fR +This option increases the log verbosity level. +The -v option may be specified multiple times +to further increase the verbosity level. +See the -D option for more information about +log verbosity. +.TP +\fB\-V\fR +This option sets the maximum verbosity level and +forces log flushing. +The -V option is equivalent to \'-D 0xFF -d 2\'. +See the -D option for more information about +log verbosity. +.TP +\fB\-D\fR +This option sets the log verbosity level. +A flags field must follow the -D option. +A bit set/clear in the flags enables/disables a +specific log level as follows: + + BIT LOG LEVEL ENABLED + ---- ----------------- + 0x01 - ERROR (error messages) + 0x02 - INFO (basic messages, low volume) + 0x04 - VERBOSE (interesting stuff, moderate volume) + 0x08 - DEBUG (diagnostic, high volume) + 0x10 - FUNCS (function entry/exit, very high volume) + 0x20 - FRAMES (dumps all SMP and GMP frames) + 0x40 - ROUTING (dump FDB routing information) + 0x80 - currently unused. + +Without -D, OpenSM defaults to ERROR + INFO (0x3). +Specifying -D 0 disables all messages. +Specifying -D 0xFF enables all messages (see -V). +High verbosity levels may require increasing +the transaction timeout with the -t option. +.TP +\fB\-d\fR, \fB\-\-debug\fR +This option specifies a debug option. +These options are not normally needed. +The number following -d selects the debug +option to enable as follows: + + OPT Description + --- ----------------- + -d0 - Ignore other SM nodes + -d1 - Force single threaded dispatching + -d2 - Force log flushing after each log message + -d3 - Disable multicast support +.TP +\fB\-h\fR, \fB\-\-help\fR +Display this usage info then exit. +.TP +\fB\-?\fR +Display this usage info then exit. + +.SH ENVIRONMENT VARIABLES +.PP +The following environment variables control opensm behavior: + +OSM_TMP_DIR - controls the directory in which the temporary files generated by +opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and +opensm.mcfdbs. By default, this directory is /var/log. + +OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent +runs are consistent. The default directory used is /var/cache/opensm. +The following files are included in it: + + guid2lid - stores the LID range assigned to each GUID + + opensm.opts - an optional file that holds a complete set of opensm + configuration options + +.SH NOTES +.PP +When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. +.PP +Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for +logrotate purposes. + +.SH PARTITION CONFIGURATION +.PP +The default name of OpenSM partitions configuration file is +\fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. The default may be changed by using +--Pconfig (-P) option with OpenSM. + +The default partition will be created by OpenSM unconditionally even +when partition configuration file does not exist or cannot be accessed. + +The default partition has P_Key value 0x7fff. OpenSM\'s port will have +full membership in default partition. All other end ports will have +partial membership. + +File Format + +Comments: + +Line content followed after \'#\' character is comment and ignored by +parser. + +General file format: + +: ; + +Partition Definition: + +[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] + + PartitionName - string, will be used with logging. When omitted + empty string will be used. + PKey - P_Key value for this partition. Only low 15 bits will + be used. When omitted will be autogenerated. + flag - used to indicate IPoIB capability of this partition. + defmember=full|limited - specifies default membership for port guid + list. Default is limited. + +Currently recognized flags are: + + ipoib - indicates that this partition may be used for IPoIB, as + result IPoIB capable MC group will be created. + rate= - specifies rate for this IPoIB MC group + (default is 3 (10GBps)) + mtu= - specifies MTU for this IPoIB MC group + (default is 4 (2048)) + sl= - specifies SL for this IPoIB MC group + (default is 0) + scope= - specifies scope for this IPoIB MC group + (default is 2 (link local)). Multiple scope settings + are permitted for a partition. + +Note that values for rate, mtu, and scope should be specified as +defined in the IBTA specification (for example, mtu=4 for 2048). + +PortGUIDs list: + + PortGUID - GUID of partition member EndPort. Hexadecimal + numbers should start from 0x, decimal numbers + are accepted too. + full or limited - indicates full or limited membership for this + port. When omitted (or unrecognized) limited + membership is assumed. + +There are two useful keywords for PortGUID definition: + + - 'ALL' means all end ports in this subnet. + - 'SELF' means subnet manager's port. + +Empty list means no ports in this partition. + +Notes: + +White space is permitted between delimiters ('=', ',',':',';'). + +The line can be wrapped after ':' followed after Partition Definition and +between. + +PartitionName does not need to be unique, PKey does need to be unique. +If PKey is repeated then those partition configurations will be merged +and first PartitionName will be used (see also next note). + +It is possible to split partition configuration in more than one +definition, but then PKey should be explicitly specified (otherwise +different PKey values will be generated for those definitions). + +Examples: + + Default=0x7fff : ALL, SELF=full ; + + NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; + + YetAnotherOne = 0x300 : SELF=full ; + YetAnotherOne = 0x300 : ALL=limited ; + + ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; + # 0x123453, 0x123454 will be limited + ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; + # 0x123456, 0x123457 will be limited + ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; + ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; + ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; + + +Note: + +The following rule is equivalent to how OpenSM used to run prior to the +partition manager: + + Default=0x7fff,ipoib:ALL=full; + +.SH QOS CONFIGURATION +.PP +There are a set of QoS related low-level configuration parameters. +All these parameter names are prefixed by "qos_" string. Here is a full +list of these parameters: + + qos_max_vls - The maximum number of VLs that will be on the subnet + qos_high_limit - The limit of High Priority component of VL + Arbitration table (IBA 7.6.9) + qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) + template + qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) + template + Both VL arbitration templates are pairs of + VL and weight + qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is + a list of VLs corresponding to SLs 0-15 (Note + that VL15 used here means drop this SL) + +Typical default values (hard-coded in OpenSM initialization) are: + + qos_max_vls=15 + qos_high_limit=0 + qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 + qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +The syntax is compatible with rest of OpenSM configuration options and +values may be stored in OpenSM config file (cached options file). + +In addition to the above, we may define separate QoS configuration +parameters sets for various target types. As targets, we currently support +CAs, routers, switch external ports, and switch's enhanced port 0. The +names of such specialized parameters are prefixed by "qos__" +string. Here is a full list of the currently supported sets: + + qos_ca_ - QoS configuration parameters set for CAs. + qos_rtr_ - parameters set for routers. + qos_sw0_ - parameters set for switches' port 0. + qos_swe_ - parameters set for switches' external ports. + +Examples: + qos_sw0_max_vls=2 + qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, + qos_swe_high_limit=0 + +.SH PREFIX ROUTES +.PP +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. +Note that IBA does not specify how the SA should obtain off-subnet path +record information. +The prefix routes configuration is meant as a stop-gap until the +specification is completed. +.PP +Each line in the configuration file is a 64-bit prefix followed by a +64-bit GUID, separated by white space. +The GUID specifies the router port on the local subnet that will +handle the prefix. +Blank lines are ignored, as is anything between a \fB#\fP character +and the end of the line. +The prefix and GUID are both in hex, the leading 0x is optional. +Either, or both, can be wild-carded by specifying an +asterisk instead of an explicit prefix or GUID. +.PP +When responding to a path record query for an off-subnet DGID, +opensm searches for the first prefix match in the configuration file. +Therefore, the order of the lines in the configuration file is important: +a wild-carded prefix at the beginning of the configuration file renders +all subsequent lines useless. +If there is no match, then opensm fails the query. +It is legal to repeat prefixes in the configuration file, +opensm will return the path to the first available matching router. +A configuration file with a single line where both prefix and GUID +are wild-carded means that a path record query specifying any +off-subnet DGID should return a path to the first available router. +This configuration yields the same behaviour formerly achieved by +compiling opensm with -DROUTER_EXP. + +.SH ROUTING +.PP +OpenSM now offers five routing engines: + +1. Min Hop Algorithm - based on the minimum hops to each node where the +path length is optimized. + +2. UPDN Unicast routing algorithm - also based on the minimum hops to each +node, but it is constrained to ranking rules. This algorithm should be chosen +if the subnet is not a pure Fat Tree, and deadlock may occur due to a +loop in the subnet. + +3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing +for congestion-free "shift" communication pattern. +It should be chosen if a subnet is a symmetrical Fat Trees of various types, +not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. +Similar to UPDN, Fat Tree routing is constrained to ranking rules. + +4. LASH unicast routing algorithm - uses Infiniband virtual layers +(SL) to provide deadlock-free shortest-path routing while also +distributing the paths between layers. LASH is an alternative +deadlock-free topology-agnostic routing algorithm to the non-minimal +UPDN algorithm avoiding the use of a potentially congested root node. + +5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but +avoids port equalization except for redundant links between the same +two switches. This provides deadlock free routes for hypercubes when +the fabric is cabled as a hypercube and for meshes when cabled as a +mesh (see details below). + +OpenSM also supports a file method which +can load routes from a table. See \'Modular Routing Engine\' for more +information on this. + +The basic routing algorithm is comprised of two stages: + +1. MinHop matrix calculation + How many hops are required to get from each port to each LID ? + The algorithm to fill these tables is different if you run standard +(min hop) or Up/Down. + For standard routing, a "relaxation" algorithm is used to propagate +min hop from every destination LID through neighbor switches + For Up/Down routing, a BFS from every target is used. The BFS tracks link +direction (up or down) and avoid steps that will perform up after a down +step was used. + +2. Once MinHop matrices exist, each switch is visited and for each target LID a +decision is made as to what port should be used to get to that LID. + This step is common to standard and Up/Down routing. Each port has a +counter counting the number of target LIDs going through it. + When there are multiple alternative ports with same MinHop to a LID, +the one with less previously assigned ports is selected. + If LMC > 0, more checks are added: Within each group of LIDs assigned to +same target port, + a. use only ports which have same MinHop + b. first prefer the ones that go to different systemImageGuid (then +the previous LID of the same LMC group) + c. if none - prefer those which go through another NodeGuid + d. fall back to the number of paths method (if all go to same node). + +Effect of Topology Changes + +OpenSM will preserve existing routing in any case where there is no change in +the fabric switches unless the -r (--reassign_lids) option is specified. + +-r +.br +--reassign_lids + This option causes OpenSM to reassign LIDs to all + end nodes. Specifying -r on a running subnet + may disrupt subnet traffic. + Without -r, OpenSM attempts to preserve existing + LID assignments resolving multiple use of same LID. + +If a link is added or removed, OpenSM does not recalculate +the routes that do not have to change. A route has to change +if the port is no longer UP or no longer the MinHop. When routing changes +are performed, the same algorithm for balancing the routes is invoked. + +In the case of using the file based routing, any topology changes are +currently ignored The 'file' routing engine just loads the LFTs from the file +specified, with no reaction to real topology. Obviously, this will not be able +to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent +switches will be skipped. Multicast is not affected by 'file' routing engine +(this uses min hop tables). + + +Min Hop Algorithm + +The Min Hop algorithm is invoked when neither UPDN or the file method are +specified. + +The Min Hop algorithm is divided into two stages: computation of +min-hop tables on every switch and LFT output port assignment. Link +subscription is also equalized with the ability to override based on +port GUID. The latter is supplied by: + +-i +.br +-ignore-guids + This option provides the means to define a set of ports + (by guid) that will be ignored by the link load + equalization algorithm. Note that only endports (CA, + switch port 0, and router ports) and not switch external + ports are supported. + +LMC awareness routes based on (remote) system or switch basis. + + +Purpose of UPDN Algorithm + +The UPDN algorithm is designed to prevent deadlocks from occurring in loops +of the subnet. A loop-deadlock is a situation in which it is no longer +possible to send data between any two hosts connected through the loop. As +such, the UPDN routing algorithm should be used if the subnet is not a pure +Fat Tree, and one of its loops may experience a deadlock (due, for example, +to high pressure). + +The UPDN algorithm is based on the following main stages: + +1. Auto-detect root nodes - based on the CA hop length from any switch in +the subnet, a statistical histogram is built for each switch (hop num vs +number of occurrences). If the histogram reflects a specific column (higher +than others) for a certain node, then it is marked as a root node. Since +the algorithm is statistical, it may not find any root nodes. The list of +the root nodes found by this auto-detect stage is used by the ranking +process stage. + + Note 1: The user can override the node list manually. + Note 2: If this stage cannot find any root nodes, and the user did + not specify a guid list file, OpenSM defaults back to the + Min Hop routing algorithm. + +2. Ranking process - All root switch nodes (found in stage 1) are assigned +a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the +subnet are ranked incrementally. This ranking aids in the process of enforcing +rules that ensure loop-free paths. + +3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from +each (CA or switch) node in the subnet. During the BFS process, the FDB table +of each switch node traversed by BFS is updated, in reference to the starting +node, based on the ranking rules and guid values. + +At the end of the process, the updated FDB tables ensure loop-free paths +through the subnet. + +Note: Up/Down routing does not allow LID routing communication between +switches that are located inside spine "switch systems". +The reason is that there is no way to allow a LID route between them +that does not break the Up/Down rule. +One ramification of this is that you cannot run SM on switches other +than the leaf switches of the fabric. + + +UPDN Algorithm Usage + +Activation through OpenSM + +Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. +Use '-a ' for adding an UPDN guid file that contains the +root nodes for ranking. +If the `-a' option is not used, OpenSM uses its auto-detect root nodes +algorithm. + +Notes on the guid list file: + +1. A valid guid file specifies one guid in each line. Lines with an invalid +format will be discarded. +.br +2. The user should specify the root switch guids. However, it is also +possible to specify CA guids; OpenSM will use the guid of the switch (if +it exists) that connects the CA to the subnet as a root node. + + +Fat-tree Routing Algorithm + +The fat-tree algorithm optimizes routing for "shift" communication pattern. +It should be chosen if a subnet is a symmetrical or almost symmetrical +fat-tree of various types. +It supports not just K-ary-N-Trees, by handling for non-constant K, +cases where not all leafs (CAs) are present, any CBB ratio. +As in UPDN, fat-tree also prevents credit-loop-deadlocks. + +If the root guid file is not provided ('-a' or '--root_guid_file' options), +the topology has to be pure fat-tree that complies with the following rules: + - Tree rank should be between two and eight (inclusively) + - Switches of the same rank should have the same number + of UP-going port groups*, unless they are root switches, + in which case the shouldn't have UP-going ports at all. + - Switches of the same rank should have the same number + of DOWN-going port groups, unless they are leaf switches. + - Switches of the same rank should have the same number + of ports in each UP-going port group. + - Switches of the same rank should have the same number + of ports in each DOWN-going port group. + - All the CAs have to be at the same tree level (rank). + +If the root guid file is provided, the topology doesn't have to be pure +fat-tree, and it should only comply with the following rules: + - Tree rank should be between two and eight (inclusively) + - All the Compute Nodes** have to be at the same tree level (rank). + Note that non-compute node CAs are allowed here to be at different + tree ranks. + +* ports that are connected to the same remote switch are referenced as +\'port group\'. + +** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' +OpenSM options. + +Topologies that do not comply cause a fallback to min hop routing. +Note that this can also occur on link failures which cause the topology +to no longer be "pure" fat-tree. + +Note that although fat-tree algorithm supports trees with non-integer CBB +ratio, the routing will not be as balanced as in case of integer CBB ratio. +In addition to this, although the algorithm allows leaf switches to have any +number of CAs, the closer the tree is to be fully populated, the more +effective the "shift" communication pattern will be. +In general, even if the root list is provided, the closer the topology to a +pure and symmetrical fat-tree, the more optimal the routing will be. + +The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) +in the same directory where the OpenSM log resides. This ordering file provides +the CN order that may be used to create efficient communication pattern, that +will match the routing tables. + +Activation through OpenSM + +Use '-R ftree' option to activate the fat-tree algorithm. +Use '-a ' to provide root nodes for ranking. If the `-a' option +is not used, routing algorithm will detect roots automatically. +Use '-u ' to provide the list of compute nodes. If the `-u' option +is not used, all the CAs are considered as compute nodes. + +Note: LMC > 0 is not supported by fat-tree routing. If this is +specified, the default routing algorithm is invoked instead. + + +LASH Routing Algorithm + +LASH is an acronym for LAyered SHortest Path Routing. It is a +deterministic shortest path routing algorithm that enables topology +agnostic deadlock-free routing within communication networks. + +When computing the routing function, LASH analyzes the network +topology for the shortest-path routes between all pairs of sources / +destinations and groups these paths into virtual layers in such a way +as to avoid deadlock. + +Note LASH analyzes routes and ensures deadlock freedom between switch +pairs. The link from HCA between and switch does not need virtual +layers as deadlock will not arise between switch and HCA. + +In more detail, the algorithm works as follows: + +1) LASH determines the shortest-path between all pairs of source / +destination switches. Note, LASH ensures the same SL is used for all +SRC/DST - DST/SRC pairs and there is no guarantee that the return +path for a given DST/SRC will be the reverse of the route SRC/DST. + +2) LASH then begins an SL assignment process where a route is assigned +to a layer (SL) if the addition of that route does not cause deadlock +within that layer. This is achieved by maintaining and analysing a +channel dependency graph for each layer. Once the potential addition +of a path could lead to deadlock, LASH opens a new layer and continues +the process. + +3) Once this stage has been completed, it is highly likely that the +first layers processed will contain more paths than the latter ones. +To better balance the use of layers, LASH moves paths from one layer +to another so that the number of paths in each layer averages out. + +Note, the implementation of LASH in opensm attempts to use as few layers +as possible. This number can be less than the number of actual layers +available. + +In general LASH is a very flexible algorithm. It can, for example, +reduce to Dimension Order Routing in certain topologies, it is topology +agnostic and fares well in the face of faults. + +It has been shown that for both regular and irregular topologies, LASH +outperforms Up/Down. The reason for this is that LASH distributes the +traffic more evenly through a network, avoiding the bottleneck issues +related to a root node and always routes shortest-path. + +The algorithm was developed by Simula Research Laboratory. + + +Use '-R lash -Q ' option to activate the LASH algorithm. + +Note: QoS support has to be turned on in order that SL/VL mappings are +used. + +Note: LMC > 0 is not supported by the LASH routing. If this is +specified, the default routing algorithm is invoked instead. + + +DOR Routing Algorithm + +The Dimension Order Routing algorithm is based on the Min Hop +algorithm and so uses shortest paths. Instead of spreading traffic +out across different paths with the same shortest distance, it chooses +among the available shortest paths based on an ordering of dimensions. +Each port must be consistently cabled to represent a hypercube +dimension or a mesh dimension. Paths are grown from a destination +back to a source using the lowest dimension (port) of available paths +at each step. This provides the ordering necessary to avoid deadlock. +When there are multiple links between any two switches, they still +represent only one dimension and traffic is balanced across them +unless port equalization is turned off. In the case of hypercubes, +the same port must be used throughout the fabric to represent the +hypercube dimension and match on both ends of the cable. In the case +of meshes, the dimension should consistently use the same pair of +ports, one port on one end of the cable, and the other port on the +other end, continuing along the mesh dimension. + +Use '-R dor' option to activate the DOR algorithm. + + +Routing References + +To learn more about deadlock-free routing, see the article +"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" +by William J Dally and Charles L Seitz (1985). + +To learn more about the up/down algorithm, see the article +"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" +by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the +Universidad Politecnica de Valencia. + +To learn more about LASH and the flexibility behind it, the requirement +for layers, performance comparisons to other algorithms, see the +following articles: + +"Layered Routing in Irregular Networks", Lysne et al, IEEE +Transactions on Parallel and Distributed Systems, VOL.16, No12, +December 2005. + +"Routing for the ASI Fabric Manager", Solheim et al. IEEE +Communications Magazine, Vol.44, No.7, July 2006. + +"Layered Shortest Path (LASH) Routing in Irregular System Area +Networks", Skeie et al. IEEE Computer Society Communication +Architecture for Clusters 2002. + + +Modular Routine Engine + +Modular routing engine structure allows for the ease of +"plugging" new routing modules. + +Currently, only unicast callbacks are supported. Multicast +can be added later. + +One existing routing module is up-down "updn", which may be +activated with '-R updn' option (instead of old '-u'). + +General usage is: +$ opensm -R 'module-name' + +There is also a trivial routing module which is able +to load LFT tables from a dump file. + +Main features: + + - this will load switch LFTs and/or LID matrices (min hops tables) + - this will load switch LFTs according to the path entries introduced + in the dump file + - no additional checks will be performed (such as "is port connected", + etc.) + - in case when fabric LIDs were changed this will try to reconstruct + LFTs correctly if endport GUIDs are represented in the dump file + (in order to disable this, GUIDs may be removed from the dump file + or zeroed) + +The dump file format is compatible with output of 'ibroute' util and for +whole fabric can be generated with dump_lfts.sh script. + +To activate file based routing module, use: + + opensm -R file -U /path/to/dump_file + +If the dump_file is not found or is in error, the default routing +algorithm is utilized. + +The ability to dump switch lid matrices (aka min hops tables) to file and +later to load these is also supported. + +The usage is similar to unicast forwarding tables loading from dump +file (introduced by 'file' routing engine), but new lid matrix file +name should be specified by -M or --lid_matrix_file option. For example: + + opensm -R file -M ./opensm-lid-matrix.dump + +The dump file is named \'opensm-lid-matrix.dump\' and will be generated +in standard opensm dump directory (/var/log by default) when +OSM_LOG_ROUTING logging flag is set. + +When routing engine 'file' is activated, but dump file is not specified +or not cannot be open default lid matrix algorithm will be used. + +There is also a switch forwarding tables dumper which generates +a file compatible with dump_lfts.sh output. This file can be used +as input for forwarding tables loading by 'file' routing engine. +Both or one of options -U and -M can be specified together with \'-R file\'. + +.SH FILES +.TP +.B @CONF_DIR@/prefix-routes.conf +default prefix routes file. + +.SH AUTHORS +.TP +Hal Rosenstock +.RI < hal at xsigo.com > +.TP +Sasha Khapyorsky +.RI < sashak at voltaire.com > +.TP +Eitan Zahavi +.RI < eitan at mellanox.co.il > +.TP +Yevgeny Kliteynik +.RI < kliteyn at mellanox.co.il > +.TP +Thomas Sodring +.RI < tsodring at simula.no > -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Update-man-page-for-configurable-partition-and-prefi.patch Type: application/octet-stream Size: 78249 bytes Desc: not available URL: From weiny2 at llnl.gov Mon Feb 4 13:22:27 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 4 Feb 2008 13:22:27 -0800 Subject: [ofa-general] [PATCH] Add node name map, partition config, and QOS policy config files to the "FILES" section of man page (WAS: Re: [PATCH] opensm/man: partition cfg file location) In-Reply-To: <20080204185119.GG1392@sashak.voltaire.com> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> <20080204185119.GG1392@sashak.voltaire.com> Message-ID: <20080204132227.04ceed57.weiny2@llnl.gov> Follow on patch which updates "FILES" section of man page. Ira >From 02af1ba1288b8a1e67b8581777ac4b8ab0dbb071 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 4 Feb 2008 13:16:16 -0800 Subject: [PATCH] Add node name map, partition config, and QOS policy config files to the "FILES" section of man page. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 2 ++ opensm/man/opensm.8.in | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 0 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index 455630e..4ba4a64 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -116,6 +116,7 @@ AC_MSG_RESULT(${withnodenamemap=no}) AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, ["$CONF_DIR/$NODENAMEMAPFILE"], [Define a default node name map file]) +AC_SUBST(NODENAMEMAPFILE) dnl Check for a different partition conf file PARTITION_CONFIG_FILE=partitions.conf @@ -157,6 +158,7 @@ AC_MSG_RESULT(${withqospolicyconf=no}) AC_DEFINE_UNQUOTED(HAVE_DEFAULT_QOS_POLICY_FILE, ["$CONF_DIR/$QOS_POLICY_FILE"], [Define a QOS policy config file]) +AC_SUBST(QOS_POLICY_FILE) dnl select example event plugin or not OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in index 115ab56..e1ad10c 100644 --- a/opensm/man/opensm.8.in +++ b/opensm/man/opensm.8.in @@ -920,6 +920,18 @@ Both or one of options -U and -M can be specified together with \'-R file\'. .SH FILES .TP +.B @CONF_DIR@/@NODENAMEMAPFILE@ +default node name map file. See ibnetdiscover for more information on format. + +.TP +.B @CONF_DIR@/@PARTITION_CONFIG_FILE@ +default partition config file + +.TP +.B @CONF_DIR@/@QOS_POLICY_FILE@ +default QOS policy config file + +.TP .B @CONF_DIR@/prefix-routes.conf default prefix routes file. @@ -939,3 +951,6 @@ Yevgeny Kliteynik .TP Thomas Sodring .RI < tsodring at simula.no > +.TP +Ira Weiny +.RI < weiny2 at llnl.gov > -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Add-node-name-map-partition-config-and-QOS-policy.patch Type: application/octet-stream Size: 1902 bytes Desc: not available URL: From hrosenstock at xsigo.com Mon Feb 4 13:33:44 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 04 Feb 2008 13:33:44 -0800 Subject: [ofa-general] Re: [PATCH] Update man page for configurable partition and prefix-routes file. (WAS: Re: [PATCH] opensm/man: partition cfg file location) In-Reply-To: <20080204132051.61e28322.weiny2@llnl.gov> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> <20080204185119.GG1392@sashak.voltaire.com> <20080204132051.61e28322.weiny2@llnl.gov> Message-ID: <1202160824.11210.292.camel@hrosenstock-ws.xsigo.com> Ira, On Mon, 2008-02-04 at 13:20 -0800, Ira Weiny wrote: > This is my bad. When I changed the config file locations and "configurability" > I should have updated the man page. > > This patch fixes this to reflect the location and names chosen at configure > time. Seems like this patch is modifying many lines in the opensm man page aside from those affected. Is that needed ? That makes it hard to see exactly what changed at least for me. -- Hal > There will be a follow on patch (another email) which adds the other config > files to the "FILES" section. It depends on this change. > > Sorry, > Ira > > On Mon, 4 Feb 2008 18:51:19 +0000 > Sasha Khapyorsky wrote: > > > On 18:48 Mon 04 Feb , Sasha Khapyorsky wrote: > > > > --- a/opensm/man/opensm.8 > > > > +++ b/opensm/man/opensm.8 > > > > @@ -200,7 +200,7 @@ is accumulative. > > > > .TP > > > > \fB\-P\fR, \fB\-\-Pconfig\fR > > > > This option defines the optional partition configuration file. > > > > -The default name is \'/etc/opensm/opensm-partitions.conf\'. > > > > +The default name is \'/etc/ofa/opensm-partitions.conf\'. > > > > > > It is also wrong name - partition config file name is configurable with > > > OpenSM (look at './configure --help') and default default value is > > > '/opensm/partitions.conf'. 'opensm -h' shows valid value. > > > > BTW in OFED-1.3 it is '/etc/opensm/partitions.conf'. > > > > Sasha > > > > From cd1344594a988f2f18a903ac454ae858f42490c0 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Mon, 4 Feb 2008 13:05:16 -0800 > Subject: [PATCH] Update man page for configurable partition and prefix-routes file. > > This changes the man page to be auto-generated based on the chosen configure > options. > > Signed-off-by: Ira K. Weiny > --- > opensm/configure.in | 4 + > opensm/man/opensm.8 | 941 ------------------------------------------------ > opensm/man/opensm.8.in | 941 ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 945 insertions(+), 941 deletions(-) > delete mode 100644 opensm/man/opensm.8 > create mode 100644 opensm/man/opensm.8.in > > diff --git a/opensm/configure.in b/opensm/configure.in > index 79a914e..455630e 100644 > --- a/opensm/configure.in > +++ b/opensm/configure.in > @@ -95,6 +95,7 @@ CONF_DIR="`eval echo $CONF_DIR_TMP2`" > AC_DEFINE_UNQUOTED(OPENSM_CONFIG_DIR, > ["$CONF_DIR"], > [Define OpenSM config directory]) > +AC_SUBST(CONF_DIR) > > dnl Check for a different default node name map file > NODENAMEMAPFILE=ib-node-name-map > @@ -135,6 +136,7 @@ AC_MSG_RESULT(${withpartitionsconf=no}) > AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PARTITION_CONFIG_FILE, > ["$CONF_DIR/$PARTITION_CONFIG_FILE"], > [Define a QOS policy config file]) > +AC_SUBST(PARTITION_CONFIG_FILE) > > dnl Check for a different QOS policy file > QOS_POLICY_FILE=qos-policy.conf > @@ -172,5 +174,7 @@ OPENIB_APP_OSMV_CHECK_LIB > # overrides. > CFLAGS=$ac_env_CFLAGS_value > > +AC_CONFIG_FILES([man/opensm.8]) > + > dnl Create the following Makefiles > AC_OUTPUT([include/opensm/osm_version.h Makefile include/Makefile complib/Makefile libvendor/Makefile opensm/Makefile osmeventplugin/Makefile osmtest/Makefile opensm.spec]) > diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 > deleted file mode 100644 > index ab7fb8e..0000000 > --- a/opensm/man/opensm.8 > +++ /dev/null > @@ -1,941 +0,0 @@ > -.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" > - > -.SH NAME > -opensm \- InfiniBand subnet manager and administration (SM/SA) > - > -.SH SYNOPSIS > -.B opensm > -[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] > -[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] > -[\-R | \-\-routing_engine ] > -[\-z | \-\-connect_roots] > -[\-M | \-\-lid_matrix_file ] > -[\-U | \-\-ucast_file ] > -[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] > -[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] > -[\-t(imeout) ] [\-maxsmps ] > -[\-console [off | local | socket | loopback]] [\-console-port ] > -[\-i(gnore-guids) ] [\-f | \-\-log_file] > -[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] > -[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] > -[\-B | \-\-daemon] [\-I | \-\-inactive] > -[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] > -[\-\-prefix_routes_file ] > -[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] > - > -.SH DESCRIPTION > -.PP > -opensm is an InfiniBand compliant Subnet Manager and Administration, > -and runs on top of OpenIB. > - > -opensm provides an implementation of an InfiniBand Subnet Manager and > -Administration. Such a software entity is required to run for in order > -to initialize the InfiniBand hardware (at least one per each > -InfiniBand subnet). > - > -opensm also now contains an experimental version of a performance > -manager as well. > - > -opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB > -fabric, initialize it, and sweep occasionally for changes. > - > -opensm attaches to a specific IB port on the local machine and configures only > -the fabric connected to it. (If the local machine has other IB ports, > -opensm will ignore the fabrics connected to those other ports). If no port is > -specified, it will select the first "best" available port. > - > -opensm can present the available ports and prompt for a port number to > -attach to. > - > -By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. > -The first file will register only general major events, whereas the second > -will include details of reported errors. All errors reported in this second > -file should be treated as indicators of IB fabric health issues. > -(Note that when a fatal and non-recoverable error occurs, opensm will exit.) > -Both log files should include the message "SUBNET UP" if opensm was able to > -setup the subnet correctly. > - > -.SH OPTIONS > - > -.PP > -.TP > -\fB\-c\fR, \fB\-\-cache-options\fR > -Write out a list of all tunable OpenSM parameters, > -including their current values from the command line > -as well as defaults for others, into the file > -OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to > -/var/cache/opensm if the corresponding environment > -variable is not set). The options file is then > -used for subsequent OpenSM invocations but any > -command line options take precedence. > -.TP > -\fB\-g\fR, \fB\-\-guid\fR > -This option specifies the local port GUID value > -with which OpenSM should bind. OpenSM may be > -bound to 1 port at a time. > -If GUID given is 0, OpenSM displays a list > -of possible port GUIDs and waits for user input. > -Without -g, OpenSM tries to use the default port. > -.TP > -\fB\-l\fR, \fB\-\-lmc\fR > -This option specifies the subnet's LMC value. > -The number of LIDs assigned to each port is 2^LMC. > -The LMC value must be in the range 0-7. > -LMC values > 0 allow multiple paths between ports. > -LMC values > 0 should only be used if the subnet > -topology actually provides multiple paths between > -ports, i.e. multiple interconnects between switches. > -Without -l, OpenSM defaults to LMC = 0, which allows > -one path between any two ports. > -.TP > -\fB\-p\fR, \fB\-\-priority\fR > -This option specifies the SM\'s PRIORITY. > -This will effect the handover cases, where master > -is chosen by priority and GUID. Range goes from 0 > -(default and lowest priority) to 15 (highest). > -.TP > -\fB\-smkey\fR > -This option specifies the SM\'s SM_Key (64 bits). > -This will effect SM authentication. > -.TP > -\fB\-r\fR, \fB\-\-reassign_lids\fR > -This option causes OpenSM to reassign LIDs to all > -end nodes. Specifying -r on a running subnet > -may disrupt subnet traffic. > -Without -r, OpenSM attempts to preserve existing > -LID assignments resolving multiple use of same LID. > -.TP > -\fB\-R\fR, \fB\-\-routing_engine\fR > -This option chooses routing engine instead of Min Hop > -algorithm (default). > -Supported engines: minhop, updn, file, ftree, lash, dor > -.TP > -\fB\-z\fR, \fB\-\-connect_roots\fR > -This option enforces a routing engine (currently up/down > -only) to make connectivity between root switches and in > -this way to be fully IBA complaint. In many cases this can > -violate "pure" deadlock free algorithm, so use it carefully. > -.TP > -\fB\-M\fR, \fB\-\-lid_matrix_file\fR > -This option specifies the name of the lid matrix dump file > -from where switch lid matrices (min hops tables will be > -loaded. > -.TP > -\fB\-U\fR, \fB\-\-ucast_file\fR > -This option specifies the name of the unicast dump file > -from where switch forwarding tables will be loaded. > -.TP > -\fB\-S\fR, \fB\-\-sadb_file\fR > -This option specifies the name of the SA DB dump file > -from where SA database will be loaded. > -.TP > -\fB\-a\fR, \fB\-\-root_guid_file\fR > -Set the root nodes for the Up/Down or Fat-Tree routing > -algorithm to the guids provided in the given file (one to a line). > -.TP > -\fB\-u\fR, \fB\-\-cn_guid_file\fR > -Set the compute nodes for the Fat-Tree routing algorithm > -to the guids provided in the given file (one to a line). > -.TP > -\fB\-o\fR, \fB\-\-once\fR > -This option causes OpenSM to configure the subnet > -once, then exit. Ports remain in the ACTIVE state. > -.TP > -\fB\-s\fR, \fB\-\-sweep\fR > -This option specifies the number of seconds between > -subnet sweeps. Specifying -s 0 disables sweeping. > -Without -s, OpenSM defaults to a sweep interval of > -10 seconds. > -.TP > -\fB\-t\fR, \fB\-\-timeout\fR > -This option specifies the time in milliseconds > -used for transaction timeouts. > -Specifying -t 0 disables timeouts. > -Without -t, OpenSM defaults to a timeout value of > -200 milliseconds. > -.TP > -\fB\-maxsmps\fR > -This option specifies the number of VL15 SMP MADs > -allowed on the wire at any one time. > -Specifying -maxsmps 0 allows unlimited outstanding > -SMPs. > -Without -maxsmps, OpenSM defaults to a maximum of > -4 outstanding SMPs. > -.TP > -\fB\-console [off | local | socket | loopback]\fR > -This option brings up the OpenSM console (default off). > -Note that the socket and loopback options will only be available > -if OpenSM was built with --enable-console-socket. > -.TP > -\fB\-console-port\fR > -Specify an alternate telnet port for the socket console (default 10000). > -Note that this option only appears if OpenSM was built with > ---enable-console-socket. > -.TP > -\fB\-i\fR, \fB\-ignore-guids\fR > -This option provides the means to define a set of ports > -(by guid) that will be ignored by the link load > -equalization algorithm. > -.TP > -\fB\-x\fR, \fB\-\-honor_guid2lid\fR > -This option forces OpenSM to honor the guid2lid file, > -when it comes out of Standby state, if such file exists > -under OSM_CACHE_DIR, and is valid. > -By default, this is FALSE. > -.TP > -\fB\-f\fR, \fB\-\-log_file\fR > -This option defines the log to be the given file. > -By default, the log goes to /var/log/opensm.log. > -For the log to go to standard output use -f stdout. > -.TP > -\fB\-L\fR, \fB\-\-log_limit\fR > -This option defines maximal log file size in MB. When > -specified the log file will be truncated upon reaching > -this limit. > -.TP > -\fB\-e\fR, \fB\-\-erase_log_file\fR > -This option will cause deletion of the log file > -(if it previously exists). By default, the log file > -is accumulative. > -.TP > -\fB\-P\fR, \fB\-\-Pconfig\fR > -This option defines the optional partition configuration file. > -The default name is \'/etc/opensm/opensm-partitions.conf\'. > -.TP > -.BI --prefix_routes_file= path > -Prefix routes control how the SA responds to path record queries for > -off-subnet DGIDs. By default, the SA fails such queries. The > -.B PREFIX ROUTES > -section below describes the format of the configuration file. > -The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. > -.TP > -\fB\-Q\fR, \fB\-\-qos\fR > -This option enables QoS setup. It is disabled by default. > -.TP > -\fB\-N\fR, \fB\-\-no_part_enforce\fR > -This option disables partition enforcement on switch external ports. > -.TP > -\fB\-y\fR, \fB\-\-stay_on_fatal\fR > -This option will cause SM not to exit on fatal initialization > -issues: if SM discovers duplicated guids or a 12x link with > -lane reversal badly configured. > -By default, the SM will exit on these errors. > -.TP > -\fB\-B\fR, \fB\-\-daemon\fR > -Run in daemon mode - OpenSM will run in the background. > -.TP > -\fB\-I\fR, \fB\-\-inactive\fR > -Start SM in inactive rather than init SM state. This > -option can be used in conjunction with the perfmgr so as to > -run a standalone performance manager without SM/SA. However, > -this is NOT currently implemented in the performance manager. > -.TP > -\fB\-perfmgr\fR > -Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at > -configure time. > -.TP > -\fB\-perfmgr_sweep_time_s\fR > -Specify the sweep time for the performance manager in seconds > -(default is 180 seconds). Only takes > -effect if --enable-perfmgr was specified at configure time. > -.TP > -.BI --consolidate_ipv6_snm_reqests > -Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast > -group. > -.TP > -\fB\-v\fR, \fB\-\-verbose\fR > -This option increases the log verbosity level. > -The -v option may be specified multiple times > -to further increase the verbosity level. > -See the -D option for more information about > -log verbosity. > -.TP > -\fB\-V\fR > -This option sets the maximum verbosity level and > -forces log flushing. > -The -V option is equivalent to \'-D 0xFF -d 2\'. > -See the -D option for more information about > -log verbosity. > -.TP > -\fB\-D\fR > -This option sets the log verbosity level. > -A flags field must follow the -D option. > -A bit set/clear in the flags enables/disables a > -specific log level as follows: > - > - BIT LOG LEVEL ENABLED > - ---- ----------------- > - 0x01 - ERROR (error messages) > - 0x02 - INFO (basic messages, low volume) > - 0x04 - VERBOSE (interesting stuff, moderate volume) > - 0x08 - DEBUG (diagnostic, high volume) > - 0x10 - FUNCS (function entry/exit, very high volume) > - 0x20 - FRAMES (dumps all SMP and GMP frames) > - 0x40 - ROUTING (dump FDB routing information) > - 0x80 - currently unused. > - > -Without -D, OpenSM defaults to ERROR + INFO (0x3). > -Specifying -D 0 disables all messages. > -Specifying -D 0xFF enables all messages (see -V). > -High verbosity levels may require increasing > -the transaction timeout with the -t option. > -.TP > -\fB\-d\fR, \fB\-\-debug\fR > -This option specifies a debug option. > -These options are not normally needed. > -The number following -d selects the debug > -option to enable as follows: > - > - OPT Description > - --- ----------------- > - -d0 - Ignore other SM nodes > - -d1 - Force single threaded dispatching > - -d2 - Force log flushing after each log message > - -d3 - Disable multicast support > -.TP > -\fB\-h\fR, \fB\-\-help\fR > -Display this usage info then exit. > -.TP > -\fB\-?\fR > -Display this usage info then exit. > - > -.SH ENVIRONMENT VARIABLES > -.PP > -The following environment variables control opensm behavior: > - > -OSM_TMP_DIR - controls the directory in which the temporary files generated by > -opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and > -opensm.mcfdbs. By default, this directory is /var/log. > - > -OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent > -runs are consistent. The default directory used is /var/cache/opensm. > -The following files are included in it: > - > - guid2lid - stores the LID range assigned to each GUID > - > - opensm.opts - an optional file that holds a complete set of opensm > - configuration options > - > -.SH NOTES > -.PP > -When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. > -.PP > -Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for > -logrotate purposes. > - > -.SH PARTITION CONFIGURATION > -.PP > -The default name of OpenSM partitions configuration file is > -\'/etc/ofa/opensm-partitions.conf\'. The default may be changed by using > ---Pconfig (-P) option with OpenSM. > - > -The default partition will be created by OpenSM unconditionally even > -when partition configuration file does not exist or cannot be accessed. > - > -The default partition has P_Key value 0x7fff. OpenSM\'s port will have > -full membership in default partition. All other end ports will have > -partial membership. > - > -File Format > - > -Comments: > - > -Line content followed after \'#\' character is comment and ignored by > -parser. > - > -General file format: > - > -: ; > - > -Partition Definition: > - > -[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] > - > - PartitionName - string, will be used with logging. When omitted > - empty string will be used. > - PKey - P_Key value for this partition. Only low 15 bits will > - be used. When omitted will be autogenerated. > - flag - used to indicate IPoIB capability of this partition. > - defmember=full|limited - specifies default membership for port guid > - list. Default is limited. > - > -Currently recognized flags are: > - > - ipoib - indicates that this partition may be used for IPoIB, as > - result IPoIB capable MC group will be created. > - rate= - specifies rate for this IPoIB MC group > - (default is 3 (10GBps)) > - mtu= - specifies MTU for this IPoIB MC group > - (default is 4 (2048)) > - sl= - specifies SL for this IPoIB MC group > - (default is 0) > - scope= - specifies scope for this IPoIB MC group > - (default is 2 (link local)). Multiple scope settings > - are permitted for a partition. > - > -Note that values for rate, mtu, and scope should be specified as > -defined in the IBTA specification (for example, mtu=4 for 2048). > - > -PortGUIDs list: > - > - PortGUID - GUID of partition member EndPort. Hexadecimal > - numbers should start from 0x, decimal numbers > - are accepted too. > - full or limited - indicates full or limited membership for this > - port. When omitted (or unrecognized) limited > - membership is assumed. > - > -There are two useful keywords for PortGUID definition: > - > - - 'ALL' means all end ports in this subnet. > - - 'SELF' means subnet manager's port. > - > -Empty list means no ports in this partition. > - > -Notes: > - > -White space is permitted between delimiters ('=', ',',':',';'). > - > -The line can be wrapped after ':' followed after Partition Definition and > -between. > - > -PartitionName does not need to be unique, PKey does need to be unique. > -If PKey is repeated then those partition configurations will be merged > -and first PartitionName will be used (see also next note). > - > -It is possible to split partition configuration in more than one > -definition, but then PKey should be explicitly specified (otherwise > -different PKey values will be generated for those definitions). > - > -Examples: > - > - Default=0x7fff : ALL, SELF=full ; > - > - NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; > - > - YetAnotherOne = 0x300 : SELF=full ; > - YetAnotherOne = 0x300 : ALL=limited ; > - > - ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; > - # 0x123453, 0x123454 will be limited > - ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; > - # 0x123456, 0x123457 will be limited > - ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; > - ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; > - ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; > - > - > -Note: > - > -The following rule is equivalent to how OpenSM used to run prior to the > -partition manager: > - > - Default=0x7fff,ipoib:ALL=full; > - > -.SH QOS CONFIGURATION > -.PP > -There are a set of QoS related low-level configuration parameters. > -All these parameter names are prefixed by "qos_" string. Here is a full > -list of these parameters: > - > - qos_max_vls - The maximum number of VLs that will be on the subnet > - qos_high_limit - The limit of High Priority component of VL > - Arbitration table (IBA 7.6.9) > - qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) > - template > - qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) > - template > - Both VL arbitration templates are pairs of > - VL and weight > - qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is > - a list of VLs corresponding to SLs 0-15 (Note > - that VL15 used here means drop this SL) > - > -Typical default values (hard-coded in OpenSM initialization) are: > - > - qos_max_vls=15 > - qos_high_limit=0 > - qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 > - qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 > - qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > - > -The syntax is compatible with rest of OpenSM configuration options and > -values may be stored in OpenSM config file (cached options file). > - > -In addition to the above, we may define separate QoS configuration > -parameters sets for various target types. As targets, we currently support > -CAs, routers, switch external ports, and switch's enhanced port 0. The > -names of such specialized parameters are prefixed by "qos__" > -string. Here is a full list of the currently supported sets: > - > - qos_ca_ - QoS configuration parameters set for CAs. > - qos_rtr_ - parameters set for routers. > - qos_sw0_ - parameters set for switches' port 0. > - qos_swe_ - parameters set for switches' external ports. > - > -Examples: > - qos_sw0_max_vls=2 > - qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, > - qos_swe_high_limit=0 > - > -.SH PREFIX ROUTES > -.PP > -Prefix routes control how the SA responds to path record queries for > -off-subnet DGIDs. By default, the SA fails such queries. > -Note that IBA does not specify how the SA should obtain off-subnet path > -record information. > -The prefix routes configuration is meant as a stop-gap until the > -specification is completed. > -.PP > -Each line in the configuration file is a 64-bit prefix followed by a > -64-bit GUID, separated by white space. > -The GUID specifies the router port on the local subnet that will > -handle the prefix. > -Blank lines are ignored, as is anything between a \fB#\fP character > -and the end of the line. > -The prefix and GUID are both in hex, the leading 0x is optional. > -Either, or both, can be wild-carded by specifying an > -asterisk instead of an explicit prefix or GUID. > -.PP > -When responding to a path record query for an off-subnet DGID, > -opensm searches for the first prefix match in the configuration file. > -Therefore, the order of the lines in the configuration file is important: > -a wild-carded prefix at the beginning of the configuration file renders > -all subsequent lines useless. > -If there is no match, then opensm fails the query. > -It is legal to repeat prefixes in the configuration file, > -opensm will return the path to the first available matching router. > -A configuration file with a single line where both prefix and GUID > -are wild-carded means that a path record query specifying any > -off-subnet DGID should return a path to the first available router. > -This configuration yields the same behaviour formerly achieved by > -compiling opensm with -DROUTER_EXP. > - > -.SH ROUTING > -.PP > -OpenSM now offers five routing engines: > - > -1. Min Hop Algorithm - based on the minimum hops to each node where the > -path length is optimized. > - > -2. UPDN Unicast routing algorithm - also based on the minimum hops to each > -node, but it is constrained to ranking rules. This algorithm should be chosen > -if the subnet is not a pure Fat Tree, and deadlock may occur due to a > -loop in the subnet. > - > -3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing > -for congestion-free "shift" communication pattern. > -It should be chosen if a subnet is a symmetrical Fat Trees of various types, > -not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. > -Similar to UPDN, Fat Tree routing is constrained to ranking rules. > - > -4. LASH unicast routing algorithm - uses Infiniband virtual layers > -(SL) to provide deadlock-free shortest-path routing while also > -distributing the paths between layers. LASH is an alternative > -deadlock-free topology-agnostic routing algorithm to the non-minimal > -UPDN algorithm avoiding the use of a potentially congested root node. > - > -5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but > -avoids port equalization except for redundant links between the same > -two switches. This provides deadlock free routes for hypercubes when > -the fabric is cabled as a hypercube and for meshes when cabled as a > -mesh (see details below). > - > -OpenSM also supports a file method which > -can load routes from a table. See \'Modular Routing Engine\' for more > -information on this. > - > -The basic routing algorithm is comprised of two stages: > - > -1. MinHop matrix calculation > - How many hops are required to get from each port to each LID ? > - The algorithm to fill these tables is different if you run standard > -(min hop) or Up/Down. > - For standard routing, a "relaxation" algorithm is used to propagate > -min hop from every destination LID through neighbor switches > - For Up/Down routing, a BFS from every target is used. The BFS tracks link > -direction (up or down) and avoid steps that will perform up after a down > -step was used. > - > -2. Once MinHop matrices exist, each switch is visited and for each target LID a > -decision is made as to what port should be used to get to that LID. > - This step is common to standard and Up/Down routing. Each port has a > -counter counting the number of target LIDs going through it. > - When there are multiple alternative ports with same MinHop to a LID, > -the one with less previously assigned ports is selected. > - If LMC > 0, more checks are added: Within each group of LIDs assigned to > -same target port, > - a. use only ports which have same MinHop > - b. first prefer the ones that go to different systemImageGuid (then > -the previous LID of the same LMC group) > - c. if none - prefer those which go through another NodeGuid > - d. fall back to the number of paths method (if all go to same node). > - > -Effect of Topology Changes > - > -OpenSM will preserve existing routing in any case where there is no change in > -the fabric switches unless the -r (--reassign_lids) option is specified. > - > --r > -.br > ---reassign_lids > - This option causes OpenSM to reassign LIDs to all > - end nodes. Specifying -r on a running subnet > - may disrupt subnet traffic. > - Without -r, OpenSM attempts to preserve existing > - LID assignments resolving multiple use of same LID. > - > -If a link is added or removed, OpenSM does not recalculate > -the routes that do not have to change. A route has to change > -if the port is no longer UP or no longer the MinHop. When routing changes > -are performed, the same algorithm for balancing the routes is invoked. > - > -In the case of using the file based routing, any topology changes are > -currently ignored The 'file' routing engine just loads the LFTs from the file > -specified, with no reaction to real topology. Obviously, this will not be able > -to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent > -switches will be skipped. Multicast is not affected by 'file' routing engine > -(this uses min hop tables). > - > - > -Min Hop Algorithm > - > -The Min Hop algorithm is invoked when neither UPDN or the file method are > -specified. > - > -The Min Hop algorithm is divided into two stages: computation of > -min-hop tables on every switch and LFT output port assignment. Link > -subscription is also equalized with the ability to override based on > -port GUID. The latter is supplied by: > - > --i > -.br > --ignore-guids > - This option provides the means to define a set of ports > - (by guid) that will be ignored by the link load > - equalization algorithm. Note that only endports (CA, > - switch port 0, and router ports) and not switch external > - ports are supported. > - > -LMC awareness routes based on (remote) system or switch basis. > - > - > -Purpose of UPDN Algorithm > - > -The UPDN algorithm is designed to prevent deadlocks from occurring in loops > -of the subnet. A loop-deadlock is a situation in which it is no longer > -possible to send data between any two hosts connected through the loop. As > -such, the UPDN routing algorithm should be used if the subnet is not a pure > -Fat Tree, and one of its loops may experience a deadlock (due, for example, > -to high pressure). > - > -The UPDN algorithm is based on the following main stages: > - > -1. Auto-detect root nodes - based on the CA hop length from any switch in > -the subnet, a statistical histogram is built for each switch (hop num vs > -number of occurrences). If the histogram reflects a specific column (higher > -than others) for a certain node, then it is marked as a root node. Since > -the algorithm is statistical, it may not find any root nodes. The list of > -the root nodes found by this auto-detect stage is used by the ranking > -process stage. > - > - Note 1: The user can override the node list manually. > - Note 2: If this stage cannot find any root nodes, and the user did > - not specify a guid list file, OpenSM defaults back to the > - Min Hop routing algorithm. > - > -2. Ranking process - All root switch nodes (found in stage 1) are assigned > -a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the > -subnet are ranked incrementally. This ranking aids in the process of enforcing > -rules that ensure loop-free paths. > - > -3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from > -each (CA or switch) node in the subnet. During the BFS process, the FDB table > -of each switch node traversed by BFS is updated, in reference to the starting > -node, based on the ranking rules and guid values. > - > -At the end of the process, the updated FDB tables ensure loop-free paths > -through the subnet. > - > -Note: Up/Down routing does not allow LID routing communication between > -switches that are located inside spine "switch systems". > -The reason is that there is no way to allow a LID route between them > -that does not break the Up/Down rule. > -One ramification of this is that you cannot run SM on switches other > -than the leaf switches of the fabric. > - > - > -UPDN Algorithm Usage > - > -Activation through OpenSM > - > -Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. > -Use '-a ' for adding an UPDN guid file that contains the > -root nodes for ranking. > -If the `-a' option is not used, OpenSM uses its auto-detect root nodes > -algorithm. > - > -Notes on the guid list file: > - > -1. A valid guid file specifies one guid in each line. Lines with an invalid > -format will be discarded. > -.br > -2. The user should specify the root switch guids. However, it is also > -possible to specify CA guids; OpenSM will use the guid of the switch (if > -it exists) that connects the CA to the subnet as a root node. > - > - > -Fat-tree Routing Algorithm > - > -The fat-tree algorithm optimizes routing for "shift" communication pattern. > -It should be chosen if a subnet is a symmetrical or almost symmetrical > -fat-tree of various types. > -It supports not just K-ary-N-Trees, by handling for non-constant K, > -cases where not all leafs (CAs) are present, any CBB ratio. > -As in UPDN, fat-tree also prevents credit-loop-deadlocks. > - > -If the root guid file is not provided ('-a' or '--root_guid_file' options), > -the topology has to be pure fat-tree that complies with the following rules: > - - Tree rank should be between two and eight (inclusively) > - - Switches of the same rank should have the same number > - of UP-going port groups*, unless they are root switches, > - in which case the shouldn't have UP-going ports at all. > - - Switches of the same rank should have the same number > - of DOWN-going port groups, unless they are leaf switches. > - - Switches of the same rank should have the same number > - of ports in each UP-going port group. > - - Switches of the same rank should have the same number > - of ports in each DOWN-going port group. > - - All the CAs have to be at the same tree level (rank). > - > -If the root guid file is provided, the topology doesn't have to be pure > -fat-tree, and it should only comply with the following rules: > - - Tree rank should be between two and eight (inclusively) > - - All the Compute Nodes** have to be at the same tree level (rank). > - Note that non-compute node CAs are allowed here to be at different > - tree ranks. > - > -* ports that are connected to the same remote switch are referenced as > -\'port group\'. > - > -** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' > -OpenSM options. > - > -Topologies that do not comply cause a fallback to min hop routing. > -Note that this can also occur on link failures which cause the topology > -to no longer be "pure" fat-tree. > - > -Note that although fat-tree algorithm supports trees with non-integer CBB > -ratio, the routing will not be as balanced as in case of integer CBB ratio. > -In addition to this, although the algorithm allows leaf switches to have any > -number of CAs, the closer the tree is to be fully populated, the more > -effective the "shift" communication pattern will be. > -In general, even if the root list is provided, the closer the topology to a > -pure and symmetrical fat-tree, the more optimal the routing will be. > - > -The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) > -in the same directory where the OpenSM log resides. This ordering file provides > -the CN order that may be used to create efficient communication pattern, that > -will match the routing tables. > - > -Activation through OpenSM > - > -Use '-R ftree' option to activate the fat-tree algorithm. > -Use '-a ' to provide root nodes for ranking. If the `-a' option > -is not used, routing algorithm will detect roots automatically. > -Use '-u ' to provide the list of compute nodes. If the `-u' option > -is not used, all the CAs are considered as compute nodes. > - > -Note: LMC > 0 is not supported by fat-tree routing. If this is > -specified, the default routing algorithm is invoked instead. > - > - > -LASH Routing Algorithm > - > -LASH is an acronym for LAyered SHortest Path Routing. It is a > -deterministic shortest path routing algorithm that enables topology > -agnostic deadlock-free routing within communication networks. > - > -When computing the routing function, LASH analyzes the network > -topology for the shortest-path routes between all pairs of sources / > -destinations and groups these paths into virtual layers in such a way > -as to avoid deadlock. > - > -Note LASH analyzes routes and ensures deadlock freedom between switch > -pairs. The link from HCA between and switch does not need virtual > -layers as deadlock will not arise between switch and HCA. > - > -In more detail, the algorithm works as follows: > - > -1) LASH determines the shortest-path between all pairs of source / > -destination switches. Note, LASH ensures the same SL is used for all > -SRC/DST - DST/SRC pairs and there is no guarantee that the return > -path for a given DST/SRC will be the reverse of the route SRC/DST. > - > -2) LASH then begins an SL assignment process where a route is assigned > -to a layer (SL) if the addition of that route does not cause deadlock > -within that layer. This is achieved by maintaining and analysing a > -channel dependency graph for each layer. Once the potential addition > -of a path could lead to deadlock, LASH opens a new layer and continues > -the process. > - > -3) Once this stage has been completed, it is highly likely that the > -first layers processed will contain more paths than the latter ones. > -To better balance the use of layers, LASH moves paths from one layer > -to another so that the number of paths in each layer averages out. > - > -Note, the implementation of LASH in opensm attempts to use as few layers > -as possible. This number can be less than the number of actual layers > -available. > - > -In general LASH is a very flexible algorithm. It can, for example, > -reduce to Dimension Order Routing in certain topologies, it is topology > -agnostic and fares well in the face of faults. > - > -It has been shown that for both regular and irregular topologies, LASH > -outperforms Up/Down. The reason for this is that LASH distributes the > -traffic more evenly through a network, avoiding the bottleneck issues > -related to a root node and always routes shortest-path. > - > -The algorithm was developed by Simula Research Laboratory. > - > - > -Use '-R lash -Q ' option to activate the LASH algorithm. > - > -Note: QoS support has to be turned on in order that SL/VL mappings are > -used. > - > -Note: LMC > 0 is not supported by the LASH routing. If this is > -specified, the default routing algorithm is invoked instead. > - > - > -DOR Routing Algorithm > - > -The Dimension Order Routing algorithm is based on the Min Hop > -algorithm and so uses shortest paths. Instead of spreading traffic > -out across different paths with the same shortest distance, it chooses > -among the available shortest paths based on an ordering of dimensions. > -Each port must be consistently cabled to represent a hypercube > -dimension or a mesh dimension. Paths are grown from a destination > -back to a source using the lowest dimension (port) of available paths > -at each step. This provides the ordering necessary to avoid deadlock. > -When there are multiple links between any two switches, they still > -represent only one dimension and traffic is balanced across them > -unless port equalization is turned off. In the case of hypercubes, > -the same port must be used throughout the fabric to represent the > -hypercube dimension and match on both ends of the cable. In the case > -of meshes, the dimension should consistently use the same pair of > -ports, one port on one end of the cable, and the other port on the > -other end, continuing along the mesh dimension. > - > -Use '-R dor' option to activate the DOR algorithm. > - > - > -Routing References > - > -To learn more about deadlock-free routing, see the article > -"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" > -by William J Dally and Charles L Seitz (1985). > - > -To learn more about the up/down algorithm, see the article > -"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" > -by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the > -Universidad Politecnica de Valencia. > - > -To learn more about LASH and the flexibility behind it, the requirement > -for layers, performance comparisons to other algorithms, see the > -following articles: > - > -"Layered Routing in Irregular Networks", Lysne et al, IEEE > -Transactions on Parallel and Distributed Systems, VOL.16, No12, > -December 2005. > - > -"Routing for the ASI Fabric Manager", Solheim et al. IEEE > -Communications Magazine, Vol.44, No.7, July 2006. > - > -"Layered Shortest Path (LASH) Routing in Irregular System Area > -Networks", Skeie et al. IEEE Computer Society Communication > -Architecture for Clusters 2002. > - > - > -Modular Routine Engine > - > -Modular routing engine structure allows for the ease of > -"plugging" new routing modules. > - > -Currently, only unicast callbacks are supported. Multicast > -can be added later. > - > -One existing routing module is up-down "updn", which may be > -activated with '-R updn' option (instead of old '-u'). > - > -General usage is: > -$ opensm -R 'module-name' > - > -There is also a trivial routing module which is able > -to load LFT tables from a dump file. > - > -Main features: > - > - - this will load switch LFTs and/or LID matrices (min hops tables) > - - this will load switch LFTs according to the path entries introduced > - in the dump file > - - no additional checks will be performed (such as "is port connected", > - etc.) > - - in case when fabric LIDs were changed this will try to reconstruct > - LFTs correctly if endport GUIDs are represented in the dump file > - (in order to disable this, GUIDs may be removed from the dump file > - or zeroed) > - > -The dump file format is compatible with output of 'ibroute' util and for > -whole fabric can be generated with dump_lfts.sh script. > - > -To activate file based routing module, use: > - > - opensm -R file -U /path/to/dump_file > - > -If the dump_file is not found or is in error, the default routing > -algorithm is utilized. > - > -The ability to dump switch lid matrices (aka min hops tables) to file and > -later to load these is also supported. > - > -The usage is similar to unicast forwarding tables loading from dump > -file (introduced by 'file' routing engine), but new lid matrix file > -name should be specified by -M or --lid_matrix_file option. For example: > - > - opensm -R file -M ./opensm-lid-matrix.dump > - > -The dump file is named \'opensm-lid-matrix.dump\' and will be generated > -in standard opensm dump directory (/var/log by default) when > -OSM_LOG_ROUTING logging flag is set. > - > -When routing engine 'file' is activated, but dump file is not specified > -or not cannot be open default lid matrix algorithm will be used. > - > -There is also a switch forwarding tables dumper which generates > -a file compatible with dump_lfts.sh output. This file can be used > -as input for forwarding tables loading by 'file' routing engine. > -Both or one of options -U and -M can be specified together with \'-R file\'. > - > -.SH FILES > -.TP > -.B /etc/opensm/prefix-routes.conf > -default prefix routes file. > - > -.SH AUTHORS > -.TP > -Hal Rosenstock > -.RI < hal at xsigo.com > > -.TP > -Sasha Khapyorsky > -.RI < sashak at voltaire.com > > -.TP > -Eitan Zahavi > -.RI < eitan at mellanox.co.il > > -.TP > -Yevgeny Kliteynik > -.RI < kliteyn at mellanox.co.il > > -.TP > -Thomas Sodring > -.RI < tsodring at simula.no > > diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in > new file mode 100644 > index 0000000..115ab56 > --- /dev/null > +++ b/opensm/man/opensm.8.in > @@ -0,0 +1,941 @@ > +.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" > + > +.SH NAME > +opensm \- InfiniBand subnet manager and administration (SM/SA) > + > +.SH SYNOPSIS > +.B opensm > +[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] > +[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] > +[\-R | \-\-routing_engine ] > +[\-z | \-\-connect_roots] > +[\-M | \-\-lid_matrix_file ] > +[\-U | \-\-ucast_file ] > +[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] > +[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] > +[\-t(imeout) ] [\-maxsmps ] > +[\-console [off | local | socket | loopback]] [\-console-port ] > +[\-i(gnore-guids) ] [\-f | \-\-log_file] > +[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] > +[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] > +[\-B | \-\-daemon] [\-I | \-\-inactive] > +[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] > +[\-\-prefix_routes_file ] > +[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] > + > +.SH DESCRIPTION > +.PP > +opensm is an InfiniBand compliant Subnet Manager and Administration, > +and runs on top of OpenIB. > + > +opensm provides an implementation of an InfiniBand Subnet Manager and > +Administration. Such a software entity is required to run for in order > +to initialize the InfiniBand hardware (at least one per each > +InfiniBand subnet). > + > +opensm also now contains an experimental version of a performance > +manager as well. > + > +opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB > +fabric, initialize it, and sweep occasionally for changes. > + > +opensm attaches to a specific IB port on the local machine and configures only > +the fabric connected to it. (If the local machine has other IB ports, > +opensm will ignore the fabrics connected to those other ports). If no port is > +specified, it will select the first "best" available port. > + > +opensm can present the available ports and prompt for a port number to > +attach to. > + > +By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. > +The first file will register only general major events, whereas the second > +will include details of reported errors. All errors reported in this second > +file should be treated as indicators of IB fabric health issues. > +(Note that when a fatal and non-recoverable error occurs, opensm will exit.) > +Both log files should include the message "SUBNET UP" if opensm was able to > +setup the subnet correctly. > + > +.SH OPTIONS > + > +.PP > +.TP > +\fB\-c\fR, \fB\-\-cache-options\fR > +Write out a list of all tunable OpenSM parameters, > +including their current values from the command line > +as well as defaults for others, into the file > +OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to > +/var/cache/opensm if the corresponding environment > +variable is not set). The options file is then > +used for subsequent OpenSM invocations but any > +command line options take precedence. > +.TP > +\fB\-g\fR, \fB\-\-guid\fR > +This option specifies the local port GUID value > +with which OpenSM should bind. OpenSM may be > +bound to 1 port at a time. > +If GUID given is 0, OpenSM displays a list > +of possible port GUIDs and waits for user input. > +Without -g, OpenSM tries to use the default port. > +.TP > +\fB\-l\fR, \fB\-\-lmc\fR > +This option specifies the subnet's LMC value. > +The number of LIDs assigned to each port is 2^LMC. > +The LMC value must be in the range 0-7. > +LMC values > 0 allow multiple paths between ports. > +LMC values > 0 should only be used if the subnet > +topology actually provides multiple paths between > +ports, i.e. multiple interconnects between switches. > +Without -l, OpenSM defaults to LMC = 0, which allows > +one path between any two ports. > +.TP > +\fB\-p\fR, \fB\-\-priority\fR > +This option specifies the SM\'s PRIORITY. > +This will effect the handover cases, where master > +is chosen by priority and GUID. Range goes from 0 > +(default and lowest priority) to 15 (highest). > +.TP > +\fB\-smkey\fR > +This option specifies the SM\'s SM_Key (64 bits). > +This will effect SM authentication. > +.TP > +\fB\-r\fR, \fB\-\-reassign_lids\fR > +This option causes OpenSM to reassign LIDs to all > +end nodes. Specifying -r on a running subnet > +may disrupt subnet traffic. > +Without -r, OpenSM attempts to preserve existing > +LID assignments resolving multiple use of same LID. > +.TP > +\fB\-R\fR, \fB\-\-routing_engine\fR > +This option chooses routing engine instead of Min Hop > +algorithm (default). > +Supported engines: minhop, updn, file, ftree, lash, dor > +.TP > +\fB\-z\fR, \fB\-\-connect_roots\fR > +This option enforces a routing engine (currently up/down > +only) to make connectivity between root switches and in > +this way to be fully IBA complaint. In many cases this can > +violate "pure" deadlock free algorithm, so use it carefully. > +.TP > +\fB\-M\fR, \fB\-\-lid_matrix_file\fR > +This option specifies the name of the lid matrix dump file > +from where switch lid matrices (min hops tables will be > +loaded. > +.TP > +\fB\-U\fR, \fB\-\-ucast_file\fR > +This option specifies the name of the unicast dump file > +from where switch forwarding tables will be loaded. > +.TP > +\fB\-S\fR, \fB\-\-sadb_file\fR > +This option specifies the name of the SA DB dump file > +from where SA database will be loaded. > +.TP > +\fB\-a\fR, \fB\-\-root_guid_file\fR > +Set the root nodes for the Up/Down or Fat-Tree routing > +algorithm to the guids provided in the given file (one to a line). > +.TP > +\fB\-u\fR, \fB\-\-cn_guid_file\fR > +Set the compute nodes for the Fat-Tree routing algorithm > +to the guids provided in the given file (one to a line). > +.TP > +\fB\-o\fR, \fB\-\-once\fR > +This option causes OpenSM to configure the subnet > +once, then exit. Ports remain in the ACTIVE state. > +.TP > +\fB\-s\fR, \fB\-\-sweep\fR > +This option specifies the number of seconds between > +subnet sweeps. Specifying -s 0 disables sweeping. > +Without -s, OpenSM defaults to a sweep interval of > +10 seconds. > +.TP > +\fB\-t\fR, \fB\-\-timeout\fR > +This option specifies the time in milliseconds > +used for transaction timeouts. > +Specifying -t 0 disables timeouts. > +Without -t, OpenSM defaults to a timeout value of > +200 milliseconds. > +.TP > +\fB\-maxsmps\fR > +This option specifies the number of VL15 SMP MADs > +allowed on the wire at any one time. > +Specifying -maxsmps 0 allows unlimited outstanding > +SMPs. > +Without -maxsmps, OpenSM defaults to a maximum of > +4 outstanding SMPs. > +.TP > +\fB\-console [off | local | socket | loopback]\fR > +This option brings up the OpenSM console (default off). > +Note that the socket and loopback options will only be available > +if OpenSM was built with --enable-console-socket. > +.TP > +\fB\-console-port\fR > +Specify an alternate telnet port for the socket console (default 10000). > +Note that this option only appears if OpenSM was built with > +--enable-console-socket. > +.TP > +\fB\-i\fR, \fB\-ignore-guids\fR > +This option provides the means to define a set of ports > +(by guid) that will be ignored by the link load > +equalization algorithm. > +.TP > +\fB\-x\fR, \fB\-\-honor_guid2lid\fR > +This option forces OpenSM to honor the guid2lid file, > +when it comes out of Standby state, if such file exists > +under OSM_CACHE_DIR, and is valid. > +By default, this is FALSE. > +.TP > +\fB\-f\fR, \fB\-\-log_file\fR > +This option defines the log to be the given file. > +By default, the log goes to /var/log/opensm.log. > +For the log to go to standard output use -f stdout. > +.TP > +\fB\-L\fR, \fB\-\-log_limit\fR > +This option defines maximal log file size in MB. When > +specified the log file will be truncated upon reaching > +this limit. > +.TP > +\fB\-e\fR, \fB\-\-erase_log_file\fR > +This option will cause deletion of the log file > +(if it previously exists). By default, the log file > +is accumulative. > +.TP > +\fB\-P\fR, \fB\-\-Pconfig\fR > +This option defines the optional partition configuration file. > +The default name is \fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. > +.TP > +.BI --prefix_routes_file= path > +Prefix routes control how the SA responds to path record queries for > +off-subnet DGIDs. By default, the SA fails such queries. The > +.B PREFIX ROUTES > +section below describes the format of the configuration file. > +The default path is \fB\%@CONF_DIR@/prefix\-routes.conf\fP. > +.TP > +\fB\-Q\fR, \fB\-\-qos\fR > +This option enables QoS setup. It is disabled by default. > +.TP > +\fB\-N\fR, \fB\-\-no_part_enforce\fR > +This option disables partition enforcement on switch external ports. > +.TP > +\fB\-y\fR, \fB\-\-stay_on_fatal\fR > +This option will cause SM not to exit on fatal initialization > +issues: if SM discovers duplicated guids or a 12x link with > +lane reversal badly configured. > +By default, the SM will exit on these errors. > +.TP > +\fB\-B\fR, \fB\-\-daemon\fR > +Run in daemon mode - OpenSM will run in the background. > +.TP > +\fB\-I\fR, \fB\-\-inactive\fR > +Start SM in inactive rather than init SM state. This > +option can be used in conjunction with the perfmgr so as to > +run a standalone performance manager without SM/SA. However, > +this is NOT currently implemented in the performance manager. > +.TP > +\fB\-perfmgr\fR > +Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at > +configure time. > +.TP > +\fB\-perfmgr_sweep_time_s\fR > +Specify the sweep time for the performance manager in seconds > +(default is 180 seconds). Only takes > +effect if --enable-perfmgr was specified at configure time. > +.TP > +.BI --consolidate_ipv6_snm_reqests > +Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast > +group. > +.TP > +\fB\-v\fR, \fB\-\-verbose\fR > +This option increases the log verbosity level. > +The -v option may be specified multiple times > +to further increase the verbosity level. > +See the -D option for more information about > +log verbosity. > +.TP > +\fB\-V\fR > +This option sets the maximum verbosity level and > +forces log flushing. > +The -V option is equivalent to \'-D 0xFF -d 2\'. > +See the -D option for more information about > +log verbosity. > +.TP > +\fB\-D\fR > +This option sets the log verbosity level. > +A flags field must follow the -D option. > +A bit set/clear in the flags enables/disables a > +specific log level as follows: > + > + BIT LOG LEVEL ENABLED > + ---- ----------------- > + 0x01 - ERROR (error messages) > + 0x02 - INFO (basic messages, low volume) > + 0x04 - VERBOSE (interesting stuff, moderate volume) > + 0x08 - DEBUG (diagnostic, high volume) > + 0x10 - FUNCS (function entry/exit, very high volume) > + 0x20 - FRAMES (dumps all SMP and GMP frames) > + 0x40 - ROUTING (dump FDB routing information) > + 0x80 - currently unused. > + > +Without -D, OpenSM defaults to ERROR + INFO (0x3). > +Specifying -D 0 disables all messages. > +Specifying -D 0xFF enables all messages (see -V). > +High verbosity levels may require increasing > +the transaction timeout with the -t option. > +.TP > +\fB\-d\fR, \fB\-\-debug\fR > +This option specifies a debug option. > +These options are not normally needed. > +The number following -d selects the debug > +option to enable as follows: > + > + OPT Description > + --- ----------------- > + -d0 - Ignore other SM nodes > + -d1 - Force single threaded dispatching > + -d2 - Force log flushing after each log message > + -d3 - Disable multicast support > +.TP > +\fB\-h\fR, \fB\-\-help\fR > +Display this usage info then exit. > +.TP > +\fB\-?\fR > +Display this usage info then exit. > + > +.SH ENVIRONMENT VARIABLES > +.PP > +The following environment variables control opensm behavior: > + > +OSM_TMP_DIR - controls the directory in which the temporary files generated by > +opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and > +opensm.mcfdbs. By default, this directory is /var/log. > + > +OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent > +runs are consistent. The default directory used is /var/cache/opensm. > +The following files are included in it: > + > + guid2lid - stores the LID range assigned to each GUID > + > + opensm.opts - an optional file that holds a complete set of opensm > + configuration options > + > +.SH NOTES > +.PP > +When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. > +.PP > +Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for > +logrotate purposes. > + > +.SH PARTITION CONFIGURATION > +.PP > +The default name of OpenSM partitions configuration file is > +\fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. The default may be changed by using > +--Pconfig (-P) option with OpenSM. > + > +The default partition will be created by OpenSM unconditionally even > +when partition configuration file does not exist or cannot be accessed. > + > +The default partition has P_Key value 0x7fff. OpenSM\'s port will have > +full membership in default partition. All other end ports will have > +partial membership. > + > +File Format > + > +Comments: > + > +Line content followed after \'#\' character is comment and ignored by > +parser. > + > +General file format: > + > +: ; > + > +Partition Definition: > + > +[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] > + > + PartitionName - string, will be used with logging. When omitted > + empty string will be used. > + PKey - P_Key value for this partition. Only low 15 bits will > + be used. When omitted will be autogenerated. > + flag - used to indicate IPoIB capability of this partition. > + defmember=full|limited - specifies default membership for port guid > + list. Default is limited. > + > +Currently recognized flags are: > + > + ipoib - indicates that this partition may be used for IPoIB, as > + result IPoIB capable MC group will be created. > + rate= - specifies rate for this IPoIB MC group > + (default is 3 (10GBps)) > + mtu= - specifies MTU for this IPoIB MC group > + (default is 4 (2048)) > + sl= - specifies SL for this IPoIB MC group > + (default is 0) > + scope= - specifies scope for this IPoIB MC group > + (default is 2 (link local)). Multiple scope settings > + are permitted for a partition. > + > +Note that values for rate, mtu, and scope should be specified as > +defined in the IBTA specification (for example, mtu=4 for 2048). > + > +PortGUIDs list: > + > + PortGUID - GUID of partition member EndPort. Hexadecimal > + numbers should start from 0x, decimal numbers > + are accepted too. > + full or limited - indicates full or limited membership for this > + port. When omitted (or unrecognized) limited > + membership is assumed. > + > +There are two useful keywords for PortGUID definition: > + > + - 'ALL' means all end ports in this subnet. > + - 'SELF' means subnet manager's port. > + > +Empty list means no ports in this partition. > + > +Notes: > + > +White space is permitted between delimiters ('=', ',',':',';'). > + > +The line can be wrapped after ':' followed after Partition Definition and > +between. > + > +PartitionName does not need to be unique, PKey does need to be unique. > +If PKey is repeated then those partition configurations will be merged > +and first PartitionName will be used (see also next note). > + > +It is possible to split partition configuration in more than one > +definition, but then PKey should be explicitly specified (otherwise > +different PKey values will be generated for those definitions). > + > +Examples: > + > + Default=0x7fff : ALL, SELF=full ; > + > + NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; > + > + YetAnotherOne = 0x300 : SELF=full ; > + YetAnotherOne = 0x300 : ALL=limited ; > + > + ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; > + # 0x123453, 0x123454 will be limited > + ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; > + # 0x123456, 0x123457 will be limited > + ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; > + ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; > + ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; > + > + > +Note: > + > +The following rule is equivalent to how OpenSM used to run prior to the > +partition manager: > + > + Default=0x7fff,ipoib:ALL=full; > + > +.SH QOS CONFIGURATION > +.PP > +There are a set of QoS related low-level configuration parameters. > +All these parameter names are prefixed by "qos_" string. Here is a full > +list of these parameters: > + > + qos_max_vls - The maximum number of VLs that will be on the subnet > + qos_high_limit - The limit of High Priority component of VL > + Arbitration table (IBA 7.6.9) > + qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) > + template > + qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) > + template > + Both VL arbitration templates are pairs of > + VL and weight > + qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is > + a list of VLs corresponding to SLs 0-15 (Note > + that VL15 used here means drop this SL) > + > +Typical default values (hard-coded in OpenSM initialization) are: > + > + qos_max_vls=15 > + qos_high_limit=0 > + qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 > + qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 > + qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 > + > +The syntax is compatible with rest of OpenSM configuration options and > +values may be stored in OpenSM config file (cached options file). > + > +In addition to the above, we may define separate QoS configuration > +parameters sets for various target types. As targets, we currently support > +CAs, routers, switch external ports, and switch's enhanced port 0. The > +names of such specialized parameters are prefixed by "qos__" > +string. Here is a full list of the currently supported sets: > + > + qos_ca_ - QoS configuration parameters set for CAs. > + qos_rtr_ - parameters set for routers. > + qos_sw0_ - parameters set for switches' port 0. > + qos_swe_ - parameters set for switches' external ports. > + > +Examples: > + qos_sw0_max_vls=2 > + qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, > + qos_swe_high_limit=0 > + > +.SH PREFIX ROUTES > +.PP > +Prefix routes control how the SA responds to path record queries for > +off-subnet DGIDs. By default, the SA fails such queries. > +Note that IBA does not specify how the SA should obtain off-subnet path > +record information. > +The prefix routes configuration is meant as a stop-gap until the > +specification is completed. > +.PP > +Each line in the configuration file is a 64-bit prefix followed by a > +64-bit GUID, separated by white space. > +The GUID specifies the router port on the local subnet that will > +handle the prefix. > +Blank lines are ignored, as is anything between a \fB#\fP character > +and the end of the line. > +The prefix and GUID are both in hex, the leading 0x is optional. > +Either, or both, can be wild-carded by specifying an > +asterisk instead of an explicit prefix or GUID. > +.PP > +When responding to a path record query for an off-subnet DGID, > +opensm searches for the first prefix match in the configuration file. > +Therefore, the order of the lines in the configuration file is important: > +a wild-carded prefix at the beginning of the configuration file renders > +all subsequent lines useless. > +If there is no match, then opensm fails the query. > +It is legal to repeat prefixes in the configuration file, > +opensm will return the path to the first available matching router. > +A configuration file with a single line where both prefix and GUID > +are wild-carded means that a path record query specifying any > +off-subnet DGID should return a path to the first available router. > +This configuration yields the same behaviour formerly achieved by > +compiling opensm with -DROUTER_EXP. > + > +.SH ROUTING > +.PP > +OpenSM now offers five routing engines: > + > +1. Min Hop Algorithm - based on the minimum hops to each node where the > +path length is optimized. > + > +2. UPDN Unicast routing algorithm - also based on the minimum hops to each > +node, but it is constrained to ranking rules. This algorithm should be chosen > +if the subnet is not a pure Fat Tree, and deadlock may occur due to a > +loop in the subnet. > + > +3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing > +for congestion-free "shift" communication pattern. > +It should be chosen if a subnet is a symmetrical Fat Trees of various types, > +not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. > +Similar to UPDN, Fat Tree routing is constrained to ranking rules. > + > +4. LASH unicast routing algorithm - uses Infiniband virtual layers > +(SL) to provide deadlock-free shortest-path routing while also > +distributing the paths between layers. LASH is an alternative > +deadlock-free topology-agnostic routing algorithm to the non-minimal > +UPDN algorithm avoiding the use of a potentially congested root node. > + > +5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but > +avoids port equalization except for redundant links between the same > +two switches. This provides deadlock free routes for hypercubes when > +the fabric is cabled as a hypercube and for meshes when cabled as a > +mesh (see details below). > + > +OpenSM also supports a file method which > +can load routes from a table. See \'Modular Routing Engine\' for more > +information on this. > + > +The basic routing algorithm is comprised of two stages: > + > +1. MinHop matrix calculation > + How many hops are required to get from each port to each LID ? > + The algorithm to fill these tables is different if you run standard > +(min hop) or Up/Down. > + For standard routing, a "relaxation" algorithm is used to propagate > +min hop from every destination LID through neighbor switches > + For Up/Down routing, a BFS from every target is used. The BFS tracks link > +direction (up or down) and avoid steps that will perform up after a down > +step was used. > + > +2. Once MinHop matrices exist, each switch is visited and for each target LID a > +decision is made as to what port should be used to get to that LID. > + This step is common to standard and Up/Down routing. Each port has a > +counter counting the number of target LIDs going through it. > + When there are multiple alternative ports with same MinHop to a LID, > +the one with less previously assigned ports is selected. > + If LMC > 0, more checks are added: Within each group of LIDs assigned to > +same target port, > + a. use only ports which have same MinHop > + b. first prefer the ones that go to different systemImageGuid (then > +the previous LID of the same LMC group) > + c. if none - prefer those which go through another NodeGuid > + d. fall back to the number of paths method (if all go to same node). > + > +Effect of Topology Changes > + > +OpenSM will preserve existing routing in any case where there is no change in > +the fabric switches unless the -r (--reassign_lids) option is specified. > + > +-r > +.br > +--reassign_lids > + This option causes OpenSM to reassign LIDs to all > + end nodes. Specifying -r on a running subnet > + may disrupt subnet traffic. > + Without -r, OpenSM attempts to preserve existing > + LID assignments resolving multiple use of same LID. > + > +If a link is added or removed, OpenSM does not recalculate > +the routes that do not have to change. A route has to change > +if the port is no longer UP or no longer the MinHop. When routing changes > +are performed, the same algorithm for balancing the routes is invoked. > + > +In the case of using the file based routing, any topology changes are > +currently ignored The 'file' routing engine just loads the LFTs from the file > +specified, with no reaction to real topology. Obviously, this will not be able > +to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent > +switches will be skipped. Multicast is not affected by 'file' routing engine > +(this uses min hop tables). > + > + > +Min Hop Algorithm > + > +The Min Hop algorithm is invoked when neither UPDN or the file method are > +specified. > + > +The Min Hop algorithm is divided into two stages: computation of > +min-hop tables on every switch and LFT output port assignment. Link > +subscription is also equalized with the ability to override based on > +port GUID. The latter is supplied by: > + > +-i > +.br > +-ignore-guids > + This option provides the means to define a set of ports > + (by guid) that will be ignored by the link load > + equalization algorithm. Note that only endports (CA, > + switch port 0, and router ports) and not switch external > + ports are supported. > + > +LMC awareness routes based on (remote) system or switch basis. > + > + > +Purpose of UPDN Algorithm > + > +The UPDN algorithm is designed to prevent deadlocks from occurring in loops > +of the subnet. A loop-deadlock is a situation in which it is no longer > +possible to send data between any two hosts connected through the loop. As > +such, the UPDN routing algorithm should be used if the subnet is not a pure > +Fat Tree, and one of its loops may experience a deadlock (due, for example, > +to high pressure). > + > +The UPDN algorithm is based on the following main stages: > + > +1. Auto-detect root nodes - based on the CA hop length from any switch in > +the subnet, a statistical histogram is built for each switch (hop num vs > +number of occurrences). If the histogram reflects a specific column (higher > +than others) for a certain node, then it is marked as a root node. Since > +the algorithm is statistical, it may not find any root nodes. The list of > +the root nodes found by this auto-detect stage is used by the ranking > +process stage. > + > + Note 1: The user can override the node list manually. > + Note 2: If this stage cannot find any root nodes, and the user did > + not specify a guid list file, OpenSM defaults back to the > + Min Hop routing algorithm. > + > +2. Ranking process - All root switch nodes (found in stage 1) are assigned > +a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the > +subnet are ranked incrementally. This ranking aids in the process of enforcing > +rules that ensure loop-free paths. > + > +3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from > +each (CA or switch) node in the subnet. During the BFS process, the FDB table > +of each switch node traversed by BFS is updated, in reference to the starting > +node, based on the ranking rules and guid values. > + > +At the end of the process, the updated FDB tables ensure loop-free paths > +through the subnet. > + > +Note: Up/Down routing does not allow LID routing communication between > +switches that are located inside spine "switch systems". > +The reason is that there is no way to allow a LID route between them > +that does not break the Up/Down rule. > +One ramification of this is that you cannot run SM on switches other > +than the leaf switches of the fabric. > + > + > +UPDN Algorithm Usage > + > +Activation through OpenSM > + > +Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. > +Use '-a ' for adding an UPDN guid file that contains the > +root nodes for ranking. > +If the `-a' option is not used, OpenSM uses its auto-detect root nodes > +algorithm. > + > +Notes on the guid list file: > + > +1. A valid guid file specifies one guid in each line. Lines with an invalid > +format will be discarded. > +.br > +2. The user should specify the root switch guids. However, it is also > +possible to specify CA guids; OpenSM will use the guid of the switch (if > +it exists) that connects the CA to the subnet as a root node. > + > + > +Fat-tree Routing Algorithm > + > +The fat-tree algorithm optimizes routing for "shift" communication pattern. > +It should be chosen if a subnet is a symmetrical or almost symmetrical > +fat-tree of various types. > +It supports not just K-ary-N-Trees, by handling for non-constant K, > +cases where not all leafs (CAs) are present, any CBB ratio. > +As in UPDN, fat-tree also prevents credit-loop-deadlocks. > + > +If the root guid file is not provided ('-a' or '--root_guid_file' options), > +the topology has to be pure fat-tree that complies with the following rules: > + - Tree rank should be between two and eight (inclusively) > + - Switches of the same rank should have the same number > + of UP-going port groups*, unless they are root switches, > + in which case the shouldn't have UP-going ports at all. > + - Switches of the same rank should have the same number > + of DOWN-going port groups, unless they are leaf switches. > + - Switches of the same rank should have the same number > + of ports in each UP-going port group. > + - Switches of the same rank should have the same number > + of ports in each DOWN-going port group. > + - All the CAs have to be at the same tree level (rank). > + > +If the root guid file is provided, the topology doesn't have to be pure > +fat-tree, and it should only comply with the following rules: > + - Tree rank should be between two and eight (inclusively) > + - All the Compute Nodes** have to be at the same tree level (rank). > + Note that non-compute node CAs are allowed here to be at different > + tree ranks. > + > +* ports that are connected to the same remote switch are referenced as > +\'port group\'. > + > +** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' > +OpenSM options. > + > +Topologies that do not comply cause a fallback to min hop routing. > +Note that this can also occur on link failures which cause the topology > +to no longer be "pure" fat-tree. > + > +Note that although fat-tree algorithm supports trees with non-integer CBB > +ratio, the routing will not be as balanced as in case of integer CBB ratio. > +In addition to this, although the algorithm allows leaf switches to have any > +number of CAs, the closer the tree is to be fully populated, the more > +effective the "shift" communication pattern will be. > +In general, even if the root list is provided, the closer the topology to a > +pure and symmetrical fat-tree, the more optimal the routing will be. > + > +The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) > +in the same directory where the OpenSM log resides. This ordering file provides > +the CN order that may be used to create efficient communication pattern, that > +will match the routing tables. > + > +Activation through OpenSM > + > +Use '-R ftree' option to activate the fat-tree algorithm. > +Use '-a ' to provide root nodes for ranking. If the `-a' option > +is not used, routing algorithm will detect roots automatically. > +Use '-u ' to provide the list of compute nodes. If the `-u' option > +is not used, all the CAs are considered as compute nodes. > + > +Note: LMC > 0 is not supported by fat-tree routing. If this is > +specified, the default routing algorithm is invoked instead. > + > + > +LASH Routing Algorithm > + > +LASH is an acronym for LAyered SHortest Path Routing. It is a > +deterministic shortest path routing algorithm that enables topology > +agnostic deadlock-free routing within communication networks. > + > +When computing the routing function, LASH analyzes the network > +topology for the shortest-path routes between all pairs of sources / > +destinations and groups these paths into virtual layers in such a way > +as to avoid deadlock. > + > +Note LASH analyzes routes and ensures deadlock freedom between switch > +pairs. The link from HCA between and switch does not need virtual > +layers as deadlock will not arise between switch and HCA. > + > +In more detail, the algorithm works as follows: > + > +1) LASH determines the shortest-path between all pairs of source / > +destination switches. Note, LASH ensures the same SL is used for all > +SRC/DST - DST/SRC pairs and there is no guarantee that the return > +path for a given DST/SRC will be the reverse of the route SRC/DST. > + > +2) LASH then begins an SL assignment process where a route is assigned > +to a layer (SL) if the addition of that route does not cause deadlock > +within that layer. This is achieved by maintaining and analysing a > +channel dependency graph for each layer. Once the potential addition > +of a path could lead to deadlock, LASH opens a new layer and continues > +the process. > + > +3) Once this stage has been completed, it is highly likely that the > +first layers processed will contain more paths than the latter ones. > +To better balance the use of layers, LASH moves paths from one layer > +to another so that the number of paths in each layer averages out. > + > +Note, the implementation of LASH in opensm attempts to use as few layers > +as possible. This number can be less than the number of actual layers > +available. > + > +In general LASH is a very flexible algorithm. It can, for example, > +reduce to Dimension Order Routing in certain topologies, it is topology > +agnostic and fares well in the face of faults. > + > +It has been shown that for both regular and irregular topologies, LASH > +outperforms Up/Down. The reason for this is that LASH distributes the > +traffic more evenly through a network, avoiding the bottleneck issues > +related to a root node and always routes shortest-path. > + > +The algorithm was developed by Simula Research Laboratory. > + > + > +Use '-R lash -Q ' option to activate the LASH algorithm. > + > +Note: QoS support has to be turned on in order that SL/VL mappings are > +used. > + > +Note: LMC > 0 is not supported by the LASH routing. If this is > +specified, the default routing algorithm is invoked instead. > + > + > +DOR Routing Algorithm > + > +The Dimension Order Routing algorithm is based on the Min Hop > +algorithm and so uses shortest paths. Instead of spreading traffic > +out across different paths with the same shortest distance, it chooses > +among the available shortest paths based on an ordering of dimensions. > +Each port must be consistently cabled to represent a hypercube > +dimension or a mesh dimension. Paths are grown from a destination > +back to a source using the lowest dimension (port) of available paths > +at each step. This provides the ordering necessary to avoid deadlock. > +When there are multiple links between any two switches, they still > +represent only one dimension and traffic is balanced across them > +unless port equalization is turned off. In the case of hypercubes, > +the same port must be used throughout the fabric to represent the > +hypercube dimension and match on both ends of the cable. In the case > +of meshes, the dimension should consistently use the same pair of > +ports, one port on one end of the cable, and the other port on the > +other end, continuing along the mesh dimension. > + > +Use '-R dor' option to activate the DOR algorithm. > + > + > +Routing References > + > +To learn more about deadlock-free routing, see the article > +"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" > +by William J Dally and Charles L Seitz (1985). > + > +To learn more about the up/down algorithm, see the article > +"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" > +by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the > +Universidad Politecnica de Valencia. > + > +To learn more about LASH and the flexibility behind it, the requirement > +for layers, performance comparisons to other algorithms, see the > +following articles: > + > +"Layered Routing in Irregular Networks", Lysne et al, IEEE > +Transactions on Parallel and Distributed Systems, VOL.16, No12, > +December 2005. > + > +"Routing for the ASI Fabric Manager", Solheim et al. IEEE > +Communications Magazine, Vol.44, No.7, July 2006. > + > +"Layered Shortest Path (LASH) Routing in Irregular System Area > +Networks", Skeie et al. IEEE Computer Society Communication > +Architecture for Clusters 2002. > + > + > +Modular Routine Engine > + > +Modular routing engine structure allows for the ease of > +"plugging" new routing modules. > + > +Currently, only unicast callbacks are supported. Multicast > +can be added later. > + > +One existing routing module is up-down "updn", which may be > +activated with '-R updn' option (instead of old '-u'). > + > +General usage is: > +$ opensm -R 'module-name' > + > +There is also a trivial routing module which is able > +to load LFT tables from a dump file. > + > +Main features: > + > + - this will load switch LFTs and/or LID matrices (min hops tables) > + - this will load switch LFTs according to the path entries introduced > + in the dump file > + - no additional checks will be performed (such as "is port connected", > + etc.) > + - in case when fabric LIDs were changed this will try to reconstruct > + LFTs correctly if endport GUIDs are represented in the dump file > + (in order to disable this, GUIDs may be removed from the dump file > + or zeroed) > + > +The dump file format is compatible with output of 'ibroute' util and for > +whole fabric can be generated with dump_lfts.sh script. > + > +To activate file based routing module, use: > + > + opensm -R file -U /path/to/dump_file > + > +If the dump_file is not found or is in error, the default routing > +algorithm is utilized. > + > +The ability to dump switch lid matrices (aka min hops tables) to file and > +later to load these is also supported. > + > +The usage is similar to unicast forwarding tables loading from dump > +file (introduced by 'file' routing engine), but new lid matrix file > +name should be specified by -M or --lid_matrix_file option. For example: > + > + opensm -R file -M ./opensm-lid-matrix.dump > + > +The dump file is named \'opensm-lid-matrix.dump\' and will be generated > +in standard opensm dump directory (/var/log by default) when > +OSM_LOG_ROUTING logging flag is set. > + > +When routing engine 'file' is activated, but dump file is not specified > +or not cannot be open default lid matrix algorithm will be used. > + > +There is also a switch forwarding tables dumper which generates > +a file compatible with dump_lfts.sh output. This file can be used > +as input for forwarding tables loading by 'file' routing engine. > +Both or one of options -U and -M can be specified together with \'-R file\'. > + > +.SH FILES > +.TP > +.B @CONF_DIR@/prefix-routes.conf > +default prefix routes file. > + > +.SH AUTHORS > +.TP > +Hal Rosenstock > +.RI < hal at xsigo.com > > +.TP > +Sasha Khapyorsky > +.RI < sashak at voltaire.com > > +.TP > +Eitan Zahavi > +.RI < eitan at mellanox.co.il > > +.TP > +Yevgeny Kliteynik > +.RI < kliteyn at mellanox.co.il > > +.TP > +Thomas Sodring > +.RI < tsodring at simula.no > From bugme-daemon at bugzilla.kernel.org Mon Feb 4 13:34:56 2008 From: bugme-daemon at bugzilla.kernel.org (bugme-daemon at bugzilla.kernel.org) Date: Mon, 4 Feb 2008 13:34:56 -0800 (PST) Subject: [ofa-general] Your Bugzilla password. Message-ID: <20080204213456.E5F85108017@picon.linux-foundation.org> To use the wonders of Bugzilla, you can use the following: E-mail address: general at lists.openfabrics.org Password: 7EN0cRJPlY To change your password, go to: http://bugzilla.kernel.org/userprefs.cgi From rdreier at cisco.com Mon Feb 4 13:40:34 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 04 Feb 2008 13:40:34 -0800 Subject: [ofa-general] IPoIB performance bug to investigate Message-ID: If you're interested: http://bugzilla.kernel.org/show_bug.cgi?id=9883 From weiny2 at llnl.gov Mon Feb 4 14:21:30 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 4 Feb 2008 14:21:30 -0800 Subject: [ofa-general] Re: [PATCH] Update man page for configurable partition and prefix-routes file. (WAS: Re: [PATCH] opensm/man: partition cfg file location) In-Reply-To: <1202160824.11210.292.camel@hrosenstock-ws.xsigo.com> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> <20080204185119.GG1392@sashak.voltaire.com> <20080204132051.61e28322.weiny2@llnl.gov> <1202160824.11210.292.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080204142130.78b04471.weiny2@llnl.gov> On Mon, 04 Feb 2008 13:33:44 -0800 Hal Rosenstock wrote: > Ira, > > On Mon, 2008-02-04 at 13:20 -0800, Ira Weiny wrote: > > This is my bad. When I changed the config file locations and "configurability" > > I should have updated the man page. > > > > This patch fixes this to reflect the location and names chosen at configure > > time. > > Seems like this patch is modifying many lines in the opensm man page > aside from those affected. Is that needed ? That makes it hard to see > exactly what changed at least for me. > I had to change the man page file to opensm.8.in to make the configure changes work. Here are 2 patches which separate the moving of the file to a *.in and then the changes. Ira >From 018ca298e8f80fbc8ea6fdbd26fb63fc2c2a0891 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 4 Feb 2008 14:14:23 -0800 Subject: [PATCH] Move opensm.8 man page in prep for making config file changes. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 2 + opensm/man/opensm.8 | 941 ------------------------------------------------ opensm/man/opensm.8.in | 941 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 943 insertions(+), 941 deletions(-) delete mode 100644 opensm/man/opensm.8 create mode 100644 opensm/man/opensm.8.in diff --git a/opensm/configure.in b/opensm/configure.in index 79a914e..e8fb250 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -172,5 +172,7 @@ OPENIB_APP_OSMV_CHECK_LIB # overrides. CFLAGS=$ac_env_CFLAGS_value +AC_CONFIG_FILES([man/opensm.8]) + dnl Create the following Makefiles AC_OUTPUT([include/opensm/osm_version.h Makefile include/Makefile complib/Makefile libvendor/Makefile opensm/Makefile osmeventplugin/Makefile osmtest/Makefile opensm.spec]) diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 deleted file mode 100644 index ab7fb8e..0000000 --- a/opensm/man/opensm.8 +++ /dev/null @@ -1,941 +0,0 @@ -.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" - -.SH NAME -opensm \- InfiniBand subnet manager and administration (SM/SA) - -.SH SYNOPSIS -.B opensm -[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] -[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] -[\-R | \-\-routing_engine ] -[\-z | \-\-connect_roots] -[\-M | \-\-lid_matrix_file ] -[\-U | \-\-ucast_file ] -[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] -[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] -[\-t(imeout) ] [\-maxsmps ] -[\-console [off | local | socket | loopback]] [\-console-port ] -[\-i(gnore-guids) ] [\-f | \-\-log_file] -[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] -[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] -[\-B | \-\-daemon] [\-I | \-\-inactive] -[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] -[\-\-prefix_routes_file ] -[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] - -.SH DESCRIPTION -.PP -opensm is an InfiniBand compliant Subnet Manager and Administration, -and runs on top of OpenIB. - -opensm provides an implementation of an InfiniBand Subnet Manager and -Administration. Such a software entity is required to run for in order -to initialize the InfiniBand hardware (at least one per each -InfiniBand subnet). - -opensm also now contains an experimental version of a performance -manager as well. - -opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB -fabric, initialize it, and sweep occasionally for changes. - -opensm attaches to a specific IB port on the local machine and configures only -the fabric connected to it. (If the local machine has other IB ports, -opensm will ignore the fabrics connected to those other ports). If no port is -specified, it will select the first "best" available port. - -opensm can present the available ports and prompt for a port number to -attach to. - -By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. -The first file will register only general major events, whereas the second -will include details of reported errors. All errors reported in this second -file should be treated as indicators of IB fabric health issues. -(Note that when a fatal and non-recoverable error occurs, opensm will exit.) -Both log files should include the message "SUBNET UP" if opensm was able to -setup the subnet correctly. - -.SH OPTIONS - -.PP -.TP -\fB\-c\fR, \fB\-\-cache-options\fR -Write out a list of all tunable OpenSM parameters, -including their current values from the command line -as well as defaults for others, into the file -OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to -/var/cache/opensm if the corresponding environment -variable is not set). The options file is then -used for subsequent OpenSM invocations but any -command line options take precedence. -.TP -\fB\-g\fR, \fB\-\-guid\fR -This option specifies the local port GUID value -with which OpenSM should bind. OpenSM may be -bound to 1 port at a time. -If GUID given is 0, OpenSM displays a list -of possible port GUIDs and waits for user input. -Without -g, OpenSM tries to use the default port. -.TP -\fB\-l\fR, \fB\-\-lmc\fR -This option specifies the subnet's LMC value. -The number of LIDs assigned to each port is 2^LMC. -The LMC value must be in the range 0-7. -LMC values > 0 allow multiple paths between ports. -LMC values > 0 should only be used if the subnet -topology actually provides multiple paths between -ports, i.e. multiple interconnects between switches. -Without -l, OpenSM defaults to LMC = 0, which allows -one path between any two ports. -.TP -\fB\-p\fR, \fB\-\-priority\fR -This option specifies the SM\'s PRIORITY. -This will effect the handover cases, where master -is chosen by priority and GUID. Range goes from 0 -(default and lowest priority) to 15 (highest). -.TP -\fB\-smkey\fR -This option specifies the SM\'s SM_Key (64 bits). -This will effect SM authentication. -.TP -\fB\-r\fR, \fB\-\-reassign_lids\fR -This option causes OpenSM to reassign LIDs to all -end nodes. Specifying -r on a running subnet -may disrupt subnet traffic. -Without -r, OpenSM attempts to preserve existing -LID assignments resolving multiple use of same LID. -.TP -\fB\-R\fR, \fB\-\-routing_engine\fR -This option chooses routing engine instead of Min Hop -algorithm (default). -Supported engines: minhop, updn, file, ftree, lash, dor -.TP -\fB\-z\fR, \fB\-\-connect_roots\fR -This option enforces a routing engine (currently up/down -only) to make connectivity between root switches and in -this way to be fully IBA complaint. In many cases this can -violate "pure" deadlock free algorithm, so use it carefully. -.TP -\fB\-M\fR, \fB\-\-lid_matrix_file\fR -This option specifies the name of the lid matrix dump file -from where switch lid matrices (min hops tables will be -loaded. -.TP -\fB\-U\fR, \fB\-\-ucast_file\fR -This option specifies the name of the unicast dump file -from where switch forwarding tables will be loaded. -.TP -\fB\-S\fR, \fB\-\-sadb_file\fR -This option specifies the name of the SA DB dump file -from where SA database will be loaded. -.TP -\fB\-a\fR, \fB\-\-root_guid_file\fR -Set the root nodes for the Up/Down or Fat-Tree routing -algorithm to the guids provided in the given file (one to a line). -.TP -\fB\-u\fR, \fB\-\-cn_guid_file\fR -Set the compute nodes for the Fat-Tree routing algorithm -to the guids provided in the given file (one to a line). -.TP -\fB\-o\fR, \fB\-\-once\fR -This option causes OpenSM to configure the subnet -once, then exit. Ports remain in the ACTIVE state. -.TP -\fB\-s\fR, \fB\-\-sweep\fR -This option specifies the number of seconds between -subnet sweeps. Specifying -s 0 disables sweeping. -Without -s, OpenSM defaults to a sweep interval of -10 seconds. -.TP -\fB\-t\fR, \fB\-\-timeout\fR -This option specifies the time in milliseconds -used for transaction timeouts. -Specifying -t 0 disables timeouts. -Without -t, OpenSM defaults to a timeout value of -200 milliseconds. -.TP -\fB\-maxsmps\fR -This option specifies the number of VL15 SMP MADs -allowed on the wire at any one time. -Specifying -maxsmps 0 allows unlimited outstanding -SMPs. -Without -maxsmps, OpenSM defaults to a maximum of -4 outstanding SMPs. -.TP -\fB\-console [off | local | socket | loopback]\fR -This option brings up the OpenSM console (default off). -Note that the socket and loopback options will only be available -if OpenSM was built with --enable-console-socket. -.TP -\fB\-console-port\fR -Specify an alternate telnet port for the socket console (default 10000). -Note that this option only appears if OpenSM was built with ---enable-console-socket. -.TP -\fB\-i\fR, \fB\-ignore-guids\fR -This option provides the means to define a set of ports -(by guid) that will be ignored by the link load -equalization algorithm. -.TP -\fB\-x\fR, \fB\-\-honor_guid2lid\fR -This option forces OpenSM to honor the guid2lid file, -when it comes out of Standby state, if such file exists -under OSM_CACHE_DIR, and is valid. -By default, this is FALSE. -.TP -\fB\-f\fR, \fB\-\-log_file\fR -This option defines the log to be the given file. -By default, the log goes to /var/log/opensm.log. -For the log to go to standard output use -f stdout. -.TP -\fB\-L\fR, \fB\-\-log_limit\fR -This option defines maximal log file size in MB. When -specified the log file will be truncated upon reaching -this limit. -.TP -\fB\-e\fR, \fB\-\-erase_log_file\fR -This option will cause deletion of the log file -(if it previously exists). By default, the log file -is accumulative. -.TP -\fB\-P\fR, \fB\-\-Pconfig\fR -This option defines the optional partition configuration file. -The default name is \'/etc/opensm/opensm-partitions.conf\'. -.TP -.BI --prefix_routes_file= path -Prefix routes control how the SA responds to path record queries for -off-subnet DGIDs. By default, the SA fails such queries. The -.B PREFIX ROUTES -section below describes the format of the configuration file. -The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. -.TP -\fB\-Q\fR, \fB\-\-qos\fR -This option enables QoS setup. It is disabled by default. -.TP -\fB\-N\fR, \fB\-\-no_part_enforce\fR -This option disables partition enforcement on switch external ports. -.TP -\fB\-y\fR, \fB\-\-stay_on_fatal\fR -This option will cause SM not to exit on fatal initialization -issues: if SM discovers duplicated guids or a 12x link with -lane reversal badly configured. -By default, the SM will exit on these errors. -.TP -\fB\-B\fR, \fB\-\-daemon\fR -Run in daemon mode - OpenSM will run in the background. -.TP -\fB\-I\fR, \fB\-\-inactive\fR -Start SM in inactive rather than init SM state. This -option can be used in conjunction with the perfmgr so as to -run a standalone performance manager without SM/SA. However, -this is NOT currently implemented in the performance manager. -.TP -\fB\-perfmgr\fR -Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at -configure time. -.TP -\fB\-perfmgr_sweep_time_s\fR -Specify the sweep time for the performance manager in seconds -(default is 180 seconds). Only takes -effect if --enable-perfmgr was specified at configure time. -.TP -.BI --consolidate_ipv6_snm_reqests -Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast -group. -.TP -\fB\-v\fR, \fB\-\-verbose\fR -This option increases the log verbosity level. -The -v option may be specified multiple times -to further increase the verbosity level. -See the -D option for more information about -log verbosity. -.TP -\fB\-V\fR -This option sets the maximum verbosity level and -forces log flushing. -The -V option is equivalent to \'-D 0xFF -d 2\'. -See the -D option for more information about -log verbosity. -.TP -\fB\-D\fR -This option sets the log verbosity level. -A flags field must follow the -D option. -A bit set/clear in the flags enables/disables a -specific log level as follows: - - BIT LOG LEVEL ENABLED - ---- ----------------- - 0x01 - ERROR (error messages) - 0x02 - INFO (basic messages, low volume) - 0x04 - VERBOSE (interesting stuff, moderate volume) - 0x08 - DEBUG (diagnostic, high volume) - 0x10 - FUNCS (function entry/exit, very high volume) - 0x20 - FRAMES (dumps all SMP and GMP frames) - 0x40 - ROUTING (dump FDB routing information) - 0x80 - currently unused. - -Without -D, OpenSM defaults to ERROR + INFO (0x3). -Specifying -D 0 disables all messages. -Specifying -D 0xFF enables all messages (see -V). -High verbosity levels may require increasing -the transaction timeout with the -t option. -.TP -\fB\-d\fR, \fB\-\-debug\fR -This option specifies a debug option. -These options are not normally needed. -The number following -d selects the debug -option to enable as follows: - - OPT Description - --- ----------------- - -d0 - Ignore other SM nodes - -d1 - Force single threaded dispatching - -d2 - Force log flushing after each log message - -d3 - Disable multicast support -.TP -\fB\-h\fR, \fB\-\-help\fR -Display this usage info then exit. -.TP -\fB\-?\fR -Display this usage info then exit. - -.SH ENVIRONMENT VARIABLES -.PP -The following environment variables control opensm behavior: - -OSM_TMP_DIR - controls the directory in which the temporary files generated by -opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and -opensm.mcfdbs. By default, this directory is /var/log. - -OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent -runs are consistent. The default directory used is /var/cache/opensm. -The following files are included in it: - - guid2lid - stores the LID range assigned to each GUID - - opensm.opts - an optional file that holds a complete set of opensm - configuration options - -.SH NOTES -.PP -When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. -.PP -Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for -logrotate purposes. - -.SH PARTITION CONFIGURATION -.PP -The default name of OpenSM partitions configuration file is -\'/etc/ofa/opensm-partitions.conf\'. The default may be changed by using ---Pconfig (-P) option with OpenSM. - -The default partition will be created by OpenSM unconditionally even -when partition configuration file does not exist or cannot be accessed. - -The default partition has P_Key value 0x7fff. OpenSM\'s port will have -full membership in default partition. All other end ports will have -partial membership. - -File Format - -Comments: - -Line content followed after \'#\' character is comment and ignored by -parser. - -General file format: - -: ; - -Partition Definition: - -[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] - - PartitionName - string, will be used with logging. When omitted - empty string will be used. - PKey - P_Key value for this partition. Only low 15 bits will - be used. When omitted will be autogenerated. - flag - used to indicate IPoIB capability of this partition. - defmember=full|limited - specifies default membership for port guid - list. Default is limited. - -Currently recognized flags are: - - ipoib - indicates that this partition may be used for IPoIB, as - result IPoIB capable MC group will be created. - rate= - specifies rate for this IPoIB MC group - (default is 3 (10GBps)) - mtu= - specifies MTU for this IPoIB MC group - (default is 4 (2048)) - sl= - specifies SL for this IPoIB MC group - (default is 0) - scope= - specifies scope for this IPoIB MC group - (default is 2 (link local)). Multiple scope settings - are permitted for a partition. - -Note that values for rate, mtu, and scope should be specified as -defined in the IBTA specification (for example, mtu=4 for 2048). - -PortGUIDs list: - - PortGUID - GUID of partition member EndPort. Hexadecimal - numbers should start from 0x, decimal numbers - are accepted too. - full or limited - indicates full or limited membership for this - port. When omitted (or unrecognized) limited - membership is assumed. - -There are two useful keywords for PortGUID definition: - - - 'ALL' means all end ports in this subnet. - - 'SELF' means subnet manager's port. - -Empty list means no ports in this partition. - -Notes: - -White space is permitted between delimiters ('=', ',',':',';'). - -The line can be wrapped after ':' followed after Partition Definition and -between. - -PartitionName does not need to be unique, PKey does need to be unique. -If PKey is repeated then those partition configurations will be merged -and first PartitionName will be used (see also next note). - -It is possible to split partition configuration in more than one -definition, but then PKey should be explicitly specified (otherwise -different PKey values will be generated for those definitions). - -Examples: - - Default=0x7fff : ALL, SELF=full ; - - NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; - - YetAnotherOne = 0x300 : SELF=full ; - YetAnotherOne = 0x300 : ALL=limited ; - - ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; - # 0x123453, 0x123454 will be limited - ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; - # 0x123456, 0x123457 will be limited - ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; - ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; - ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; - - -Note: - -The following rule is equivalent to how OpenSM used to run prior to the -partition manager: - - Default=0x7fff,ipoib:ALL=full; - -.SH QOS CONFIGURATION -.PP -There are a set of QoS related low-level configuration parameters. -All these parameter names are prefixed by "qos_" string. Here is a full -list of these parameters: - - qos_max_vls - The maximum number of VLs that will be on the subnet - qos_high_limit - The limit of High Priority component of VL - Arbitration table (IBA 7.6.9) - qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) - template - qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) - template - Both VL arbitration templates are pairs of - VL and weight - qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is - a list of VLs corresponding to SLs 0-15 (Note - that VL15 used here means drop this SL) - -Typical default values (hard-coded in OpenSM initialization) are: - - qos_max_vls=15 - qos_high_limit=0 - qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 - qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 - qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 - -The syntax is compatible with rest of OpenSM configuration options and -values may be stored in OpenSM config file (cached options file). - -In addition to the above, we may define separate QoS configuration -parameters sets for various target types. As targets, we currently support -CAs, routers, switch external ports, and switch's enhanced port 0. The -names of such specialized parameters are prefixed by "qos__" -string. Here is a full list of the currently supported sets: - - qos_ca_ - QoS configuration parameters set for CAs. - qos_rtr_ - parameters set for routers. - qos_sw0_ - parameters set for switches' port 0. - qos_swe_ - parameters set for switches' external ports. - -Examples: - qos_sw0_max_vls=2 - qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, - qos_swe_high_limit=0 - -.SH PREFIX ROUTES -.PP -Prefix routes control how the SA responds to path record queries for -off-subnet DGIDs. By default, the SA fails such queries. -Note that IBA does not specify how the SA should obtain off-subnet path -record information. -The prefix routes configuration is meant as a stop-gap until the -specification is completed. -.PP -Each line in the configuration file is a 64-bit prefix followed by a -64-bit GUID, separated by white space. -The GUID specifies the router port on the local subnet that will -handle the prefix. -Blank lines are ignored, as is anything between a \fB#\fP character -and the end of the line. -The prefix and GUID are both in hex, the leading 0x is optional. -Either, or both, can be wild-carded by specifying an -asterisk instead of an explicit prefix or GUID. -.PP -When responding to a path record query for an off-subnet DGID, -opensm searches for the first prefix match in the configuration file. -Therefore, the order of the lines in the configuration file is important: -a wild-carded prefix at the beginning of the configuration file renders -all subsequent lines useless. -If there is no match, then opensm fails the query. -It is legal to repeat prefixes in the configuration file, -opensm will return the path to the first available matching router. -A configuration file with a single line where both prefix and GUID -are wild-carded means that a path record query specifying any -off-subnet DGID should return a path to the first available router. -This configuration yields the same behaviour formerly achieved by -compiling opensm with -DROUTER_EXP. - -.SH ROUTING -.PP -OpenSM now offers five routing engines: - -1. Min Hop Algorithm - based on the minimum hops to each node where the -path length is optimized. - -2. UPDN Unicast routing algorithm - also based on the minimum hops to each -node, but it is constrained to ranking rules. This algorithm should be chosen -if the subnet is not a pure Fat Tree, and deadlock may occur due to a -loop in the subnet. - -3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing -for congestion-free "shift" communication pattern. -It should be chosen if a subnet is a symmetrical Fat Trees of various types, -not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. -Similar to UPDN, Fat Tree routing is constrained to ranking rules. - -4. LASH unicast routing algorithm - uses Infiniband virtual layers -(SL) to provide deadlock-free shortest-path routing while also -distributing the paths between layers. LASH is an alternative -deadlock-free topology-agnostic routing algorithm to the non-minimal -UPDN algorithm avoiding the use of a potentially congested root node. - -5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but -avoids port equalization except for redundant links between the same -two switches. This provides deadlock free routes for hypercubes when -the fabric is cabled as a hypercube and for meshes when cabled as a -mesh (see details below). - -OpenSM also supports a file method which -can load routes from a table. See \'Modular Routing Engine\' for more -information on this. - -The basic routing algorithm is comprised of two stages: - -1. MinHop matrix calculation - How many hops are required to get from each port to each LID ? - The algorithm to fill these tables is different if you run standard -(min hop) or Up/Down. - For standard routing, a "relaxation" algorithm is used to propagate -min hop from every destination LID through neighbor switches - For Up/Down routing, a BFS from every target is used. The BFS tracks link -direction (up or down) and avoid steps that will perform up after a down -step was used. - -2. Once MinHop matrices exist, each switch is visited and for each target LID a -decision is made as to what port should be used to get to that LID. - This step is common to standard and Up/Down routing. Each port has a -counter counting the number of target LIDs going through it. - When there are multiple alternative ports with same MinHop to a LID, -the one with less previously assigned ports is selected. - If LMC > 0, more checks are added: Within each group of LIDs assigned to -same target port, - a. use only ports which have same MinHop - b. first prefer the ones that go to different systemImageGuid (then -the previous LID of the same LMC group) - c. if none - prefer those which go through another NodeGuid - d. fall back to the number of paths method (if all go to same node). - -Effect of Topology Changes - -OpenSM will preserve existing routing in any case where there is no change in -the fabric switches unless the -r (--reassign_lids) option is specified. - --r -.br ---reassign_lids - This option causes OpenSM to reassign LIDs to all - end nodes. Specifying -r on a running subnet - may disrupt subnet traffic. - Without -r, OpenSM attempts to preserve existing - LID assignments resolving multiple use of same LID. - -If a link is added or removed, OpenSM does not recalculate -the routes that do not have to change. A route has to change -if the port is no longer UP or no longer the MinHop. When routing changes -are performed, the same algorithm for balancing the routes is invoked. - -In the case of using the file based routing, any topology changes are -currently ignored The 'file' routing engine just loads the LFTs from the file -specified, with no reaction to real topology. Obviously, this will not be able -to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent -switches will be skipped. Multicast is not affected by 'file' routing engine -(this uses min hop tables). - - -Min Hop Algorithm - -The Min Hop algorithm is invoked when neither UPDN or the file method are -specified. - -The Min Hop algorithm is divided into two stages: computation of -min-hop tables on every switch and LFT output port assignment. Link -subscription is also equalized with the ability to override based on -port GUID. The latter is supplied by: - --i -.br --ignore-guids - This option provides the means to define a set of ports - (by guid) that will be ignored by the link load - equalization algorithm. Note that only endports (CA, - switch port 0, and router ports) and not switch external - ports are supported. - -LMC awareness routes based on (remote) system or switch basis. - - -Purpose of UPDN Algorithm - -The UPDN algorithm is designed to prevent deadlocks from occurring in loops -of the subnet. A loop-deadlock is a situation in which it is no longer -possible to send data between any two hosts connected through the loop. As -such, the UPDN routing algorithm should be used if the subnet is not a pure -Fat Tree, and one of its loops may experience a deadlock (due, for example, -to high pressure). - -The UPDN algorithm is based on the following main stages: - -1. Auto-detect root nodes - based on the CA hop length from any switch in -the subnet, a statistical histogram is built for each switch (hop num vs -number of occurrences). If the histogram reflects a specific column (higher -than others) for a certain node, then it is marked as a root node. Since -the algorithm is statistical, it may not find any root nodes. The list of -the root nodes found by this auto-detect stage is used by the ranking -process stage. - - Note 1: The user can override the node list manually. - Note 2: If this stage cannot find any root nodes, and the user did - not specify a guid list file, OpenSM defaults back to the - Min Hop routing algorithm. - -2. Ranking process - All root switch nodes (found in stage 1) are assigned -a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the -subnet are ranked incrementally. This ranking aids in the process of enforcing -rules that ensure loop-free paths. - -3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from -each (CA or switch) node in the subnet. During the BFS process, the FDB table -of each switch node traversed by BFS is updated, in reference to the starting -node, based on the ranking rules and guid values. - -At the end of the process, the updated FDB tables ensure loop-free paths -through the subnet. - -Note: Up/Down routing does not allow LID routing communication between -switches that are located inside spine "switch systems". -The reason is that there is no way to allow a LID route between them -that does not break the Up/Down rule. -One ramification of this is that you cannot run SM on switches other -than the leaf switches of the fabric. - - -UPDN Algorithm Usage - -Activation through OpenSM - -Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. -Use '-a ' for adding an UPDN guid file that contains the -root nodes for ranking. -If the `-a' option is not used, OpenSM uses its auto-detect root nodes -algorithm. - -Notes on the guid list file: - -1. A valid guid file specifies one guid in each line. Lines with an invalid -format will be discarded. -.br -2. The user should specify the root switch guids. However, it is also -possible to specify CA guids; OpenSM will use the guid of the switch (if -it exists) that connects the CA to the subnet as a root node. - - -Fat-tree Routing Algorithm - -The fat-tree algorithm optimizes routing for "shift" communication pattern. -It should be chosen if a subnet is a symmetrical or almost symmetrical -fat-tree of various types. -It supports not just K-ary-N-Trees, by handling for non-constant K, -cases where not all leafs (CAs) are present, any CBB ratio. -As in UPDN, fat-tree also prevents credit-loop-deadlocks. - -If the root guid file is not provided ('-a' or '--root_guid_file' options), -the topology has to be pure fat-tree that complies with the following rules: - - Tree rank should be between two and eight (inclusively) - - Switches of the same rank should have the same number - of UP-going port groups*, unless they are root switches, - in which case the shouldn't have UP-going ports at all. - - Switches of the same rank should have the same number - of DOWN-going port groups, unless they are leaf switches. - - Switches of the same rank should have the same number - of ports in each UP-going port group. - - Switches of the same rank should have the same number - of ports in each DOWN-going port group. - - All the CAs have to be at the same tree level (rank). - -If the root guid file is provided, the topology doesn't have to be pure -fat-tree, and it should only comply with the following rules: - - Tree rank should be between two and eight (inclusively) - - All the Compute Nodes** have to be at the same tree level (rank). - Note that non-compute node CAs are allowed here to be at different - tree ranks. - -* ports that are connected to the same remote switch are referenced as -\'port group\'. - -** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' -OpenSM options. - -Topologies that do not comply cause a fallback to min hop routing. -Note that this can also occur on link failures which cause the topology -to no longer be "pure" fat-tree. - -Note that although fat-tree algorithm supports trees with non-integer CBB -ratio, the routing will not be as balanced as in case of integer CBB ratio. -In addition to this, although the algorithm allows leaf switches to have any -number of CAs, the closer the tree is to be fully populated, the more -effective the "shift" communication pattern will be. -In general, even if the root list is provided, the closer the topology to a -pure and symmetrical fat-tree, the more optimal the routing will be. - -The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) -in the same directory where the OpenSM log resides. This ordering file provides -the CN order that may be used to create efficient communication pattern, that -will match the routing tables. - -Activation through OpenSM - -Use '-R ftree' option to activate the fat-tree algorithm. -Use '-a ' to provide root nodes for ranking. If the `-a' option -is not used, routing algorithm will detect roots automatically. -Use '-u ' to provide the list of compute nodes. If the `-u' option -is not used, all the CAs are considered as compute nodes. - -Note: LMC > 0 is not supported by fat-tree routing. If this is -specified, the default routing algorithm is invoked instead. - - -LASH Routing Algorithm - -LASH is an acronym for LAyered SHortest Path Routing. It is a -deterministic shortest path routing algorithm that enables topology -agnostic deadlock-free routing within communication networks. - -When computing the routing function, LASH analyzes the network -topology for the shortest-path routes between all pairs of sources / -destinations and groups these paths into virtual layers in such a way -as to avoid deadlock. - -Note LASH analyzes routes and ensures deadlock freedom between switch -pairs. The link from HCA between and switch does not need virtual -layers as deadlock will not arise between switch and HCA. - -In more detail, the algorithm works as follows: - -1) LASH determines the shortest-path between all pairs of source / -destination switches. Note, LASH ensures the same SL is used for all -SRC/DST - DST/SRC pairs and there is no guarantee that the return -path for a given DST/SRC will be the reverse of the route SRC/DST. - -2) LASH then begins an SL assignment process where a route is assigned -to a layer (SL) if the addition of that route does not cause deadlock -within that layer. This is achieved by maintaining and analysing a -channel dependency graph for each layer. Once the potential addition -of a path could lead to deadlock, LASH opens a new layer and continues -the process. - -3) Once this stage has been completed, it is highly likely that the -first layers processed will contain more paths than the latter ones. -To better balance the use of layers, LASH moves paths from one layer -to another so that the number of paths in each layer averages out. - -Note, the implementation of LASH in opensm attempts to use as few layers -as possible. This number can be less than the number of actual layers -available. - -In general LASH is a very flexible algorithm. It can, for example, -reduce to Dimension Order Routing in certain topologies, it is topology -agnostic and fares well in the face of faults. - -It has been shown that for both regular and irregular topologies, LASH -outperforms Up/Down. The reason for this is that LASH distributes the -traffic more evenly through a network, avoiding the bottleneck issues -related to a root node and always routes shortest-path. - -The algorithm was developed by Simula Research Laboratory. - - -Use '-R lash -Q ' option to activate the LASH algorithm. - -Note: QoS support has to be turned on in order that SL/VL mappings are -used. - -Note: LMC > 0 is not supported by the LASH routing. If this is -specified, the default routing algorithm is invoked instead. - - -DOR Routing Algorithm - -The Dimension Order Routing algorithm is based on the Min Hop -algorithm and so uses shortest paths. Instead of spreading traffic -out across different paths with the same shortest distance, it chooses -among the available shortest paths based on an ordering of dimensions. -Each port must be consistently cabled to represent a hypercube -dimension or a mesh dimension. Paths are grown from a destination -back to a source using the lowest dimension (port) of available paths -at each step. This provides the ordering necessary to avoid deadlock. -When there are multiple links between any two switches, they still -represent only one dimension and traffic is balanced across them -unless port equalization is turned off. In the case of hypercubes, -the same port must be used throughout the fabric to represent the -hypercube dimension and match on both ends of the cable. In the case -of meshes, the dimension should consistently use the same pair of -ports, one port on one end of the cable, and the other port on the -other end, continuing along the mesh dimension. - -Use '-R dor' option to activate the DOR algorithm. - - -Routing References - -To learn more about deadlock-free routing, see the article -"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" -by William J Dally and Charles L Seitz (1985). - -To learn more about the up/down algorithm, see the article -"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" -by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the -Universidad Politecnica de Valencia. - -To learn more about LASH and the flexibility behind it, the requirement -for layers, performance comparisons to other algorithms, see the -following articles: - -"Layered Routing in Irregular Networks", Lysne et al, IEEE -Transactions on Parallel and Distributed Systems, VOL.16, No12, -December 2005. - -"Routing for the ASI Fabric Manager", Solheim et al. IEEE -Communications Magazine, Vol.44, No.7, July 2006. - -"Layered Shortest Path (LASH) Routing in Irregular System Area -Networks", Skeie et al. IEEE Computer Society Communication -Architecture for Clusters 2002. - - -Modular Routine Engine - -Modular routing engine structure allows for the ease of -"plugging" new routing modules. - -Currently, only unicast callbacks are supported. Multicast -can be added later. - -One existing routing module is up-down "updn", which may be -activated with '-R updn' option (instead of old '-u'). - -General usage is: -$ opensm -R 'module-name' - -There is also a trivial routing module which is able -to load LFT tables from a dump file. - -Main features: - - - this will load switch LFTs and/or LID matrices (min hops tables) - - this will load switch LFTs according to the path entries introduced - in the dump file - - no additional checks will be performed (such as "is port connected", - etc.) - - in case when fabric LIDs were changed this will try to reconstruct - LFTs correctly if endport GUIDs are represented in the dump file - (in order to disable this, GUIDs may be removed from the dump file - or zeroed) - -The dump file format is compatible with output of 'ibroute' util and for -whole fabric can be generated with dump_lfts.sh script. - -To activate file based routing module, use: - - opensm -R file -U /path/to/dump_file - -If the dump_file is not found or is in error, the default routing -algorithm is utilized. - -The ability to dump switch lid matrices (aka min hops tables) to file and -later to load these is also supported. - -The usage is similar to unicast forwarding tables loading from dump -file (introduced by 'file' routing engine), but new lid matrix file -name should be specified by -M or --lid_matrix_file option. For example: - - opensm -R file -M ./opensm-lid-matrix.dump - -The dump file is named \'opensm-lid-matrix.dump\' and will be generated -in standard opensm dump directory (/var/log by default) when -OSM_LOG_ROUTING logging flag is set. - -When routing engine 'file' is activated, but dump file is not specified -or not cannot be open default lid matrix algorithm will be used. - -There is also a switch forwarding tables dumper which generates -a file compatible with dump_lfts.sh output. This file can be used -as input for forwarding tables loading by 'file' routing engine. -Both or one of options -U and -M can be specified together with \'-R file\'. - -.SH FILES -.TP -.B /etc/opensm/prefix-routes.conf -default prefix routes file. - -.SH AUTHORS -.TP -Hal Rosenstock -.RI < hal at xsigo.com > -.TP -Sasha Khapyorsky -.RI < sashak at voltaire.com > -.TP -Eitan Zahavi -.RI < eitan at mellanox.co.il > -.TP -Yevgeny Kliteynik -.RI < kliteyn at mellanox.co.il > -.TP -Thomas Sodring -.RI < tsodring at simula.no > diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in new file mode 100644 index 0000000..ab7fb8e --- /dev/null +++ b/opensm/man/opensm.8.in @@ -0,0 +1,941 @@ +.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" + +.SH NAME +opensm \- InfiniBand subnet manager and administration (SM/SA) + +.SH SYNOPSIS +.B opensm +[\-c(ache-options)] [\-g(uid)[=]] [\-l(mc) ] +[\-p(riority) ] [\-smkey ] [\-r(eassign_lids)] +[\-R | \-\-routing_engine ] +[\-z | \-\-connect_roots] +[\-M | \-\-lid_matrix_file ] +[\-U | \-\-ucast_file ] +[\-S | \-\-sadb_file ] [\-a | \-\-root_guid_file ] +[\-u | \-\-cn_guid_file ] [\-o(nce)] [\-s(weep) ] +[\-t(imeout) ] [\-maxsmps ] +[\-console [off | local | socket | loopback]] [\-console-port ] +[\-i(gnore-guids) ] [\-f | \-\-log_file] +[\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] +[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] +[\-B | \-\-daemon] [\-I | \-\-inactive] +[\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] +[\-\-prefix_routes_file ] +[\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] + +.SH DESCRIPTION +.PP +opensm is an InfiniBand compliant Subnet Manager and Administration, +and runs on top of OpenIB. + +opensm provides an implementation of an InfiniBand Subnet Manager and +Administration. Such a software entity is required to run for in order +to initialize the InfiniBand hardware (at least one per each +InfiniBand subnet). + +opensm also now contains an experimental version of a performance +manager as well. + +opensm defaults were designed to meet the common case usage on clusters with up to a few hundred nodes. Thus, in this default mode, opensm will scan the IB +fabric, initialize it, and sweep occasionally for changes. + +opensm attaches to a specific IB port on the local machine and configures only +the fabric connected to it. (If the local machine has other IB ports, +opensm will ignore the fabrics connected to those other ports). If no port is +specified, it will select the first "best" available port. + +opensm can present the available ports and prompt for a port number to +attach to. + +By default, the run is logged to two files: /var/log/messages and /var/log/opensm.log. +The first file will register only general major events, whereas the second +will include details of reported errors. All errors reported in this second +file should be treated as indicators of IB fabric health issues. +(Note that when a fatal and non-recoverable error occurs, opensm will exit.) +Both log files should include the message "SUBNET UP" if opensm was able to +setup the subnet correctly. + +.SH OPTIONS + +.PP +.TP +\fB\-c\fR, \fB\-\-cache-options\fR +Write out a list of all tunable OpenSM parameters, +including their current values from the command line +as well as defaults for others, into the file +OSM_CACHE_DIR/opensm.opts (OSM_CACHE_DIR defaults to +/var/cache/opensm if the corresponding environment +variable is not set). The options file is then +used for subsequent OpenSM invocations but any +command line options take precedence. +.TP +\fB\-g\fR, \fB\-\-guid\fR +This option specifies the local port GUID value +with which OpenSM should bind. OpenSM may be +bound to 1 port at a time. +If GUID given is 0, OpenSM displays a list +of possible port GUIDs and waits for user input. +Without -g, OpenSM tries to use the default port. +.TP +\fB\-l\fR, \fB\-\-lmc\fR +This option specifies the subnet's LMC value. +The number of LIDs assigned to each port is 2^LMC. +The LMC value must be in the range 0-7. +LMC values > 0 allow multiple paths between ports. +LMC values > 0 should only be used if the subnet +topology actually provides multiple paths between +ports, i.e. multiple interconnects between switches. +Without -l, OpenSM defaults to LMC = 0, which allows +one path between any two ports. +.TP +\fB\-p\fR, \fB\-\-priority\fR +This option specifies the SM\'s PRIORITY. +This will effect the handover cases, where master +is chosen by priority and GUID. Range goes from 0 +(default and lowest priority) to 15 (highest). +.TP +\fB\-smkey\fR +This option specifies the SM\'s SM_Key (64 bits). +This will effect SM authentication. +.TP +\fB\-r\fR, \fB\-\-reassign_lids\fR +This option causes OpenSM to reassign LIDs to all +end nodes. Specifying -r on a running subnet +may disrupt subnet traffic. +Without -r, OpenSM attempts to preserve existing +LID assignments resolving multiple use of same LID. +.TP +\fB\-R\fR, \fB\-\-routing_engine\fR +This option chooses routing engine instead of Min Hop +algorithm (default). +Supported engines: minhop, updn, file, ftree, lash, dor +.TP +\fB\-z\fR, \fB\-\-connect_roots\fR +This option enforces a routing engine (currently up/down +only) to make connectivity between root switches and in +this way to be fully IBA complaint. In many cases this can +violate "pure" deadlock free algorithm, so use it carefully. +.TP +\fB\-M\fR, \fB\-\-lid_matrix_file\fR +This option specifies the name of the lid matrix dump file +from where switch lid matrices (min hops tables will be +loaded. +.TP +\fB\-U\fR, \fB\-\-ucast_file\fR +This option specifies the name of the unicast dump file +from where switch forwarding tables will be loaded. +.TP +\fB\-S\fR, \fB\-\-sadb_file\fR +This option specifies the name of the SA DB dump file +from where SA database will be loaded. +.TP +\fB\-a\fR, \fB\-\-root_guid_file\fR +Set the root nodes for the Up/Down or Fat-Tree routing +algorithm to the guids provided in the given file (one to a line). +.TP +\fB\-u\fR, \fB\-\-cn_guid_file\fR +Set the compute nodes for the Fat-Tree routing algorithm +to the guids provided in the given file (one to a line). +.TP +\fB\-o\fR, \fB\-\-once\fR +This option causes OpenSM to configure the subnet +once, then exit. Ports remain in the ACTIVE state. +.TP +\fB\-s\fR, \fB\-\-sweep\fR +This option specifies the number of seconds between +subnet sweeps. Specifying -s 0 disables sweeping. +Without -s, OpenSM defaults to a sweep interval of +10 seconds. +.TP +\fB\-t\fR, \fB\-\-timeout\fR +This option specifies the time in milliseconds +used for transaction timeouts. +Specifying -t 0 disables timeouts. +Without -t, OpenSM defaults to a timeout value of +200 milliseconds. +.TP +\fB\-maxsmps\fR +This option specifies the number of VL15 SMP MADs +allowed on the wire at any one time. +Specifying -maxsmps 0 allows unlimited outstanding +SMPs. +Without -maxsmps, OpenSM defaults to a maximum of +4 outstanding SMPs. +.TP +\fB\-console [off | local | socket | loopback]\fR +This option brings up the OpenSM console (default off). +Note that the socket and loopback options will only be available +if OpenSM was built with --enable-console-socket. +.TP +\fB\-console-port\fR +Specify an alternate telnet port for the socket console (default 10000). +Note that this option only appears if OpenSM was built with +--enable-console-socket. +.TP +\fB\-i\fR, \fB\-ignore-guids\fR +This option provides the means to define a set of ports +(by guid) that will be ignored by the link load +equalization algorithm. +.TP +\fB\-x\fR, \fB\-\-honor_guid2lid\fR +This option forces OpenSM to honor the guid2lid file, +when it comes out of Standby state, if such file exists +under OSM_CACHE_DIR, and is valid. +By default, this is FALSE. +.TP +\fB\-f\fR, \fB\-\-log_file\fR +This option defines the log to be the given file. +By default, the log goes to /var/log/opensm.log. +For the log to go to standard output use -f stdout. +.TP +\fB\-L\fR, \fB\-\-log_limit\fR +This option defines maximal log file size in MB. When +specified the log file will be truncated upon reaching +this limit. +.TP +\fB\-e\fR, \fB\-\-erase_log_file\fR +This option will cause deletion of the log file +(if it previously exists). By default, the log file +is accumulative. +.TP +\fB\-P\fR, \fB\-\-Pconfig\fR +This option defines the optional partition configuration file. +The default name is \'/etc/opensm/opensm-partitions.conf\'. +.TP +.BI --prefix_routes_file= path +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. The +.B PREFIX ROUTES +section below describes the format of the configuration file. +The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. +.TP +\fB\-Q\fR, \fB\-\-qos\fR +This option enables QoS setup. It is disabled by default. +.TP +\fB\-N\fR, \fB\-\-no_part_enforce\fR +This option disables partition enforcement on switch external ports. +.TP +\fB\-y\fR, \fB\-\-stay_on_fatal\fR +This option will cause SM not to exit on fatal initialization +issues: if SM discovers duplicated guids or a 12x link with +lane reversal badly configured. +By default, the SM will exit on these errors. +.TP +\fB\-B\fR, \fB\-\-daemon\fR +Run in daemon mode - OpenSM will run in the background. +.TP +\fB\-I\fR, \fB\-\-inactive\fR +Start SM in inactive rather than init SM state. This +option can be used in conjunction with the perfmgr so as to +run a standalone performance manager without SM/SA. However, +this is NOT currently implemented in the performance manager. +.TP +\fB\-perfmgr\fR +Enable the perfmgr. Only takes effect if --enable-perfmgr was specified at +configure time. +.TP +\fB\-perfmgr_sweep_time_s\fR +Specify the sweep time for the performance manager in seconds +(default is 180 seconds). Only takes +effect if --enable-perfmgr was specified at configure time. +.TP +.BI --consolidate_ipv6_snm_reqests +Consolidate IPv6 Solicited Node Multicast group joins into 1 IB multicast +group. +.TP +\fB\-v\fR, \fB\-\-verbose\fR +This option increases the log verbosity level. +The -v option may be specified multiple times +to further increase the verbosity level. +See the -D option for more information about +log verbosity. +.TP +\fB\-V\fR +This option sets the maximum verbosity level and +forces log flushing. +The -V option is equivalent to \'-D 0xFF -d 2\'. +See the -D option for more information about +log verbosity. +.TP +\fB\-D\fR +This option sets the log verbosity level. +A flags field must follow the -D option. +A bit set/clear in the flags enables/disables a +specific log level as follows: + + BIT LOG LEVEL ENABLED + ---- ----------------- + 0x01 - ERROR (error messages) + 0x02 - INFO (basic messages, low volume) + 0x04 - VERBOSE (interesting stuff, moderate volume) + 0x08 - DEBUG (diagnostic, high volume) + 0x10 - FUNCS (function entry/exit, very high volume) + 0x20 - FRAMES (dumps all SMP and GMP frames) + 0x40 - ROUTING (dump FDB routing information) + 0x80 - currently unused. + +Without -D, OpenSM defaults to ERROR + INFO (0x3). +Specifying -D 0 disables all messages. +Specifying -D 0xFF enables all messages (see -V). +High verbosity levels may require increasing +the transaction timeout with the -t option. +.TP +\fB\-d\fR, \fB\-\-debug\fR +This option specifies a debug option. +These options are not normally needed. +The number following -d selects the debug +option to enable as follows: + + OPT Description + --- ----------------- + -d0 - Ignore other SM nodes + -d1 - Force single threaded dispatching + -d2 - Force log flushing after each log message + -d3 - Disable multicast support +.TP +\fB\-h\fR, \fB\-\-help\fR +Display this usage info then exit. +.TP +\fB\-?\fR +Display this usage info then exit. + +.SH ENVIRONMENT VARIABLES +.PP +The following environment variables control opensm behavior: + +OSM_TMP_DIR - controls the directory in which the temporary files generated by +opensm are created. These files are: opensm-subnet.lst, opensm.fdbs, and +opensm.mcfdbs. By default, this directory is /var/log. + +OSM_CACHE_DIR - opensm stores certain data to the disk such that subsequent +runs are consistent. The default directory used is /var/cache/opensm. +The following files are included in it: + + guid2lid - stores the LID range assigned to each GUID + + opensm.opts - an optional file that holds a complete set of opensm + configuration options + +.SH NOTES +.PP +When opensm receives a HUP signal, it starts a new heavy sweep as if a trap was received or a topology change was found. +.PP +Also, SIGUSR1 can be used to trigger a reopen of /var/log/opensm.log for +logrotate purposes. + +.SH PARTITION CONFIGURATION +.PP +The default name of OpenSM partitions configuration file is +\'/etc/ofa/opensm-partitions.conf\'. The default may be changed by using +--Pconfig (-P) option with OpenSM. + +The default partition will be created by OpenSM unconditionally even +when partition configuration file does not exist or cannot be accessed. + +The default partition has P_Key value 0x7fff. OpenSM\'s port will have +full membership in default partition. All other end ports will have +partial membership. + +File Format + +Comments: + +Line content followed after \'#\' character is comment and ignored by +parser. + +General file format: + +: ; + +Partition Definition: + +[PartitionName][=PKey][,flag[=value]][,defmember=full|limited] + + PartitionName - string, will be used with logging. When omitted + empty string will be used. + PKey - P_Key value for this partition. Only low 15 bits will + be used. When omitted will be autogenerated. + flag - used to indicate IPoIB capability of this partition. + defmember=full|limited - specifies default membership for port guid + list. Default is limited. + +Currently recognized flags are: + + ipoib - indicates that this partition may be used for IPoIB, as + result IPoIB capable MC group will be created. + rate= - specifies rate for this IPoIB MC group + (default is 3 (10GBps)) + mtu= - specifies MTU for this IPoIB MC group + (default is 4 (2048)) + sl= - specifies SL for this IPoIB MC group + (default is 0) + scope= - specifies scope for this IPoIB MC group + (default is 2 (link local)). Multiple scope settings + are permitted for a partition. + +Note that values for rate, mtu, and scope should be specified as +defined in the IBTA specification (for example, mtu=4 for 2048). + +PortGUIDs list: + + PortGUID - GUID of partition member EndPort. Hexadecimal + numbers should start from 0x, decimal numbers + are accepted too. + full or limited - indicates full or limited membership for this + port. When omitted (or unrecognized) limited + membership is assumed. + +There are two useful keywords for PortGUID definition: + + - 'ALL' means all end ports in this subnet. + - 'SELF' means subnet manager's port. + +Empty list means no ports in this partition. + +Notes: + +White space is permitted between delimiters ('=', ',',':',';'). + +The line can be wrapped after ':' followed after Partition Definition and +between. + +PartitionName does not need to be unique, PKey does need to be unique. +If PKey is repeated then those partition configurations will be merged +and first PartitionName will be used (see also next note). + +It is possible to split partition configuration in more than one +definition, but then PKey should be explicitly specified (otherwise +different PKey values will be generated for those definitions). + +Examples: + + Default=0x7fff : ALL, SELF=full ; + + NewPartition , ipoib : 0x123456=full, 0x3456789034=limi, 0x2134af2306 ; + + YetAnotherOne = 0x300 : SELF=full ; + YetAnotherOne = 0x300 : ALL=limited ; + + ShareIO = 0x80 , defmember=full : 0x123451, 0x123452; + # 0x123453, 0x123454 will be limited + ShareIO = 0x80 : 0x123453, 0x123454, 0x123455=full; + # 0x123456, 0x123457 will be limited + ShareIO = 0x80 : defmember=limited : 0x123456, 0x123457, 0x123458=full; + ShareIO = 0x80 , defmember=full : 0x123459, 0x12345a; + ShareIO = 0x80 , defmember=full : 0x12345b, 0x12345c=limited, 0x12345d; + + +Note: + +The following rule is equivalent to how OpenSM used to run prior to the +partition manager: + + Default=0x7fff,ipoib:ALL=full; + +.SH QOS CONFIGURATION +.PP +There are a set of QoS related low-level configuration parameters. +All these parameter names are prefixed by "qos_" string. Here is a full +list of these parameters: + + qos_max_vls - The maximum number of VLs that will be on the subnet + qos_high_limit - The limit of High Priority component of VL + Arbitration table (IBA 7.6.9) + qos_vlarb_low - Low priority VL Arbitration table (IBA 7.6.9) + template + qos_vlarb_high - High priority VL Arbitration table (IBA 7.6.9) + template + Both VL arbitration templates are pairs of + VL and weight + qos_sl2vl - SL2VL Mapping table (IBA 7.6.6) template. It is + a list of VLs corresponding to SLs 0-15 (Note + that VL15 used here means drop this SL) + +Typical default values (hard-coded in OpenSM initialization) are: + + qos_max_vls=15 + qos_high_limit=0 + qos_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 + qos_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 + qos_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 + +The syntax is compatible with rest of OpenSM configuration options and +values may be stored in OpenSM config file (cached options file). + +In addition to the above, we may define separate QoS configuration +parameters sets for various target types. As targets, we currently support +CAs, routers, switch external ports, and switch's enhanced port 0. The +names of such specialized parameters are prefixed by "qos__" +string. Here is a full list of the currently supported sets: + + qos_ca_ - QoS configuration parameters set for CAs. + qos_rtr_ - parameters set for routers. + qos_sw0_ - parameters set for switches' port 0. + qos_swe_ - parameters set for switches' external ports. + +Examples: + qos_sw0_max_vls=2 + qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, + qos_swe_high_limit=0 + +.SH PREFIX ROUTES +.PP +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. +Note that IBA does not specify how the SA should obtain off-subnet path +record information. +The prefix routes configuration is meant as a stop-gap until the +specification is completed. +.PP +Each line in the configuration file is a 64-bit prefix followed by a +64-bit GUID, separated by white space. +The GUID specifies the router port on the local subnet that will +handle the prefix. +Blank lines are ignored, as is anything between a \fB#\fP character +and the end of the line. +The prefix and GUID are both in hex, the leading 0x is optional. +Either, or both, can be wild-carded by specifying an +asterisk instead of an explicit prefix or GUID. +.PP +When responding to a path record query for an off-subnet DGID, +opensm searches for the first prefix match in the configuration file. +Therefore, the order of the lines in the configuration file is important: +a wild-carded prefix at the beginning of the configuration file renders +all subsequent lines useless. +If there is no match, then opensm fails the query. +It is legal to repeat prefixes in the configuration file, +opensm will return the path to the first available matching router. +A configuration file with a single line where both prefix and GUID +are wild-carded means that a path record query specifying any +off-subnet DGID should return a path to the first available router. +This configuration yields the same behaviour formerly achieved by +compiling opensm with -DROUTER_EXP. + +.SH ROUTING +.PP +OpenSM now offers five routing engines: + +1. Min Hop Algorithm - based on the minimum hops to each node where the +path length is optimized. + +2. UPDN Unicast routing algorithm - also based on the minimum hops to each +node, but it is constrained to ranking rules. This algorithm should be chosen +if the subnet is not a pure Fat Tree, and deadlock may occur due to a +loop in the subnet. + +3. Fat Tree Unicast routing algorithm - this algorithm optimizes routing +for congestion-free "shift" communication pattern. +It should be chosen if a subnet is a symmetrical Fat Trees of various types, +not just K-ary-N-Trees: non-constant K, not fully staffed, any CBB ratio. +Similar to UPDN, Fat Tree routing is constrained to ranking rules. + +4. LASH unicast routing algorithm - uses Infiniband virtual layers +(SL) to provide deadlock-free shortest-path routing while also +distributing the paths between layers. LASH is an alternative +deadlock-free topology-agnostic routing algorithm to the non-minimal +UPDN algorithm avoiding the use of a potentially congested root node. + +5. DOR Unicast routing algorithm - based on the Min Hop algorithm, but +avoids port equalization except for redundant links between the same +two switches. This provides deadlock free routes for hypercubes when +the fabric is cabled as a hypercube and for meshes when cabled as a +mesh (see details below). + +OpenSM also supports a file method which +can load routes from a table. See \'Modular Routing Engine\' for more +information on this. + +The basic routing algorithm is comprised of two stages: + +1. MinHop matrix calculation + How many hops are required to get from each port to each LID ? + The algorithm to fill these tables is different if you run standard +(min hop) or Up/Down. + For standard routing, a "relaxation" algorithm is used to propagate +min hop from every destination LID through neighbor switches + For Up/Down routing, a BFS from every target is used. The BFS tracks link +direction (up or down) and avoid steps that will perform up after a down +step was used. + +2. Once MinHop matrices exist, each switch is visited and for each target LID a +decision is made as to what port should be used to get to that LID. + This step is common to standard and Up/Down routing. Each port has a +counter counting the number of target LIDs going through it. + When there are multiple alternative ports with same MinHop to a LID, +the one with less previously assigned ports is selected. + If LMC > 0, more checks are added: Within each group of LIDs assigned to +same target port, + a. use only ports which have same MinHop + b. first prefer the ones that go to different systemImageGuid (then +the previous LID of the same LMC group) + c. if none - prefer those which go through another NodeGuid + d. fall back to the number of paths method (if all go to same node). + +Effect of Topology Changes + +OpenSM will preserve existing routing in any case where there is no change in +the fabric switches unless the -r (--reassign_lids) option is specified. + +-r +.br +--reassign_lids + This option causes OpenSM to reassign LIDs to all + end nodes. Specifying -r on a running subnet + may disrupt subnet traffic. + Without -r, OpenSM attempts to preserve existing + LID assignments resolving multiple use of same LID. + +If a link is added or removed, OpenSM does not recalculate +the routes that do not have to change. A route has to change +if the port is no longer UP or no longer the MinHop. When routing changes +are performed, the same algorithm for balancing the routes is invoked. + +In the case of using the file based routing, any topology changes are +currently ignored The 'file' routing engine just loads the LFTs from the file +specified, with no reaction to real topology. Obviously, this will not be able +to recheck LIDs (by GUID) for disconnected nodes, and LFTs for non-existent +switches will be skipped. Multicast is not affected by 'file' routing engine +(this uses min hop tables). + + +Min Hop Algorithm + +The Min Hop algorithm is invoked when neither UPDN or the file method are +specified. + +The Min Hop algorithm is divided into two stages: computation of +min-hop tables on every switch and LFT output port assignment. Link +subscription is also equalized with the ability to override based on +port GUID. The latter is supplied by: + +-i +.br +-ignore-guids + This option provides the means to define a set of ports + (by guid) that will be ignored by the link load + equalization algorithm. Note that only endports (CA, + switch port 0, and router ports) and not switch external + ports are supported. + +LMC awareness routes based on (remote) system or switch basis. + + +Purpose of UPDN Algorithm + +The UPDN algorithm is designed to prevent deadlocks from occurring in loops +of the subnet. A loop-deadlock is a situation in which it is no longer +possible to send data between any two hosts connected through the loop. As +such, the UPDN routing algorithm should be used if the subnet is not a pure +Fat Tree, and one of its loops may experience a deadlock (due, for example, +to high pressure). + +The UPDN algorithm is based on the following main stages: + +1. Auto-detect root nodes - based on the CA hop length from any switch in +the subnet, a statistical histogram is built for each switch (hop num vs +number of occurrences). If the histogram reflects a specific column (higher +than others) for a certain node, then it is marked as a root node. Since +the algorithm is statistical, it may not find any root nodes. The list of +the root nodes found by this auto-detect stage is used by the ranking +process stage. + + Note 1: The user can override the node list manually. + Note 2: If this stage cannot find any root nodes, and the user did + not specify a guid list file, OpenSM defaults back to the + Min Hop routing algorithm. + +2. Ranking process - All root switch nodes (found in stage 1) are assigned +a rank of 0. Using the BFS algorithm, the rest of the switch nodes in the +subnet are ranked incrementally. This ranking aids in the process of enforcing +rules that ensure loop-free paths. + +3. Min Hop Table setting - after ranking is done, a BFS algorithm is run from +each (CA or switch) node in the subnet. During the BFS process, the FDB table +of each switch node traversed by BFS is updated, in reference to the starting +node, based on the ranking rules and guid values. + +At the end of the process, the updated FDB tables ensure loop-free paths +through the subnet. + +Note: Up/Down routing does not allow LID routing communication between +switches that are located inside spine "switch systems". +The reason is that there is no way to allow a LID route between them +that does not break the Up/Down rule. +One ramification of this is that you cannot run SM on switches other +than the leaf switches of the fabric. + + +UPDN Algorithm Usage + +Activation through OpenSM + +Use '-R updn' option (instead of old '-u') to activate the UPDN algorithm. +Use '-a ' for adding an UPDN guid file that contains the +root nodes for ranking. +If the `-a' option is not used, OpenSM uses its auto-detect root nodes +algorithm. + +Notes on the guid list file: + +1. A valid guid file specifies one guid in each line. Lines with an invalid +format will be discarded. +.br +2. The user should specify the root switch guids. However, it is also +possible to specify CA guids; OpenSM will use the guid of the switch (if +it exists) that connects the CA to the subnet as a root node. + + +Fat-tree Routing Algorithm + +The fat-tree algorithm optimizes routing for "shift" communication pattern. +It should be chosen if a subnet is a symmetrical or almost symmetrical +fat-tree of various types. +It supports not just K-ary-N-Trees, by handling for non-constant K, +cases where not all leafs (CAs) are present, any CBB ratio. +As in UPDN, fat-tree also prevents credit-loop-deadlocks. + +If the root guid file is not provided ('-a' or '--root_guid_file' options), +the topology has to be pure fat-tree that complies with the following rules: + - Tree rank should be between two and eight (inclusively) + - Switches of the same rank should have the same number + of UP-going port groups*, unless they are root switches, + in which case the shouldn't have UP-going ports at all. + - Switches of the same rank should have the same number + of DOWN-going port groups, unless they are leaf switches. + - Switches of the same rank should have the same number + of ports in each UP-going port group. + - Switches of the same rank should have the same number + of ports in each DOWN-going port group. + - All the CAs have to be at the same tree level (rank). + +If the root guid file is provided, the topology doesn't have to be pure +fat-tree, and it should only comply with the following rules: + - Tree rank should be between two and eight (inclusively) + - All the Compute Nodes** have to be at the same tree level (rank). + Note that non-compute node CAs are allowed here to be at different + tree ranks. + +* ports that are connected to the same remote switch are referenced as +\'port group\'. + +** list of compute nodes (CNs) can be specified by \'-u\' or \'--cn_guid_file\' +OpenSM options. + +Topologies that do not comply cause a fallback to min hop routing. +Note that this can also occur on link failures which cause the topology +to no longer be "pure" fat-tree. + +Note that although fat-tree algorithm supports trees with non-integer CBB +ratio, the routing will not be as balanced as in case of integer CBB ratio. +In addition to this, although the algorithm allows leaf switches to have any +number of CAs, the closer the tree is to be fully populated, the more +effective the "shift" communication pattern will be. +In general, even if the root list is provided, the closer the topology to a +pure and symmetrical fat-tree, the more optimal the routing will be. + +The algorithm also dumps compute node ordering file (opensm-ftree-ca-order.dump) +in the same directory where the OpenSM log resides. This ordering file provides +the CN order that may be used to create efficient communication pattern, that +will match the routing tables. + +Activation through OpenSM + +Use '-R ftree' option to activate the fat-tree algorithm. +Use '-a ' to provide root nodes for ranking. If the `-a' option +is not used, routing algorithm will detect roots automatically. +Use '-u ' to provide the list of compute nodes. If the `-u' option +is not used, all the CAs are considered as compute nodes. + +Note: LMC > 0 is not supported by fat-tree routing. If this is +specified, the default routing algorithm is invoked instead. + + +LASH Routing Algorithm + +LASH is an acronym for LAyered SHortest Path Routing. It is a +deterministic shortest path routing algorithm that enables topology +agnostic deadlock-free routing within communication networks. + +When computing the routing function, LASH analyzes the network +topology for the shortest-path routes between all pairs of sources / +destinations and groups these paths into virtual layers in such a way +as to avoid deadlock. + +Note LASH analyzes routes and ensures deadlock freedom between switch +pairs. The link from HCA between and switch does not need virtual +layers as deadlock will not arise between switch and HCA. + +In more detail, the algorithm works as follows: + +1) LASH determines the shortest-path between all pairs of source / +destination switches. Note, LASH ensures the same SL is used for all +SRC/DST - DST/SRC pairs and there is no guarantee that the return +path for a given DST/SRC will be the reverse of the route SRC/DST. + +2) LASH then begins an SL assignment process where a route is assigned +to a layer (SL) if the addition of that route does not cause deadlock +within that layer. This is achieved by maintaining and analysing a +channel dependency graph for each layer. Once the potential addition +of a path could lead to deadlock, LASH opens a new layer and continues +the process. + +3) Once this stage has been completed, it is highly likely that the +first layers processed will contain more paths than the latter ones. +To better balance the use of layers, LASH moves paths from one layer +to another so that the number of paths in each layer averages out. + +Note, the implementation of LASH in opensm attempts to use as few layers +as possible. This number can be less than the number of actual layers +available. + +In general LASH is a very flexible algorithm. It can, for example, +reduce to Dimension Order Routing in certain topologies, it is topology +agnostic and fares well in the face of faults. + +It has been shown that for both regular and irregular topologies, LASH +outperforms Up/Down. The reason for this is that LASH distributes the +traffic more evenly through a network, avoiding the bottleneck issues +related to a root node and always routes shortest-path. + +The algorithm was developed by Simula Research Laboratory. + + +Use '-R lash -Q ' option to activate the LASH algorithm. + +Note: QoS support has to be turned on in order that SL/VL mappings are +used. + +Note: LMC > 0 is not supported by the LASH routing. If this is +specified, the default routing algorithm is invoked instead. + + +DOR Routing Algorithm + +The Dimension Order Routing algorithm is based on the Min Hop +algorithm and so uses shortest paths. Instead of spreading traffic +out across different paths with the same shortest distance, it chooses +among the available shortest paths based on an ordering of dimensions. +Each port must be consistently cabled to represent a hypercube +dimension or a mesh dimension. Paths are grown from a destination +back to a source using the lowest dimension (port) of available paths +at each step. This provides the ordering necessary to avoid deadlock. +When there are multiple links between any two switches, they still +represent only one dimension and traffic is balanced across them +unless port equalization is turned off. In the case of hypercubes, +the same port must be used throughout the fabric to represent the +hypercube dimension and match on both ends of the cable. In the case +of meshes, the dimension should consistently use the same pair of +ports, one port on one end of the cable, and the other port on the +other end, continuing along the mesh dimension. + +Use '-R dor' option to activate the DOR algorithm. + + +Routing References + +To learn more about deadlock-free routing, see the article +"Deadlock Free Message Routing in Multiprocessor Interconnection Networks" +by William J Dally and Charles L Seitz (1985). + +To learn more about the up/down algorithm, see the article +"Effective Strategy to Compute Forwarding Tables for InfiniBand Networks" +by Jose Carlos Sancho, Antonio Robles, and Jose Duato at the +Universidad Politecnica de Valencia. + +To learn more about LASH and the flexibility behind it, the requirement +for layers, performance comparisons to other algorithms, see the +following articles: + +"Layered Routing in Irregular Networks", Lysne et al, IEEE +Transactions on Parallel and Distributed Systems, VOL.16, No12, +December 2005. + +"Routing for the ASI Fabric Manager", Solheim et al. IEEE +Communications Magazine, Vol.44, No.7, July 2006. + +"Layered Shortest Path (LASH) Routing in Irregular System Area +Networks", Skeie et al. IEEE Computer Society Communication +Architecture for Clusters 2002. + + +Modular Routine Engine + +Modular routing engine structure allows for the ease of +"plugging" new routing modules. + +Currently, only unicast callbacks are supported. Multicast +can be added later. + +One existing routing module is up-down "updn", which may be +activated with '-R updn' option (instead of old '-u'). + +General usage is: +$ opensm -R 'module-name' + +There is also a trivial routing module which is able +to load LFT tables from a dump file. + +Main features: + + - this will load switch LFTs and/or LID matrices (min hops tables) + - this will load switch LFTs according to the path entries introduced + in the dump file + - no additional checks will be performed (such as "is port connected", + etc.) + - in case when fabric LIDs were changed this will try to reconstruct + LFTs correctly if endport GUIDs are represented in the dump file + (in order to disable this, GUIDs may be removed from the dump file + or zeroed) + +The dump file format is compatible with output of 'ibroute' util and for +whole fabric can be generated with dump_lfts.sh script. + +To activate file based routing module, use: + + opensm -R file -U /path/to/dump_file + +If the dump_file is not found or is in error, the default routing +algorithm is utilized. + +The ability to dump switch lid matrices (aka min hops tables) to file and +later to load these is also supported. + +The usage is similar to unicast forwarding tables loading from dump +file (introduced by 'file' routing engine), but new lid matrix file +name should be specified by -M or --lid_matrix_file option. For example: + + opensm -R file -M ./opensm-lid-matrix.dump + +The dump file is named \'opensm-lid-matrix.dump\' and will be generated +in standard opensm dump directory (/var/log by default) when +OSM_LOG_ROUTING logging flag is set. + +When routing engine 'file' is activated, but dump file is not specified +or not cannot be open default lid matrix algorithm will be used. + +There is also a switch forwarding tables dumper which generates +a file compatible with dump_lfts.sh output. This file can be used +as input for forwarding tables loading by 'file' routing engine. +Both or one of options -U and -M can be specified together with \'-R file\'. + +.SH FILES +.TP +.B /etc/opensm/prefix-routes.conf +default prefix routes file. + +.SH AUTHORS +.TP +Hal Rosenstock +.RI < hal at xsigo.com > +.TP +Sasha Khapyorsky +.RI < sashak at voltaire.com > +.TP +Eitan Zahavi +.RI < eitan at mellanox.co.il > +.TP +Yevgeny Kliteynik +.RI < kliteyn at mellanox.co.il > +.TP +Thomas Sodring +.RI < tsodring at simula.no > -- 1.5.1 >From b55b4018cd581815093d1ee4b8e4214a629d9cfc Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 4 Feb 2008 14:19:02 -0800 Subject: [PATCH] Update man page for configurable partition and prefix-routes file Signed-off-by: Ira K. Weiny --- opensm/configure.in | 2 ++ opensm/man/opensm.8.in | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index e8fb250..455630e 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -95,6 +95,7 @@ CONF_DIR="`eval echo $CONF_DIR_TMP2`" AC_DEFINE_UNQUOTED(OPENSM_CONFIG_DIR, ["$CONF_DIR"], [Define OpenSM config directory]) +AC_SUBST(CONF_DIR) dnl Check for a different default node name map file NODENAMEMAPFILE=ib-node-name-map @@ -135,6 +136,7 @@ AC_MSG_RESULT(${withpartitionsconf=no}) AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PARTITION_CONFIG_FILE, ["$CONF_DIR/$PARTITION_CONFIG_FILE"], [Define a QOS policy config file]) +AC_SUBST(PARTITION_CONFIG_FILE) dnl Check for a different QOS policy file QOS_POLICY_FILE=qos-policy.conf diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in index ab7fb8e..115ab56 100644 --- a/opensm/man/opensm.8.in +++ b/opensm/man/opensm.8.in @@ -200,14 +200,14 @@ is accumulative. .TP \fB\-P\fR, \fB\-\-Pconfig\fR This option defines the optional partition configuration file. -The default name is \'/etc/opensm/opensm-partitions.conf\'. +The default name is \fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. .TP .BI --prefix_routes_file= path Prefix routes control how the SA responds to path record queries for off-subnet DGIDs. By default, the SA fails such queries. The .B PREFIX ROUTES section below describes the format of the configuration file. -The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. +The default path is \fB\%@CONF_DIR@/prefix\-routes.conf\fP. .TP \fB\-Q\fR, \fB\-\-qos\fR This option enables QoS setup. It is disabled by default. @@ -326,7 +326,7 @@ logrotate purposes. .SH PARTITION CONFIGURATION .PP The default name of OpenSM partitions configuration file is -\'/etc/ofa/opensm-partitions.conf\'. The default may be changed by using +\fB\%@CONF_DIR@/@PARTITION_CONFIG_FILE@\fP. The default may be changed by using --Pconfig (-P) option with OpenSM. The default partition will be created by OpenSM unconditionally even @@ -920,7 +920,7 @@ Both or one of options -U and -M can be specified together with \'-R file\'. .SH FILES .TP -.B /etc/opensm/prefix-routes.conf +.B @CONF_DIR@/prefix-routes.conf default prefix routes file. .SH AUTHORS -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Move-opensm.8-man-page-in-prep-for-making-config-fil.patch Type: application/octet-stream Size: 77587 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Update-man-page-for-configurable-partition-and-prefi.patch Type: application/octet-stream Size: 2623 bytes Desc: not available URL: From jlentini at netapp.com Mon Feb 4 14:39:25 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 4 Feb 2008 17:39:25 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080204211908.GB15115@cefeid.wcss.wroc.pl> References: <20080129003731.GA30262@cefeid.wcss.wroc.pl> <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> Message-ID: On Mon, 4 Feb 2008, Pawel Dziekonski wrote: > On Mon, 04 Feb 2008 at 11:09:43AM -0500, James Lentini wrote: > > > I'm refering to the OFED 1.2 NFS/RDMA package you cited above: > > > > http://www.mellanox.com/downloads/NFSoRDMA/OFED-1.2-NFS-RDMA.gz, so > > > > You can NOT install that on Tom Tucker's kernel. That is an old > > version of the NFS/RDMA software that will conflict with the latest > > code. > > > > If the OFED version you want to use is supported on Tom Tucker's > > kernel (2.6.24-rc6), there should be no problem using OFED. The OFED > > 1.2 NFS/RDMA release is a different story. > > OK! > > I do not insist on this version of OFED. I was trying to use it > because of lack of info which OFED is compatible with Tom's tree and > OFED-1.2-NFS-RDMA.gz has RDMA in name. ;) > > I want to use ANY version of OFED that would allow me to use > nfs/rdma TOGETHER with IPoIB, MPIoIB and SDP - please suggest which > version it would be. > > I really appreciate your help! Pawel I don't use OFED, but I'll share with you what I know. OFED 1.3 is based on 2.6.24, which is the same kernel Tom Tucker's git tree is based on. I would assume the two are compatible, but I have not tried it. Do you have NFS/RDMA working at this point? From meier3 at llnl.gov Mon Feb 4 15:01:12 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Mon, 04 Feb 2008 15:01:12 -0800 Subject: [ofa-general] [PATCH] opensm: separated current loop in main into its own function Message-ID: <47A79938.8020306@llnl.gov> Sasha, I am finally getting back to adding the OpenSSL option to the console. This patch, as well as the next one, are just a little cleanup to prepare for that effort. From e1cd363fe9a24e7d88b0b4354b0467a191627073 Mon Sep 17 00:00:00 2001 From: Tim Meier Date: Fri, 1 Feb 2008 16:43:47 -0800 Subject: [PATCH] opensm: separated current loop in main into its own function Put the endless while loop in "main" into its own function to support decoupling the osm_console from opensm. Signed-off-by: Tim Meier --- opensm/opensm/main.c | 66 ++++++++++++++++++++++++++++--------------------- 1 files changed, 38 insertions(+), 28 deletions(-) diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 7c435a0..239de84 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -551,6 +551,40 @@ static int daemonize(osm_opensm_t * osm) /********************************************************************** **********************************************************************/ +int osm_manager_loop(osm_subn_opt_t * p_opt, osm_opensm_t * p_osm) +{ + osm_console_init(p_opt, p_osm); + + /* + Sit here forever + */ + while (!osm_exit_flag) { + if (strcmp(p_opt->console, OSM_LOCAL_CONSOLE) == 0 +#ifdef ENABLE_OSM_CONSOLE_SOCKET + || strcmp(p_opt->console, OSM_REMOTE_CONSOLE) == 0 + || strcmp(p_opt->console, OSM_LOOPBACK_CONSOLE) == 0 +#endif + ) + osm_console(p_osm); + else + cl_thread_suspend(10000); + + if (osm_usr1_flag) { + osm_usr1_flag = 0; + osm_log_reopen_file(&(p_osm->log)); + } + if (osm_hup_flag) { + osm_hup_flag = 0; + /* a HUP signal should only start a new heavy sweep */ + p_osm->subn.force_heavy_sweep = TRUE; + osm_opensm_sweep(p_osm); + } + } + osm_console_close_socket(p_osm); + return 0; +} +/********************************************************************** + **********************************************************************/ int main(int argc, char *argv[]) { osm_opensm_t osm; @@ -1010,34 +1044,10 @@ int main(int argc, char *argv[]) osm_exit_flag = 1; } } else { - osm_console_init(&opt, &osm); - - /* - Sit here forever - */ - while (!osm_exit_flag) { - if (strcmp(opt.console, OSM_LOCAL_CONSOLE) == 0 -#ifdef ENABLE_OSM_CONSOLE_SOCKET - || strcmp(opt.console, OSM_REMOTE_CONSOLE) == 0 - || strcmp(opt.console, OSM_LOOPBACK_CONSOLE) == 0 -#endif - ) - osm_console(&osm); - else - cl_thread_suspend(10000); - - if (osm_usr1_flag) { - osm_usr1_flag = 0; - osm_log_reopen_file(&osm.log); - } - if (osm_hup_flag) { - osm_hup_flag = 0; - /* a HUP signal should only start a new heavy sweep */ - osm.subn.force_heavy_sweep = TRUE; - osm_opensm_sweep(&osm); - } - } - osm_console_close_socket(&osm); + /* + * Sit here until signaled to exit + */ + osm_manager_loop(&opt, &osm); } if (osm.mad_pool.mads_out) { -- 1.5.1 -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0001-opensm-separated-current-loop-in-main-into-its-own.patch URL: From meier3 at llnl.gov Mon Feb 4 15:03:31 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Mon, 04 Feb 2008 15:03:31 -0800 Subject: [ofa-general] [PATCH] opensm: osm_console - cleanup in preparation for adding OpenSSL option Message-ID: <47A799C3.7000207@llnl.gov> Sasha, the second one. From 7a82f221e5e02ddc660aa917dc95256774fdc508 Mon Sep 17 00:00:00 2001 From: Tim Meier Date: Mon, 4 Feb 2008 14:49:34 -0800 Subject: [PATCH] opensm: osm_console - cleanup in preparation for adding OpenSSL option Trivial reorganization and cleanup, no new functionality. This is to help minimize the impact (on existing code) of adding new features to the Console (such as OpenSSL). Signed-off-by: Tim Meier --- opensm/include/opensm/osm_console.h | 5 +-- opensm/opensm/main.c | 15 ++++------- opensm/opensm/osm_console.c | 45 ++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/opensm/include/opensm/osm_console.h b/opensm/include/opensm/osm_console.h index 33e41e7..8b31d6a 100644 --- a/opensm/include/opensm/osm_console.h +++ b/opensm/include/opensm/osm_console.h @@ -61,8 +61,7 @@ BEGIN_C_DECLS void osm_console_init(osm_subn_opt_t * opt, osm_opensm_t * p_osm); void osm_console(osm_opensm_t * p_osm); -void osm_console_prompt(FILE * out); -void osm_console_close_socket(osm_opensm_t * p_osm); - +void osm_console_exit(osm_opensm_t * p_osm); +int is_console_enabled(osm_subn_opt_t *p_opt); END_C_DECLS #endif /* _OSM_CONSOLE_H_ */ diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 239de84..3663232 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -553,18 +553,14 @@ static int daemonize(osm_opensm_t * osm) **********************************************************************/ int osm_manager_loop(osm_subn_opt_t * p_opt, osm_opensm_t * p_osm) { - osm_console_init(p_opt, p_osm); + if(is_console_enabled(p_opt)) + osm_console_init(p_opt, p_osm); /* - Sit here forever + Sit here forever - dwell or do console i/o & cmds */ while (!osm_exit_flag) { - if (strcmp(p_opt->console, OSM_LOCAL_CONSOLE) == 0 -#ifdef ENABLE_OSM_CONSOLE_SOCKET - || strcmp(p_opt->console, OSM_REMOTE_CONSOLE) == 0 - || strcmp(p_opt->console, OSM_LOOPBACK_CONSOLE) == 0 -#endif - ) + if (is_console_enabled(p_opt)) osm_console(p_osm); else cl_thread_suspend(10000); @@ -580,7 +576,8 @@ int osm_manager_loop(osm_subn_opt_t * p_opt, osm_opensm_t * p_osm) osm_opensm_sweep(p_osm); } } - osm_console_close_socket(p_osm); + if(is_console_enabled(p_opt)) + osm_console_exit(p_osm); return 0; } /********************************************************************** diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index d0a632f..63ee1de 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -773,6 +773,42 @@ static void portstatus_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) fprintf(out, "\n"); } +static int is_local(char* str) +{ + // convenience - checks if just stdin/stdout + if(str) + return (strcmp(str, OSM_LOCAL_CONSOLE) == 0); +return 0; +} + +static int is_loopback(char* str) +{ + // convenience - checks if socket based connection + if(str) + return (strcmp(str, OSM_LOOPBACK_CONSOLE) == 0); +return 0; +} + +static int is_remote(char* str) +{ + // convenience - checks if socket based connection + if(str) + return (strcmp(str, OSM_REMOTE_CONSOLE) == 0) + || is_loopback(str); +return 0; +} + +int is_console_enabled(osm_subn_opt_t *p_opt) +{ + // checks for a variety of types of consoles - default is off or 0 + if(p_opt) + return (is_local(p_opt->console) + || is_loopback(p_opt->console) + || is_remote(p_opt->console)); +return 0; +} + + #ifdef ENABLE_OSM_PERF_MGR static void perfmgr_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) { @@ -924,7 +960,7 @@ static void parse_cmd_line(char *line, osm_opensm_t * p_osm) } } -void osm_console_prompt(FILE * out) +static void osm_console_prompt(FILE * out) { if (out) { fprintf(out, "OpenSM %s", OSM_COMMAND_PROMPT); @@ -989,6 +1025,13 @@ void osm_console_init(osm_subn_opt_t * opt, osm_opensm_t * p_osm) } } +/* clean up and release resouces */ +void osm_console_exit(osm_opensm_t * p_osm) +{ + // clean up and release resouces, currently just close the socket + osm_console_close_socket(p_osm); +} + #ifdef ENABLE_OSM_CONSOLE_SOCKET static void handle_osm_connection(osm_opensm_t * p_osm, int new_fd, char *client_ip, char *client_hn) -- 1.5.1 -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0002-opensm-osm_console-cleanup-in-preparation-for-add.patch URL: From mashirle at us.ibm.com Mon Feb 4 06:23:41 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 04 Feb 2008 06:23:41 -0800 Subject: [ofa-general] Re: [ewg] Oops with today's OFED 1.3 In-Reply-To: <47A7746E.6030303@linux.vnet.ibm.com> References: <47A7746E.6030303@linux.vnet.ibm.com> Message-ID: <1202135021.4502.40.camel@localhost.localdomain> Eli, Please look at this issues ASAP. Without your patch everything works well. Thanks Shirley From arlin.r.davis at intel.com Mon Feb 4 16:44:37 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 4 Feb 2008 16:44:37 -0800 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release Message-ID: <000101c86790$48ff6db0$9f97070a@amr.corp.intel.com> New release for dapl 2.0 available on the OFA download page and in my git tree. - changes to correctly build against and target /dat2/include directory. md5sum: c0ad3d8b5df252ff5ce47f616a462f4e dapl-2.0.6.tar.gz Vlad, please pull new v2 release into OFED 1.3 RC4 and install the following dapl packages: dapl-1.2.4-1 dapl-devel-1.2.4-1 dapl-2.0.6-1 dapl-utils-2.0.6-1 dapl-devel-2.0.6-1 dapl-debuginfo-2.0.6-1 See http://www.openfabrics.org/downloads/dapl/README.html for details. -arlin -------------- next part -------------- An HTML attachment was scrubbed... URL: From YJia at tmriusa.com Mon Feb 4 16:51:32 2008 From: YJia at tmriusa.com (Yicheng Jia) Date: Mon, 4 Feb 2008 18:51:32 -0600 Subject: [ofa-general] completion order of RDMA write? Message-ID: Hi Folks, I have a question regarding to the completion order of RDMA write. Since RDMA write without Immediate is not handled by responder's RQ WQEs, if two or more HCAs write to a single HCA simultaneously with this operation, what the completion order of these RDMA writes would be? Thanks! Yicheng -------------- next part -------------- An HTML attachment was scrubbed... URL: From ralph.campbell at qlogic.com Mon Feb 4 17:17:27 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Mon, 04 Feb 2008 17:17:27 -0800 Subject: [ofa-general] completion order of RDMA write? In-Reply-To: References: Message-ID: <1202174247.27464.224.camel@brick.pathscale.com> On Mon, 2008-02-04 at 18:51 -0600, Yicheng Jia wrote: > > Hi Folks, > > I have a question regarding to the completion order of RDMA write. > Since RDMA write without Immediate is not handled by responder's RQ > WQEs, if two or more HCAs write to a single HCA simultaneously with > this operation, what the completion order of these RDMA writes would > be? > > Thanks! > Yicheng There are no ordering guarantees. Two senders can interleave packets at the receiver in any order. If the two RDMA write ranges overlap and are larger than the path MTU, the resulting memory contents might have parts of each RDMA write. The RDMA with or without immediate doesn't matter. From kliteyn at mellanox.co.il Mon Feb 4 17:21:31 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 5 Feb 2008 03:21:31 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-05:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-04 OpenSM git rev = Sun_Feb_3_11:49:31_2008 [6d4b76c4a28bcc8e57549d46a1ceaa4ca64e06ce] ibutils git rev = Mon_Dec_24_10:42:01_2007 [675bec82306d6920555dd0b5e2f664983e27e60f] Total=400 Pass=399 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 9 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From mashirle at us.ibm.com Mon Feb 4 09:35:28 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 04 Feb 2008 09:35:28 -0800 Subject: [ofa-general] Re: [PATCH 5/5] IB/ipoib: IPOIB rx post list In-Reply-To: <1201873280.6677.9.camel@eli-laptop> References: <1201873280.6677.9.camel@eli-laptop> Message-ID: <1202146528.4502.54.camel@localhost.localdomain> Hello Eli, I have seen heavily memory leak (system run out of memory) and very bad performance when errors after your patch checked in. I am back looking at the code. Below place has memory leak. When ib_post_recv() has problem, only one skb gets freed. And this kind of post_recv() could hurt the performance significantly if one fails. Please correct me if I understand incorrectly. Thanks Shirley +static int ipoib_ib_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int ret = 0; + int i = priv->rx_outst; + + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; + if (++priv->rx_outst == UD_POST_RCV_COUNT) { + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); + + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (% d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~IPOIB_OP_RECV; + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } + } + priv->rx_outst = 0; + } return ret; } From dwrocketdownloadm at rocketdownload.com Mon Feb 4 20:30:45 2008 From: dwrocketdownloadm at rocketdownload.com (Mathew Quintanilla) Date: Tue, 5 Feb 2008 10:00:45 +0530 Subject: [ofa-general] We hope this information will help you to make the right choice. Message-ID: <01c867dd$f96a8480$0acd94ca@dwrocketdownloadm> Everyone has heard about lower-cost drugs from abroad drugstore. The difficulty is to find the reliable one. �CanadianPharmacy� is an experienced, trusted, and fully-licensed Canadian online drugstore. Make significant savings buying medications in Canada! Mathew Quintanilla From rdreier at cisco.com Mon Feb 4 20:43:33 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 04 Feb 2008 20:43:33 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: (Shirley Ma's message of "Sat, 2 Feb 2008 08:16:51 -0800") References: Message-ID: > My patch has been passed the stress test for both PPC and Intel > architechture against OFED-1.3-RC2 bit for a couple days. And I didn't see > performance imapct for 2K mtu. But I rethink about your suggestion here > yesterday night. I can modify my patch to meet your thoughts here by > keeping current implementation of 2K mtu and using if condition check for > the new code. I will submit a new version of patchset today for review. > Since I only have two days for my patch to be integred into OFED-1.3-RC3 > for Distros to pick up. I would like to see your ack here for this approach > as soon as possible. I will compare two different implementation's > performance. Sorry, I've kind of lost the plot here with so many versions of the patches flying around. In any case this is not something I am going to pick up for 2.6.25. I don't have any control over OFED or distros, although I would probably hold off on adding a feature at this late stage of the release process; but the OFED maintainers don't seem to be as conservative as I am. - R. From rdreier at cisco.com Mon Feb 4 20:49:20 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 04 Feb 2008 20:49:20 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get a second batch of InfiniBand/RDMA batches. In addition to the usual motley crew of changes, this pull includes a new driver for NetEffect RNICs in drivers/infiniband/hw/nes. The code could use some further cleaning, but I don't think it's worth holding off on the merge. David Dillow (1): IB/srp: Retry stale connections Eli Cohen (2): IB/mthca: Remove checks for srq->first_free < 0 IB/ib_mthca: Pre-link receive WQEs in Tavor mode Glenn Streiff (1): RDMA/nes: Add a driver for NetEffect RNICs Hoang-Nam Nguyen (1): IB/ehca: Add PMA support Jack Morgenstein (2): IB/mthca: Don't read reserved fields in mthca_QUERY_ADAPTER() mlx4_core: Don't read reserved fields in mlx4_QUERY_ADAPTER() Joachim Fenkes (2): IB/ehca: Prevent sending UD packets to QP0 IB/ehca: Update sma_attr also in case of disruptive config change Olaf Kirch (1): IB/mthca: Return proper error codes from mthca_fmr_alloc() Or Gerlitz (3): IPoIB: Handle bonding failover race for connected neighbours too IPoIB: Remove a misleading debug print IB/fmr_pool: Allocate page list for pool FMRs only when caching enabled Roland Dreier (4): mlx4_core: Fix more section mismatches IB/mthca: Fix and simplify page size calculation in mthca_reg_phys_mr() IB/mlx4: Actually print out the driver version IB: Avoid marking __devinitdata as const Sean Hefty (1): IB/cm: Add interim support for routed paths MAINTAINERS | 10 + drivers/infiniband/Kconfig | 2 +- drivers/infiniband/Makefile | 1 + drivers/infiniband/core/cm.c | 89 +- drivers/infiniband/core/fmr_pool.c | 7 +- drivers/infiniband/hw/ehca/ehca_classes.h | 1 + drivers/infiniband/hw/ehca/ehca_irq.c | 2 + drivers/infiniband/hw/ehca/ehca_iverbs.h | 5 + drivers/infiniband/hw/ehca/ehca_main.c | 2 +- drivers/infiniband/hw/ehca/ehca_reqs.c | 4 + drivers/infiniband/hw/ehca/ehca_sqp.c | 91 + drivers/infiniband/hw/mlx4/main.c | 10 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 11 +- drivers/infiniband/hw/mthca/mthca_main.c | 5 +- drivers/infiniband/hw/mthca/mthca_mr.c | 8 +- drivers/infiniband/hw/mthca/mthca_provider.c | 22 +- drivers/infiniband/hw/mthca/mthca_qp.c | 13 +- drivers/infiniband/hw/mthca/mthca_srq.c | 47 +- drivers/infiniband/hw/nes/Kconfig | 16 + drivers/infiniband/hw/nes/Makefile | 3 + drivers/infiniband/hw/nes/nes.c | 1152 ++++++++ drivers/infiniband/hw/nes/nes.h | 560 ++++ drivers/infiniband/hw/nes/nes_cm.c | 3088 ++++++++++++++++++++ drivers/infiniband/hw/nes/nes_cm.h | 433 +++ drivers/infiniband/hw/nes/nes_context.h | 193 ++ drivers/infiniband/hw/nes/nes_hw.c | 3080 ++++++++++++++++++++ drivers/infiniband/hw/nes/nes_hw.h | 1206 ++++++++ drivers/infiniband/hw/nes/nes_nic.c | 1703 +++++++++++ drivers/infiniband/hw/nes/nes_user.h | 112 + drivers/infiniband/hw/nes/nes_utils.c | 917 ++++++ drivers/infiniband/hw/nes/nes_verbs.c | 3917 ++++++++++++++++++++++++++ drivers/infiniband/hw/nes/nes_verbs.h | 169 ++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 19 +- drivers/infiniband/ulp/srp/ib_srp.c | 53 +- drivers/infiniband/ulp/srp/ib_srp.h | 1 + drivers/net/mlx4/fw.c | 6 - drivers/net/mlx4/fw.h | 3 - drivers/net/mlx4/main.c | 11 +- drivers/net/mlx4/mr.c | 2 +- 39 files changed, 16848 insertions(+), 126 deletions(-) create mode 100644 drivers/infiniband/hw/nes/Kconfig create mode 100644 drivers/infiniband/hw/nes/Makefile create mode 100644 drivers/infiniband/hw/nes/nes.c create mode 100644 drivers/infiniband/hw/nes/nes.h create mode 100644 drivers/infiniband/hw/nes/nes_cm.c create mode 100644 drivers/infiniband/hw/nes/nes_cm.h create mode 100644 drivers/infiniband/hw/nes/nes_context.h create mode 100644 drivers/infiniband/hw/nes/nes_hw.c create mode 100644 drivers/infiniband/hw/nes/nes_hw.h create mode 100644 drivers/infiniband/hw/nes/nes_nic.c create mode 100644 drivers/infiniband/hw/nes/nes_user.h create mode 100644 drivers/infiniband/hw/nes/nes_utils.c create mode 100644 drivers/infiniband/hw/nes/nes_verbs.c create mode 100644 drivers/infiniband/hw/nes/nes_verbs.h From xma at us.ibm.com Mon Feb 4 21:21:08 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 21:21:08 -0800 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: Message-ID: Thanks Roland. We can review this patch for upper stream later. Eli has reviewed it. This patch is going to be OFED-1.3. I am testing current OFED-1.3 Git tree + this patch now. It seems everything works well for a few hours. I will let the test running overnight to see any issues tomorrow. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From eli at mellanox.co.il Mon Feb 4 21:21:36 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 5 Feb 2008 07:21:36 +0200 Subject: [ofa-general] RE: [ewg] Oops with today's OFED 1.3 In-Reply-To: <1202135021.4502.40.camel@localhost.localdomain> References: <47A7746E.6030303@linux.vnet.ibm.com> <1202135021.4502.40.camel@localhost.localdomain> Message-ID: <6C2C79E72C305246B504CBA17B5500C90340E39A@mtlexch01.mtl.com> I have already addressed this issue it should have been fixed by now. Please check if your git tree is updated - the fix should be there. -----Original Message----- From: Shirley Ma [mailto:mashirle at us.ibm.com] Sent: ב 04 פברואר 2008 16:24 To: Eli Cohen Cc: openfabrics-ewg at openib.org; OpenFabrics General; Pradeep Satyanarayana Subject: Re: [ewg] Oops with today's OFED 1.3 Eli, Please look at this issues ASAP. Without your patch everything works well. Thanks Shirley From jackm at dev.mellanox.co.il Mon Feb 4 22:17:05 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 5 Feb 2008 08:17:05 +0200 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support In-Reply-To: References: <200802030949.44943.jackm@dev.mellanox.co.il> Message-ID: <200802050817.06380.jackm@dev.mellanox.co.il> On Sunday 03 February 2008 19:30, Tang, Changqing wrote: > > > > In addition, the patch implements the foundation for > > distributing XRC-receive-only QP events to userspace > > processes registered with that QP. > > If r1 is on node1, r2 and r3 are on node2, r1 and r2 have the > XRC conection established, r1 knows SRQ number from both r2 and > r3, but r3 does not register the recv QP created by r2, can r3 > still receive message from r1 ? Yes, it can. However, when r2 either unregisters the recv QP, or exits, that QP will be destroyed, and r3 will no longer be able to receive messages via the recv QP created by r2. If, however, r3 does register with the recv QP, that QP will not be destroyed until r3 unregisters (or exits). - Jack From jackm at dev.mellanox.co.il Mon Feb 4 22:30:58 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 5 Feb 2008 08:30:58 +0200 Subject: [ofa-general] ENOMEM In-Reply-To: <47A584DE.2020308@lfbs.rwth-aachen.de> References: <47A352FC.8090604@lfbs.rwth-aachen.de> <200802011705.51202.kilian@stanford.edu> <47A584DE.2020308@lfbs.rwth-aachen.de> Message-ID: <200802050830.58514.jackm@dev.mellanox.co.il> On Sunday 03 February 2008 11:09, Ruben Niederhagen wrote: > How do I enlarge this limit? The line > #                 > *               hard    locks           1024 > in /etc/security/limits.conf (+reboot) didn't do the trick... > should be: * hard memlock 1024 * soft memlock 1024 You used "locks", which refers to file locks, not memory locks. - Jack From akstcallenfrancemnsdgs at allenfrance.com Mon Feb 4 23:26:34 2008 From: akstcallenfrancemnsdgs at allenfrance.com (Allison Price) Date: Tue, 5 Feb 2008 16:26:34 +0900 Subject: [ofa-general] Small Cap gems like this are rare Message-ID: <01c86813$df91c9c0$3b724b3d@akstcallenfrancemnsdgs> Equity monster stock PERMANENTTECH (Other OTC:PERT.PK) From xma at us.ibm.com Mon Feb 4 23:22:03 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 23:22:03 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202140964.18209.69.camel@mtls03> Message-ID: Hello Eli, Thanks. I have tested your patch + 4K mtu patch. I do see better performance for mthca when CPU is not 100% used and no errors. I will measure ehca performance tomorrow. I also see some issues, like: ib_mthca 0000:04:00.0: SQ 0c0404 full (2641376 head, 2641312 tail, 64 max, 0 nreq) Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From eli at dev.mellanox.co.il Mon Feb 4 23:26:50 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 09:26:50 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202196410.18209.87.camel@mtls03> On Mon, 2008-02-04 at 23:22 -0800, Shirley Ma wrote: > Hello Eli, > > Thanks. I have tested your patch + 4K mtu patch. I do see better > performance for mthca when CPU is not 100% used and no errors. I will > measure ehca performance tomorrow. I also see some issues, like: > ib_mthca 0000:04:00.0: SQ 0c0404 full (2641376 head, 2641312 tail, 64 > max, 0 nreq) > Can you send what do you do to cause this error? Also, can you check if the current ofed git tree eliminates the crashes you've been seeing? From xma at us.ibm.com Mon Feb 4 23:38:34 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 4 Feb 2008 23:38:34 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202196410.18209.87.camel@mtls03> Message-ID: Hello Eli, > Can you send what do you do to cause this error? I wasn't be able to reproduce this right now. I saw these errors in /var/log/messages after I did numerous tests. unfortunately I couldn't tell which one triggered this. The current tests are happy running. Once I can reproduce this I will let you know. > Also, can you check if the current ofed git tree eliminates the crashes > you've been seeing? I have forwarded your email for the further validation to the team. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From ruben at lfbs.RWTH-Aachen.DE Tue Feb 5 00:12:56 2008 From: ruben at lfbs.RWTH-Aachen.DE (Ruben Niederhagen) Date: Tue, 05 Feb 2008 09:12:56 +0100 Subject: [ofa-general] ENOMEM In-Reply-To: <200802050830.58514.jackm@dev.mellanox.co.il> References: <47A352FC.8090604@lfbs.rwth-aachen.de> <200802011705.51202.kilian@stanford.edu> <47A584DE.2020308@lfbs.rwth-aachen.de> <200802050830.58514.jackm@dev.mellanox.co.il> Message-ID: <47A81A88.1080809@lfbs.rwth-aachen.de> Jack Morgenstein wrote: > On Sunday 03 February 2008 11:09, Ruben Niederhagen wrote: >> How do I enlarge this limit? The line >> # >> * hard locks 1024 >> in /etc/security/limits.conf (+reboot) didn't do the trick... >> > should be: > > * hard memlock 1024 > * soft memlock 1024 > > You used "locks", which refers to file locks, not memory locks. Thanks a lot, setting the value to 1024 leads to $ ibv_srq_pingpong Couldn't create QP[13] with 2048 everything works fine! Merci! Ruben From eli at dev.mellanox.co.il Tue Feb 5 00:14:08 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 10:14:08 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202199248.18209.91.camel@mtls03> Thanks. On Mon, 2008-02-04 at 23:38 -0800, Shirley Ma wrote: > Hello Eli, > > > Can you send what do you do to cause this error? > > I wasn't be able to reproduce this right now. I saw these errors > in /var/log/messages after I did numerous tests. unfortunately I > couldn't tell which one triggered this. The current tests are happy > running. Once I can reproduce this I will let you know. > > > Also, can you check if the current ofed git tree eliminates the > crashes > > you've been seeing? > > I have forwarded your email for the further validation to the team. > > Thanks > Shirley > From xma at us.ibm.com Tue Feb 5 00:29:57 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 00:29:57 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202199248.18209.91.camel@mtls03> Message-ID: Hello Eli, Did you see my another email regarding the memory leak issue in post_receive()? +static int ipoib_ib_post_receive(struct net_device *dev, int id) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct ib_recv_wr *bad_wr; + int ret = 0; + int i = priv->rx_outst; + + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; + if (++priv->rx_outst == UD_POST_RCV_COUNT) { + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); + + if (unlikely(ret)) { + ipoib_warn(priv, "receive failed for buf %d (% d)\n", id, ret); + while (bad_wr) { + id = bad_wr->wr_id & ~IPOIB_OP_RECV; + ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, + IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_ring[id].skb); + priv->rx_ring[id].skb = NULL; + } 16 skbs need to be freed here when errors, not just this one. + } + priv->rx_outst = 0; + } return ret; } Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From eli at dev.mellanox.co.il Tue Feb 5 00:38:00 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 10:38:00 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202200680.18209.100.camel@mtls03> On Tue, 2008-02-05 at 00:29 -0800, Shirley Ma wrote: > Hello Eli, > > Did you see my another email regarding the memory leak issue in > post_receive()? No, I saw you suggested using dynamic allocations for the list of receive WQEs but nothing re memory leak. Can you be more specific? > > +static int ipoib_ib_post_receive(struct net_device *dev, int id) > +{ > + struct ipoib_dev_priv *priv = netdev_priv(dev); > + struct ib_recv_wr *bad_wr; > + int ret = 0; > + int i = priv->rx_outst; > + > + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; > + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; > + if (++priv->rx_outst == UD_POST_RCV_COUNT) { > + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, > &bad_wr); > + > + if (unlikely(ret)) { > + ipoib_warn(priv, "receive failed for buf %d (% > d)\n", id, ret); > + while (bad_wr) { > + id = bad_wr->wr_id & ~IPOIB_OP_RECV; > + ib_dma_unmap_single(priv->ca, > priv->rx_ring[id].mapping, > + IPOIB_BUF_SIZE, > DMA_FROM_DEVICE); > + > dev_kfree_skb_any(priv->rx_ring[id].skb); > + priv->rx_ring[id].skb = NULL; > + } > > 16 skbs need to be freed here when errors, not just this one. > > + } > + priv->rx_outst = 0; > + } > > return ret; > } > > Thanks > Shirley > > From xma at us.ibm.com Tue Feb 5 00:53:00 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 00:53:00 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202200680.18209.100.camel@mtls03> Message-ID: Hello Eli, If i undersand your code correctly, you post_receive 16 skb's at once. when ib_post_receive returns error, all these 16 skb's would fail, not just this skb failure, am I right? If all failure, then we should free these 16 skb's not just one. Also the same issue in ipoib-cm code. Thanks Shirley Eli Cohen To Shirley Ma/Beaverton/IBM at IBMUS 02/05/08 12:38 AM cc openfabrics , Roland Dreier eli at dev.mellanox. , co.il tziporet at dev.mellanox.co.il Subject Re: [ofa-general] [PATCH 0/5]: Improve small UDP messages On Tue, 2008-02-05 at 00:29 -0800, Shirley Ma wrote: > Hello Eli, > > Did you see my another email regarding the memory leak issue in > post_receive()? No, I saw you suggested using dynamic allocations for the list of receive WQEs but nothing re memory leak. Can you be more specific? > > +static int ipoib_ib_post_receive(struct net_device *dev, int id) > +{ > + struct ipoib_dev_priv *priv = netdev_priv(dev); > + struct ib_recv_wr *bad_wr; > + int ret = 0; > + int i = priv->rx_outst; > + > + priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; > + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; > + if (++priv->rx_outst == UD_POST_RCV_COUNT) { > + ret = ib_post_recv(priv->qp, priv->rx_wr_draft, > &bad_wr); > + > + if (unlikely(ret)) { > + ipoib_warn(priv, "receive failed for buf %d (% > d)\n", id, ret); > + while (bad_wr) { > + id = bad_wr->wr_id & ~IPOIB_OP_RECV; > + ib_dma_unmap_single(priv->ca, > priv->rx_ring[id].mapping, > + IPOIB_BUF_SIZE, > DMA_FROM_DEVICE); > + > dev_kfree_skb_any(priv->rx_ring[id].skb); > + priv->rx_ring[id].skb = NULL; > + } > > 16 skbs need to be freed here when errors, not just this one. > > + } > + priv->rx_outst = 0; > + } > > return ret; > } > > Thanks > Shirley > > -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic01959.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From eli at dev.mellanox.co.il Tue Feb 5 01:01:13 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 11:01:13 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202202073.18209.104.camel@mtls03> On Tue, 2008-02-05 at 00:53 -0800, Shirley Ma wrote: > Hello Eli, > > If i undersand your code correctly, you post_receive 16 skb's at once. > when ib_post_receive returns error, all these 16 skb's would fail, not > just this skb failure, am I right? If all failure, then we should free > these 16 skb's not just one. Also the same issue in ipoib-cm code. > When you post a list of receive WQEs, it may fail somewhere in the middle. The errant work request is denoted by bad_wr and all the rest following it are not posted. So I free them all. From xma at us.ibm.com Tue Feb 5 01:09:26 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 01:09:26 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202202073.18209.104.camel@mtls03> Message-ID: Hello Eli, > When you post a list of receive WQEs, it may fail somewhere in the > middle. The errant work request is denoted by bad_wr and all the rest > following it are not posted. So I free them all. OK, so bad_wr returns the rest of the list not being able to post. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From dotanb at dev.mellanox.co.il Tue Feb 5 01:12:08 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 05 Feb 2008 11:12:08 +0200 Subject: [ofa-general] ENOMEM In-Reply-To: <47A81A88.1080809@lfbs.rwth-aachen.de> References: <47A352FC.8090604@lfbs.rwth-aachen.de> <200802011705.51202.kilian@stanford.edu> <47A584DE.2020308@lfbs.rwth-aachen.de> <200802050830.58514.jackm@dev.mellanox.co.il> <47A81A88.1080809@lfbs.rwth-aachen.de> Message-ID: <47A82868.7070600@dev.mellanox.co.il> Ruben Niederhagen wrote: > Jack Morgenstein wrote: > >> On Sunday 03 February 2008 11:09, Ruben Niederhagen wrote: >> >>> How do I enlarge this limit? The line >>> # >>> * hard locks 1024 >>> in /etc/security/limits.conf (+reboot) didn't do the trick... >>> >>> >> should be: >> >> * hard memlock 1024 >> * soft memlock 1024 >> >> You used "locks", which refers to file locks, not memory locks. >> > > Thanks a lot, setting the value to 1024 leads to > > $ ibv_srq_pingpong > Couldn't create QP[13] > > with 2048 everything works fine! > Please pay attention: the amount of memory pages that you need to be able to lock depends on the amount of the objects (for example: number of QPs) and the size of those objects (to continue the previous example: in a QP, number of WRs and number of S/Gs). Dotan From eli at dev.mellanox.co.il Tue Feb 5 01:13:50 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 11:13:50 +0200 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: References: Message-ID: <1202202830.18209.106.camel@mtls03> On Tue, 2008-02-05 at 01:09 -0800, Shirley Ma wrote: > Hello Eli, > > > When you post a list of receive WQEs, it may fail somewhere in the > > middle. The errant work request is denoted by bad_wr and all the > rest > > following it are not posted. So I free them all. > > OK, so bad_wr returns the rest of the list not being able to post. > Yes. From dentsn at cityairterminal.de Tue Feb 5 01:50:12 2008 From: dentsn at cityairterminal.de (Ethel Myrick) Date: Tue, 5 Feb 2008 01:50:12 -0800 Subject: [ofa-general] Millions of customers can't be wrong! Message-ID: <027135367.28302963627238@cityairterminal.de> Ps fde ycho xu logi ncn cal Ben fy efits of a Bi zc gg qs er P oao e rur ni ki s VThe face of a man is usually an open book to most women. They can read practically anything in our faces. I’ve known ladies who could tell the s gy iz na e of my p cqf en cb i jg s, bank account and the status of my s dhy e ul x life by taking only one look at me. Of course, the look was followed by the lady’s departure in the opposite direction.PXLCLI nv CK HE xeb RE!!! -------------- next part -------------- An HTML attachment was scrubbed... URL: From ogerlitz at voltaire.com Tue Feb 5 02:19:54 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 05 Feb 2008 12:19:54 +0200 Subject: [ofa-general] [GIT PULL] please pull infiniband.git In-Reply-To: References: Message-ID: <47A8384A.8030001@voltaire.com> Roland Dreier wrote: > This will get a second batch of InfiniBand/RDMA batches. In addition > to the usual motley crew of changes, this pull includes a new driver > for NetEffect RNICs in drivers/infiniband/hw/nes. The code could use > some further cleaning, but I don't think it's worth holding off on the > merge. Hi Roland, So what is the status of the stateless offload patchset? thanks, Or. From dwsmidtsm at smidts.nl Tue Feb 5 02:47:34 2008 From: dwsmidtsm at smidts.nl (Jessica Hogan) Date: Tue, 5 Feb 2008 18:47:34 +0800 Subject: [ofa-general] Reputable online casino! Message-ID: <01c86827$91d92700$931b1976@dwsmidtsm> Online gambling is not only fun and exciting. It can bring real money! All you have to do is to download free software, receive great $2400 welcome bonus and start playing. Enjoy the real casino atmosphere with Golden Gate Casino! Among our advantages are: fast payouts, high degree of security, all around the clock customer support. These are few reasons why Golden Gate casino is so popular http://geocities.com/irmanoble310/ Play casino games any time you like. From vlad at lists.openfabrics.org Tue Feb 5 02:59:43 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 5 Feb 2008 02:59:43 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080205-0200 daily build status Message-ID: <20080205105943.4387DE60193@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Failed: From ogerlitz at voltaire.com Tue Feb 5 03:00:03 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 05 Feb 2008 13:00:03 +0200 Subject: [ofa-general] Re: [PATCH 0/3] ib/ipoib: Enable IPoIB-UD 4K MTU support In-Reply-To: References: Message-ID: <47A841B3.5010708@voltaire.com> Roland Dreier wrote: > Sorry, I've kind of lost the plot here with so many versions of the > patches flying around. In any case this is not something I am going > to pick up for 2.6.25. I don't have any control over OFED or distros, > although I would probably hold off on adding a feature at this late > stage of the release process; but the OFED maintainers don't seem to > be as conservative as I am. Same goes for me on both points: I was totally lost between all the posts you have made, and it prevents me from reviewing the patches, also, integration to ofed of patches which were --not reviewed-- (nor accepted) for upstream inclusion totally unclear to me, to remove doubt this policy is present in ofed from day one, so its not specific to your patches. Or. From jackm at dev.mellanox.co.il Tue Feb 5 04:38:44 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 5 Feb 2008 14:38:44 +0200 Subject: [ofa-general] OFED 1.3 rc2 kernel panic: trying to enable SA cache In-Reply-To: <47A59737.9060207@mellanox.co.il> References: <47A59737.9060207@mellanox.co.il> Message-ID: <200802051438.44688.jackm@dev.mellanox.co.il> On Sunday 03 February 2008 12:28, Tziporet Koren wrote: > please look at this issue with mthca > Problem is that a change in sa_query.c (by Roland) was not reflected in Sean's patches. Roland's commit: ========== commit 2aec5c602c6a44e2a3a173339a9ab94549658e4b Author: Sean Hefty Date: Mon Jun 18 11:03:58 2007 -0700 IB/sa: Make sure SA queries use default P_Key MADs sent to the SA should use the the default P_Key (0x7fff/0xffff). There's no requirement that the default P_Key is stored at index 0 in the local P_Key table, so add code to the sa_query module to look up the index of the default P_Key when creating an address handle for the SA (which is done any time the P_Key table might change), and use this index for all SA queries. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier ========= moved initializing the ah in a MAD query from procedure send_mad (in file sa_query.c) to a new procedure alloc_mad(). However, the informinfo query in patch "kernel_patches/fixes/sean_local_sa_1_notifications.patch" was not updated as well to reflect the above change. As a result, the ah field remained zeroed out in the informinfo query; when, down the stack, mlx4_ib_post_send gets invoked, the Oops occurs. This oversight probably occurred when we migrated patches from ofed 1.2.5 over to the ofed 1.3 tree. I guess this is the first time anyone has tried to activate the local sa cache. I'll post a fix today. - Jack From eli at mellanox.co.il Tue Feb 5 04:44:09 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 14:44:09 +0200 Subject: [ofa-general] 4k MTU patch Message-ID: <1202215449.18209.130.camel@mtls03> Hi Shirley, I see the following problems with the patch: 1.static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); ==> use ib_dma_unmap_page() + ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); +} 2. When you allocate an SKB with ipoib_sg_alloc_rx_skb(), you allocate and map both the linear data and the first fragment (in the case of 4K mtu and 4K page size). But then you call ipoib_ud_skb_put_frags() to potentially take the first fragment from the SKB for which a packet has just received. This can cause a leak of one page (although I think this case should never happen since the the length of packet is likely to exceed the linear data of the SKB. 3. The if (ipoib_ud_need_sg(priv->max_ib_mtu)) { in ipoib_ib_handle_rx_wc() can be eliminated - most of the code is identical. From customer.care at nwolb.com Tue Feb 5 01:56:53 2008 From: customer.care at nwolb.com (NatWest Internet Banking) Date: Tue, 5 Feb 2008 10:56:53 +0100 (CET) Subject: [ofa-general] Customer Notices : Your NatWest Online Bank Account Is About To Expire Message-ID: An HTML attachment was scrubbed... URL: From rpune at yahoo.com Mon Feb 4 23:05:32 2008 From: rpune at yahoo.com (ravindra kulkarni) Date: Mon, 4 Feb 2008 23:05:32 -0800 (PST) Subject: [ofa-general] ***SPAM*** help request:-mmap Message-ID: <559008.49325.qm@web50407.mail.re2.yahoo.com> Hi I am using mellanox hcaIII on RHEL5, i386. Can some one help me doing this. Want to map the HCA local memory BAR4 to user space and create the data buffer in that, so that it becomes faster to do transmit. thanks in advance ____________________________________________________________________________________ Looking for last minute shopping deals? Find them fast with Yahoo! Search. http://tools.search.yahoo.com/newsearch/category.php?category=shopping From ogerlitz at voltaire.com Tue Feb 5 06:20:24 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 05 Feb 2008 16:20:24 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <000101c86750$1ffce090$a937170a@amr.corp.intel.com> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> Message-ID: <47A870A8.5050409@voltaire.com> Sean Hefty wrote: > The verbs also support iWarp devices and are not necessarily restricted to the > 1.2 IB spec definitions. It might make sense to state that the IB > implementation is based on the 1.2 spec in an IB specific section, but keep the > general documentation transport neutral at this point. Sure, the page would be changed to reflect that. Or. From pawel.dziekonski at pwr.wroc.pl Tue Feb 5 06:40:02 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Tue, 5 Feb 2008 15:40:02 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> Message-ID: <20080205144002.GB4754@cefeid.wcss.wroc.pl> On Mon, 04 Feb 2008 at 05:39:25PM -0500, James Lentini wrote: > I don't use OFED, but I'll share with you what I know. OFED 1.3 is > based on 2.6.24, which is the same kernel Tom Tucker's git tree is > based on. I would assume the two are compatible, but I have not tried > it. > > Do you have NFS/RDMA working at this point? no - internal error again. :( I have uninstalled OFED completely and I'm running pure Tom Tucker's kernel. yum install opensm /etc/init.d/opensmd start cat /sys/class/infiniband/mthca0/ports/1/state 4: ACTIVE ifconfig ib0 ib0 Link encap:UNSPEC HWaddr 80-00-04-04-FE-80-00-00-00-00-00-00-00-00-00-00 inet addr:10.2.2.1 Bcast:10.2.2.255 Mask:255.255.255.0 UP BROADCAST RUNNING MULTICAST MTU:2044 Metric:1 RX packets:8 errors:0 dropped:0 overruns:0 frame:0 TX packets:8 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:128 RX bytes:784 (784.0 b) TX bytes:552 (552.0 b) rpm -e nfs-utils cd nfs-utils-1.1.1/ ./configure --prefix=/usr/local --disable-gss --disable-nfs4 make make install cd .. cat /etc/exports /scratch 10.255.255.222(no_subtree_check,rw,async) 10.2.2.2(no_subtree_check,rw,async,insecure,no_root_squash) cat NFS_START_SERVER #!/bin/sh export PATH="/usr/local/sbin:/usr/local/bin:$PATH" /etc/rc.d/init.d/portmap restart modprobe svcrdma modprobe nfs umount /proc/fs/nfsd >&/dev/null mount -t nfsd nfsd /proc/fs/nfsd >&/dev/null mount |grep /proc/fs/nfsd exportfs -av rpc.mountd rpc.statd --no-notify rpc.nfsd sm-notify echo rdma 2050 > /proc/fs/nfsd/portlist ./NFS_START_SERVER same done on client. cat NFS_START_CLIENT #!/bin/sh export PATH="/usr/local/sbin:/usr/local/bin:$PATH" /etc/rc.d/init.d/portmap restart modprobe svcrdma modprobe nfs sm-notify ./NFS_START_CLIENT mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs: timeout set for Tue Feb 5 14:50:47 2008 mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' mount.nfs: internal error mount("10.2.2.1:/scratch", "/mnt", "nfs", 0, "rdma,port=2050,addr=10.2.2.1") = -1 EIO (Input/output error) write(2, "mount.nfs: internal error\n", 26mount.nfs: internal error ) = 26 exit_group(32) = ? Process 13170 detached server logs: Feb 5 14:45:46 ib1 mountd[19614]: authenticated mount request from 10.2.2.2:988 for /scratch (/scratch) :( -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From erezz at Voltaire.COM Tue Feb 5 06:41:10 2008 From: erezz at Voltaire.COM (Erez Zilber) Date: Tue, 05 Feb 2008 16:41:10 +0200 Subject: [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPM is available Message-ID: <47A87586.6010904@Voltaire.COM> stgt (SCSI target) is an open-source framework for storage target drivers. It supports iSCSI over iSER among other storage target drivers. Voltaire added a git tree for stgt that will be added to OFED 1.4: http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary Until OFED 1.4 gets released, it is possible to install the stgt RPM on top of OFED 1.3. For more details about how to install and use stgt, please refer to https://wiki.openfabrics.org/tiki-index.php?page=ISER-target Some performance numbers that were measured by OSC (using SDR cards): * READ: 920 MB/sec * WRITE: 850 MB/sec We hope to have DDR measurements numbers soon. -- ____________________________________________________________ Erez Zilber | 972-9-971-7689 Software Engineer, Storage Solutions Voltaire – _The Grid Backbone_ www.voltaire.com From dotanb at dev.mellanox.co.il Tue Feb 5 06:51:34 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 05 Feb 2008 16:51:34 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <47A870A8.5050409@voltaire.com> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> Message-ID: <47A877F6.6010605@dev.mellanox.co.il> Or Gerlitz wrote: > Sean Hefty wrote: >> The verbs also support iWarp devices and are not necessarily >> restricted to the >> 1.2 IB spec definitions. It might make sense to state that the IB >> implementation is based on the 1.2 spec in an IB specific section, >> but keep the >> general documentation transport neutral at this point. > > Sure, the page would be changed to reflect that. > > Or. This man page (as is) will be added to OFED 1.3 rc4 and i'll later send a patch to the XRC man pages as well and will change the text according to this comment. Dotan From jlentini at netapp.com Tue Feb 5 07:04:46 2008 From: jlentini at netapp.com (James Lentini) Date: Tue, 5 Feb 2008 10:04:46 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080205144002.GB4754@cefeid.wcss.wroc.pl> References: <20080130161924.GA31154@cefeid.wcss.wroc.pl> <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> Message-ID: On Tue, 5 Feb 2008, Pawel Dziekonski wrote: > On Mon, 04 Feb 2008 at 05:39:25PM -0500, James Lentini wrote: > > > I don't use OFED, but I'll share with you what I know. OFED 1.3 is > > based on 2.6.24, which is the same kernel Tom Tucker's git tree is > > based on. I would assume the two are compatible, but I have not tried > > it. > > > > Do you have NFS/RDMA working at this point? > > no - internal error again. :( > > I have uninstalled OFED completely and I'm running pure Tom Tucker's > kernel. > > yum install opensm > /etc/init.d/opensmd start > cat /sys/class/infiniband/mthca0/ports/1/state > 4: ACTIVE > > ifconfig ib0 > ib0 Link encap:UNSPEC HWaddr 80-00-04-04-FE-80-00-00-00-00-00-00-00-00-00-00 > inet addr:10.2.2.1 Bcast:10.2.2.255 Mask:255.255.255.0 > UP BROADCAST RUNNING MULTICAST MTU:2044 Metric:1 > RX packets:8 errors:0 dropped:0 overruns:0 frame:0 > TX packets:8 errors:0 dropped:0 overruns:0 carrier:0 > collisions:0 txqueuelen:128 > RX bytes:784 (784.0 b) TX bytes:552 (552.0 b) Is this ib0 interface on the server? Can you ping the server's IPoIB interface from your client? Based on the output below, I think to these questions is yes. > rpm -e nfs-utils > cd nfs-utils-1.1.1/ > ./configure --prefix=/usr/local --disable-gss --disable-nfs4 > make > make install > cd .. > > cat /etc/exports > /scratch 10.255.255.222(no_subtree_check,rw,async) 10.2.2.2(no_subtree_check,rw,async,insecure,no_root_squash) > > cat NFS_START_SERVER > #!/bin/sh > export PATH="/usr/local/sbin:/usr/local/bin:$PATH" > /etc/rc.d/init.d/portmap restart > modprobe svcrdma > modprobe nfs > umount /proc/fs/nfsd >&/dev/null > mount -t nfsd nfsd /proc/fs/nfsd >&/dev/null > mount |grep /proc/fs/nfsd > exportfs -av > rpc.mountd > rpc.statd --no-notify > rpc.nfsd > sm-notify > echo rdma 2050 > /proc/fs/nfsd/portlist > > ./NFS_START_SERVER > > same done on client. What is the same on the client? > cat NFS_START_CLIENT > #!/bin/sh > export PATH="/usr/local/sbin:/usr/local/bin:$PATH" > /etc/rc.d/init.d/portmap restart > modprobe svcrdma You don't need to load the svcrdma module on the client. The svcrdma module is only needed on the server. The client needs to have the xprtrdma code loaded. You appear to have built nfs as a module, so the xprtrdma code would also have been built as a module. Instead of loading scvrdma, you should run this here: modprobe xprtrdma Did you see this message on the client? RPC: transport (256) not supported > modprobe nfs > sm-notify > > ./NFS_START_CLIENT > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > mount.nfs: timeout set for Tue Feb 5 14:50:47 2008 > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > mount.nfs: internal error > > mount("10.2.2.1:/scratch", "/mnt", "nfs", 0, "rdma,port=2050,addr=10.2.2.1") = -1 EIO (Input/output error) > write(2, "mount.nfs: internal error\n", 26mount.nfs: internal error > ) = 26 > exit_group(32) = ? > Process 13170 detached > > server logs: > Feb 5 14:45:46 ib1 mountd[19614]: authenticated mount request from 10.2.2.2:988 for /scratch (/scratch) If loading the xprtrdma module doesn't solve your problem, send the output from these commands: dmesc -c > /dev/null echo 1024 > /proc/sys/sunrpc/nfs_debug mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v dmesg > output > :( > > -- > Pawel Dziekonski > Wroclaw Centre for Networking & Supercomputing, HPC Department > Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND > phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl > From krause at cup.hp.com Tue Feb 5 07:18:56 2008 From: krause at cup.hp.com (Michael Krause) Date: Tue, 05 Feb 2008 07:18:56 -0800 Subject: [ofa-general] SA code? In-Reply-To: References: <1201909575.11210.125.camel@hrosenstock-ws.xsigo.com> Message-ID: <6.2.0.14.2.20080205071553.07606e50@esmail.cup.hp.com> An HTML attachment was scrubbed... URL: From jackm at dev.mellanox.co.il Tue Feb 5 07:30:27 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 5 Feb 2008 17:30:27 +0200 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: References: Message-ID: <200802051730.27500.jackm@dev.mellanox.co.il> On Monday 04 February 2008 06:18, Mahmoud Hanafi wrote: > How do I ensure that local_sa_cache is enables? > > I have tried all the other suggestions but I am still getting the error. > There was a bug in the local sa implementation (the informinfo query did not incorporate a required change). This bug has been fixed, and the fix will be in the next OFED daily build. The fix will also be in RC4. - Jack From xma at us.ibm.com Tue Feb 5 07:34:05 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 07:34:05 -0800 Subject: [ofa-general] 4k MTU patch In-Reply-To: <1202215449.18209.130.camel@mtls03> Message-ID: general-bounces at lists.openfabrics.org wrote on 02/05/2008 04:44:09 AM: > Hi Shirley, > > I see the following problems with the patch: > > 1.static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, > + u64 mapping[IPOIB_UD_RX_SG]) > +{ > + ib_dma_unmap_single(priv->ca, mapping[0], > IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); > > ==> use ib_dma_unmap_page() > + ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); > +} Good catch, I will fix it. > 2. When you allocate an SKB with ipoib_sg_alloc_rx_skb(), you allocate > and map both the linear data and the first fragment (in the case of 4K > mtu and 4K page size). But then you call ipoib_ud_skb_put_frags() to > potentially take the first fragment from the SKB for which a packet has > just received. This can cause a leak of one page (although I think this > case should never happen since the the length of packet is likely to > exceed the linear data of the SKB. The first buffer only has GRH + IPoIB header = 44 bytes, the first fragment has the IP payload, the length can never be zero. Actually I can remove that code from taking the first fragment. It's useless here. > 3. The > if (ipoib_ud_need_sg(priv->max_ib_mtu)) { > > in ipoib_ib_handle_rx_wc() can be eliminated - most of the code is > identical. The skb free and allocation need this check, the rest of code can share between 2K and 4K. I am trying to use one condition check here. Can you please tell me how to totally eliminate it in details? thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From pawel.dziekonski at pwr.wroc.pl Tue Feb 5 07:41:16 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Tue, 5 Feb 2008 16:41:16 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> Message-ID: <20080205154116.GA19804@cefeid.wcss.wroc.pl> On Tue, 05 Feb 2008 at 10:04:46AM -0500, James Lentini wrote: > > ifconfig ib0 > > ib0 Link encap:UNSPEC HWaddr 80-00-04-04-FE-80-00-00-00-00-00-00-00-00-00-00 > > inet addr:10.2.2.1 Bcast:10.2.2.255 Mask:255.255.255.0 > > UP BROADCAST RUNNING MULTICAST MTU:2044 Metric:1 > > RX packets:8 errors:0 dropped:0 overruns:0 frame:0 > > TX packets:8 errors:0 dropped:0 overruns:0 carrier:0 > > collisions:0 txqueuelen:128 > > RX bytes:784 (784.0 b) TX bytes:552 (552.0 b) > > Is this ib0 interface on the server? yes. > Can you ping the server's IPoIB interface from your client? > Based on the output below, I think to these questions is yes. yes. # ping -c 3 10.2.2.2 PING 10.2.2.2 (10.2.2.2) 56(84) bytes of data. 64 bytes from 10.2.2.2: icmp_seq=0 ttl=64 time=1.91 ms 64 bytes from 10.2.2.2: icmp_seq=1 ttl=64 time=0.033 ms 64 bytes from 10.2.2.2: icmp_seq=2 ttl=64 time=0.025 ms --- 10.2.2.2 ping statistics --- 3 packets transmitted, 3 received, 0% packet loss, time 2001ms rtt min/avg/max/mdev = 0.025/0.657/1.915/0.889 ms, pipe 2 > > rpm -e nfs-utils > > cd nfs-utils-1.1.1/ > > ./configure --prefix=/usr/local --disable-gss --disable-nfs4 > > make > > make install > > cd .. > > same done on client. > > What is the same on the client? nfs-utils installed in the very same way. > > modprobe svcrdma > > You don't need to load the svcrdma module on the client. The svcrdma > module is only needed on the server. > > The client needs to have the xprtrdma code loaded. You appear to have > built nfs as a module, so the xprtrdma code would also have been built > as a module. Instead of loading scvrdma, you should run this here: > > modprobe xprtrdma ok! > Did you see this message on the client? > RPC: transport (256) not supported no. > If loading the xprtrdma module doesn't solve your problem, send the > output from these commands: > > dmesc -c > /dev/null > echo 1024 > /proc/sys/sunrpc/nfs_debug > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > dmesg > output # modprobe xprtrdma # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs: timeout set for Tue Feb 5 16:31:19 2008 mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' mount.nfs: internal error # dmesg NFS: nfs mount opts='rdma,port=2050,addr=10.2.2.1' NFS: parsing nfs mount option 'rdma' NFS: parsing nfs mount option 'port=2050' NFS: parsing nfs mount option 'addr=10.2.2.1' NFS: sending MNT request for 10.2.2.1:/scratch NFS: MNT request succeeded -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From eli at mellanox.co.il Tue Feb 5 07:45:36 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 05 Feb 2008 17:45:36 +0200 Subject: [ofa-general] 4k MTU patch In-Reply-To: References: Message-ID: <1202226336.18209.140.camel@mtls03> On Tue, 2008-02-05 at 07:34 -0800, Shirley Ma wrote: > general-bounces at lists.openfabrics.org wrote on 02/05/2008 04:44:09 AM: > > > Hi Shirley, > > > > I see the following problems with the patch: > > > > 1.static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv > *priv, > > + u64 > mapping[IPOIB_UD_RX_SG]) > > +{ > > + ib_dma_unmap_single(priv->ca, mapping[0], > > IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); > > > > ==> use ib_dma_unmap_page() > > + ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, > DMA_FROM_DEVICE); > > +} > > Good catch, I will fix it. > > > 2. When you allocate an SKB with ipoib_sg_alloc_rx_skb(), you > allocate > > and map both the linear data and the first fragment (in the case of > 4K > > mtu and 4K page size). But then you call ipoib_ud_skb_put_frags() to > > potentially take the first fragment from the SKB for which a packet > has > > just received. This can cause a leak of one page (although I think > this > > case should never happen since the the length of packet is likely to > > exceed the linear data of the SKB. > > The first buffer only has GRH + IPoIB header = 44 bytes, the first > fragment has the IP payload, the length can never be zero. Actually I > can remove that code from taking the first fragment. It's useless > here. > > > 3. The > > if (ipoib_ud_need_sg(priv->max_ib_mtu)) { > > > > in ipoib_ib_handle_rx_wc() can be eliminated - most of the code is > > identical. > > The skb free and allocation need this check, the rest of code can > share between 2K and 4K. I am trying to use one condition check here. > Can you please tell me how to totally eliminate it in details? > I think it would be better to use a single alloc and free for the receive SKB and do the if inside those functions. For example, you can do: +static inline void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 *mapping) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu) { + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); + } else + ib_dma_unmap_single(priv->ca, mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); +} and something similar for allocating. From bart.vanassche at gmail.com Tue Feb 5 07:48:22 2008 From: bart.vanassche at gmail.com (Bart Van Assche) Date: Tue, 5 Feb 2008 16:48:22 +0100 Subject: [ofa-general] Re: [Stgt-devel] [ANNOUNCE] open iSCSI over iSER target RPM is available In-Reply-To: <47A87586.6010904@Voltaire.COM> References: <47A87586.6010904@Voltaire.COM> Message-ID: On Feb 5, 2008 3:41 PM, Erez Zilber wrote: > stgt (SCSI target) is an open-source framework for storage target > drivers. It supports iSCSI over iSER among other storage target drivers. > > Voltaire added a git tree for stgt that will be added to OFED 1.4: > http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary > > Until OFED 1.4 gets released, it is possible to install the stgt RPM on > top of OFED 1.3. For more details about how to install and use stgt, > please refer to https://wiki.openfabrics.org/tiki-index.php?page=ISER-target > > Some performance numbers that were measured by OSC (using SDR cards): > > * READ: 920 MB/sec > * WRITE: 850 MB/sec > > We hope to have DDR measurements numbers soon. Hello Erez, Can you please post more information about how these numbers were obtained (test program and configuration parameters) ? Bart Van Assche. From jlentini at netapp.com Tue Feb 5 07:51:54 2008 From: jlentini at netapp.com (James Lentini) Date: Tue, 5 Feb 2008 10:51:54 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080205154116.GA19804@cefeid.wcss.wroc.pl> References: <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> Message-ID: On Tue, 5 Feb 2008, Pawel Dziekonski wrote: > On Tue, 05 Feb 2008 at 10:04:46AM -0500, James Lentini wrote: > > > If loading the xprtrdma module doesn't solve your problem, send the > > output from these commands: > > > > dmesc -c > /dev/null > > echo 1024 > /proc/sys/sunrpc/nfs_debug > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > dmesg > output > > # modprobe xprtrdma > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > mount.nfs: timeout set for Tue Feb 5 16:31:19 2008 > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > mount.nfs: internal error > # dmesg > NFS: nfs mount opts='rdma,port=2050,addr=10.2.2.1' > NFS: parsing nfs mount option 'rdma' > NFS: parsing nfs mount option 'port=2050' > NFS: parsing nfs mount option 'addr=10.2.2.1' > NFS: sending MNT request for 10.2.2.1:/scratch > NFS: MNT request succeeded Looks like we'll need more data. Can you try the mount again, but this time with: echo 32767 > /proc/sys/sunrpc/rpc_debug echo 65535 > /proc/sys/sunrpc/nfs_debug This should produce a lot of data. From changquing.tang at hp.com Tue Feb 5 07:54:54 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 5 Feb 2008 15:54:54 +0000 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support In-Reply-To: <200802050817.06380.jackm@dev.mellanox.co.il> References: <200802030949.44943.jackm@dev.mellanox.co.il> <200802050817.06380.jackm@dev.mellanox.co.il> Message-ID: Thanks, Jack. In High Availability(HA) mode, every rank will register the recvQP, but in non-HA mode, if any rank dies, the whole application will teardown, so no one register the recvQP, (the creator register it implicitly, right ?) I have another question about ibv_open_xrc_domain(,fd,). All ranks on a node get the 'fd' to the same unique file, since this is a temp file, we need to remove it. Can I remove the file right after ibv_open_xrc_domain() returns ? If so, when I start another MPI job, the same temp file may be generated again for that job, is it possible this job attached to the old job's domain ? Thanks. --CQ > -----Original Message----- > From: Jack Morgenstein [mailto:jackm at dev.mellanox.co.il] > Sent: Tuesday, February 05, 2008 12:17 AM > To: Tang, Changqing > Cc: Roland Dreier; general at lists.openfabrics.org > Subject: Re: [ofa-general] [PATCH 7/8 V3] core: Add XRC > receive-only qp support > > On Sunday 03 February 2008 19:30, Tang, Changqing wrote: > > > > > > In addition, the patch implements the foundation for distributing > > > XRC-receive-only QP events to userspace processes registered with > > > that QP. > > > > If r1 is on node1, r2 and r3 are on node2, r1 and r2 have the XRC > > conection established, r1 knows SRQ number from both r2 and > r3, but r3 > > does not register the recv QP created by r2, can r3 still receive > > message from r1 ? > > Yes, it can. However, when r2 either unregisters the recv > QP, or exits, that QP will be destroyed, and r3 will no > longer be able to receive messages via the recv QP created by r2. > > If, however, r3 does register with the recv QP, that QP will > not be destroyed until r3 unregisters (or exits). > > - Jack > From tziporet at dev.mellanox.co.il Tue Feb 5 07:59:48 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 17:59:48 +0200 Subject: [ofa-general] [GIT PULL] please pull infiniband.git In-Reply-To: References: Message-ID: <47A887F4.70801@mellanox.co.il> Roland Dreier wrote: > Linus, please pull from > > master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus > > This tree is also available from kernel.org mirrors at: > > git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus > > This will get a second batch of InfiniBand/RDMA batches. In addition > to the usual motley crew of changes, this pull includes a new driver > for NetEffect RNICs in drivers/infiniband/hw/nes. The code could use > some further cleaning, but I don't think it's worth holding off on the > merge. > > David Dillow (1): > IB/srp: Retry stale connections > > Eli Cohen (2): > IB/mthca: Remove checks for srq->first_free < 0 > IB/ib_mthca: Pre-link receive WQEs in Tavor mode > > Glenn Streiff (1): > RDMA/nes: Add a driver for NetEffect RNICs > > Hoang-Nam Nguyen (1): > IB/ehca: Add PMA support > > Jack Morgenstein (2): > IB/mthca: Don't read reserved fields in mthca_QUERY_ADAPTER() > mlx4_core: Don't read reserved fields in mlx4_QUERY_ADAPTER() > > Joachim Fenkes (2): > IB/ehca: Prevent sending UD packets to QP0 > IB/ehca: Update sma_attr also in case of disruptive config change > > Olaf Kirch (1): > IB/mthca: Return proper error codes from mthca_fmr_alloc() > > Or Gerlitz (3): > IPoIB: Handle bonding failover race for connected neighbours too > IPoIB: Remove a misleading debug print > IB/fmr_pool: Allocate page list for pool FMRs only when caching enabled > > Roland Dreier (4): > mlx4_core: Fix more section mismatches > IB/mthca: Fix and simplify page size calculation in mthca_reg_phys_mr() > IB/mlx4: Actually print out the driver version > IB: Avoid marking __devinitdata as const > > Sean Hefty (1): > IB/cm: Add interim support for routed paths > > > Does this means ConnectX WQE BB and stateless offload will not be in for 2.6.25? Tziporet From mashirle at us.ibm.com Mon Feb 4 22:05:46 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 04 Feb 2008 22:05:46 -0800 Subject: [ofa-general] 4k MTU patch In-Reply-To: <1202226336.18209.140.camel@mtls03> References: <1202226336.18209.140.camel@mtls03> Message-ID: <1202191546.22678.8.camel@localhost.localdomain> I looked the patch file, it is ib_dma_unmap_page() before. I wonder why all my tests have been passed without any problem. The mistake was made when manually merging it. Sorry for the trouble in last min. > +static inline void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, > + u64 *mapping) > +{ > + if (ipoib_ud_need_sg(priv->max_ib_mtu) { > + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); > + ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); > + } else > + ib_dma_unmap_single(priv->ca, mapping[0], > + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), > + DMA_FROM_DEVICE); > +} > > and something similar for allocating. Ok, I can change it but it doesn't save anything. Thanks Shirley From jlentini at netapp.com Tue Feb 5 08:16:59 2008 From: jlentini at netapp.com (James Lentini) Date: Tue, 5 Feb 2008 11:16:59 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080201151902.GA16264@cefeid.wcss.wroc.pl> <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> Message-ID: On Tue, 5 Feb 2008, James Lentini wrote: > > > On Tue, 5 Feb 2008, Pawel Dziekonski wrote: > > > On Tue, 05 Feb 2008 at 10:04:46AM -0500, James Lentini wrote: > > > > > If loading the xprtrdma module doesn't solve your problem, send the > > > output from these commands: > > > > > > dmesc -c > /dev/null > > > echo 1024 > /proc/sys/sunrpc/nfs_debug > > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > > dmesg > output > > > > # modprobe xprtrdma > > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > > mount.nfs: timeout set for Tue Feb 5 16:31:19 2008 > > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > > mount.nfs: internal error > > # dmesg > > NFS: nfs mount opts='rdma,port=2050,addr=10.2.2.1' > > NFS: parsing nfs mount option 'rdma' > > NFS: parsing nfs mount option 'port=2050' > > NFS: parsing nfs mount option 'addr=10.2.2.1' > > NFS: sending MNT request for 10.2.2.1:/scratch > > NFS: MNT request succeeded > > Looks like we'll need more data. Can you try the mount again, but this > time with: > > echo 32767 > /proc/sys/sunrpc/rpc_debug > echo 65535 > /proc/sys/sunrpc/nfs_debug > > This should produce a lot of data. One more question. Looking back at your NFS_START_SERVER script, I noticed that you are starting the NFS server "by hand" and not using your distro's scripts. I'm wondering if there if there is some aspect of the server configuration missing. Are you able to do a normal TCP or UDP mount of the server? From mashirle at us.ibm.com Mon Feb 4 22:20:05 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 04 Feb 2008 22:20:05 -0800 Subject: [ofa-general] 4k MTU patch In-Reply-To: <1202191546.22678.8.camel@localhost.localdomain> References: <1202226336.18209.140.camel@mtls03> <1202191546.22678.8.camel@localhost.localdomain> Message-ID: <1202192405.22678.12.camel@localhost.localdomain> merging 4K mtu buffer allocation with 2K will cause more if additional check since for 4K mtu we use one 44 bytes buffer and one 4K buff, for 2K mtu or PAGE_SIZE > 44 bytes + 4K mtu, we use payload size buffer. Are you sure you want me to change it? thanks Shirey From tziporet at dev.mellanox.co.il Tue Feb 5 08:19:51 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 18:19:51 +0200 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: <1202148892.4502.63.camel@localhost.localdomain> References: <1202113449.4502.16.camel@localhost.localdomain> <1202133925.4502.36.camel@localhost.localdomain> <1202148892.4502.63.camel@localhost.localdomain> Message-ID: <47A88CA7.8000909@mellanox.co.il> Shirley Ma wrote: > I found one one line was out side "for loop" when merging this patch > with current git-tree. This caused UD_POST_RCV_COUNT = 16 wrong. I have > fixed it. This is the updated patch. > > Thanks > Shirley > > > > Hi Shirley, Its seems to me that 4K MTU patch is not "cooked" enough for RC4. I appreciate your hard work to push it but so many changes, possible leaks and not enough time for review and testing means too high risk for now Tziporet From eli at mellanox.co.il Tue Feb 5 08:24:38 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 5 Feb 2008 18:24:38 +0200 Subject: [ofa-general] 4k MTU patch In-Reply-To: <1202192405.22678.12.camel@localhost.localdomain> References: <1202226336.18209.140.camel@mtls03> <1202191546.22678.8.camel@localhost.localdomain> <1202192405.22678.12.camel@localhost.localdomain> Message-ID: <6C2C79E72C305246B504CBA17B5500C90340EA13@mtlexch01.mtl.com> It looks cleaner to me. -----Original Message----- From: Shirley Ma [mailto:mashirle at us.ibm.com] Sent: ג 05 פברואר 2008 08:20 To: Eli Cohen Cc: mashirle at linux.vnet.ibm.com; general-bounces at lists.openfabrics.org; openfabrics; Roland Dreier Subject: Re: [ofa-general] 4k MTU patch merging 4K mtu buffer allocation with 2K will cause more if additional check since for 4K mtu we use one 44 bytes buffer and one 4K buff, for 2K mtu or PAGE_SIZE > 44 bytes + 4K mtu, we use payload size buffer. Are you sure you want me to change it? thanks Shirey From eli at mellanox.co.il Tue Feb 5 08:24:38 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 5 Feb 2008 18:24:38 +0200 Subject: [ofa-general] 4k MTU patch In-Reply-To: <1202192405.22678.12.camel@localhost.localdomain> References: <1202226336.18209.140.camel@mtls03> <1202191546.22678.8.camel@localhost.localdomain> <1202192405.22678.12.camel@localhost.localdomain> Message-ID: <6C2C79E72C305246B504CBA17B5500C90340EA14@mtlexch01.mtl.com> It looks cleaner to me. -----Original Message----- From: Shirley Ma [mailto:mashirle at us.ibm.com] Sent: ג 05 פברואר 2008 08:20 To: Eli Cohen Cc: mashirle at linux.vnet.ibm.com; general-bounces at lists.openfabrics.org; openfabrics; Roland Dreier Subject: Re: [ofa-general] 4k MTU patch merging 4K mtu buffer allocation with 2K will cause more if additional check since for 4K mtu we use one 44 bytes buffer and one 4K buff, for 2K mtu or PAGE_SIZE > 44 bytes + 4K mtu, we use payload size buffer. Are you sure you want me to change it? thanks Shirey From xma at us.ibm.com Tue Feb 5 08:31:35 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 08:31:35 -0800 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: <47A88CA7.8000909@mellanox.co.il> Message-ID: Hello Tziporet, The problem was because of the last check in of small UDP performance patch. It changed the receiving path completely. And I only got less than one day to merge/test the patch with that patch on both intel and PPC platform. The patch was in good/stable shape before this patch. It has passed stress test for both intel and PPC platform. I have tested the whole night of the new patch yesterday night. It works well and passes the stress test without any problem. Regarding Eli's comments, I have sent out. I am sorry for the minor mistake because of the rushing, but I don't see any risk from my test results. Please reconsider this patch to be in OFED-1.3. thanks Shirley Tziporet Koren Eli Cohen , Sent by: ewg at lists.openfabrics.org, OpenFabrics general-b General ounces at li Subject sts.openf [ofa-general] Re: [ewg] [UPDATE][PATCH] abrics.or IPoIB-UD 4K MTU patch against 2.6.24 g ofed-1.3-git tree 02/05/08 08:19 AM Shirley Ma wrote: > I found one one line was out side "for loop" when merging this patch > with current git-tree. This caused UD_POST_RCV_COUNT = 16 wrong. I have > fixed it. This is the updated patch. > > Thanks > Shirley > > > > Hi Shirley, Its seems to me that 4K MTU patch is not "cooked" enough for RC4. I appreciate your hard work to push it but so many changes, possible leaks and not enough time for review and testing means too high risk for now Tziporet _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic23340.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From tziporet at dev.mellanox.co.il Tue Feb 5 08:56:18 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 18:56:18 +0200 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: References: Message-ID: <47A89532.6080202@mellanox.co.il> Shirley Ma wrote: > > Hello Tziporet, > > The problem was because of the last check in of small UDP performance > patch. It changed the receiving path completely. And I only got less > than one day to merge/test the patch with that patch on both intel and > PPC platform. The patch was in good/stable shape before this patch. It > has passed stress test for both intel and PPC platform. I have tested > the whole night of the new patch yesterday night. It works well and > passes the stress test without any problem. > Which OS have you tested? > > > Regarding Eli's comments, I have sent out. I am sorry for the minor > mistake because of the rushing, but I don't see any risk from my test > results. Please reconsider this patch to be in OFED-1.3. > OK - we will do this - we will run one set of our regression with your patch now, and also check that it pass compilation on all kernels. If both will be OK we will take it. I cross fingers for you :-) ziporet From mashirle at us.ibm.com Mon Feb 4 23:12:35 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 04 Feb 2008 23:12:35 -0800 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: <47A89532.6080202@mellanox.co.il> References: <47A89532.6080202@mellanox.co.il> Message-ID: <1202195555.22678.27.camel@localhost.localdomain> Hello Tziporet, On Tue, 2008-02-05 at 18:56 +0200, Tziporet Koren wrote: > Shirley Ma wrote: > > > > Hello Tziporet, > > > > The problem was because of the last check in of small UDP performance > > patch. It changed the receiving path completely. And I only got less > > than one day to merge/test the patch with that patch on both intel and > > PPC platform. The patch was in good/stable shape before this patch. It > > has passed stress test for both intel and PPC platform. I have tested > > the whole night of the new patch yesterday night. It works well and > > passes the stress test without any problem. > > > Which OS have you tested? 2.6.24 kernel, and I am going to test SLES10SP2 kernel. It has passed stress test the whole night for 2K MTU test suites. > > Regarding Eli's comments, I have sent out. I am sorry for the minor > > mistake because of the rushing, but I don't see any risk from my test > > results. Please reconsider this patch to be in OFED-1.3. > > > OK - we will do this - we will run one set of our regression with your > patch now, and also check that it pass compilation on all kernels. > If both will be OK we will take it. > > I cross fingers for you :-) > > ziporet Appreciate you, Vlad and Eli's help here! There is one line change needed for backporting ++priv->stats and ++dev->stats. I didn't create the backport patch for this. thanks Shirley From xma at us.ibm.com Tue Feb 5 09:17:26 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 09:17:26 -0800 Subject: [ofa-general] 4k MTU patch In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90340EA14@mtlexch01.mtl.com> Message-ID: Hello Eli, I am working on it. I will resubmit the patch when I pass my tests. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From sean.hefty at intel.com Tue Feb 5 09:17:24 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 5 Feb 2008 09:17:24 -0800 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: <200802051730.27500.jackm@dev.mellanox.co.il> References: <200802051730.27500.jackm@dev.mellanox.co.il> Message-ID: <000001c8681a$f9bfe3c0$3c98070a@amr.corp.intel.com> >There was a bug in the local sa implementation (the informinfo query did >not incorporate a required change). > >This bug has been fixed, and the fix will be in the next OFED daily build. > >The fix will also be in RC4. Thanks, Jack From dwsmswym at smswy.com Tue Feb 5 10:00:42 2008 From: dwsmswym at smswy.com (Michael Croft) Date: Tue, 5 Feb 2008 19:00:42 +0100 Subject: [ofa-general] High prices don't mean high quality. Purchase high quality medications at low cost in Canada. Message-ID: <01c86829$67c91b80$f5f83d53@dwsmswym> We are glad to offer you trusted, experienced, and fully-licensed Canadian ŤCanadianPharmacyť online drugstore. ŤCanadianPharmacyť is happy to provide their customers with a large selection of quality medications at surprisingly low prices.Visit our "CanadianPharmacy" site Make significant savings buying medications in Canada! http://geocities.com/judithkent222/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From sean.hefty at intel.com Tue Feb 5 11:16:35 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 5 Feb 2008 11:16:35 -0800 Subject: [ofa-general] new releases of libibcm and librdmacm libraries Message-ID: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> I've pushed out new releases: libibcm 1.0.2 librdmacm 1.0.6 to my git tree, and the OFA download pages. Please pull both packages into OFED 1.3. Major changes from previous release: libibcm - removes obsolete simple.c test program librdmacm - updates to build, fix setting QP attributes for RDMA reads - Sean From tziporet at dev.mellanox.co.il Tue Feb 5 12:07:28 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 22:07:28 +0200 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: <1202195555.22678.27.camel@localhost.localdomain> References: <47A89532.6080202@mellanox.co.il> <1202195555.22678.27.camel@localhost.localdomain> Message-ID: <47A8C200.9090404@mellanox.co.il> Shirley Ma wrote: > 2.6.24 kernel, and I am going to test SLES10SP2 kernel. It has passed > stress test the whole night for 2K MTU test suites. > > Please test on RHREL 5 too What are your stress tests? > > Appreciate you, Vlad and Eli's help here! There is one line change > needed for backporting ++priv->stats and ++dev->stats. I didn't create > the backport patch for this. > > > Please send this backport patch and specify to which kernels its needed Tziporet From tziporet at dev.mellanox.co.il Tue Feb 5 12:08:26 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 22:08:26 +0200 Subject: [ofa-general] Re: [ewg] new releases of libibcm and librdmacm libraries In-Reply-To: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> References: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> Message-ID: <47A8C23A.9050003@mellanox.co.il> Sean Hefty wrote: > I've pushed out new releases: > > libibcm 1.0.2 > librdmacm 1.0.6 > > to my git tree, and the OFA download pages. > > Please pull both packages into OFED 1.3. Major changes from previous release: > > libibcm - removes obsolete simple.c test program > librdmacm - updates to build, fix setting QP attributes for RDMA reads > > Can you put a tag with the name ofed_1_3 on these git trees too Thanks Tziporet From sean.hefty at intel.com Tue Feb 5 12:15:15 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 5 Feb 2008 12:15:15 -0800 Subject: [ofa-general] RE: [ewg] new releases of libibcm and librdmacm libraries In-Reply-To: <47A8C23A.9050003@mellanox.co.il> References: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> <47A8C23A.9050003@mellanox.co.il> Message-ID: <000b01c86833$d20fc9d0$3c98070a@amr.corp.intel.com> >Can you put a tag with the name ofed_1_3 on these git trees too I have tags of v1.0.2 and v1.0.6. Can these just be used instead? From prjl at bowern8.plus.com Tue Feb 5 12:29:36 2008 From: prjl at bowern8.plus.com (Jim Anderson) Date: Tue, 5 Feb 2008 14:29:36 -0600 Subject: [ofa-general] USA representatives required Message-ID: <01c86803$88b987b0$53d3a6d0@prjl> Hello. My name is Jim Anderson, I represent Jorque Development Inc. company. Jorque Development Inc. mainly develops securely web databases for USA companies and we are seekeing for representative for administrative/representative job in USA. We need representatives in USA for full and part-time jobs (2 positions are available). We do not ask for any money, we are reputable company operating in Netherlands. Job benefits: - 5000 USD guaranteed monthly income for full-time job - 3500 USD guaranteed monthly income for part-time job - Comprehensive medical and life insurance for you and your dependents. You - will be receiving the Jorque Development Inc. Medicine card and all the paperwork in 2 weeks after successfully completing your probation period. GENERAL REQUIREMENTS: - You have to be honest,loyal, responsible and hard-working. - You have to comply with all reasonable and lawful instructions provided to - You by our company. - Minimal 5-7 hours during the week for communication, this work is considered to be homework and shall take no more then 2-3 hrs a day. - Computer w/internet connection. We do not ask any money and the job is 100% legal. Please, reply to job at jorquedevelopment.com if you are interested and company manager will contact you shortly with job details. Thanks Jim Anderson Jorque Development Inc. From gege11 at juno.com Tue Feb 5 12:28:26 2008 From: gege11 at juno.com (Brain Allen) Date: Tue, 5 Feb 2008 21:28:26 +0100 Subject: [ofa-general] No doctor appointment is necessary Message-ID: <258844929.47238201087422@juno.com> Dear valued member.Fed up with overpaying for drugs? Want to find a cheaper alternative without a quality loss? There is one way to do it - check it out in CanadianPharmacy.Subsidiary branches of world's major pharmaceutical companies located in Canada are able to produce drugs of same high quality at a much lower prime cost - that's what makes it so advantageous to buy them there.12 free bonus pills will be received with any order over $300.Hurry up to get hold of our New Specials - 20% discounts won't wait for too long. http://geocities.com/mitchellholloway664/Thank You for Your time and for your attention Yours faithfully, Brain Allen -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Tue Feb 5 12:34:01 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 05 Feb 2008 12:34:01 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git In-Reply-To: <47A887F4.70801@mellanox.co.il> (Tziporet Koren's message of "Tue, 05 Feb 2008 17:59:48 +0200") References: <47A887F4.70801@mellanox.co.il> Message-ID: > Does this means ConnectX WQE BB and stateless offload will not be in > for 2.6.25? No, I will do one more merge before the 2.6.25 merge window closes. I expect the ConnectX WQE BB stuff to be in, and at least some of the IPoIB changes. From eli at dev.mellanox.co.il Tue Feb 5 12:34:54 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 5 Feb 2008 22:34:54 +0200 Subject: [ofa-general] Oops with today's OFED 1.3 In-Reply-To: <47A7746E.6030303@linux.vnet.ibm.com> References: <47A7746E.6030303@linux.vnet.ibm.com> Message-ID: <4e6a6b3c0802051234k42a8bcbfr75d897658d7913f@mail.gmail.com> Pradeep, Can you check if this is resolved? On 2/4/08, Pradeep Satyanarayana wrote: > I pulled today's (Feb 4th) OFED build and saw the following Oops while touch testing > on ehca1 on a 2.6.24 kernel. > > Modules linked in: ib_ipoib ib_cm ib_sa ib_uverbs ib_umad ib_ehca ib_mthca ib_mad ib_core joydev st ide_cd ipv6 sg pdc202xx_new e1000 ibmveth dm_mod ipr libata firmware_class sr_mod cdrom sd_mod scsi_mod > NIP: d000000000299ca8 LR: d000000000299a70 CTR: d00000000015ec04 > REGS: c0000001cc85f3b0 TRAP: 0300 Not tainted (2.6.23-ppc64) > MSR: 8000000000009032 CR: 24022424 XER: 00000020 > DAR: 000000000000002c, DSISR: 0000000042000000 > TASK = c0000001d883d4a0[17052] 'modprobe' THREAD: c0000001cc85c000 CPU: 2 > GPR00: 0000000000000000 c0000001cc85f630 d0000000002b5cf0 ffffffffffffffda > GPR04: c0000001cc85f760 ffffffffffffffda d0000000002a7eb0 0000000000000000 > GPR08: 0000000000000000 0000000000000000 0000000000000001 00000000001b4800 > GPR12: d00000000029ef30 c0000000005a8280 c0000001d895aa20 0000000000000000 > GPR16: 0000000000000008 0000000000000000 0000000000000000 d00000000040f27e > GPR20: 0000000000000211 0000000000000000 0000000000000000 c0000001cd1e0000 > GPR24: 0000000000000000 d0000000002ad9d8 d0000000002a7eb0 0000000000000001 > GPR28: c0000001cc85f760 0000000000000000 d0000000002b4ce0 c0000001cd1e0780 > NIP [d000000000299ca8] .ipoib_cm_dev_init+0x440/0x63c [ib_ipoib] > LR [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] > Call Trace: > [c0000001cc85f630] [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] (unreliable) > [c0000001cc85f7d0] [d000000000297f4c] .ipoib_transport_dev_init+0x120/0x458 [ib_ipoib] > [c0000001cc85f930] [d00000000029463c] .ipoib_ib_dev_init+0x44/0xb8 [ib_ipoib] > [c0000001cc85f9c0] [d0000000002902ec] .ipoib_dev_init+0xe0/0x138 [ib_ipoib] > [c0000001cc85fa60] [d000000000290544] .ipoib_add_one+0x200/0x424 [ib_ipoib] > [c0000001cc85fb20] [d0000000001610e4] .ib_register_client+0x94/0xf4 [ib_core] > [c0000001cc85fbb0] [d00000000029dcac] .ipoib_init_module+0x1f8/0x246c [ib_ipoib] > [c0000001cc85fc70] [c0000000000905f0] .sys_init_module+0x176c/0x187c > [c0000001cc85fe30] [c00000000000852c] syscall_exit+0x0/0x40 > Instruction dump: > 801f0f20 3b600000 2f800000 409d0040 e81f0f30 e97f04f0 7b6926e4 395b0001 > 7d5b07b4 7c080214 816b0018 7d290214 <9169002c> 60000000 60000000 60000000 > > > I tracked this down to the following area of code: > + for (j = 0; j < ipoib_recvq_size; ++j) { > + for (i = 0; i < priv->cm.num_frags; ++i) > + priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey; > > > in ipoib_0230_srq_post_n.patch. > > Touch tested after removing this patch seems to solve the problem. > > Pradeep > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From dwpuridiomm at puridiom.com Tue Feb 5 12:32:57 2008 From: dwpuridiomm at puridiom.com (Jed Fisher) Date: Tue, 5 Feb 2008 21:32:57 +0100 Subject: [ofa-general] Get your free 2400$ welcome bonus and win much more! Message-ID: <01c8683e$ac6aca80$45af7b54@dwpuridiomm> Play the most popular casino games at home! Black Jack, Slots, Roulette, Poker, Craps! Just download easy to use free software, register free account and play your favorite game. Receive free $2400 bonus to start play with! 100% fair gaming guaranteed! Maximum security of your information and fast response on your requests is guaranteed as our customer support is available 24/7. http://geocities.com/elviamendoza851/ Gamble at home without worry and rush! From pradeeps at linux.vnet.ibm.com Tue Feb 5 12:47:15 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 05 Feb 2008 12:47:15 -0800 Subject: [ofa-general] Oops with today's OFED 1.3 In-Reply-To: <4e6a6b3c0802051234k42a8bcbfr75d897658d7913f@mail.gmail.com> References: <47A7746E.6030303@linux.vnet.ibm.com> <4e6a6b3c0802051234k42a8bcbfr75d897658d7913f@mail.gmail.com> Message-ID: <47A8CB53.9030107@linux.vnet.ibm.com> Eli Cohen wrote: > Pradeep, > Can you check if this is resolved? > > On 2/4/08, Pradeep Satyanarayana wrote: >> I pulled today's (Feb 4th) OFED build and saw the following Oops while touch testing >> on ehca1 on a 2.6.24 kernel. >> >> NIP [d000000000299ca8] .ipoib_cm_dev_init+0x440/0x63c [ib_ipoib] >> LR [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] >> Call Trace: >> [c0000001cc85f630] [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] (unreliable) >> [c0000001cc85f7d0] [d000000000297f4c] .ipoib_transport_dev_init+0x120/0x458 [ib_ipoib] >> [c0000001cc85f930] [d00000000029463c] .ipoib_ib_dev_init+0x44/0xb8 [ib_ipoib] >> [c0000001cc85f9c0] [d0000000002902ec] .ipoib_dev_init+0xe0/0x138 [ib_ipoib] >> [c0000001cc85fa60] [d000000000290544] .ipoib_add_one+0x200/0x424 [ib_ipoib] >> [c0000001cc85fb20] [d0000000001610e4] .ib_register_client+0x94/0xf4 [ib_core] >> [c0000001cc85fbb0] [d00000000029dcac] .ipoib_init_module+0x1f8/0x246c [ib_ipoib] >> [c0000001cc85fc70] [c0000000000905f0] .sys_init_module+0x176c/0x187c >> [c0000001cc85fe30] [c00000000000852c] syscall_exit+0x0/0x40 >> Instruction dump: >> 801f0f20 3b600000 2f800000 409d0040 e81f0f30 e97f04f0 7b6926e4 395b0001 >> 7d5b07b4 7c080214 816b0018 7d290214 <9169002c> 60000000 60000000 60000000 Hello Eli, Yes, this particular issue has been solved. However, I do see some other issues. I seeing some new messages (not seen previously) in dmesg relating to ib_cq_destroy() (on ehca): ib0: ib_cq_destroy failed ib_destroy_srq failed: -16 ib_dealloc_pd failed This happens after some network tests and an rmmod of ib_ehca. At this point my guess is that this has to do with the split CQ patch. I have not had enough cycles to state that with absolute certainty. Can you please take a look too? Pradeep From xma at us.ibm.com Tue Feb 5 12:49:27 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 12:49:27 -0800 Subject: [ofa-general] [PATCH 0/5]: Improve small UDP messages In-Reply-To: <1202196410.18209.87.camel@mtls03> Message-ID: Hello Eli, Eli Cohen wrote on 02/04/2008 11:26:50 PM: > On Mon, 2008-02-04 at 23:22 -0800, Shirley Ma wrote: > > Hello Eli, > > > > Thanks. I have tested your patch + 4K mtu patch. I do see better > > performance for mthca when CPU is not 100% used and no errors. I will > > measure ehca performance tomorrow. I also see some issues, like: > > ib_mthca 0000:04:00.0: SQ 0c0404 full (2641376 head, 2641312 tail, 64 > > max, 0 nreq) > > > Can you send what do you do to cause this error? I have successfully created this by running bidirectional netperf with 20 streams. This problem can be avoided by increasing send queue size. thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Tue Feb 5 12:54:35 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 5 Feb 2008 12:54:35 -0800 Subject: [ofa-general] Re: [ewg] [UPDATE][PATCH] IPoIB-UD 4K MTU patch against 2.6.24 ofed-1.3-git tree In-Reply-To: <47A8C200.9090404@mellanox.co.il> Message-ID: Tziporet Koren wrote on 02/05/2008 12:07:28 PM: > Please test on RHREL 5 too > What are your stress tests? Ok. The stress test is similar to netperf/netserver. But it's bi-directional multiple streams. I have stressed the stream to 150, duplex running overnight. > Please send this backport patch and specify to which kernels its needed > > Tziporet Ok. It might be out tonight. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From chai.15 at osu.edu Tue Feb 5 13:05:04 2008 From: chai.15 at osu.edu (LEI CHAI) Date: Tue, 05 Feb 2008 16:05:04 -0500 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release Message-ID: <125bf18c14.18c14125bf@osu.edu> Hi Arlin, When I ran programs with dapl 2.0.6 libraries I got this error by setting DAPL_DBG_TYPE=0xffff and DAT_DBG_TYPE=0xffff: libdaplofa.so.2: undefined symbol: dapl_extensions I don't have this problem if I use the libraries in dapl 2.0.5 (though had to compile with dapl 2.0.6 header files). Thanks, Lei From tziporet at dev.mellanox.co.il Tue Feb 5 13:06:03 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 05 Feb 2008 23:06:03 +0200 Subject: [ofa-general] Re: [ewg] new releases of libibcm and librdmacm libraries In-Reply-To: <000b01c86833$d20fc9d0$3c98070a@amr.corp.intel.com> References: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> <47A8C23A.9050003@mellanox.co.il> <000b01c86833$d20fc9d0$3c98070a@amr.corp.intel.com> Message-ID: <47A8CFBB.3090600@mellanox.co.il> Sean Hefty wrote: >> Can you put a tag with the name ofed_1_3 on these git trees too >> > > I have tags of v1.0.2 and v1.0.6. Can these just be used instead? > > > Its just easier for us when we do diffs to have also the ofed_1_3 as all other git trees After some time pass its hard to remember which version was part of which release :-( Tziporet From sean.hefty at intel.com Tue Feb 5 14:05:58 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 5 Feb 2008 14:05:58 -0800 Subject: [ofa-general] RE: [ewg] new releases of libibcm and librdmacm libraries In-Reply-To: <47A8CFBB.3090600@mellanox.co.il> References: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> <47A8C23A.9050003@mellanox.co.il> <000b01c86833$d20fc9d0$3c98070a@amr.corp.intel.com> <47A8CFBB.3090600@mellanox.co.il> Message-ID: <000d01c86843$496bacb0$3c98070a@amr.corp.intel.com> >Its just easier for us when we do diffs to have also the ofed_1_3 as all >other git trees >After some time pass its hard to remember which version was part of >which release :-( I've added ofed_1_3 tags. Note that if there's a need to update the libraries again, the tag will need to be deleted and re-created to move it forward. I don't anticipate other changes for OFED 1.3 however. - Sean From rdreier at cisco.com Tue Feb 5 15:02:45 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 05 Feb 2008 15:02:45 -0800 Subject: [ofa-general] new releases of libibcm and librdmacm libraries In-Reply-To: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> (Sean Hefty's message of "Tue, 5 Feb 2008 11:16:35 -0800") References: <000a01c8682b$9fe05220$3c98070a@amr.corp.intel.com> Message-ID: FWIW, I've updated the packages in my Ubuntu PPA; if you're using it, you should get the packages automatically with a normal update once the builds complete. I've also updated the packages I've proposed for inclusion in Debian. From calling-bumper.mpeg at zippyvideos.com Tue Feb 5 15:24:22 2008 From: calling-bumper.mpeg at zippyvideos.com (Aileen Bouchard) Date: Wed, 6 Feb 2008 00:24:22 +0100 Subject: [ofa-general] Some useful advices for your health Message-ID: <579792722.13112696865400@zippyvideos.com> Dear valued member. Looking for cheap drugs? What about 20% discount for extremely high quality products? Don't hesitate to purchase products from a reliable source at incredibly low prices.CanadianPharmacy offers high quality Canadian products meeting all Pharmaceutical Standards. Wide selection of products which are cheaper than American ones are available to order online. Easy, secure and confidential ordering process.You receive 12 bonus pills for free if your order is over $300.Save time and money with CanadianPharmacy. http://geocities.com/manuelalarsen691/Thank You for Your time and for your attention. -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Tue Feb 5 17:18:15 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 6 Feb 2008 03:18:15 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-06:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-05 OpenSM git rev = Sun_Feb_3_11:49:31_2008 [6d4b76c4a28bcc8e57549d46a1ceaa4ca64e06ce] ibutils git rev = Mon_Dec_24_10:42:01_2007 [675bec82306d6920555dd0b5e2f664983e27e60f] Total=400 Pass=399 Fail=1 Pass: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 9 Stability IS3-loop.topo Failures: 1 Stability IS3-loop.topo From batuchtanz.yana-naima at t-online.de Tue Feb 5 17:41:31 2008 From: batuchtanz.yana-naima at t-online.de (Alexa Terrell) Date: Tue, 5 Feb 2008 22:41:31 -0300 Subject: [ofa-general] Best way to cure yourself Message-ID: <477021821.22051687157332@t-online.de> Dear valued member.Why spend more when there's an opportunity to get a product of the same quality at a lower price? Check out the New Offers at CanadianPharmacy. The drugs we offer are made at Canadian plants belonging to major USA pharmaceutical companies such as Pfizer, Johnson & Johnson and others. The quality of such drugs is not any lower than that of their American analogues but the price The price is something worth seeing.If your order is $300+, you will receive 12 bonus pills.Visit our online storeto find out about our great prices. http://geocities.com/antoniocase160/Thank You for Your time and for your attention Yours faithfully, Alexa Terrell -------------- next part -------------- An HTML attachment was scrubbed... URL: From pradeeps at linux.vnet.ibm.com Tue Feb 5 22:18:18 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 05 Feb 2008 22:18:18 -0800 Subject: [ofa-general] Oops with today's OFED 1.3 In-Reply-To: <47A8CB53.9030107@linux.vnet.ibm.com> References: <47A7746E.6030303@linux.vnet.ibm.com> <4e6a6b3c0802051234k42a8bcbfr75d897658d7913f@mail.gmail.com> <47A8CB53.9030107@linux.vnet.ibm.com> Message-ID: <47A9512A.1050004@linux.vnet.ibm.com> Pradeep Satyanarayana wrote: > Eli Cohen wrote: >> Pradeep, >> Can you check if this is resolved? >> >> On 2/4/08, Pradeep Satyanarayana wrote: >>> I pulled today's (Feb 4th) OFED build and saw the following Oops while touch testing >>> on ehca1 on a 2.6.24 kernel. >>> > > > > >>> NIP [d000000000299ca8] .ipoib_cm_dev_init+0x440/0x63c [ib_ipoib] >>> LR [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] >>> Call Trace: >>> [c0000001cc85f630] [d000000000299a70] .ipoib_cm_dev_init+0x208/0x63c [ib_ipoib] (unreliable) >>> [c0000001cc85f7d0] [d000000000297f4c] .ipoib_transport_dev_init+0x120/0x458 [ib_ipoib] >>> [c0000001cc85f930] [d00000000029463c] .ipoib_ib_dev_init+0x44/0xb8 [ib_ipoib] >>> [c0000001cc85f9c0] [d0000000002902ec] .ipoib_dev_init+0xe0/0x138 [ib_ipoib] >>> [c0000001cc85fa60] [d000000000290544] .ipoib_add_one+0x200/0x424 [ib_ipoib] >>> [c0000001cc85fb20] [d0000000001610e4] .ib_register_client+0x94/0xf4 [ib_core] >>> [c0000001cc85fbb0] [d00000000029dcac] .ipoib_init_module+0x1f8/0x246c [ib_ipoib] >>> [c0000001cc85fc70] [c0000000000905f0] .sys_init_module+0x176c/0x187c >>> [c0000001cc85fe30] [c00000000000852c] syscall_exit+0x0/0x40 >>> Instruction dump: >>> 801f0f20 3b600000 2f800000 409d0040 e81f0f30 e97f04f0 7b6926e4 395b0001 >>> 7d5b07b4 7c080214 816b0018 7d290214 <9169002c> 60000000 60000000 60000000 > > Hello Eli, > > Yes, this particular issue has been solved. However, I do see some other issues. > > I seeing some new messages (not seen previously) in dmesg relating to > ib_cq_destroy() (on ehca): > > ib0: ib_cq_destroy failed > ib_destroy_srq failed: -16 > ib_dealloc_pd failed > > This happens after some network tests and an rmmod of ib_ehca. > > At this point my guess is that this has to do with the split CQ patch. I have not > had enough cycles to state that with absolute certainty. Can you please take a look too? > > Pradeep > I looked at this some more. This error occurs because ib_cq_destroy() for rcq failed. After that there are a series of cascading failures. Pradeep From SCIENCEnewsletter-owner at yahoogroups.com Tue Feb 5 22:44:48 2008 From: SCIENCEnewsletter-owner at yahoogroups.com (SCIENCEnewsletter moderator) Date: 6 Feb 2008 06:44:48 -0000 Subject: [ofa-general] Yahoo! Groups: You're invited! Join SCIENCEnewsletter today. Message-ID: <1202280288.961.83675.w104@yahoogroups.com> science-news at idxc.org has invited you to join SCIENCEnewsletter [http://groups.yahoo.com/group/SCIENCEnewsletter/] on Yahoo! Groups, the best way to discover and share information and advice with others. All for free. A personal invitation from science-news at idxc.org: Hello! We'd like to offer you a free subscription to a new email newsletter that contains some of the world's most exciting science news and photos -- often long before they're in the papers. The newsletter comes out approximately every 3-10 days and presents the top science news in an understandable way. People of all backgrounds, from Nobel Prize-winning scientists to students, subscribe to it. A few stories that appeared first in this newsletter the past few months: * Monkeys found to "baby talk" young * For first time, planet outside our Solar System judged possibly habitable * Human evolution speeding up drastically, researchers say * Genes may help predict infidelity, study finds The subscription will be free for as long as you desire it. Canceling it will also be easy at any time. Your email will never be shared with or sold to anyone. TO JOIN: Just send an email to this exact address: science at email-news.org Type the word "join" anywhere in the subject line. If you have another email address where you'd prefer us to send the newsletter, send us the email from that address. (It is also possible to join by clicking the gray button below, but following the instructions above is better for prompt receipt of the newsletter.) We don't plan on emailing you again; but if you wish to make absolutely sure of this, send an email to "science at email-news.org" with "remove" in the subject line. Best wishes, WS News, P.0. Box 61 New York City, NY 10028 U.S.A. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * See for yourself why millions of people are members of Yahoo! Groups. But hurry. This invitation expires in 30 days. Join this group: http://groups.yahoo.com/i?i=zcR-Z8aEgWnoDF0ylhfYTEGSGpM&e=openib-general%40openib%2Eorg ------------------------------------------------------------------------ Yahoo! Groups is a free service that allows you to stay in touch with friends and family or meet new people who share your interests. Yahoo! Groups values your privacy. It is a violation of our service rules for Groups members to abuse this invitation feature. If you feel this has happened, please notify us: http://help.yahoo.com/fast/help/us/groups/cgi_abuse You may also change your email preferences to stop receiving group invitations in the future. To do so, please go here: http://groups.yahoo.com/s?tag=WqphKp6bTfXh0Ep-BEdoXYwgA7SDOUxEC06VzE10hBVMeH5Lhx6okahk-nRTkoGokgwF9rGGYXbGMUstmM6tLw Your use of Yahoo! Groups is subject to: http://docs.yahoo.com/info/terms/ -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_lblue_top.gif Type: image/gif Size: 50 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_lblue_bottom.gif Type: image/gif Size: 50 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_lblue_left.gif Type: image/gif Size: 54 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_lblue_right.gif Type: image/gif Size: 54 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_m_lblue_nw.gif Type: image/gif Size: 52 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_m_lblue_ne.gif Type: image/gif Size: 52 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_m_lblue_sw.gif Type: image/gif Size: 52 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_m_lblue_se.gif Type: image/gif Size: 52 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_blue_left.gif Type: image/gif Size: 51 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_blue_right.gif Type: image/gif Size: 51 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: bg_blue_bottom.gif Type: image/gif Size: 50 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_blue_sw.gif Type: image/gif Size: 90 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: cr_blue_se.gif Type: image/gif Size: 90 bytes Desc: not available URL: From jackm at dev.mellanox.co.il Tue Feb 5 23:21:05 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 6 Feb 2008 09:21:05 +0200 Subject: [ofa-general] ofed1.2.5rc2 and intel mpi error In-Reply-To: <000001c8681a$f9bfe3c0$3c98070a@amr.corp.intel.com> References: <200802051730.27500.jackm@dev.mellanox.co.il> <000001c8681a$f9bfe3c0$3c98070a@amr.corp.intel.com> Message-ID: <200802060921.05908.jackm@dev.mellanox.co.il> On Tuesday 05 February 2008 19:17, Sean Hefty wrote: > >There was a bug in the local sa implementation (the informinfo query did > >not incorporate a required change). > > > >This bug has been fixed, and the fix will be in the next OFED daily build. > > > >The fix will also be in RC4. > > Thanks, Jack > Thanks for not getting annoyed. The fix was a simple one, and I wanted the fix to make it into OFED 1.3 RC4. - Jack P.S., this change is also needed for anyone using the driver built in to kernels 2.6.23 and above. P.P.S., I'll post the changed patch now. I used quilt, though, to generate the patch (rather than git) -- it was MUCH easier for me, but the side effect is many cosmetic changes. The only relevant change though is (in patch file kernel_patches/fixes/sean_local_sa_1_notifications.patch): @@ -1025,13 +1030,10 @@ index 6469406..369fe60 100644 + if (!query) + return -ENOMEM; + -+ query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0, -+ 0, IB_MGMT_SA_HDR, -+ IB_MGMT_SA_DATA, gfp_mask); -+ if (!query->sa_query.mad_buf) { -+ ret = -ENOMEM; ++ query->sa_query.port = port; ++ ret = alloc_mad(&query->sa_query, gfp_mask); ++ if (ret) + goto err1; -+ } From jackm at dev.mellanox.co.il Tue Feb 5 23:23:18 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 6 Feb 2008 09:23:18 +0200 Subject: [ofa-general] [PATCH 1/2 V2] ib/sa: Add InformInfo/Notice support Message-ID: <200802060923.19034.jackm@dev.mellanox.co.il> Add SA client support for notice/trap registration using InformInfo. Clients can use the ib_sa interface to register for SA events based on trap numbers, and receive SA event notification. This allows clients to receive notification, such as GID in/out of service. Signed-off-by: Sean Hefty --- drivers/infiniband/core/Makefile | 2 drivers/infiniband/core/notice.c | 749 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/sa.h | 16 + drivers/infiniband/core/sa_query.c | 316 +++++++++++++++ include/rdma/ib_sa.h | 171 ++++++++ 5 files changed, 1251 insertions(+), 3 deletions(-) V2: modified ib_sa_register_inform_info() to use alloc_mad, per changes in commit 2aec5c602c6a44e2a3a173339a9ab94549658e4b This change is also required for anyone using the infiniband driver built in to kernels 2.6.23 and above. Signed-off-by: Jack Morgenstein Index: ofa_1_3_dev_kernel/drivers/infiniband/core/Makefile =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/Makefile 2008-02-05 08:30:21.000000000 +0200 +++ ofa_1_3_dev_kernel/drivers/infiniband/core/Makefile 2008-02-05 15:10:53.000000000 +0200 @@ -13,7 +13,7 @@ ib_core-$(CONFIG_INFINIBAND_USER_MEM) += ib_mad-y := mad.o smi.o agent.o mad_rmpp.o -ib_sa-y := sa_query.o multicast.o +ib_sa-y := sa_query.o multicast.o notice.o ib_cm-y := cm.o Index: ofa_1_3_dev_kernel/drivers/infiniband/core/notice.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ ofa_1_3_dev_kernel/drivers/infiniband/core/notice.c 2008-02-05 14:57:05.000000000 +0200 @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2006 Intel Corporation.� All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sa.h" + +MODULE_AUTHOR("Sean Hefty"); +MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling"); +MODULE_LICENSE("Dual BSD/GPL"); + +static void inform_add_one(struct ib_device *device); +static void inform_remove_one(struct ib_device *device); + +static struct ib_client inform_client = { + .name = "ib_notice", + .add = inform_add_one, + .remove = inform_remove_one +}; + +static struct ib_sa_client sa_client; +static struct workqueue_struct *inform_wq; + +struct inform_device; + +struct inform_port { + struct inform_device *dev; + spinlock_t lock; + struct rb_root table; + atomic_t refcount; + struct completion comp; + u8 port_num; +}; + +struct inform_device { + struct ib_device *device; + struct ib_event_handler event_handler; + int start_port; + int end_port; + struct inform_port port[0]; +}; + +enum inform_state { + INFORM_IDLE, + INFORM_REGISTERING, + INFORM_MEMBER, + INFORM_BUSY, + INFORM_ERROR +}; + +struct inform_member; + +struct inform_group { + u16 trap_number; + struct rb_node node; + struct inform_port *port; + spinlock_t lock; + struct work_struct work; + struct list_head pending_list; + struct list_head active_list; + struct list_head notice_list; + struct inform_member *last_join; + int members; + enum inform_state join_state; /* State relative to SA */ + atomic_t refcount; + enum inform_state state; + struct ib_sa_query *query; + int query_id; +}; + +struct inform_member { + struct ib_inform_info info; + struct ib_sa_client *client; + struct inform_group *group; + struct list_head list; + enum inform_state state; + atomic_t refcount; + struct completion comp; +}; + +struct inform_notice { + struct list_head list; + struct ib_sa_notice notice; +}; + +static void reg_handler(int status, struct ib_sa_inform *inform, + void *context); +static void unreg_handler(int status, struct ib_sa_inform *inform, + void *context); + +static struct inform_group *inform_find(struct inform_port *port, + u16 trap_number) +{ + struct rb_node *node = port->table.rb_node; + struct inform_group *group; + + while (node) { + group = rb_entry(node, struct inform_group, node); + if (trap_number < group->trap_number) + node = node->rb_left; + else if (trap_number > group->trap_number) + node = node->rb_right; + else + return group; + } + return NULL; +} + +static struct inform_group *inform_insert(struct inform_port *port, + struct inform_group *group) +{ + struct rb_node **link = &port->table.rb_node; + struct rb_node *parent = NULL; + struct inform_group *cur_group; + + while (*link) { + parent = *link; + cur_group = rb_entry(parent, struct inform_group, node); + if (group->trap_number < cur_group->trap_number) + link = &(*link)->rb_left; + else if (group->trap_number > cur_group->trap_number) + link = &(*link)->rb_right; + else + return cur_group; + } + rb_link_node(&group->node, parent, link); + rb_insert_color(&group->node, &port->table); + return NULL; +} + +static void deref_port(struct inform_port *port) +{ + if (atomic_dec_and_test(&port->refcount)) + complete(&port->comp); +} + +static void release_group(struct inform_group *group) +{ + struct inform_port *port = group->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + if (atomic_dec_and_test(&group->refcount)) { + rb_erase(&group->node, &port->table); + spin_unlock_irqrestore(&port->lock, flags); + kfree(group); + deref_port(port); + } else + spin_unlock_irqrestore(&port->lock, flags); +} + +static void deref_member(struct inform_member *member) +{ + if (atomic_dec_and_test(&member->refcount)) + complete(&member->comp); +} + +static void queue_reg(struct inform_member *member) +{ + struct inform_group *group = member->group; + unsigned long flags; + + spin_lock_irqsave(&group->lock, flags); + list_add(&member->list, &group->pending_list); + if (group->state == INFORM_IDLE) { + group->state = INFORM_BUSY; + atomic_inc(&group->refcount); + queue_work(inform_wq, &group->work); + } + spin_unlock_irqrestore(&group->lock, flags); +} + +static int send_reg(struct inform_group *group, struct inform_member *member) +{ + struct inform_port *port = group->port; + struct ib_sa_inform inform; + int ret; + + memset(&inform, 0, sizeof inform); + inform.lid_range_begin = cpu_to_be16(0xFFFF); + inform.is_generic = 1; + inform.subscribe = 1; + inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); + inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number); + inform.trap.generic.resp_time = 19; + inform.trap.generic.producer_type = + cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); + + group->last_join = member; + ret = ib_sa_informinfo_query(&sa_client, port->dev->device, + port->port_num, &inform, 3000, GFP_KERNEL, + reg_handler, group,&group->query); + if (ret >= 0) { + group->query_id = ret; + ret = 0; + } + return ret; +} + +static int send_unreg(struct inform_group *group) +{ + struct inform_port *port = group->port; + struct ib_sa_inform inform; + int ret; + + memset(&inform, 0, sizeof inform); + inform.lid_range_begin = cpu_to_be16(0xFFFF); + inform.is_generic = 1; + inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL); + inform.trap.generic.trap_num = cpu_to_be16(group->trap_number); + inform.trap.generic.qpn = IB_QP1; + inform.trap.generic.resp_time = 19; + inform.trap.generic.producer_type = + cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL); + + ret = ib_sa_informinfo_query(&sa_client, port->dev->device, + port->port_num, &inform, 3000, GFP_KERNEL, + unreg_handler, group, &group->query); + if (ret >= 0) { + group->query_id = ret; + ret = 0; + } + return ret; +} + +static void join_group(struct inform_group *group, struct inform_member *member) +{ + member->state = INFORM_MEMBER; + group->members++; + list_move(&member->list, &group->active_list); +} + +static int fail_join(struct inform_group *group, struct inform_member *member, + int status) +{ + spin_lock_irq(&group->lock); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + return member->info.callback(status, &member->info, NULL); +} + +static void process_group_error(struct inform_group *group) +{ + struct inform_member *member; + int ret; + + spin_lock_irq(&group->lock); + while (!list_empty(&group->active_list)) { + member = list_entry(group->active_list.next, + struct inform_member, list); + atomic_inc(&member->refcount); + list_del_init(&member->list); + group->members--; + member->state = INFORM_ERROR; + spin_unlock_irq(&group->lock); + + ret = member->info.callback(-ENETRESET, &member->info, NULL); + deref_member(member); + if (ret) + ib_sa_unregister_inform_info(&member->info); + spin_lock_irq(&group->lock); + } + + group->join_state = INFORM_IDLE; + group->state = INFORM_BUSY; + spin_unlock_irq(&group->lock); +} + +/* + * Report a notice to all active subscribers. We use a temporary list to + * handle unsubscription requests while the notice is being reported, which + * avoids holding the group lock while in the user's callback. + */ +static void process_notice(struct inform_group *group, + struct inform_notice *info_notice) +{ + struct inform_member *member; + struct list_head list; + int ret; + + INIT_LIST_HEAD(&list); + + spin_lock_irq(&group->lock); + list_splice_init(&group->active_list, &list); + while (!list_empty(&list)) { + + member = list_entry(list.next, struct inform_member, list); + atomic_inc(&member->refcount); + list_move(&member->list, &group->active_list); + spin_unlock_irq(&group->lock); + + ret = member->info.callback(0, &member->info, + &info_notice->notice); + deref_member(member); + if (ret) + ib_sa_unregister_inform_info(&member->info); + spin_lock_irq(&group->lock); + } + spin_unlock_irq(&group->lock); +} + +static void inform_work_handler(struct work_struct *work) +{ + struct inform_group *group; + struct inform_member *member; + struct ib_inform_info *info; + struct inform_notice *info_notice; + int status, ret; + + group = container_of(work, typeof(*group), work); +retest: + spin_lock_irq(&group->lock); + while (!list_empty(&group->pending_list) || + !list_empty(&group->notice_list) || + (group->state == INFORM_ERROR)) { + + if (group->state == INFORM_ERROR) { + spin_unlock_irq(&group->lock); + process_group_error(group); + goto retest; + } + + if (!list_empty(&group->notice_list)) { + info_notice = list_entry(group->notice_list.next, + struct inform_notice, list); + list_del(&info_notice->list); + spin_unlock_irq(&group->lock); + process_notice(group, info_notice); + kfree(info_notice); + goto retest; + } + + member = list_entry(group->pending_list.next, + struct inform_member, list); + info = &member->info; + atomic_inc(&member->refcount); + + if (group->join_state == INFORM_MEMBER) { + join_group(group, member); + spin_unlock_irq(&group->lock); + ret = info->callback(0, info, NULL); + } else { + spin_unlock_irq(&group->lock); + status = send_reg(group, member); + if (!status) { + deref_member(member); + return; + } + ret = fail_join(group, member, status); + } + + deref_member(member); + if (ret) + ib_sa_unregister_inform_info(&member->info); + spin_lock_irq(&group->lock); + } + + if (!group->members && (group->join_state == INFORM_MEMBER)) { + group->join_state = INFORM_IDLE; + spin_unlock_irq(&group->lock); + if (send_unreg(group)) + goto retest; + } else { + group->state = INFORM_IDLE; + spin_unlock_irq(&group->lock); + release_group(group); + } +} + +/* + * Fail a join request if it is still active - at the head of the pending queue. + */ +static void process_join_error(struct inform_group *group, int status) +{ + struct inform_member *member; + int ret; + + spin_lock_irq(&group->lock); + member = list_entry(group->pending_list.next, + struct inform_member, list); + if (group->last_join == member) { + atomic_inc(&member->refcount); + list_del_init(&member->list); + spin_unlock_irq(&group->lock); + ret = member->info.callback(status, &member->info, NULL); + deref_member(member); + if (ret) + ib_sa_unregister_inform_info(&member->info); + } else + spin_unlock_irq(&group->lock); +} + +static void reg_handler(int status, struct ib_sa_inform *inform, void *context) +{ + struct inform_group *group = context; + + if (status) + process_join_error(group, status); + else + group->join_state = INFORM_MEMBER; + + inform_work_handler(&group->work); +} + +static void unreg_handler(int status, struct ib_sa_inform *rec, void *context) +{ + struct inform_group *group = context; + + inform_work_handler(&group->work); +} + +int notice_dispatch(struct ib_device *device, u8 port_num, + struct ib_sa_notice *notice) +{ + struct inform_device *dev; + struct inform_port *port; + struct inform_group *group; + struct inform_notice *info_notice; + + dev = ib_get_client_data(device, &inform_client); + if (!dev) + return 0; /* No one to give notice to. */ + + port = &dev->port[port_num - dev->start_port]; + spin_lock_irq(&port->lock); + group = inform_find(port, __be16_to_cpu(notice->trap. + generic.trap_num)); + if (!group) { + spin_unlock_irq(&port->lock); + return 0; + } + + atomic_inc(&group->refcount); + spin_unlock_irq(&port->lock); + + info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL); + if (!info_notice) { + release_group(group); + return -ENOMEM; + } + + info_notice->notice = *notice; + + spin_lock_irq(&group->lock); + list_add(&info_notice->list, &group->notice_list); + if (group->state == INFORM_IDLE) { + group->state = INFORM_BUSY; + spin_unlock_irq(&group->lock); + inform_work_handler(&group->work); + } else { + spin_unlock_irq(&group->lock); + release_group(group); + } + + return 0; +} + +static struct inform_group *acquire_group(struct inform_port *port, + u16 trap_number, gfp_t gfp_mask) +{ + struct inform_group *group, *cur_group; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + group = inform_find(port, trap_number); + if (group) + goto found; + spin_unlock_irqrestore(&port->lock, flags); + + group = kzalloc(sizeof *group, gfp_mask); + if (!group) + return NULL; + + group->port = port; + group->trap_number = trap_number; + INIT_LIST_HEAD(&group->pending_list); + INIT_LIST_HEAD(&group->active_list); + INIT_LIST_HEAD(&group->notice_list); + INIT_WORK(&group->work, inform_work_handler); + spin_lock_init(&group->lock); + + spin_lock_irqsave(&port->lock, flags); + cur_group = inform_insert(port, group); + if (cur_group) { + kfree(group); + group = cur_group; + } else + atomic_inc(&port->refcount); +found: + atomic_inc(&group->refcount); + spin_unlock_irqrestore(&port->lock, flags); + return group; +} + +/* + * We serialize all join requests to a single group to make our lives much + * easier. Otherwise, two users could try to join the same group + * simultaneously, with different configurations, one could leave while the + * join is in progress, etc., which makes locking around error recovery + * difficult. + */ +struct ib_inform_info * +ib_sa_register_inform_info(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + u16 trap_number, gfp_t gfp_mask, + int (*callback)(int status, + struct ib_inform_info *info, + struct ib_sa_notice *notice), + void *context) +{ + struct inform_device *dev; + struct inform_member *member; + struct ib_inform_info *info; + int ret; + + dev = ib_get_client_data(device, &inform_client); + if (!dev) + return ERR_PTR(-ENODEV); + + member = kzalloc(sizeof *member, gfp_mask); + if (!member) + return ERR_PTR(-ENOMEM); + + ib_sa_client_get(client); + member->client = client; + member->info.trap_number = trap_number; + member->info.callback = callback; + member->info.context = context; + init_completion(&member->comp); + atomic_set(&member->refcount, 1); + member->state = INFORM_REGISTERING; + + member->group = acquire_group(&dev->port[port_num - dev->start_port], + trap_number, gfp_mask); + if (!member->group) { + ret = -ENOMEM; + goto err; + } + + /* + * The user will get the info structure in their callback. They + * could then free the info structure before we can return from + * this routine. So we save the pointer to return before queuing + * any callback. + */ + info = &member->info; + queue_reg(member); + return info; + +err: + ib_sa_client_put(member->client); + kfree(member); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_sa_register_inform_info); + +void ib_sa_unregister_inform_info(struct ib_inform_info *info) +{ + struct inform_member *member; + struct inform_group *group; + + member = container_of(info, struct inform_member, info); + group = member->group; + + spin_lock_irq(&group->lock); + if (member->state == INFORM_MEMBER) + group->members--; + + list_del_init(&member->list); + + if (group->state == INFORM_IDLE) { + group->state = INFORM_BUSY; + spin_unlock_irq(&group->lock); + /* Continue to hold reference on group until callback */ + queue_work(inform_wq, &group->work); + } else { + spin_unlock_irq(&group->lock); + release_group(group); + } + + deref_member(member); + wait_for_completion(&member->comp); + ib_sa_client_put(member->client); + kfree(member); +} +EXPORT_SYMBOL(ib_sa_unregister_inform_info); + +static void inform_groups_lost(struct inform_port *port) +{ + struct inform_group *group; + struct rb_node *node; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + for (node = rb_first(&port->table); node; node = rb_next(node)) { + group = rb_entry(node, struct inform_group, node); + spin_lock(&group->lock); + if (group->state == INFORM_IDLE) { + atomic_inc(&group->refcount); + queue_work(inform_wq, &group->work); + } + group->state = INFORM_ERROR; + spin_unlock(&group->lock); + } + spin_unlock_irqrestore(&port->lock, flags); +} + +static void inform_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct inform_device *dev; + + dev = container_of(handler, struct inform_device, event_handler); + + switch (event->event) { + case IB_EVENT_PORT_ERR: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + inform_groups_lost(&dev->port[event->element.port_num - + dev->start_port]); + break; + default: + break; + } +} + +static void inform_add_one(struct ib_device *device) +{ + struct inform_device *dev; + struct inform_port *port; + int i; + + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + + dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, + GFP_KERNEL); + if (!dev) + return; + + if (device->node_type == RDMA_NODE_IB_SWITCH) + dev->start_port = dev->end_port = 0; + else { + dev->start_port = 1; + dev->end_port = device->phys_port_cnt; + } + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + port = &dev->port[i]; + port->dev = dev; + port->port_num = dev->start_port + i; + spin_lock_init(&port->lock); + port->table = RB_ROOT; + init_completion(&port->comp); + atomic_set(&port->refcount, 1); + } + + dev->device = device; + ib_set_client_data(device, &inform_client, dev); + + INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler); + ib_register_event_handler(&dev->event_handler); +} + +static void inform_remove_one(struct ib_device *device) +{ + struct inform_device *dev; + struct inform_port *port; + int i; + + dev = ib_get_client_data(device, &inform_client); + if (!dev) + return; + + ib_unregister_event_handler(&dev->event_handler); + flush_workqueue(inform_wq); + + for (i = 0; i <= dev->end_port - dev->start_port; i++) { + port = &dev->port[i]; + deref_port(port); + wait_for_completion(&port->comp); + } + + kfree(dev); +} + +int notice_init(void) +{ + int ret; + + inform_wq = create_singlethread_workqueue("ib_inform"); + if (!inform_wq) + return -ENOMEM; + + ib_sa_register_client(&sa_client); + + ret = ib_register_client(&inform_client); + if (ret) + goto err; + return 0; + +err: + ib_sa_unregister_client(&sa_client); + destroy_workqueue(inform_wq); + return ret; +} + +void notice_cleanup(void) +{ + ib_unregister_client(&inform_client); + ib_sa_unregister_client(&sa_client); + destroy_workqueue(inform_wq); +} Index: ofa_1_3_dev_kernel/drivers/infiniband/core/sa.h =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/sa.h 2008-02-05 08:30:21.000000000 +0200 +++ ofa_1_3_dev_kernel/drivers/infiniband/core/sa.h 2008-02-05 15:10:53.000000000 +0200 @@ -63,4 +63,20 @@ int ib_sa_mcmember_rec_query(struct ib_s int mcast_init(void); void mcast_cleanup(void); +int ib_sa_informinfo_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_inform *rec, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_inform *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + +int notice_dispatch(struct ib_device *device, u8 port_num, + struct ib_sa_notice *notice); + +int notice_init(void); +void notice_cleanup(void); + #endif /* SA_H */ Index: ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/sa_query.c 2008-02-05 08:30:21.000000000 +0200 +++ ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c 2008-02-05 15:11:24.000000000 +0200 @@ -62,10 +62,12 @@ struct ib_sa_sm_ah { struct ib_sa_port { struct ib_mad_agent *agent; + struct ib_mad_agent *notice_agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; spinlock_t ah_lock; u8 port_num; + struct ib_device *device; }; struct ib_sa_device { @@ -102,6 +104,12 @@ struct ib_sa_mcmember_query { struct ib_sa_query sa_query; }; +struct ib_sa_inform_query { + void (*callback)(int, struct ib_sa_inform *, void *); + void *context; + struct ib_sa_query sa_query; +}; + static void ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device); @@ -349,6 +357,110 @@ static const struct ib_field service_rec .size_bits = 2*64 }, }; +#define INFORM_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \ + .field_name = "sa_inform:" #field + +static const struct ib_field inform_table[] = { + { INFORM_FIELD(gid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 128 }, + { INFORM_FIELD(lid_range_begin), + .offset_words = 4, + .offset_bits = 0, + .size_bits = 16 }, + { INFORM_FIELD(lid_range_end), + .offset_words = 4, + .offset_bits = 16, + .size_bits = 16 }, + { RESERVED, + .offset_words = 5, + .offset_bits = 0, + .size_bits = 16 }, + { INFORM_FIELD(is_generic), + .offset_words = 5, + .offset_bits = 16, + .size_bits = 8 }, + { INFORM_FIELD(subscribe), + .offset_words = 5, + .offset_bits = 24, + .size_bits = 8 }, + { INFORM_FIELD(type), + .offset_words = 6, + .offset_bits = 0, + .size_bits = 16 }, + { INFORM_FIELD(trap.generic.trap_num), + .offset_words = 6, + .offset_bits = 16, + .size_bits = 16 }, + { INFORM_FIELD(trap.generic.qpn), + .offset_words = 7, + .offset_bits = 0, + .size_bits = 24 }, + { RESERVED, + .offset_words = 7, + .offset_bits = 24, + .size_bits = 3 }, + { INFORM_FIELD(trap.generic.resp_time), + .offset_words = 7, + .offset_bits = 27, + .size_bits = 5 }, + { RESERVED, + .offset_words = 8, + .offset_bits = 0, + .size_bits = 8 }, + { INFORM_FIELD(trap.generic.producer_type), + .offset_words = 8, + .offset_bits = 8, + .size_bits = 24 }, +}; + +#define NOTICE_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \ + .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \ + .field_name = "sa_notice:" #field + +static const struct ib_field notice_table[] = { + { NOTICE_FIELD(is_generic), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 1 }, + { NOTICE_FIELD(type), + .offset_words = 0, + .offset_bits = 1, + .size_bits = 7 }, + { NOTICE_FIELD(trap.generic.producer_type), + .offset_words = 0, + .offset_bits = 8, + .size_bits = 24 }, + { NOTICE_FIELD(trap.generic.trap_num), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 16 }, + { NOTICE_FIELD(issuer_lid), + .offset_words = 1, + .offset_bits = 16, + .size_bits = 16 }, + { NOTICE_FIELD(notice_toggle), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 1 }, + { NOTICE_FIELD(notice_count), + .offset_words = 2, + .offset_bits = 1, + .size_bits = 15 }, + { NOTICE_FIELD(data_details), + .offset_words = 2, + .offset_bits = 16, + .size_bits = 432 }, + { NOTICE_FIELD(issuer_gid), + .offset_words = 16, + .offset_bits = 0, + .size_bits = 128 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -923,6 +1035,150 @@ err1: return ret; } +static void ib_sa_inform_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_inform_query *query = + container_of(sa_query, struct ib_sa_inform_query, sa_query); + + if (mad) { + struct ib_sa_inform rec; + + ib_unpack(inform_table, ARRAY_SIZE(inform_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_inform_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query)); +} + +/** + * ib_sa_informinfo_query - Start an InformInfo registration. + * @client:SA client + * @device:device to send query on + * @port_num: port number to send query on + * @rec:Inform record to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when notice handler registration completes, + * times out or is canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * This function sends inform info to register with SA to receive + * in-service notice. + * The callback function will be called when the query completes (or + * fails); status is 0 for a successful response, -EINTR if the query + * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error + * occurred sending the query. The resp parameter of the callback is + * only valid if status is 0. + * + * If the return value of ib_sa_inform_query() is negative, it is an + * error code. Otherwise it is a query ID that can be used to cancel + * the query. + */ +int ib_sa_informinfo_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_inform *rec, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_inform *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_inform_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_inform_callback : NULL; + query->sa_query.release = ib_sa_inform_release; + query->sa_query.port = port; + mad->mad_hdr.method = IB_MGMT_METHOD_SET; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO); + + ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data); + + *sa_query = &query->sa_query; + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + ib_free_send_mad(query->sa_query.mad_buf); +err1: + kfree(query); + return ret; +} + +static void ib_sa_notice_resp(struct ib_sa_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_mad_send_buf *mad_buf; + struct ib_sa_mad *mad; + int ret; + + mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0, + IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, + GFP_KERNEL); + if (IS_ERR(mad_buf)) + return; + + mad = mad_buf->mad; + memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad); + mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP; + + spin_lock_irq(&port->ah_lock); + kref_get(&port->sm_ah->ref); + mad_buf->context[0] = &port->sm_ah->ref; + mad_buf->ah = port->sm_ah->ah; + spin_unlock_irq(&port->ah_lock); + + ret = ib_post_send_mad(mad_buf, NULL); + if (ret) + goto err; + + return; +err: + kref_put(mad_buf->context[0], free_sm_ah); + ib_free_send_mad(mad_buf); +} + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { @@ -976,9 +1232,36 @@ static void recv_handler(struct ib_mad_a ib_free_recv_mad(mad_recv_wc); } +static void notice_resp_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + kref_put(mad_send_wc->send_buf->context[0], free_sm_ah); + ib_free_send_mad(mad_send_wc->send_buf); +} + +static void notice_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc) +{ + struct ib_sa_port *port; + struct ib_sa_mad *mad; + struct ib_sa_notice notice; + + port = mad_agent->context; + mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad; + ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, ¬ice); + + if (!notice_dispatch(port->device, port->port_num, ¬ice)) + ib_sa_notice_resp(port, mad_recv_wc); + ib_free_recv_mad(mad_recv_wc); +} + static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; + struct ib_mad_reg_req reg_req = { + .mgmt_class = IB_MGMT_CLASS_SUBN_ADM, + .mgmt_class_version = 2 + }; int s, e, i; if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) @@ -1012,6 +1295,16 @@ static void ib_sa_add_one(struct ib_devi if (IS_ERR(sa_dev->port[i].agent)) goto err; + sa_dev->port[i].device = device; + set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask); + sa_dev->port[i].notice_agent = + ib_register_mad_agent(device, i + s, IB_QPT_GSI, + ®_req, 0, notice_resp_handler, + notice_handler, &sa_dev->port[i]); + + if (IS_ERR(sa_dev->port[i].notice_agent)) + goto err; + INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); } @@ -1034,8 +1327,14 @@ static void ib_sa_add_one(struct ib_devi return; err: - while (--i >= 0) - ib_unregister_mad_agent(sa_dev->port[i].agent); + while (--i >= 0) { + if (!IS_ERR(sa_dev->port[i].notice_agent)) { + ib_unregister_mad_agent(sa_dev->port[i].notice_agent); + } + if (!IS_ERR(sa_dev->port[i].agent)) { + ib_unregister_mad_agent(sa_dev->port[i].agent); + } + } kfree(sa_dev); @@ -1055,6 +1354,7 @@ static void ib_sa_remove_one(struct ib_d flush_scheduled_work(); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { + ib_unregister_mad_agent(sa_dev->port[i].notice_agent); ib_unregister_mad_agent(sa_dev->port[i].agent); kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } @@ -1083,7 +1383,15 @@ static int __init ib_sa_init(void) goto err2; } + ret = notice_init(); + if (ret) { + printk(KERN_ERR "Couldn't initialize notice handling\n"); + goto err3; + } + return 0; +err3: + mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: @@ -1093,6 +1401,7 @@ err1: static void __exit ib_sa_cleanup(void) { mcast_cleanup(); + notice_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); } Index: ofa_1_3_dev_kernel/include/rdma/ib_sa.h =================================================================== --- ofa_1_3_dev_kernel.orig/include/rdma/ib_sa.h 2008-02-05 08:30:21.000000000 +0200 +++ ofa_1_3_dev_kernel/include/rdma/ib_sa.h 2008-02-05 10:07:01.000000000 +0200 @@ -253,6 +253,127 @@ struct ib_sa_service_rec { u64 data64[2]; }; +enum { + IB_SA_EVENT_TYPE_FATAL = 0x0, + IB_SA_EVENT_TYPE_URGENT = 0x1, + IB_SA_EVENT_TYPE_SECURITY = 0x2, + IB_SA_EVENT_TYPE_SM = 0x3, + IB_SA_EVENT_TYPE_INFO = 0x4, + IB_SA_EVENT_TYPE_EMPTY = 0x7F, + IB_SA_EVENT_TYPE_ALL = 0xFFFF +}; + +enum { + IB_SA_EVENT_PRODUCER_TYPE_CA = 0x1, + IB_SA_EVENT_PRODUCER_TYPE_SWITCH = 0x2, + IB_SA_EVENT_PRODUCER_TYPE_ROUTER = 0x3, + IB_SA_EVENT_PRODUCER_TYPE_CLASS_MANAGER = 0x4, + IB_SA_EVENT_PRODUCER_TYPE_ALL = 0xFFFFFF +}; + +enum { + IB_SA_SM_TRAP_GID_IN_SERVICE = 64, + IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65, + IB_SA_SM_TRAP_CREATE_MC_GROUP = 66, + IB_SA_SM_TRAP_DELETE_MC_GROUP = 67, + IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128, + IB_SA_SM_TRAP_LINK_INTEGRITY = 129, + IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130, + IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131, + IB_SA_SM_TRAP_BAD_M_KEY = 256, + IB_SA_SM_TRAP_BAD_P_KEY = 257, + IB_SA_SM_TRAP_BAD_Q_KEY = 258, + IB_SA_SM_TRAP_SWITCH_BAD_P_KEY = 259, + IB_SA_SM_TRAP_ALL = 0xFFFF +}; + +struct ib_sa_inform { + union ib_gid gid; + __be16 lid_range_begin; + __be16 lid_range_end; + u8 is_generic; + u8 subscribe; + __be16 type; + union { + struct { + __be16 trap_num; + __be32 qpn; + u8 resp_time; + __be32 producer_type; + } generic; + struct { + __be16 device_id; + __be32 qpn; + u8 resp_time; + __be32 vendor_id; + } vendor; + } trap; +}; + +struct ib_sa_notice { + u8 is_generic; + u8 type; + union { + struct { + __be32 producer_type; + __be16 trap_num; + } generic; + struct { + __be32 vendor_id; + __be16 device_id; + } vendor; + } trap; + __be16 issuer_lid; + __be16 notice_count; + u8 notice_toggle; + /* + * Align data 16 bits off 64 bit field to match InformInfo definition. + * Data contained within this field will then align properly. + * See IB spec 1.2, sections 13.4.8.2 and 14.2.5.1. + */ + u8 reserved[5]; + u8 data_details[54]; + union ib_gid issuer_gid; +}; + +/* + * SM notice data details for: + * + * IB_SA_SM_TRAP_GID_IN_SERVICE = 64 + * IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65 + * IB_SA_SM_TRAP_CREATE_MC_GROUP = 66 + * IB_SA_SM_TRAP_DELETE_MC_GROUP = 67 + */ +struct ib_sa_notice_data_gid { + u8 reserved[6]; + u8 gid[16]; + u8 padding[32]; +}; + +/* + * SM notice data details for: + * + * IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128 + */ +struct ib_sa_notice_data_port_change { + __be16 lid; + u8 padding[52]; +}; + +/* + * SM notice data details for: + * + * IB_SA_SM_TRAP_LINK_INTEGRITY = 129 + * IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130 + * IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131 + */ +struct ib_sa_notice_data_port_error { + u8 reserved[2]; + __be16 lid; + u8 port_num; + u8 padding[49]; +}; + struct ib_sa_client { atomic_t users; struct completion comp; @@ -381,4 +502,54 @@ int ib_init_ah_from_path(struct ib_devic struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr); +struct ib_inform_info { + void *context; + int (*callback)(int status, + struct ib_inform_info *info, + struct ib_sa_notice *notice); + u16 trap_number; +}; + +/** + * ib_sa_register_inform_info - Registers to receive notice events. + * @device: Device associated with the registration. + * @port_num: Port on the specified device to associate with the registration. + * @trap_number: InformInfo trap number to register for. + * @gfp_mask: GFP mask for memory allocations. + * @callback: User callback invoked once the registration completes and to + * report noticed events. + * @context: User specified context stored with the ib_inform_reg structure. + * + * This call initiates a registration request with the SA for the specified + * trap number. If the operation is started successfully, it returns + * an ib_inform_info structure that is used to track the registration operation. + * Users must free this structure by calling ib_unregister_inform_info, + * even if the operation later fails. (The callback status is non-zero.) + * + * If the registration fails; status will be non-zero. If the registration + * succeeds, the callback status will be zero, but the notice parameter will + * be NULL. If the notice parameter is not NULL, a trap or notice is being + * reported to the user. + * + * A status of -ENETRESET indicates that an error occurred which requires + * reregisteration. + */ +struct ib_inform_info * +ib_sa_register_inform_info(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + u16 trap_number, gfp_t gfp_mask, + int (*callback)(int status, + struct ib_inform_info *info, + struct ib_sa_notice *notice), + void *context); + +/** + * ib_sa_unregister_inform_info - Releases an InformInfo registration. + * @info: InformInfo registration tracking structure. + * + * This call blocks until the registration request is destroyed. It may + * not be called from within the registration callback. + */ +void ib_sa_unregister_inform_info(struct ib_inform_info *info); + #endif /* IB_SA_H */ From ogerlitz at voltaire.com Tue Feb 5 23:27:54 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 06 Feb 2008 09:27:54 +0200 Subject: [ofa-general] IPOIB_FLAG_UMCAST bit default value In-Reply-To: References: Message-ID: <47A9617A.1000908@voltaire.com> Or Gerlitz wrote: > Roland, > I was asked... to check with you what's your take on having the > IPOIB_FLAG_UMCAST bit set by default? with the reasoning being that if > there's no rdma-cm user space multicast activity on the node for ipoib > group, then it has no influence and if there is such, the behavior of the > driver with this being flag set is typically what the user want, and where > this is not the case, they turn it off through sysfs. Roland, I did not see any comment from you on this matter, can you let me know your thinking on this? thanks, Or From nye at franceloisirs.com Tue Feb 5 21:50:55 2008 From: nye at franceloisirs.com (hermy tsung) Date: Wed, 06 Feb 2008 05:50:55 +0000 Subject: [ofa-general] Wonderful pills - special package price just 69.99 USD Message-ID: <000901c86893$03ad1a9b$9c0ad1a5@ippgyakm> Special package of mega pills just for 69.99 USD and 4 FOR FREE, till 20th of February 2008 only! Don't miss it here.. -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Wed Feb 6 03:00:59 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 6 Feb 2008 03:00:59 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080206-0200 daily build status Message-ID: <20080206110059.902B8E601E9@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Failed: From moshek at voltaire.com Wed Feb 6 03:16:01 2008 From: moshek at voltaire.com (Moshe Kazir) Date: Wed, 6 Feb 2008 13:16:01 +0200 Subject: [ofa-general] OFED-1.3-rc3 in bugzilla In-Reply-To: <47A9617A.1000908@voltaire.com> References: <47A9617A.1000908@voltaire.com> Message-ID: <39C75744D164D948A170E9792AF8E7CAC5AD6F@exil.voltaire.com> How's reponsible to add OFED-1.3-rc3 to the bugzilla ? Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com From linrob at juno.com Wed Feb 6 03:44:54 2008 From: linrob at juno.com (Jocelyne Guerra) Date: Wed, 6 Feb 2008 06:44:54 -0500 Subject: [ofa-general] Drugs worldwide at low price Message-ID: <879329968.03340608290393@juno.com> Fill your bedroom with fire once and for all! Power up your prowess with these outstanding male products, and you'll become her #1. Order now, fast worldwide shipping!We are flexible and take care of every customer. http://geocities.com/bufordenglish634/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From pawel.dziekonski at pwr.wroc.pl Wed Feb 6 03:55:42 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Wed, 6 Feb 2008 12:55:42 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> Message-ID: <20080206115542.GC396@cefeid.wcss.wroc.pl> On Tue, 05 Feb 2008 at 10:51:54AM -0500, James Lentini wrote: > Looks like we'll need more data. Can you try the mount again, but this > time with: > > echo 32767 > /proc/sys/sunrpc/rpc_debug > echo 65535 > /proc/sys/sunrpc/nfs_debug > This should produce a lot of data. > One more question. Looking back at your NFS_START_SERVER script, I > noticed that you are starting the NFS server "by hand" and not using > your distro's scripts. I'm wondering if there if there is some > aspect of the server configuration missing. Are you able to do a > normal TCP or UDP mount of the server? I'm doing this "by hand" because there is no nfs-utils 1.1.1 RPM package for my distro (Scientific Linux 4.5 - clone of RH 4u5), so I have uninstalled old nfs-utils RPM and compiled+installed new version by hand. regular nfs mount without rdma works like a charm: # mount -v -t nfs 10.2.2.1:/scratch /mnt mount.nfs: trying 10.2.2.1 prog 100003 vers 3 prot TCP port 2049 mount.nfs: trying 10.2.2.1 prog 100005 vers 3 prot UDP port 45151 10.2.2.1:/scratch on /mnt type nfs (rw) # ls -l /mnt total 16 drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ # touch /mnt/qqq # ls -l /mnt total 16 drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ -rw-r--r-- 1 root root 0 Feb 6 12:39 qqq # umount /mnt with rdma: # dmesg -c # echo 32767 > /proc/sys/sunrpc/rpc_debug # echo 65535 > /proc/sys/sunrpc/nfs_debug # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs: timeout set for Wed Feb 6 12:45:12 2008 mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' mount.nfs: internal error dmesg log: https://cefeid.wcss.wroc.pl/d/tmp/dmesg.log cheers, P -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From movey.wmv at zippyvideos.com Wed Feb 6 04:15:34 2008 From: movey.wmv at zippyvideos.com (Brant Cherry) Date: Wed, 6 Feb 2008 13:15:34 +0100 Subject: [ofa-general] Reliable software only Message-ID: <040391775.01976878064268@zippyvideos.com> An HTML attachment was scrubbed... URL: From cust.services at www3paypal.com Wed Feb 6 04:31:40 2008 From: cust.services at www3paypal.com (PayPal) Date: Wed, 06 Feb 2008 15:31:40 +0300 Subject: [ofa-general] Notification of Limited Account Access Message-ID: <1130384585.13653@paypal.com> An HTML attachment was scrubbed... URL: From sashak at voltaire.com Wed Feb 6 04:54:09 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 6 Feb 2008 12:54:09 +0000 Subject: [ofa-general] Re: [PATCH] Add node name map, partition config, and QOS policy config files to the "FILES" section of man page (WAS: Re: [PATCH] opensm/man: partition cfg file location) In-Reply-To: <20080204132227.04ceed57.weiny2@llnl.gov> References: <47A6D462.7090904@dev.mellanox.co.il> <20080204184826.GF1392@sashak.voltaire.com> <20080204185119.GG1392@sashak.voltaire.com> <20080204132227.04ceed57.weiny2@llnl.gov> Message-ID: <20080206125409.GA11526@sashak.voltaire.com> On 13:22 Mon 04 Feb , Ira Weiny wrote: > Follow on patch which updates "FILES" section of man page. > > Ira > > From 02af1ba1288b8a1e67b8581777ac4b8ab0dbb071 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Mon, 4 Feb 2008 13:16:16 -0800 > Subject: [PATCH] Add node name map, partition config, and QOS policy config files to the "FILES" > section of man page. > > Signed-off-by: Ira K. Weiny All applied. Thanks. Sasha From dwsouthernfrancem at southernfrance.com Wed Feb 6 06:10:01 2008 From: dwsouthernfrancem at southernfrance.com (Noel Mayfield) Date: Wed, 6 Feb 2008 21:10:01 +0700 Subject: [ofa-general] Medications that you need. Message-ID: <01c86904$a2ab9a80$6135403a@dwsouthernfrancem> Buy Must Have medications at Canada based pharmacy. No prescription at all! Save your money, buy pills immediately. Same quality! http://geocities.com/elvispennington767/ We provide confidential and secure purchase! From dietz at cox.net Wed Feb 6 07:09:25 2008 From: dietz at cox.net (Houston Norwood) Date: Wed, 6 Feb 2008 16:09:25 +0100 Subject: [ofa-general] Start using your software immediately after purchase. Message-ID: <407112303.91656039472535@cox.net> Software full line in cheap&quick OEM e-shopOur aim is to fulfil all our customers' needs by providing them with low-price PC and Mac software solutions. We definitely have the necessary software for you whenever you need it for your own PC, corporation or small-scale business.View what we got to propose http://geocities.com/nedcarlson284/Most popular materials in sight are:*Microsoft Windows XP Professional with SP2: Retail price now - $269.99; Our only - $49.95 *Microsoft Office 2007 Enterprise: Retail price for this time - $899.00; Our now just - $79.95 *Microsoft Plus! for Windows XP: Retail price for this time - $29.95; Our just - $10.95 *Quark XPress Passport 7.02: Retail price this day - $749.00; Our just - $69.95 *Corel Procreate KnockOut 2.0: Retail price this day - $99.99; Our only - $19.95 *Microsoft Money Home & Business 7: Retail price today - $89.90; Our just - $39.95 *Corel Procreate KPT Effects: Retail price this day - $199.00; Our only for today - $19.95 *Corel Ventura 10.0: Retail price for now - $395.00; Our only for today - $29.95Check what we have to propose http://geocities.com/nedcarlson284/ Let it go tis but a drum. I have not muchWhether dost thou. No deed at all ofthis that so. She derives her honesty and. [To BERTRAM] I dare not say I. Have whatan honest man should. Is at all timesgood must of. And worth it with addition.. -------------- next part -------------- An HTML attachment was scrubbed... URL: From jlentini at netapp.com Wed Feb 6 07:17:24 2008 From: jlentini at netapp.com (James Lentini) Date: Wed, 6 Feb 2008 10:17:24 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080206115542.GC396@cefeid.wcss.wroc.pl> References: <20080201224530.GA16581@cefeid.wcss.wroc.pl> <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> <20080206115542.GC396@cefeid.wcss.wroc.pl> Message-ID: On Wed, 6 Feb 2008, Pawel Dziekonski wrote: > On Tue, 05 Feb 2008 at 10:51:54AM -0500, James Lentini wrote: > > > Looks like we'll need more data. Can you try the mount again, but this > > time with: > > > > echo 32767 > /proc/sys/sunrpc/rpc_debug > > echo 65535 > /proc/sys/sunrpc/nfs_debug > > This should produce a lot of data. > > > One more question. Looking back at your NFS_START_SERVER script, I > > noticed that you are starting the NFS server "by hand" and not using > > your distro's scripts. I'm wondering if there if there is some > > aspect of the server configuration missing. Are you able to do a > > normal TCP or UDP mount of the server? > > I'm doing this "by hand" because there is no nfs-utils 1.1.1 RPM > package for my distro (Scientific Linux 4.5 - clone of RH 4u5), so I > have uninstalled old nfs-utils RPM and compiled+installed new > version by hand. To be precise, you do not need nfs-utils 1.1.1 installed on your NFS server. You only need the mount.nfs command (not event the whole package) from nfs-util 1.1.1 or greater on your NFS client so you can use the "string" mount API. Our instructions should note this. I'll update them. > regular nfs mount without rdma works like a charm: > > # mount -v -t nfs 10.2.2.1:/scratch /mnt > mount.nfs: trying 10.2.2.1 prog 100003 vers 3 prot TCP port 2049 > mount.nfs: trying 10.2.2.1 prog 100005 vers 3 prot UDP port 45151 > 10.2.2.1:/scratch on /mnt type nfs (rw) > # ls -l /mnt > total 16 > drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ > # touch /mnt/qqq > # ls -l /mnt > total 16 > drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ > -rw-r--r-- 1 root root 0 Feb 6 12:39 qqq > # umount /mnt Good. If I remember correctly, 10.2.2.1, is the IPoIB interface on the server. It looks like you are doing the mount over IPoIB. Given that this works, it would appear that your export list is setup correctly. By the way, do these machines have ethernet interfaces? Are the Ethernet and IPoIB IPs on different subnets? > with rdma: > > # dmesg -c > # echo 32767 > /proc/sys/sunrpc/rpc_debug > # echo 65535 > /proc/sys/sunrpc/nfs_debug > # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v > mount.nfs: timeout set for Wed Feb 6 12:45:12 2008 > mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' > mount.nfs: internal error > > dmesg log: https://cefeid.wcss.wroc.pl/d/tmp/dmesg.log The client's connection request is being refused. This looks like a server problem. On the server, what is the output of: cat /proc/fs/nfsd/portlist cat /proc/sys/sunrpc/transports ps x | grep nfsd showmount -e 10.2.2.1 From isowo011 at yahoo.ca Wed Feb 6 07:32:08 2008 From: isowo011 at yahoo.ca (Rollin Odom) Date: Wed, 6 Feb 2008 17:32:08 +0200 Subject: [ofa-general] Drugs worldwide at low price Message-ID: <622955528.88448511235704@yahoo.ca> Todays special offers:/ I @ G R A only 1.56 per 100mg http://geocities.com/rickhall338/And special ED packs... ... and many many more.Click here to visit our NEW ONLINE MEDS STORE http://geocities.com/rickhall338/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From pawel.dziekonski at pwr.wroc.pl Wed Feb 6 07:47:01 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Wed, 6 Feb 2008 16:47:01 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: References: <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> <20080206115542.GC396@cefeid.wcss.wroc.pl> Message-ID: <20080206154701.GA11384@cefeid.wcss.wroc.pl> On Wed, 06 Feb 2008 at 10:17:24AM -0500, James Lentini wrote: > > > One more question. Looking back at your NFS_START_SERVER script, I > > > noticed that you are starting the NFS server "by hand" and not using > > > your distro's scripts. I'm wondering if there if there is some > > > aspect of the server configuration missing. Are you able to do a > > > normal TCP or UDP mount of the server? > > > > I'm doing this "by hand" because there is no nfs-utils 1.1.1 RPM > > package for my distro (Scientific Linux 4.5 - clone of RH 4u5), so I > > have uninstalled old nfs-utils RPM and compiled+installed new > > version by hand. > > To be precise, you do not need nfs-utils 1.1.1 installed on your NFS > server. You only need the mount.nfs command (not event the whole > package) from nfs-util 1.1.1 or greater on your NFS client so you can > use the "string" mount API. oh man, this is a good news. > > regular nfs mount without rdma works like a charm: > > > > # mount -v -t nfs 10.2.2.1:/scratch /mnt > > mount.nfs: trying 10.2.2.1 prog 100003 vers 3 prot TCP port 2049 > > mount.nfs: trying 10.2.2.1 prog 100005 vers 3 prot UDP port 45151 > > 10.2.2.1:/scratch on /mnt type nfs (rw) > > # ls -l /mnt > > total 16 > > drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ > > # touch /mnt/qqq > > # ls -l /mnt > > total 16 > > drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ > > -rw-r--r-- 1 root root 0 Feb 6 12:39 qqq > > # umount /mnt > > Good. If I remember correctly, 10.2.2.1, is the IPoIB interface on the > server. It looks like you are doing the mount over IPoIB. Given that > this works, it would appear that your export list is setup correctly. > > By the way, do these machines have ethernet interfaces? Are the > Ethernet and IPoIB IPs on different subnets? # ip a 1: lo: mtu 16436 qdisc noqueue link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo 2: eth0: mtu 1500 qdisc pfifo_fast qlen 1000 link/ether 00:30:48:7a:42:24 brd ff:ff:ff:ff:ff:ff inet 10.255.255.221/8 brd 10.255.255.255 scope global eth0 3: eth1: mtu 1500 qdisc pfifo_fast qlen 1000 link/ether 00:30:48:7a:42:25 brd ff:ff:ff:ff:ff:ff 4: ib0: mtu 2044 qdisc pfifo_fast qlen 128 link/[32] 80:00:04:04:fe:80:00:00:00:00:00:00:00:30:48:7a:42:24:00:01 brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff inet 10.2.2.1/24 brd 10.2.2.255 scope global ib0 eth0 has to be in /8 network but I can change IPs on IB network if there is such a need. > The client's connection request is being refused. This looks like a > server problem. > > On the server, what is the output of: > > cat /proc/fs/nfsd/portlist # cat /proc/fs/nfsd/portlist tcp 0.0.0.0, port=2049 udp 0.0.0.0, port=2049 # echo rdma 2050 > /proc/fs/nfsd/portlist # cat /proc/fs/nfsd/portlist tcp 0.0.0.0, port=2049 udp 0.0.0.0, port=2049 :o > cat /proc/sys/sunrpc/transports # cat /proc/sys/sunrpc/transports tcp 1048576 udp 32768 rdma 1048576 > ps x | grep nfsd # ps x | grep nfsd 5155 ? S 0:00 [nfsd] 5727 pts/0 R+ 0:00 grep nfsd > showmount -e 10.2.2.1 # showmount -e 10.2.2.1 Export list for 10.2.2.1: /scratch 10.2.2.2,10.255.255.222 -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From dwsailm at sail.it Wed Feb 6 08:03:02 2008 From: dwsailm at sail.it (Natalia Day) Date: Wed, 6 Feb 2008 18:03:02 +0200 Subject: [ofa-general] Purchase popular impotency treatment drugs in Canada for the best Net prices. Message-ID: <01c868ea$83a00f00$fca0ec58@dwsailm> According to the results of monitoring carried out by the Quality Research Organization, ŤCanadianPharmacyť online drugstore has the best level of service and the cheapest prices among the 50 online drugstores, while the medications are of the extremely high quality. There is a great selection of modern pharmaceutical products! The utmost care is taken about security of your information. You purchase will be 100% confidential. Prompt delivery, personal approach to each customer! http://geocities.com/chancealexander202/ Visit ŤCanadianPharmacyť drugstore and you will definitely make the order! From diffdrum at sympatico.ca Wed Feb 6 08:08:44 2008 From: diffdrum at sympatico.ca (Lionel Whalen) Date: Thu, 7 Feb 2008 00:08:44 +0800 Subject: [ofa-general] Die Software ohne Probleme mit Aufstellung und hohen Preisen Message-ID: <01c8691d$9a138e00$f88815be@diffdrum> Die Software in kurzer Zeit und fur wenig Geld bekommen, ist es moglich? Warum nicht. Hier sind die Programmen auf allen europaischen Sprachen uberlassen und fur Windows und Macintosh vorherbestimmt. Alle hier prasentierten Produkte der Software sind original und vollig.Sie stellen jedes Programm leicht auf mit der Hilfe der professionellen Konsultation des Anwenderdienstes. Wenn Sie Fragen haben, bekommen Sie schnelle Antworte. Die Ruckzahlung ist moglich. Sie kaufen nur die ausgezeichnet funktionierende Software http://geocities.com/willdyer744/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From a-andrei at ackurat.se Wed Feb 6 08:58:33 2008 From: a-andrei at ackurat.se (Ulysses Read) Date: Wed, 6 Feb 2008 13:58:33 -0300 Subject: [ofa-general] Chatting online Message-ID: <01c868c8$5c384a80$afd70fbd@a-andrei> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Madeleine at EHealThies.info only, because I am using my friend's email to write this. Will send some of my pictures From tziporet at dev.mellanox.co.il Wed Feb 6 08:25:06 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 06 Feb 2008 18:25:06 +0200 Subject: [ofa-general] OFED 1.3 rc4 update Message-ID: <47A9DF62.8090803@mellanox.co.il> Hi, We will have OFED 1.3-rc4 tomorrow after one more night of regression It will include: 1. IPoIB: Non-SRQ for CM mode 2. IPOIB: 4K MTU 3. IPoIB - Small messages improvements Note that today's latest build will include theses features too if someone want to test it today Tziporet From mashirle at us.ibm.com Tue Feb 5 22:33:42 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Tue, 05 Feb 2008 22:33:42 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47A9DF62.8090803@mellanox.co.il> References: <47A9DF62.8090803@mellanox.co.il> Message-ID: <1202279622.6286.6.camel@localhost.localdomain> On Wed, 2008-02-06 at 18:25 +0200, Tziporet Koren wrote: > Hi, > > We will have OFED 1.3-rc4 tomorrow after one more night of regression > > It will include: > > 1. IPoIB: Non-SRQ for CM mode > 2. IPOIB: 4K MTU > 3. IPoIB - Small messages improvements > > Note that today's latest build will include theses features too if > someone want to test it today > > Tziporet Thanks Tziporet. We will test it right after it's out. Thanks Shirley From tziporet at dev.mellanox.co.il Wed Feb 6 08:45:38 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 06 Feb 2008 18:45:38 +0200 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <1202279622.6286.6.camel@localhost.localdomain> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> Message-ID: <47A9E432.7070508@mellanox.co.il> Shirley Ma wrote: > > Thanks Tziporet. We will test it right after it's out. > > You can start use the lates build - http://www.openfabrics.org/builds/ofed-1.3/OFED-1.3-20080206-0751.tgz Tziporet From olaf.kirch at oracle.com Wed Feb 6 08:56:10 2008 From: olaf.kirch at oracle.com (Olaf Kirch) Date: Wed, 6 Feb 2008 17:56:10 +0100 Subject: [ofa-general] Re: Standard RDS port number In-Reply-To: <479C5A45.2010701@dev.mellanox.co.il> References: <479C5A45.2010701@dev.mellanox.co.il> Message-ID: <200802061756.12092.olaf.kirch@oracle.com> Hi Yevgeny, On Sunday 27 January 2008 11:17, Yevgeny Kliteynik wrote: > I noticed the following in the rds.h: > > /* > * XXX randomly chosen, but at least seems to be unused: > * # 18464-18768 Unassigned > * We should do better. We want a reserved port to discourage unpriv'ed > * userspace from listening. > * > * port 18633 was the version that had ack frames on the wire. > */ > #define RDS_PORT 18634 > > I'm using this port number to recognize RDS > connection in QoS manager (OpenSM). > > How 'solid' is this RDS_PORT definition? > Will it be standardized somehow? > Do you have some plans to change it? There are no plans to change the port number right now. In fact, the TCP support is currently rather instable, so that we disabled it for now. Olaf -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From eli at mellanox.co.il Wed Feb 6 09:45:40 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Wed, 06 Feb 2008 19:45:40 +0200 Subject: [ofa-general] 4K MTU patch review Message-ID: <1202319940.13132.61.camel@mtls03> Hi Shirley, I have created the following patch based on your patch series such that it touches less of the previous code and is thus smaller. Please review it and see if you think we can make any use of it. Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-06 18:57:34.847708000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-06 19:22:02.372113000 +0200 @@ -56,11 +56,11 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, + IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, + IPOIB_UD_RX_SG = 2, /* for 4K MTU */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -141,9 +141,9 @@ struct ipoib_mcast { struct net_device *dev; }; -struct ipoib_rx_buf { +struct ipoib_sg_rx_buf { struct sk_buff *skb; - u64 mapping; + u64 mapping[IPOIB_UD_RX_SG]; }; struct ipoib_tx_buf { @@ -337,7 +337,7 @@ struct ipoib_dev_priv { struct net_device *dev; struct ib_recv_wr rx_wr_draft[UD_POST_RCV_COUNT]; - struct ib_sge sglist_draft[UD_POST_RCV_COUNT]; + struct ib_sge sglist_draft[UD_POST_RCV_COUNT][IPOIB_UD_RX_SG]; unsigned int rx_outst; struct napi_struct napi; @@ -378,7 +378,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; @@ -412,6 +412,7 @@ struct ipoib_dev_priv { struct ipoib_ethtool_st etool; struct timer_list poll_timer; struct ib_ah *own_ah; + int max_ib_mtu; }; struct ipoib_ah { @@ -452,6 +453,28 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) + +static inline int ipoib_ud_need_sg(int ib_mtu) +{ + return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0; +} + +static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, + DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, + DMA_FROM_DEVICE); + } else + ib_dma_unmap_single(priv->ca, mapping[0], + IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), + DMA_FROM_DEVICE); +} + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-06 18:57:34.007682000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-06 19:23:08.903503000 +0200 @@ -96,14 +96,35 @@ static void clean_pending_receives(struc for (i = 0; i < priv->rx_outst; ++i) { id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV; - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[i].mapping); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } priv->rx_outst = 0; } +static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, + struct sk_buff *skb, unsigned int length) +{ + unsigned int size; + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + + /* put header into skb */ + size = min(length, (unsigned)IPOIB_UD_HEAD_SIZE); + __skb_put(skb, size); + + length -= size; + + frag->size = length; + skb->data_len += length; + skb->len += length; + skb->truesize += length; + } else + skb_put(skb, length); +} + static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -111,7 +132,9 @@ static int ipoib_ib_post_receive(struct int ret = 0; int i = priv->rx_outst; - priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->sglist_draft[i][0].addr = priv->rx_ring[id].mapping[0]; + priv->sglist_draft[i][1].addr = priv->rx_ring[id].mapping[1]; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; if (++priv->rx_outst == UD_POST_RCV_COUNT) { ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); @@ -120,8 +143,8 @@ static int ipoib_ib_post_receive(struct ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); while (bad_wr) { id = bad_wr->wr_id & ~IPOIB_OP_RECV; - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } @@ -136,11 +159,17 @@ static int ipoib_alloc_rx_skb(struct net { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - u64 addr; + int buf_size; + u64 *mapping = priv->rx_ring[id].mapping; + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + buf_size = IPOIB_UD_HEAD_SIZE; + else + buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) - return -ENOMEM; + skb = dev_alloc_skb(buf_size + 4); + if (unlikely(!skb)) + return -ENOMEM; /* * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte @@ -149,17 +178,33 @@ static int ipoib_alloc_rx_skb(struct net */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { - dev_kfree_skb_any(skb); - return -EIO; + mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) + goto out_free; + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + struct page *page = alloc_page(GFP_ATOMIC); + + if (!page) + goto partial_error; + + skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); + mapping[1] = ib_dma_map_page(priv->ca, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) + goto partial_error; } - priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; + priv->rx_ring[id].skb = skb; + return 0; - return 0; +partial_error: + ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE); + +out_free: + dev_kfree_skb_any(skb); + return -ENOMEM; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -186,7 +231,6 @@ static void ipoib_ib_handle_rx_wc(struct struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; - u64 addr; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -198,15 +242,13 @@ static void ipoib_ib_handle_rx_wc(struct } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; @@ -231,9 +273,9 @@ static void ipoib_ib_handle_rx_wc(struct ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); - skb_put(skb, wc->byte_len); + ipoib_ud_skb_put_frags(priv, skb, wc->byte_len); skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -828,15 +870,13 @@ int ipoib_ib_dev_stop(struct net_device * all our pending work requests. */ for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; if (rx_req->skb) { - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); dev_kfree_skb_any(rx_req->skb); rx_req->skb = NULL; } Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-02-06 18:57:29.657377000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-02-06 19:25:56.553407000 +0200 @@ -193,7 +193,7 @@ static int ipoib_change_mtu(struct net_d return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -981,10 +981,6 @@ static void ipoib_setup(struct net_devic dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1130,6 +1126,7 @@ static struct net_device *ipoib_add_port struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1140,6 +1137,18 @@ static struct net_device *ipoib_add_port priv->dev->features |= NETIF_F_HIGHDMA; + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-06 18:56:03.372135000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-06 19:22:02.386120000 +0200 @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_s return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-06 18:57:34.010682000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-06 19:22:02.373118000 +0200 @@ -151,7 +151,7 @@ int ipoib_transport_dev_init(struct net_ .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_UD, @@ -227,16 +227,27 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.send_flags = IB_SEND_SIGNALED; for (i = 0; i < UD_POST_RCV_COUNT; ++i) { - priv->sglist_draft[i].length = IPOIB_BUF_SIZE; - priv->sglist_draft[i].lkey = priv->mr->lkey; - - priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i]; - priv->rx_wr_draft[i].num_sge = 1; + priv->sglist_draft[i][0].lkey = priv->mr->lkey; + priv->sglist_draft[i][1].lkey = priv->mr->lkey; + priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i][0]; if (i < UD_POST_RCV_COUNT - 1) priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1]; } priv->rx_wr_draft[i].next = NULL; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i][0].length = IPOIB_UD_HEAD_SIZE; + priv->sglist_draft[i][1].length = PAGE_SIZE; + priv->rx_wr_draft[i].num_sge = IPOIB_UD_RX_SG; + } + } else { + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i][0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + priv->rx_wr_draft[i].num_sge = 1; + } + } + return 0; out_free_scq: From sweitzen at cisco.com Wed Feb 6 10:03:01 2008 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Wed, 6 Feb 2008 10:03:01 -0800 Subject: [ofa-general] OFED-1.3-rc3 in bugzilla In-Reply-To: <39C75744D164D948A170E9792AF8E7CAC5AD6F@exil.voltaire.com> References: <47A9617A.1000908@voltaire.com> <39C75744D164D948A170E9792AF8E7CAC5AD6F@exil.voltaire.com> Message-ID: I added versions 1.3rc3 and 1.3rc4 to bugzilla. Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of > Moshe Kazir > Sent: Wednesday, February 06, 2008 3:16 AM > To: general at lists.openfabrics.org > Subject: [ofa-general] OFED-1.3-rc3 in bugzilla > > > How's reponsible to add OFED-1.3-rc3 to the bugzilla ? > > Moshe > > ____________________________________________________________ > Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) > > Voltaire - The Grid Backbone > > www.voltaire.com > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From swise at opengridcomputing.com Wed Feb 6 10:05:19 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 06 Feb 2008 12:05:19 -0600 Subject: [ofa-general] [PATCH 2.6.25] cxgb3: Handle ARP completions that mark neighbors stale. Message-ID: <20080206180519.4680.34741.stgit@dell3.ogc.int> cxgb3: Handle ARP completions that mark neighbors stale. When ARP completes due to a request rather than a reply the neighbor is marked NUD_STALE instead of reachable (see arp_process()). The handler for the resulting netevent needs to check also for NUD_STALE. Failure to use the arp entry can cause RDMA connection failures. Signed-off-by: Steve Wise --- drivers/net/cxgb3/l2t.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/cxgb3/l2t.c b/drivers/net/cxgb3/l2t.c index d660af7..d80bbdb 100644 --- a/drivers/net/cxgb3/l2t.c +++ b/drivers/net/cxgb3/l2t.c @@ -404,7 +404,7 @@ found: if (neigh->nud_state & NUD_FAILED) { arpq = e->arpq_head; e->arpq_head = e->arpq_tail = NULL; - } else if (neigh_is_connected(neigh)) + } else if (neigh->nud_state & (NUD_CONNECTED|NUD_STALE)) setup_l2e_send_pending(dev, NULL, e); } else { e->state = neigh_is_connected(neigh) ? From xma at us.ibm.com Wed Feb 6 10:16:59 2008 From: xma at us.ibm.com (Shirley Ma) Date: Wed, 6 Feb 2008 10:16:59 -0800 Subject: [ofa-general] 4K MTU patch review In-Reply-To: <1202319940.13132.61.camel@mtls03> Message-ID: Hello Eli, I didn't see much difference with the one I submited to you and Tziporet for review besides below lines. What's the purpose to rewrite the patch? You can do a diff with the patch current in the Git-tree and the one you have. + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + + /* put header into skb */ + size = min(length, (unsigned)IPOIB_UD_HEAD_SIZE); + __skb_put(skb, size); + Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From changquing.tang at hp.com Wed Feb 6 10:14:35 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Wed, 6 Feb 2008 18:14:35 +0000 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support In-Reply-To: <200802030949.44943.jackm@dev.mellanox.co.il> References: <200802030949.44943.jackm@dev.mellanox.co.il> Message-ID: Jack: When I create the XRC send QP, I think this is a sending QP only, so I assign recv_cq to NULL: However I got the following segfault: 0 0x00002aaaab3a68bd in ibv_cmd_create_qp (pd=0x4d226c0, qp=0x4d837b0, attr=0x7fffe3fd4f20, cmd=0x7fffe3fd4e50, cmd_size=88, resp=0x7fffe3fd4e30, resp_size=32) at src/cmd.c:633 633 cmd->recv_cq_handle = attr->recv_cq->handle; (gdb) bt #0 0x00002aaaab3a68bd in ibv_cmd_create_qp (pd=0x4d226c0, qp=0x4d837b0, attr=0x7fffe3fd4f20, cmd=0x7fffe3fd4e50, cmd_size=88, resp=0x7fffe3fd4e30, resp_size=32) at src/cmd.c:633 #1 0x00002aaaabbdb86a in mlx4_create_qp (pd=0x4d226c0, attr=0x7fffe3fd4f20) at src/verbs.c:427 #2 0x00002aaaab3aa4d2 in __ibv_create_qp (pd=0x4d226c0, qp_init_attr=0x7fffe3fd4e50) at src/verbs.c:431 #3 0x00002aaaaad86d0d in xrc_create_procqp (ibvproc=0x4d20090, proc=0x4d1f570, fl_send=1, fl_recv=1) at ../../../../../src/lib/hpmp/hpmpibvinit.c:4089 When I set recv_cq the same as send_cq, I don't see the segfault. Is this a situation the driver need to catch ? --CQ > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of > Jack Morgenstein > Sent: Sunday, February 03, 2008 1:50 AM > To: Roland Dreier > Cc: general at lists.openfabrics.org > Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC > receive-only qp support > > ib/core: Implement XRC receive-only QPs for userspace apps. > > Added creation of XRC receive-only QPs for userspace, which > reside in kernel space (user cannot post-to or poll these QPs). > > Motivation: MPI community requires XRC receive QPs which > will not be destroyed when the creating process terminates. > > Solution: Userspace requests that a QP be created in kernel space. > Each userspace process using that QP (i.e. receiving packets > on an XRC SRQ via the qp), registers with that QP (-- the > creator is also registered, whether or not it is a user of > the QP). When the last userspace user unregisters with the > QP, it is destroyed. Unregistration is also part of > userspace process cleanup, so there is no leakage. > > This patch implements the kernel procedures to implement the following > (new) libibverbs API: > ibv_create_xrc_rcv_qp > ibv_modify_xrc_rcv_qp > ibv_query_xrc_rcv_qp > ibv_reg_xrc_rcv_qp > ibv_unreg_xrc_rcv_qp > > In addition, the patch implements the foundation for > distributing XRC-receive-only QP events to userspace > processes registered with that QP. > > Finally, the patch modifies ib_uverbs_close_xrc_domain() to > return BUSY if any resources are still in use by the process, > so that the XRC rcv-only QP cleanup can operate properly. > > V2: > Fixed bug in ib_uverbs_close_xrc_domain. > We need to allow the process to successfully close its copy > of the domain, even if it still has undestroyed XRC QPs -- > these will continue to operate, although it will not be > possible to create new ones (there will be no Oops). > > However, we need to check that there are no outstanding > xrc-qp-registrations: > the cleanup procedure for this depends on the xrc domain > still being accessible in this process in order to perform > all needed un-registrations (and thus prevent resource leakage). > > V3: > Fix thinko in ib_uverbs_reg_xrc_rcv_qp, > ib_uverbs_unreg_xrc_rcv_qp, and ib_uverbs_modify_xrc_rcv_qp: > on success, incorrectly returned 0 instead of input length. > > Signed-off-by: Jack Morgenstein > > Index: infiniband/include/rdma/ib_verbs.h > =================================================================== > --- infiniband.orig/include/rdma/ib_verbs.h 2008-01-28 > 12:20:55.000000000 +0200 > +++ infiniband/include/rdma/ib_verbs.h 2008-01-28 12:22:09.000000000 > +++ +0200 > @@ -285,6 +285,10 @@ enum ib_event_type { > IB_EVENT_CLIENT_REREGISTER > }; > > +enum ib_event_flags { > + IB_XRC_QP_EVENT_FLAG = 0x80000000, }; > + > struct ib_event { > struct ib_device *device; > union { > @@ -292,6 +296,7 @@ struct ib_event { > struct ib_qp *qp; > struct ib_srq *srq; > u8 port_num; > + u32 xrc_qp_num; > } element; > enum ib_event_type event; > }; > @@ -492,6 +497,7 @@ enum ib_qp_type { > > enum qp_create_flags { > QP_CREATE_LSO = 1 << 0, > + XRC_RCV_QP = 1 << 1, > }; > > struct ib_qp_init_attr { > @@ -723,6 +729,7 @@ struct ib_ucontext { > struct list_head srq_list; > struct list_head ah_list; > struct list_head xrc_domain_list; > + struct list_head xrc_reg_qp_list; > int closing; > }; > > @@ -744,6 +751,12 @@ struct ib_udata { > size_t outlen; > }; > > +struct ib_uxrc_rcv_object { > + struct list_head list; /* link to > context's list */ > + u32 qp_num; > + u32 domain_handle; > +}; > + > struct ib_pd { > struct ib_device *device; > struct ib_uobject *uobject; > @@ -1053,6 +1066,23 @@ struct ib_device { > struct > ib_ucontext *context, > struct > ib_udata *udata); > int (*dealloc_xrcd)(struct > ib_xrcd *xrcd); > + int > (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, > + u32* qp_num); > + int > (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, > + u32 qp_num, > + > struct ib_qp_attr *attr, > + int > attr_mask); > + int (*query_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + u32 qp_num, > + struct > ib_qp_attr *attr, > + int attr_mask, > + struct > ib_qp_init_attr *init_attr); > + int (*reg_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + void *context, > + u32 qp_num); > + int (*unreg_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + void *context, > + u32 qp_num); > > struct ib_dma_mapping_ops *dma_ops; > > Index: infiniband/drivers/infiniband/core/uverbs_main.c > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs_main.c > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs_main.c > 2008-01-28 12:20:56.000000000 +0200 > @@ -114,6 +114,11 @@ static ssize_t (*uverbs_cmd_table[])(str > [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = > ib_uverbs_create_xrc_srq, > [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = > ib_uverbs_open_xrc_domain, > [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = > ib_uverbs_close_xrc_domain, > + [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = > ib_uverbs_create_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = > ib_uverbs_modify_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = > ib_uverbs_query_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = > ib_uverbs_reg_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = > ib_uverbs_unreg_xrc_rcv_qp, > }; > > static struct vfsmount *uverbs_event_mnt; @@ -191,6 +196,7 > @@ static int ib_uverbs_cleanup_ucontext(st > struct ib_ucontext *context) { > struct ib_uobject *uobj, *tmp; > + struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; > > if (!context) > return 0; > @@ -251,6 +257,13 @@ static int ib_uverbs_cleanup_ucontext(st > kfree(uobj); > } > > + list_for_each_entry_safe(xrc_qp_obj, tmp1, > &context->xrc_reg_qp_list, list) { > + list_del(&xrc_qp_obj->list); > + ib_uverbs_cleanup_xrc_rcv_qp(file, > xrc_qp_obj->domain_handle, > + xrc_qp_obj->qp_num); > + kfree(xrc_qp_obj); > + } > + > mutex_lock(&file->device->ib_dev->xrcd_table_mutex); > list_for_each_entry_safe(uobj, tmp, > &context->xrc_domain_list, list) { > struct ib_xrcd *xrcd = uobj->object; @@ > -506,6 +519,12 @@ void ib_uverbs_event_handler(struct ib_e > NULL, NULL); } > > +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void > +*context_ptr) { > + ib_uverbs_async_handler(context_ptr, > event->element.xrc_qp_num, > + event->event, NULL, NULL); } > + > struct file *ib_uverbs_alloc_event_file(struct > ib_uverbs_file *uverbs_file, > int is_async, int *fd) { > Index: infiniband/drivers/infiniband/core/uverbs_cmd.c > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs_cmd.c > 2008-01-28 12:20:56.000000000 +0200 > @@ -315,6 +315,7 @@ ssize_t ib_uverbs_get_context(struct ib_ > INIT_LIST_HEAD(&ucontext->srq_list); > INIT_LIST_HEAD(&ucontext->ah_list); > INIT_LIST_HEAD(&ucontext->xrc_domain_list); > + INIT_LIST_HEAD(&ucontext->xrc_reg_qp_list); > ucontext->closing = 0; > > resp.num_comp_vectors = > file->device->num_comp_vectors; @@ -1080,6 +1081,7 @@ ssize_t > ib_uverbs_create_qp(struct ib_uv > goto err_put; > } > > + attr.create_flags = 0; > attr.event_handler = ib_uverbs_qp_event_handler; > attr.qp_context = file; > attr.send_cq = scq; > @@ -2561,6 +2563,7 @@ ssize_t ib_uverbs_close_xrc_domain(struc > int out_len) { > struct ib_uverbs_close_xrc_domain cmd; > + struct ib_uxrc_rcv_object *tmp; > struct ib_uobject *uobj; > struct ib_xrcd *xrcd = NULL; > struct inode *inode = NULL; > @@ -2576,6 +2579,18 @@ ssize_t ib_uverbs_close_xrc_domain(struc > goto err_unlock_mutex; > } > > + mutex_lock(&file->mutex); > + list_for_each_entry(tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.xrcd_handle == tmp->domain_handle) { > + ret = -EBUSY; > + break; > + } > + mutex_unlock(&file->mutex); > + if (ret) { > + put_uobj_write(uobj); > + goto err_unlock_mutex; > + } > + > xrcd = (struct ib_xrcd *) (uobj->object); > inode = xrcd->inode; > > @@ -2611,7 +2626,7 @@ err_unlock_mutex: > } > > void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, > - struct ib_xrcd *xrcd) > + struct ib_xrcd *xrcd) > { > struct inode *inode = NULL; > int ret = 0; > @@ -2625,4 +2640,353 @@ void ib_uverbs_dealloc_xrcd(struct ib_de > xrcd_table_delete(ib_dev, inode); } > > +ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_create_xrc_rcv_qp cmd; > + struct ib_uverbs_create_xrc_rcv_qp_resp resp; > + struct ib_uxrc_rcv_object *obj; > + struct ib_qp_init_attr init_attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + u32 qp_num; > + int err; > + > + if (out_len < sizeof resp) > + return -ENOSPC; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + obj = kmalloc(sizeof *obj, GFP_KERNEL); > + if (!obj) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + err = -EINVAL; > + goto err_out; > + } > + > + memset(&init_attr, 0, sizeof init_attr); > + init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; > + init_attr.qp_context = file; > + init_attr.srq = NULL; > + init_attr.sq_sig_type = cmd.sq_sig_all ? > IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; > + init_attr.qp_type = IB_QPT_XRC; > + init_attr.xrc_domain = xrcd; > + init_attr.create_flags = XRC_RCV_QP; > + > + init_attr.cap.max_send_wr = 1; > + init_attr.cap.max_recv_wr = 0; > + init_attr.cap.max_send_sge = 1; > + init_attr.cap.max_recv_sge = 0; > + init_attr.cap.max_inline_data = 0; > + > + err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); > + if (err) > + goto err_put; > + > + memset(&resp, 0, sizeof resp); > + resp.qpn = qp_num; > + > + if (copy_to_user((void __user *) (unsigned long) cmd.response, > + &resp, sizeof resp)) { > + err = -EFAULT; > + goto err_destroy; > + } > + > + atomic_inc(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + obj->qp_num = qp_num; > + obj->domain_handle = cmd.xrc_domain_handle; > + mutex_lock(&file->mutex); > + list_add_tail(&obj->list, &file->ucontext->xrc_reg_qp_list); > + mutex_unlock(&file->mutex); > + > + return in_len; > + > +err_destroy: > + xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); > +err_put: > + put_xrcd_read(xrcd_uobj); > +err_out: > + kfree(obj); > + return err; > +} > + > +ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_modify_xrc_rcv_qp cmd; > + struct ib_qp_attr *attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int err; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + attr = kmalloc(sizeof *attr, GFP_KERNEL); > + if (!attr) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + kfree(attr); > + return -EINVAL; > + } > + > + memset(attr, 0, sizeof *attr); > + attr->qp_state = cmd.qp_state; > + attr->cur_qp_state = cmd.cur_qp_state; > + attr->qp_access_flags = cmd.qp_access_flags; > + attr->pkey_index = cmd.pkey_index; > + attr->port_num = cmd.port_num; > + attr->path_mtu = cmd.path_mtu; > + attr->path_mig_state = cmd.path_mig_state; > + attr->qkey = cmd.qkey; > + attr->rq_psn = cmd.rq_psn; > + attr->sq_psn = cmd.sq_psn; > + attr->dest_qp_num = cmd.dest_qp_num; > + attr->alt_pkey_index = cmd.alt_pkey_index; > + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; > + attr->max_rd_atomic = cmd.max_rd_atomic; > + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; > + attr->min_rnr_timer = cmd.min_rnr_timer; > + attr->port_num = cmd.port_num; > + attr->timeout = cmd.timeout; > + attr->retry_cnt = cmd.retry_cnt; > + attr->rnr_retry = cmd.rnr_retry; > + attr->alt_port_num = cmd.alt_port_num; > + attr->alt_timeout = cmd.alt_timeout; > + > + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); > + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; > + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; > + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; > + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; > + attr->ah_attr.dlid = cmd.dest.dlid; > + attr->ah_attr.sl = cmd.dest.sl; > + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; > + attr->ah_attr.static_rate = cmd.dest.static_rate; > + attr->ah_attr.ah_flags = > cmd.dest.is_global ? IB_AH_GRH : 0; > + attr->ah_attr.port_num = cmd.dest.port_num; > + > + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); > + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; > + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; > + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; > + attr->alt_ah_attr.grh.traffic_class = > cmd.alt_dest.traffic_class; > + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; > + attr->alt_ah_attr.sl = cmd.alt_dest.sl; > + attr->alt_ah_attr.src_path_bits = > cmd.alt_dest.src_path_bits; > + attr->alt_ah_attr.static_rate = > cmd.alt_dest.static_rate; > + attr->alt_ah_attr.ah_flags = > cmd.alt_dest.is_global ? IB_AH_GRH : 0; > + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; > + > + err = xrcd->device->modify_xrc_rcv_qp(xrcd, > cmd.qp_num, attr, cmd.attr_mask); > + put_xrcd_read(xrcd_uobj); > + kfree(attr); > + return err ? err : in_len; > +} > + > +ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_query_xrc_rcv_qp cmd; > + struct ib_uverbs_query_qp_resp resp; > + struct ib_qp_attr *attr; > + struct ib_qp_init_attr *init_attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + attr = kmalloc(sizeof *attr, GFP_KERNEL); > + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); > + if (!attr || !init_attr) { > + ret = -ENOMEM; > + goto out; > + } > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + ret = -EINVAL; > + goto out; > + } > + > + ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, > + cmd.attr_mask, > init_attr); > + > + put_xrcd_read(xrcd_uobj); > + > + if (ret) > + goto out; > + > + memset(&resp, 0, sizeof resp); > + resp.qp_state = attr->qp_state; > + resp.cur_qp_state = attr->cur_qp_state; > + resp.path_mtu = attr->path_mtu; > + resp.path_mig_state = attr->path_mig_state; > + resp.qkey = attr->qkey; > + resp.rq_psn = attr->rq_psn; > + resp.sq_psn = attr->sq_psn; > + resp.dest_qp_num = attr->dest_qp_num; > + resp.qp_access_flags = attr->qp_access_flags; > + resp.pkey_index = attr->pkey_index; > + resp.alt_pkey_index = attr->alt_pkey_index; > + resp.sq_draining = attr->sq_draining; > + resp.max_rd_atomic = attr->max_rd_atomic; > + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; > + resp.min_rnr_timer = attr->min_rnr_timer; > + resp.port_num = attr->port_num; > + resp.timeout = attr->timeout; > + resp.retry_cnt = attr->retry_cnt; > + resp.rnr_retry = attr->rnr_retry; > + resp.alt_port_num = attr->alt_port_num; > + resp.alt_timeout = attr->alt_timeout; > + > + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); > + resp.dest.flow_label = attr->ah_attr.grh.flow_label; > + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; > + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; > + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; > + resp.dest.dlid = attr->ah_attr.dlid; > + resp.dest.sl = attr->ah_attr.sl; > + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; > + resp.dest.static_rate = attr->ah_attr.static_rate; > + resp.dest.is_global = > !!(attr->ah_attr.ah_flags & IB_AH_GRH); > + resp.dest.port_num = attr->ah_attr.port_num; > + > + memcpy(resp.alt_dest.dgid, > attr->alt_ah_attr.grh.dgid.raw, 16); > + resp.alt_dest.flow_label = > attr->alt_ah_attr.grh.flow_label; > + resp.alt_dest.sgid_index = > attr->alt_ah_attr.grh.sgid_index; > + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; > + resp.alt_dest.traffic_class = > attr->alt_ah_attr.grh.traffic_class; > + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; > + resp.alt_dest.sl = attr->alt_ah_attr.sl; > + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; > + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; > + resp.alt_dest.is_global = > !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); > + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; > + > + resp.max_send_wr = init_attr->cap.max_send_wr; > + resp.max_recv_wr = init_attr->cap.max_recv_wr; > + resp.max_send_sge = init_attr->cap.max_send_sge; > + resp.max_recv_sge = init_attr->cap.max_recv_sge; > + resp.max_inline_data = init_attr->cap.max_inline_data; > + resp.sq_sig_all = init_attr->sq_sig_type > == IB_SIGNAL_ALL_WR; > + > + if (copy_to_user((void __user *) (unsigned long) cmd.response, > + &resp, sizeof resp)) > + ret = -EFAULT; > + > +out: > + kfree(attr); > + kfree(init_attr); > + > + return ret ? ret : in_len; > +} > + > +ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_reg_xrc_rcv_qp cmd; > + struct ib_uxrc_rcv_object *qp_obj, *tmp; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); > + if (!qp_obj) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + ret = -EINVAL; > + goto err_out; > + } > + > + ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); > + if (ret) > + goto err_put; > + > + atomic_inc(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + mutex_lock(&file->mutex); > + list_for_each_entry(tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.qp_num == tmp->qp_num) { > + kfree(qp_obj); > + mutex_unlock(&file->mutex); > + put_xrcd_read(xrcd_uobj); > + return 0; > + } > + qp_obj->qp_num = cmd.qp_num; > + qp_obj->domain_handle = cmd.xrc_domain_handle; > + list_add_tail(&qp_obj->list, > &file->ucontext->xrc_reg_qp_list); > + mutex_unlock(&file->mutex); > + return in_len; > + > +err_put: > + put_xrcd_read(xrcd_uobj); > +err_out: > + > + kfree(qp_obj); > + return ret; > +} > + > +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, > + u32 domain_handle, u32 qp_num) { > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int err; > + > + xrcd = idr_read_xrcd(domain_handle, file->ucontext, > &xrcd_uobj); > + if (!xrcd) > + return -EINVAL; > > + err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); > + > + if (!err) > + atomic_dec(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + return err; > +} > + > +ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_unreg_xrc_rcv_qp cmd; > + struct ib_uxrc_rcv_object *qp_obj, *tmp; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + ret = ib_uverbs_cleanup_xrc_rcv_qp(file, > cmd.xrc_domain_handle, cmd.qp_num); > + if (ret) > + return ret; > + > + mutex_lock(&file->mutex); > + list_for_each_entry_safe(qp_obj, tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.qp_num == qp_obj->qp_num) { > + list_del(&qp_obj->list); > + kfree(qp_obj); > + break; > + } > + mutex_unlock(&file->mutex); > + return in_len; > + > +} > Index: infiniband/include/rdma/ib_user_verbs.h > =================================================================== > --- infiniband.orig/include/rdma/ib_user_verbs.h > 2008-01-28 12:20:54.000000000 +0200 > +++ infiniband/include/rdma/ib_user_verbs.h 2008-01-28 > 12:20:56.000000000 +0200 > @@ -86,7 +86,12 @@ enum { > IB_USER_VERBS_CMD_POST_SRQ_RECV, > IB_USER_VERBS_CMD_CREATE_XRC_SRQ, > IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, > - IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN > + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN, > + IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP, > + IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP, > + IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP, > + IB_USER_VERBS_CMD_REG_XRC_RCV_QP, > + IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP, > }; > > /* > @@ -714,6 +719,76 @@ struct ib_uverbs_close_xrc_domain { > __u64 driver_data[0]; > }; > > +struct ib_uverbs_create_xrc_rcv_qp { > + __u64 response; > + __u64 user_handle; > + __u32 xrc_domain_handle; > + __u32 max_send_wr; > + __u32 max_recv_wr; > + __u32 max_send_sge; > + __u32 max_recv_sge; > + __u32 max_inline_data; > + __u8 sq_sig_all; > + __u8 qp_type; > + __u8 reserved[2]; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_create_xrc_rcv_qp_resp { > + __u32 qpn; > + __u32 reserved; > +}; > + > +struct ib_uverbs_modify_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + struct ib_uverbs_qp_dest dest; > + struct ib_uverbs_qp_dest alt_dest; > + __u32 attr_mask; > + __u32 qkey; > + __u32 rq_psn; > + __u32 sq_psn; > + __u32 dest_qp_num; > + __u32 qp_access_flags; > + __u16 pkey_index; > + __u16 alt_pkey_index; > + __u8 qp_state; > + __u8 cur_qp_state; > + __u8 path_mtu; > + __u8 path_mig_state; > + __u8 en_sqd_async_notify; > + __u8 max_rd_atomic; > + __u8 max_dest_rd_atomic; > + __u8 min_rnr_timer; > + __u8 port_num; > + __u8 timeout; > + __u8 retry_cnt; > + __u8 rnr_retry; > + __u8 alt_port_num; > + __u8 alt_timeout; > + __u8 reserved[2]; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_query_xrc_rcv_qp { > + __u64 response; > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u32 attr_mask; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_reg_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_unreg_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u64 driver_data[0]; > +}; > > > #endif /* IB_USER_VERBS_H */ > Index: infiniband/drivers/infiniband/core/uverbs.h > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs.h > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs.h 2008-01-28 > +++ 12:20:56.000000000 +0200 > @@ -163,8 +163,12 @@ void ib_uverbs_qp_event_handler(struct i > void ib_uverbs_srq_event_handler(struct ib_event *event, > void *context_ptr); void ib_uverbs_event_handler(struct > ib_event_handler *handler, > struct ib_event *event); > +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, > + void *context_ptr); > void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, > struct ib_xrcd *xrcd); > +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, > + u32 domain_handle, u32 qp_num); > > #define IB_UVERBS_DECLARE_CMD(name) > \ > ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, > \ > @@ -202,6 +206,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); > IB_UVERBS_DECLARE_CMD(create_xrc_srq); > IB_UVERBS_DECLARE_CMD(open_xrc_domain); > IB_UVERBS_DECLARE_CMD(close_xrc_domain); > +IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp); > > > #endif /* UVERBS_H */ > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From divy at chelsio.com Wed Feb 6 10:27:34 2008 From: divy at chelsio.com (Divy Le Ray) Date: Wed, 06 Feb 2008 10:27:34 -0800 Subject: [ofa-general] Re: [PATCH 2.6.25] cxgb3: Handle ARP completions that mark neighbors stale. In-Reply-To: <20080206180519.4680.34741.stgit@dell3.ogc.int> References: <20080206180519.4680.34741.stgit@dell3.ogc.int> Message-ID: <47A9FC16.3070803@chelsio.com> Steve Wise wrote: > cxgb3: Handle ARP completions that mark neighbors stale. > > When ARP completes due to a request rather than a reply the neighbor is > marked NUD_STALE instead of reachable (see arp_process()). The handler > for the resulting netevent needs to check also for NUD_STALE. > > Failure to use the arp entry can cause RDMA connection failures. > > Signed-off-by: Steve Wise > Acked-by: Divy Le Ray Divy From pradeeps at linux.vnet.ibm.com Wed Feb 6 10:36:12 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 06 Feb 2008 10:36:12 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47A9E432.7070508@mellanox.co.il> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> Message-ID: <47A9FE1C.9010709@linux.vnet.ibm.com> Tziporet Koren wrote: > Shirley Ma wrote: >> >> Thanks Tziporet. We will test it right after it's out. >> >> > You can start use the lates build - > http://www.openfabrics.org/builds/ofed-1.3/OFED-1.3-20080206-0751.tgz > > Tziporet > I have downloaded the todays build mentioned above. I am still seeing the issue of failing ib_destroy_cq() for the rcq mentioned yesterday. Here are the steps that I follow: 1. On a freshly booted system configure ib0 2. Switch to connected mode ( on HCA that supports SRQ) 3. ping remote interface 4. modprobe -r ib_ehca 5. I see the failures about ib_destroy_cq() failing and the cascading failures following that (srq and pd cannot be destroyed) 6. If I try a modprobe ib_ehca I get an error "Cannot allocate memory" This also means some one is chewing tons of memory. I realize that the qp and associated pd were not freed, so some memory is "lost". However, this system has 8 GB of memory. Pradeep From xma at us.ibm.com Wed Feb 6 10:33:03 2008 From: xma at us.ibm.com (Shirley Ma) Date: Wed, 6 Feb 2008 11:33:03 -0700 Subject: [ofa-general] ***SPAM*** Fw: [Final][PATCH] IPoIB-4K MTU patch Message-ID: Hello Eli, FYI. In case you didn't receive these emails on time. You are welcome to create a patch on top of it, like use __skb_put to replace +size in ipoib_ud_sg_put_frags(). ------------------------------------------------------------------------------------------- Nam and Stefan have helped out in the backporting while I am concentrate on stress testing against 2.6.24 kernel, (20 duplex streams over one port testing against mthca for 2K mtu, it has been running over 8 hours). What we have validated this patch on (build, sniff test, flood ping) are 2.6.16 - 2.6.24 kernel, RHEL4.5, RHEL4.6, RHEL5.1 and SLES10SP1& the derivative version of SLES 10 SP1. Below attachment is the backport patch. I reattach the patch file here for your convenient. The backport patch file ipoib_0100_to_2.6.21.patch needs to be copied into below dir: ./kernel_patches/attic/backport/2.6.9_U2/ipoib_0100_to_2.6.21.patch ./kernel_patches/attic/backport/2.6.9_U3/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.18-EL5.1/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.16_sles10/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.9_U4/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.9_U5/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.9_U6/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.18_suse10_2/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.16_sles10_sp1/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.11/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.12/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.13/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.14/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.15/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.16/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.17/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.18/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.19/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.20/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.21/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.13_suse10_0_u/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.15_ubuntu606/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.11_FC4/ipoib_0100_to_2.6.21.patch ./kernel_patches/backport/2.6.18_FC6/ipoib_0100_to_2.6.21.patch Shirley ----- Forwarded by Shirley Ma/Beaverton/IBM on 02/06/08 12:28 AM ----- Shirley Ma/Beavert on/IBM To tziporet at dev.mellanox.co.il, "Vladimir 02/05/08 Sokolovsky (Mellanox)" 01:33 PM cc eli at mellanox.co.il Subject [Final][PATCH] IPoIB-4K MTU patch(Document link: Shirley Ma) Hello, below is the final patch based on Eli's review comments. Thanks Eli for all of your work. This patch has been validated on 2.6.24 kernel, SLES10 on both intel/mthca and ppc/mthca. I am working on RHEL5 testing. The backport patch will be provided tonight. Hopefully Nam could help me on this. I will continue to let the stress test going on different of subnets. I hopefully these is nothing changed in ofed-1.3bit today. So the patch can be applied cleanly. If not, let me know. Please use attachment for applying patch since my notes has problem. Thanks Shirley ------------ This patch is enabling IPoIB 4K MTU support. When PAGE_SIZE is greater than IB MTU size + GRH + IPoIB head, there is no need for RX S/G. When it's smaller two buffers are allocated, one buffer is GRH+IPoIB header, one buffer is for IPoIB payload. Signed-off-by: Shirley Ma --- diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib.h ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib.h --- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-04 20:09:18.000000000 -0800 +++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-05 12:20:46.000000000 -0800 @@ -56,11 +56,11 @@ /* constants */ enum { - IPOIB_PACKET_SIZE = 2048, - IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES, - IPOIB_ENCAP_LEN = 4, + IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, + IPOIB_UD_RX_SG = 2, /* for 4K MTU */ + IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */ IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN, IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE, @@ -141,9 +141,9 @@ struct ipoib_mcast { struct net_device *dev; }; -struct ipoib_rx_buf { +struct ipoib_sg_rx_buf { struct sk_buff *skb; - u64 mapping; + u64 mapping[IPOIB_UD_RX_SG]; }; struct ipoib_tx_buf { @@ -337,7 +337,7 @@ struct ipoib_dev_priv { struct net_device *dev; struct ib_recv_wr rx_wr_draft[UD_POST_RCV_COUNT]; - struct ib_sge sglist_draft[UD_POST_RCV_COUNT]; + struct ib_sge sglist_draft[UD_POST_RCV_COUNT][IPOIB_UD_RX_SG]; unsigned int rx_outst; struct napi_struct napi; @@ -378,7 +378,7 @@ struct ipoib_dev_priv { unsigned int admin_mtu; unsigned int mcast_mtu; - struct ipoib_rx_buf *rx_ring; + struct ipoib_sg_rx_buf *rx_ring; spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; @@ -412,6 +412,7 @@ struct ipoib_dev_priv { struct ipoib_ethtool_st etool; struct timer_list poll_timer; struct ib_ah *own_ah; + int max_ib_mtu; }; struct ipoib_ah { @@ -452,6 +453,22 @@ struct ipoib_neigh { struct list_head list; }; +#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) +#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES) +static inline int ipoib_ud_need_sg(int ib_mtu) +{ + return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0; +} +static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv, + u64 mapping[IPOIB_UD_RX_SG]) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE); + } else + ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE); +} + /* * We stash a pointer to our private neighbour information after our * hardware address in neigh->ha. The ALIGN() expression here makes diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_ib.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_ib.c --- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-04 20:09:18.000000000 -0800 +++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-05 12:20:40.000000000 -0800 @@ -96,14 +96,37 @@ static void clean_pending_receives(struc for (i = 0; i < priv->rx_outst; ++i) { id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV; - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } priv->rx_outst = 0; } +static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, struct sk_buff *skb, + unsigned int length) +{ + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + unsigned int size; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + + /* put header into skb */ + size = min(length, (unsigned)IPOIB_UD_HEAD_SIZE); + skb->tail += size; + skb->len += size; + length -= size; + + size = min(length, (unsigned) PAGE_SIZE); + frag->size = size; + skb->data_len += size; + skb->truesize += size; + skb->len += size; + length -= size; + } else + skb_put(skb, length); +} + static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -111,8 +134,11 @@ static int ipoib_ib_post_receive(struct int ret = 0; int i = priv->rx_outst; - priv->sglist_draft[i].addr = priv->rx_ring[id].mapping; + priv->sglist_draft[i][0].addr = priv->rx_ring[id].mapping[0]; + priv->sglist_draft[i][1].addr = priv->rx_ring[id].mapping[1]; + priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV; + if (++priv->rx_outst == UD_POST_RCV_COUNT) { ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); @@ -120,8 +146,8 @@ static int ipoib_ib_post_receive(struct ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); while (bad_wr) { id = bad_wr->wr_id & ~IPOIB_OP_RECV; - ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; } @@ -132,16 +158,23 @@ static int ipoib_ib_post_receive(struct return ret; } -static int ipoib_alloc_rx_skb(struct net_device *dev, int id) +static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id, + u64 mapping[IPOIB_UD_RX_SG]) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; - u64 addr; + int buf_size; - skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4); - if (!skb) - return -ENOMEM; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) + buf_size = IPOIB_UD_HEAD_SIZE; + else + buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + skb = dev_alloc_skb(buf_size + 4); + + if (unlikely(!skb)) + return NULL; + /* * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte * header. So we need 4 more bytes to get to 48 and align the @@ -149,17 +182,32 @@ static int ipoib_alloc_rx_skb(struct net */ skb_reserve(skb, 4); - addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { - dev_kfree_skb_any(skb); - return -EIO; - } - - priv->rx_ring[id].skb = skb; - priv->rx_ring[id].mapping = addr; - - return 0; + mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, + DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { + dev_kfree_skb_any(skb); + return NULL; + } + + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + struct page *page = alloc_page(GFP_ATOMIC); + if (!page) + goto partial_error; + + skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); + mapping[1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page, + 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) + goto partial_error; + } + + priv->rx_ring[id].skb = skb; + return skb; + +partial_error: + ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE); + dev_kfree_skb_any(skb); + return NULL; } static int ipoib_ib_post_receives(struct net_device *dev) @@ -168,7 +216,7 @@ static int ipoib_ib_post_receives(struct int i; for (i = 0; i < ipoib_recvq_size; ++i) { - if (ipoib_alloc_rx_skb(dev, i)) { + if (!ipoib_alloc_rx_skb(dev, i, priv->rx_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); return -ENOMEM; } @@ -186,7 +234,7 @@ static void ipoib_ib_handle_rx_wc(struct struct ipoib_dev_priv *priv = netdev_priv(dev); unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; struct sk_buff *skb; - u64 addr; + u64 mapping[IPOIB_UD_RX_SG]; ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", wr_id, wc->status); @@ -198,42 +246,38 @@ static void ipoib_ib_handle_rx_wc(struct } skb = priv->rx_ring[wr_id].skb; - addr = priv->rx_ring[wr_id].mapping; + /* duplicate the code here, to omit fast path if need-sg condition check */ if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - ib_dma_unmap_single(priv->ca, addr, - IPOIB_BUF_SIZE, DMA_FROM_DEVICE); + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); dev_kfree_skb_any(skb); priv->rx_ring[wr_id].skb = NULL; return; } - /* * Drop packets that this interface sent, ie multicast packets * that the HCA has replicated. */ - if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)) + if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) goto repost; - /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ - if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) { + if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id, mapping))) { ++dev->stats.rx_dropped; goto repost; } - ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); - - ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE); - - skb_put(skb, wc->byte_len); + ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping); + ipoib_ud_skb_put_frags(priv, skb, wc->byte_len); + memcpy(priv->rx_ring[wr_id].mapping, mapping, + IPOIB_UD_RX_SG * sizeof *mapping); skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; @@ -827,18 +871,15 @@ int ipoib_ib_dev_stop(struct net_device * all our pending work requests. */ for (i = 0; i < ipoib_recvq_size; ++i) { - struct ipoib_rx_buf *rx_req; + struct ipoib_sg_rx_buf *rx_req; rx_req = &priv->rx_ring[i]; - - if (rx_req->skb) { - ib_dma_unmap_single(priv->ca, - rx_req->mapping, - IPOIB_BUF_SIZE, - DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_req->skb); - rx_req->skb = NULL; - } + if (!rx_req->skb) + continue; + ipoib_sg_dma_unmap_rx(priv, + priv->rx_ring[i].mapping); + dev_kfree_skb_any(rx_req->skb); + rx_req->skb = NULL; } goto timeout; diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_main.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_main.c --- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-02-04 20:09:18.000000000 -0800 +++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-02-05 12:20:40.000000000 -0800 @@ -193,7 +193,7 @@ static int ipoib_change_mtu(struct net_d return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) + if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; @@ -1007,10 +1007,6 @@ static void ipoib_setup(struct net_devic dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; - /* MTU will be reset when mcast join happens */ - dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; - priv->mcast_mtu = priv->admin_mtu = dev->mtu; - memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); netif_carrier_off(dev); @@ -1156,6 +1152,7 @@ static struct net_device *ipoib_add_port struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; + struct ib_port_attr attr; int result = -ENOMEM; priv = ipoib_intf_alloc(format); @@ -1166,6 +1163,18 @@ static struct net_device *ipoib_add_port priv->dev->features |= NETIF_F_HIGHDMA; + if (!ib_query_port(hca, port, &attr)) + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + else { + printk(KERN_WARNING "%s: ib_query_port %d failed\n", + hca->name, port); + goto device_init_failed; + } + + /* MTU will be reset when mcast join happens */ + priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c --- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-04 15:31:14.000000000 -0800 +++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-05 12:20:40.000000000 -0800 @@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_s return; } - priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - - IPOIB_ENCAP_LEN; + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); if (!ipoib_cm_admin_enabled(dev)) dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c --- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-04 20:09:18.000000000 -0800 +++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-05 12:20:40.000000000 -0800 @@ -151,7 +151,7 @@ int ipoib_transport_dev_init(struct net_ .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1, - .max_recv_sge = 1 + .max_recv_sge = IPOIB_UD_RX_SG }, .sq_sig_type = IB_SIGNAL_REQ_WR, .qp_type = IB_QPT_UD, @@ -225,18 +225,29 @@ int ipoib_transport_dev_init(struct net_ priv->tx_wr.opcode = IB_WR_SEND; priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; - + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { - priv->sglist_draft[i].length = IPOIB_BUF_SIZE; - priv->sglist_draft[i].lkey = priv->mr->lkey; - - priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i]; - priv->rx_wr_draft[i].num_sge = 1; + priv->sglist_draft[i][0].lkey = priv->mr->lkey; + priv->sglist_draft[i][1].lkey = priv->mr->lkey; + priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i][0]; if (i < UD_POST_RCV_COUNT - 1) priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1]; } priv->rx_wr_draft[i].next = NULL; + if (ipoib_ud_need_sg(priv->max_ib_mtu)) { + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i][0].length = IPOIB_UD_HEAD_SIZE; + priv->sglist_draft[i][1].length = PAGE_SIZE; + priv->rx_wr_draft[i].num_sge = IPOIB_UD_RX_SG; + } + } else { + for (i = 0; i < UD_POST_RCV_COUNT; ++i) { + priv->sglist_draft[i][0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); + priv->rx_wr_draft[i].num_sge = 1; + } + } + return 0; out_free_scq: (See attached file: ipoib-new-4kmtu.patch) Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic28698.gif Type: image/gif Size: 1972 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: doclink.gif Type: image/gif Size: 149 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ipoib-new-4kmtu.patch Type: application/octet-stream Size: 14679 bytes Desc: not available URL: From changquing.tang at hp.com Wed Feb 6 11:04:23 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Wed, 6 Feb 2008 19:04:23 +0000 Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC receive-only qp support In-Reply-To: <200802030949.44943.jackm@dev.mellanox.co.il> References: <200802030949.44943.jackm@dev.mellanox.co.il> Message-ID: Jack: Do you have the updated version of srq_pingpong.c.xrc using these new API ? I always got following error for a ping_pong HP-MPI code, two ranks, one on each node. pp.x.x: Rank 0:1: MPI_Init: ibv_poll_cq(): bad status 9 pp.x.x: Rank 0:1: MPI_Init: self mpiobl03 peer mpiobl04 (rank: 0) pp.x.x: Rank 0:1: MPI_Init: error message: remote invalid request error the returned xrc_srq_num on each rank is zero, is this a correct value ? --CQ > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of > Jack Morgenstein > Sent: Sunday, February 03, 2008 1:50 AM > To: Roland Dreier > Cc: general at lists.openfabrics.org > Subject: [ofa-general] [PATCH 7/8 V3] core: Add XRC > receive-only qp support > > ib/core: Implement XRC receive-only QPs for userspace apps. > > Added creation of XRC receive-only QPs for userspace, which > reside in kernel space (user cannot post-to or poll these QPs). > > Motivation: MPI community requires XRC receive QPs which > will not be destroyed when the creating process terminates. > > Solution: Userspace requests that a QP be created in kernel space. > Each userspace process using that QP (i.e. receiving packets > on an XRC SRQ via the qp), registers with that QP (-- the > creator is also registered, whether or not it is a user of > the QP). When the last userspace user unregisters with the > QP, it is destroyed. Unregistration is also part of > userspace process cleanup, so there is no leakage. > > This patch implements the kernel procedures to implement the following > (new) libibverbs API: > ibv_create_xrc_rcv_qp > ibv_modify_xrc_rcv_qp > ibv_query_xrc_rcv_qp > ibv_reg_xrc_rcv_qp > ibv_unreg_xrc_rcv_qp > > In addition, the patch implements the foundation for > distributing XRC-receive-only QP events to userspace > processes registered with that QP. > > Finally, the patch modifies ib_uverbs_close_xrc_domain() to > return BUSY if any resources are still in use by the process, > so that the XRC rcv-only QP cleanup can operate properly. > > V2: > Fixed bug in ib_uverbs_close_xrc_domain. > We need to allow the process to successfully close its copy > of the domain, even if it still has undestroyed XRC QPs -- > these will continue to operate, although it will not be > possible to create new ones (there will be no Oops). > > However, we need to check that there are no outstanding > xrc-qp-registrations: > the cleanup procedure for this depends on the xrc domain > still being accessible in this process in order to perform > all needed un-registrations (and thus prevent resource leakage). > > V3: > Fix thinko in ib_uverbs_reg_xrc_rcv_qp, > ib_uverbs_unreg_xrc_rcv_qp, and ib_uverbs_modify_xrc_rcv_qp: > on success, incorrectly returned 0 instead of input length. > > Signed-off-by: Jack Morgenstein > > Index: infiniband/include/rdma/ib_verbs.h > =================================================================== > --- infiniband.orig/include/rdma/ib_verbs.h 2008-01-28 > 12:20:55.000000000 +0200 > +++ infiniband/include/rdma/ib_verbs.h 2008-01-28 12:22:09.000000000 > +++ +0200 > @@ -285,6 +285,10 @@ enum ib_event_type { > IB_EVENT_CLIENT_REREGISTER > }; > > +enum ib_event_flags { > + IB_XRC_QP_EVENT_FLAG = 0x80000000, }; > + > struct ib_event { > struct ib_device *device; > union { > @@ -292,6 +296,7 @@ struct ib_event { > struct ib_qp *qp; > struct ib_srq *srq; > u8 port_num; > + u32 xrc_qp_num; > } element; > enum ib_event_type event; > }; > @@ -492,6 +497,7 @@ enum ib_qp_type { > > enum qp_create_flags { > QP_CREATE_LSO = 1 << 0, > + XRC_RCV_QP = 1 << 1, > }; > > struct ib_qp_init_attr { > @@ -723,6 +729,7 @@ struct ib_ucontext { > struct list_head srq_list; > struct list_head ah_list; > struct list_head xrc_domain_list; > + struct list_head xrc_reg_qp_list; > int closing; > }; > > @@ -744,6 +751,12 @@ struct ib_udata { > size_t outlen; > }; > > +struct ib_uxrc_rcv_object { > + struct list_head list; /* link to > context's list */ > + u32 qp_num; > + u32 domain_handle; > +}; > + > struct ib_pd { > struct ib_device *device; > struct ib_uobject *uobject; > @@ -1053,6 +1066,23 @@ struct ib_device { > struct > ib_ucontext *context, > struct > ib_udata *udata); > int (*dealloc_xrcd)(struct > ib_xrcd *xrcd); > + int > (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, > + u32* qp_num); > + int > (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, > + u32 qp_num, > + > struct ib_qp_attr *attr, > + int > attr_mask); > + int (*query_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + u32 qp_num, > + struct > ib_qp_attr *attr, > + int attr_mask, > + struct > ib_qp_init_attr *init_attr); > + int (*reg_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + void *context, > + u32 qp_num); > + int (*unreg_xrc_rcv_qp)(struct > ib_xrcd *xrcd, > + void *context, > + u32 qp_num); > > struct ib_dma_mapping_ops *dma_ops; > > Index: infiniband/drivers/infiniband/core/uverbs_main.c > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs_main.c > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs_main.c > 2008-01-28 12:20:56.000000000 +0200 > @@ -114,6 +114,11 @@ static ssize_t (*uverbs_cmd_table[])(str > [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = > ib_uverbs_create_xrc_srq, > [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = > ib_uverbs_open_xrc_domain, > [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = > ib_uverbs_close_xrc_domain, > + [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = > ib_uverbs_create_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = > ib_uverbs_modify_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = > ib_uverbs_query_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = > ib_uverbs_reg_xrc_rcv_qp, > + [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = > ib_uverbs_unreg_xrc_rcv_qp, > }; > > static struct vfsmount *uverbs_event_mnt; @@ -191,6 +196,7 > @@ static int ib_uverbs_cleanup_ucontext(st > struct ib_ucontext *context) { > struct ib_uobject *uobj, *tmp; > + struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; > > if (!context) > return 0; > @@ -251,6 +257,13 @@ static int ib_uverbs_cleanup_ucontext(st > kfree(uobj); > } > > + list_for_each_entry_safe(xrc_qp_obj, tmp1, > &context->xrc_reg_qp_list, list) { > + list_del(&xrc_qp_obj->list); > + ib_uverbs_cleanup_xrc_rcv_qp(file, > xrc_qp_obj->domain_handle, > + xrc_qp_obj->qp_num); > + kfree(xrc_qp_obj); > + } > + > mutex_lock(&file->device->ib_dev->xrcd_table_mutex); > list_for_each_entry_safe(uobj, tmp, > &context->xrc_domain_list, list) { > struct ib_xrcd *xrcd = uobj->object; @@ > -506,6 +519,12 @@ void ib_uverbs_event_handler(struct ib_e > NULL, NULL); } > > +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, void > +*context_ptr) { > + ib_uverbs_async_handler(context_ptr, > event->element.xrc_qp_num, > + event->event, NULL, NULL); } > + > struct file *ib_uverbs_alloc_event_file(struct > ib_uverbs_file *uverbs_file, > int is_async, int *fd) { > Index: infiniband/drivers/infiniband/core/uverbs_cmd.c > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs_cmd.c > 2008-01-28 12:20:56.000000000 +0200 > @@ -315,6 +315,7 @@ ssize_t ib_uverbs_get_context(struct ib_ > INIT_LIST_HEAD(&ucontext->srq_list); > INIT_LIST_HEAD(&ucontext->ah_list); > INIT_LIST_HEAD(&ucontext->xrc_domain_list); > + INIT_LIST_HEAD(&ucontext->xrc_reg_qp_list); > ucontext->closing = 0; > > resp.num_comp_vectors = > file->device->num_comp_vectors; @@ -1080,6 +1081,7 @@ ssize_t > ib_uverbs_create_qp(struct ib_uv > goto err_put; > } > > + attr.create_flags = 0; > attr.event_handler = ib_uverbs_qp_event_handler; > attr.qp_context = file; > attr.send_cq = scq; > @@ -2561,6 +2563,7 @@ ssize_t ib_uverbs_close_xrc_domain(struc > int out_len) { > struct ib_uverbs_close_xrc_domain cmd; > + struct ib_uxrc_rcv_object *tmp; > struct ib_uobject *uobj; > struct ib_xrcd *xrcd = NULL; > struct inode *inode = NULL; > @@ -2576,6 +2579,18 @@ ssize_t ib_uverbs_close_xrc_domain(struc > goto err_unlock_mutex; > } > > + mutex_lock(&file->mutex); > + list_for_each_entry(tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.xrcd_handle == tmp->domain_handle) { > + ret = -EBUSY; > + break; > + } > + mutex_unlock(&file->mutex); > + if (ret) { > + put_uobj_write(uobj); > + goto err_unlock_mutex; > + } > + > xrcd = (struct ib_xrcd *) (uobj->object); > inode = xrcd->inode; > > @@ -2611,7 +2626,7 @@ err_unlock_mutex: > } > > void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, > - struct ib_xrcd *xrcd) > + struct ib_xrcd *xrcd) > { > struct inode *inode = NULL; > int ret = 0; > @@ -2625,4 +2640,353 @@ void ib_uverbs_dealloc_xrcd(struct ib_de > xrcd_table_delete(ib_dev, inode); } > > +ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_create_xrc_rcv_qp cmd; > + struct ib_uverbs_create_xrc_rcv_qp_resp resp; > + struct ib_uxrc_rcv_object *obj; > + struct ib_qp_init_attr init_attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + u32 qp_num; > + int err; > + > + if (out_len < sizeof resp) > + return -ENOSPC; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + obj = kmalloc(sizeof *obj, GFP_KERNEL); > + if (!obj) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + err = -EINVAL; > + goto err_out; > + } > + > + memset(&init_attr, 0, sizeof init_attr); > + init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; > + init_attr.qp_context = file; > + init_attr.srq = NULL; > + init_attr.sq_sig_type = cmd.sq_sig_all ? > IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; > + init_attr.qp_type = IB_QPT_XRC; > + init_attr.xrc_domain = xrcd; > + init_attr.create_flags = XRC_RCV_QP; > + > + init_attr.cap.max_send_wr = 1; > + init_attr.cap.max_recv_wr = 0; > + init_attr.cap.max_send_sge = 1; > + init_attr.cap.max_recv_sge = 0; > + init_attr.cap.max_inline_data = 0; > + > + err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); > + if (err) > + goto err_put; > + > + memset(&resp, 0, sizeof resp); > + resp.qpn = qp_num; > + > + if (copy_to_user((void __user *) (unsigned long) cmd.response, > + &resp, sizeof resp)) { > + err = -EFAULT; > + goto err_destroy; > + } > + > + atomic_inc(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + obj->qp_num = qp_num; > + obj->domain_handle = cmd.xrc_domain_handle; > + mutex_lock(&file->mutex); > + list_add_tail(&obj->list, &file->ucontext->xrc_reg_qp_list); > + mutex_unlock(&file->mutex); > + > + return in_len; > + > +err_destroy: > + xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); > +err_put: > + put_xrcd_read(xrcd_uobj); > +err_out: > + kfree(obj); > + return err; > +} > + > +ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_modify_xrc_rcv_qp cmd; > + struct ib_qp_attr *attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int err; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + attr = kmalloc(sizeof *attr, GFP_KERNEL); > + if (!attr) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + kfree(attr); > + return -EINVAL; > + } > + > + memset(attr, 0, sizeof *attr); > + attr->qp_state = cmd.qp_state; > + attr->cur_qp_state = cmd.cur_qp_state; > + attr->qp_access_flags = cmd.qp_access_flags; > + attr->pkey_index = cmd.pkey_index; > + attr->port_num = cmd.port_num; > + attr->path_mtu = cmd.path_mtu; > + attr->path_mig_state = cmd.path_mig_state; > + attr->qkey = cmd.qkey; > + attr->rq_psn = cmd.rq_psn; > + attr->sq_psn = cmd.sq_psn; > + attr->dest_qp_num = cmd.dest_qp_num; > + attr->alt_pkey_index = cmd.alt_pkey_index; > + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; > + attr->max_rd_atomic = cmd.max_rd_atomic; > + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; > + attr->min_rnr_timer = cmd.min_rnr_timer; > + attr->port_num = cmd.port_num; > + attr->timeout = cmd.timeout; > + attr->retry_cnt = cmd.retry_cnt; > + attr->rnr_retry = cmd.rnr_retry; > + attr->alt_port_num = cmd.alt_port_num; > + attr->alt_timeout = cmd.alt_timeout; > + > + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); > + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; > + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; > + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; > + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; > + attr->ah_attr.dlid = cmd.dest.dlid; > + attr->ah_attr.sl = cmd.dest.sl; > + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; > + attr->ah_attr.static_rate = cmd.dest.static_rate; > + attr->ah_attr.ah_flags = > cmd.dest.is_global ? IB_AH_GRH : 0; > + attr->ah_attr.port_num = cmd.dest.port_num; > + > + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); > + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; > + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; > + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; > + attr->alt_ah_attr.grh.traffic_class = > cmd.alt_dest.traffic_class; > + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; > + attr->alt_ah_attr.sl = cmd.alt_dest.sl; > + attr->alt_ah_attr.src_path_bits = > cmd.alt_dest.src_path_bits; > + attr->alt_ah_attr.static_rate = > cmd.alt_dest.static_rate; > + attr->alt_ah_attr.ah_flags = > cmd.alt_dest.is_global ? IB_AH_GRH : 0; > + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; > + > + err = xrcd->device->modify_xrc_rcv_qp(xrcd, > cmd.qp_num, attr, cmd.attr_mask); > + put_xrcd_read(xrcd_uobj); > + kfree(attr); > + return err ? err : in_len; > +} > + > +ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_query_xrc_rcv_qp cmd; > + struct ib_uverbs_query_qp_resp resp; > + struct ib_qp_attr *attr; > + struct ib_qp_init_attr *init_attr; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + attr = kmalloc(sizeof *attr, GFP_KERNEL); > + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); > + if (!attr || !init_attr) { > + ret = -ENOMEM; > + goto out; > + } > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + ret = -EINVAL; > + goto out; > + } > + > + ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, > + cmd.attr_mask, > init_attr); > + > + put_xrcd_read(xrcd_uobj); > + > + if (ret) > + goto out; > + > + memset(&resp, 0, sizeof resp); > + resp.qp_state = attr->qp_state; > + resp.cur_qp_state = attr->cur_qp_state; > + resp.path_mtu = attr->path_mtu; > + resp.path_mig_state = attr->path_mig_state; > + resp.qkey = attr->qkey; > + resp.rq_psn = attr->rq_psn; > + resp.sq_psn = attr->sq_psn; > + resp.dest_qp_num = attr->dest_qp_num; > + resp.qp_access_flags = attr->qp_access_flags; > + resp.pkey_index = attr->pkey_index; > + resp.alt_pkey_index = attr->alt_pkey_index; > + resp.sq_draining = attr->sq_draining; > + resp.max_rd_atomic = attr->max_rd_atomic; > + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; > + resp.min_rnr_timer = attr->min_rnr_timer; > + resp.port_num = attr->port_num; > + resp.timeout = attr->timeout; > + resp.retry_cnt = attr->retry_cnt; > + resp.rnr_retry = attr->rnr_retry; > + resp.alt_port_num = attr->alt_port_num; > + resp.alt_timeout = attr->alt_timeout; > + > + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); > + resp.dest.flow_label = attr->ah_attr.grh.flow_label; > + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; > + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; > + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; > + resp.dest.dlid = attr->ah_attr.dlid; > + resp.dest.sl = attr->ah_attr.sl; > + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; > + resp.dest.static_rate = attr->ah_attr.static_rate; > + resp.dest.is_global = > !!(attr->ah_attr.ah_flags & IB_AH_GRH); > + resp.dest.port_num = attr->ah_attr.port_num; > + > + memcpy(resp.alt_dest.dgid, > attr->alt_ah_attr.grh.dgid.raw, 16); > + resp.alt_dest.flow_label = > attr->alt_ah_attr.grh.flow_label; > + resp.alt_dest.sgid_index = > attr->alt_ah_attr.grh.sgid_index; > + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; > + resp.alt_dest.traffic_class = > attr->alt_ah_attr.grh.traffic_class; > + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; > + resp.alt_dest.sl = attr->alt_ah_attr.sl; > + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; > + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; > + resp.alt_dest.is_global = > !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); > + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; > + > + resp.max_send_wr = init_attr->cap.max_send_wr; > + resp.max_recv_wr = init_attr->cap.max_recv_wr; > + resp.max_send_sge = init_attr->cap.max_send_sge; > + resp.max_recv_sge = init_attr->cap.max_recv_sge; > + resp.max_inline_data = init_attr->cap.max_inline_data; > + resp.sq_sig_all = init_attr->sq_sig_type > == IB_SIGNAL_ALL_WR; > + > + if (copy_to_user((void __user *) (unsigned long) cmd.response, > + &resp, sizeof resp)) > + ret = -EFAULT; > + > +out: > + kfree(attr); > + kfree(init_attr); > + > + return ret ? ret : in_len; > +} > + > +ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_reg_xrc_rcv_qp cmd; > + struct ib_uxrc_rcv_object *qp_obj, *tmp; > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); > + if (!qp_obj) > + return -ENOMEM; > + > + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, > file->ucontext, &xrcd_uobj); > + if (!xrcd) { > + ret = -EINVAL; > + goto err_out; > + } > + > + ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); > + if (ret) > + goto err_put; > + > + atomic_inc(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + mutex_lock(&file->mutex); > + list_for_each_entry(tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.qp_num == tmp->qp_num) { > + kfree(qp_obj); > + mutex_unlock(&file->mutex); > + put_xrcd_read(xrcd_uobj); > + return 0; > + } > + qp_obj->qp_num = cmd.qp_num; > + qp_obj->domain_handle = cmd.xrc_domain_handle; > + list_add_tail(&qp_obj->list, > &file->ucontext->xrc_reg_qp_list); > + mutex_unlock(&file->mutex); > + return in_len; > + > +err_put: > + put_xrcd_read(xrcd_uobj); > +err_out: > + > + kfree(qp_obj); > + return ret; > +} > + > +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, > + u32 domain_handle, u32 qp_num) { > + struct ib_xrcd *xrcd; > + struct ib_uobject *xrcd_uobj; > + int err; > + > + xrcd = idr_read_xrcd(domain_handle, file->ucontext, > &xrcd_uobj); > + if (!xrcd) > + return -EINVAL; > > + err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); > + > + if (!err) > + atomic_dec(&xrcd->usecnt); > + put_xrcd_read(xrcd_uobj); > + return err; > +} > + > +ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, > + const char __user *buf, int in_len, > + int out_len) { > + struct ib_uverbs_unreg_xrc_rcv_qp cmd; > + struct ib_uxrc_rcv_object *qp_obj, *tmp; > + int ret; > + > + if (copy_from_user(&cmd, buf, sizeof cmd)) > + return -EFAULT; > + > + ret = ib_uverbs_cleanup_xrc_rcv_qp(file, > cmd.xrc_domain_handle, cmd.qp_num); > + if (ret) > + return ret; > + > + mutex_lock(&file->mutex); > + list_for_each_entry_safe(qp_obj, tmp, > &file->ucontext->xrc_reg_qp_list, list) > + if (cmd.qp_num == qp_obj->qp_num) { > + list_del(&qp_obj->list); > + kfree(qp_obj); > + break; > + } > + mutex_unlock(&file->mutex); > + return in_len; > + > +} > Index: infiniband/include/rdma/ib_user_verbs.h > =================================================================== > --- infiniband.orig/include/rdma/ib_user_verbs.h > 2008-01-28 12:20:54.000000000 +0200 > +++ infiniband/include/rdma/ib_user_verbs.h 2008-01-28 > 12:20:56.000000000 +0200 > @@ -86,7 +86,12 @@ enum { > IB_USER_VERBS_CMD_POST_SRQ_RECV, > IB_USER_VERBS_CMD_CREATE_XRC_SRQ, > IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, > - IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN > + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN, > + IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP, > + IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP, > + IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP, > + IB_USER_VERBS_CMD_REG_XRC_RCV_QP, > + IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP, > }; > > /* > @@ -714,6 +719,76 @@ struct ib_uverbs_close_xrc_domain { > __u64 driver_data[0]; > }; > > +struct ib_uverbs_create_xrc_rcv_qp { > + __u64 response; > + __u64 user_handle; > + __u32 xrc_domain_handle; > + __u32 max_send_wr; > + __u32 max_recv_wr; > + __u32 max_send_sge; > + __u32 max_recv_sge; > + __u32 max_inline_data; > + __u8 sq_sig_all; > + __u8 qp_type; > + __u8 reserved[2]; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_create_xrc_rcv_qp_resp { > + __u32 qpn; > + __u32 reserved; > +}; > + > +struct ib_uverbs_modify_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + struct ib_uverbs_qp_dest dest; > + struct ib_uverbs_qp_dest alt_dest; > + __u32 attr_mask; > + __u32 qkey; > + __u32 rq_psn; > + __u32 sq_psn; > + __u32 dest_qp_num; > + __u32 qp_access_flags; > + __u16 pkey_index; > + __u16 alt_pkey_index; > + __u8 qp_state; > + __u8 cur_qp_state; > + __u8 path_mtu; > + __u8 path_mig_state; > + __u8 en_sqd_async_notify; > + __u8 max_rd_atomic; > + __u8 max_dest_rd_atomic; > + __u8 min_rnr_timer; > + __u8 port_num; > + __u8 timeout; > + __u8 retry_cnt; > + __u8 rnr_retry; > + __u8 alt_port_num; > + __u8 alt_timeout; > + __u8 reserved[2]; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_query_xrc_rcv_qp { > + __u64 response; > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u32 attr_mask; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_reg_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u64 driver_data[0]; > +}; > + > +struct ib_uverbs_unreg_xrc_rcv_qp { > + __u32 xrc_domain_handle; > + __u32 qp_num; > + __u64 driver_data[0]; > +}; > > > #endif /* IB_USER_VERBS_H */ > Index: infiniband/drivers/infiniband/core/uverbs.h > =================================================================== > --- infiniband.orig/drivers/infiniband/core/uverbs.h > 2008-01-28 12:20:55.000000000 +0200 > +++ infiniband/drivers/infiniband/core/uverbs.h 2008-01-28 > +++ 12:20:56.000000000 +0200 > @@ -163,8 +163,12 @@ void ib_uverbs_qp_event_handler(struct i > void ib_uverbs_srq_event_handler(struct ib_event *event, > void *context_ptr); void ib_uverbs_event_handler(struct > ib_event_handler *handler, > struct ib_event *event); > +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, > + void *context_ptr); > void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, > struct ib_xrcd *xrcd); > +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, > + u32 domain_handle, u32 qp_num); > > #define IB_UVERBS_DECLARE_CMD(name) > \ > ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, > \ > @@ -202,6 +206,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); > IB_UVERBS_DECLARE_CMD(create_xrc_srq); > IB_UVERBS_DECLARE_CMD(open_xrc_domain); > IB_UVERBS_DECLARE_CMD(close_xrc_domain); > +IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp); > +IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp); > > > #endif /* UVERBS_H */ > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From jlentini at netapp.com Wed Feb 6 12:38:02 2008 From: jlentini at netapp.com (James Lentini) Date: Wed, 6 Feb 2008 15:38:02 -0500 (EST) Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? In-Reply-To: <20080206154701.GA11384@cefeid.wcss.wroc.pl> References: <20080204152858.GA25343@cefeid.wcss.wroc.pl> <20080204211908.GB15115@cefeid.wcss.wroc.pl> <20080205144002.GB4754@cefeid.wcss.wroc.pl> <20080205154116.GA19804@cefeid.wcss.wroc.pl> <20080206115542.GC396@cefeid.wcss.wroc.pl> <20080206154701.GA11384@cefeid.wcss.wroc.pl> Message-ID: On Wed, 6 Feb 2008, Pawel Dziekonski wrote: > On Wed, 06 Feb 2008 at 10:17:24AM -0500, James Lentini wrote: > > > By the way, do these machines have ethernet interfaces? Are the > > Ethernet and IPoIB IPs on different subnets? > > # ip a > 1: lo: mtu 16436 qdisc noqueue > link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 > inet 127.0.0.1/8 scope host lo > 2: eth0: mtu 1500 qdisc pfifo_fast qlen 1000 > link/ether 00:30:48:7a:42:24 brd ff:ff:ff:ff:ff:ff > inet 10.255.255.221/8 brd 10.255.255.255 scope global eth0 > 3: eth1: mtu 1500 qdisc pfifo_fast qlen 1000 > link/ether 00:30:48:7a:42:25 brd ff:ff:ff:ff:ff:ff > 4: ib0: mtu 2044 qdisc pfifo_fast qlen 128 > link/[32] 80:00:04:04:fe:80:00:00:00:00:00:00:00:30:48:7a:42:24:00:01 brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff > inet 10.2.2.1/24 brd 10.2.2.255 scope global ib0 > > eth0 has to be in /8 network but I can change IPs on IB network if > there is such a need. The subneting scheme above looks like a potential problem. It looks like all packets to 10.x.y.z will be routed through the eth0 interface. I'd suggest moving the IPoIB interface to a different private network, say 192.168.0.0. > > The client's connection request is being refused. This looks like a > > server problem. > > > > On the server, what is the output of: > > > > cat /proc/fs/nfsd/portlist > > # cat /proc/fs/nfsd/portlist > tcp 0.0.0.0, port=2049 > udp 0.0.0.0, port=2049 > > # echo rdma 2050 > /proc/fs/nfsd/portlist > # cat /proc/fs/nfsd/portlist > tcp 0.0.0.0, port=2049 > udp 0.0.0.0, port=2049 > > :o You're right. This is a problem. You should have just received some instructions from Tom Tucker on how to fix this. > > cat /proc/sys/sunrpc/transports > > # cat /proc/sys/sunrpc/transports > tcp 1048576 > udp 32768 > rdma 1048576 > > > ps x | grep nfsd > > # ps x | grep nfsd > 5155 ? S 0:00 [nfsd] > 5727 pts/0 R+ 0:00 grep nfsd This should work, but you will want more nfsd threads for performance. I'd recommend going back to using the nfs server startup scripts from your distro. That should take care of this and any other setup details for you. From highpointesdanceacademy at yahoo.ca Wed Feb 6 12:49:41 2008 From: highpointesdanceacademy at yahoo.ca (Archie Lacy) Date: Wed, 6 Feb 2008 21:49:41 +0100 Subject: [ofa-general] Only high quality medications Message-ID: <247564654.65972779715228@yahoo.ca> Get hard now and stay hard till she's exhausted! Boost your performance with Erect-Pack. Safe, approved, guaranteed, absolutely trouble-free!Thanks for being our customer. http://geocities.com/davismorales69/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwsouthernfoodsincm at southernfoodsinc.com Wed Feb 6 13:15:52 2008 From: dwsouthernfoodsincm at southernfoodsinc.com (Angeline Stroud) Date: Wed, 6 Feb 2008 22:15:52 +0100 Subject: [ofa-general] Looking for the best payday loan service online? Message-ID: <01c8690d$d5aa0580$9bdc415b@dwsouthernfoodsincm> Quote Advisor has helped me out to no end, when I needed a quick advance. Some years ago I joined and I know that they are there when I need them. Thanks guys for your professionalism and assistance as always. http://geocities.com/richiehancock99 Angeline Stroud From pawel.dziekonski at pwr.wroc.pl Wed Feb 6 13:35:43 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Wed, 6 Feb 2008 22:35:43 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? (fwd) In-Reply-To: <1202320491.14810.29.camel@trinity.ogc.int> References: <20080206154701.GA11384@cefeid.wcss.wroc.pl> <1202320491.14810.29.camel@trinity.ogc.int> Message-ID: <20080206213543.GA21176@cefeid.wcss.wroc.pl> On Wed, 06 Feb 2008 at 11:54:51AM -0600, Tom Tucker wrote: > Pawel: > > > On Wed, 2008-02-06 at 12:19 -0500, James Lentini wrote: > > > > ---------- Forwarded message ---------- > > > cat /proc/fs/nfsd/portlist > > > > # cat /proc/fs/nfsd/portlist > > tcp 0.0.0.0, port=2049 > > udp 0.0.0.0, port=2049 > > > > >From the output of the portlist file, I can tell that you have a patch > that I have since removed from the tree. The syntax for creating a > listener with this patch is different from James' README. The syntax > with that patch is as follows: > > echo "rdma 2 0.0.0.0 2050" > /proc/fs/nfsd/portlist and it works!!! server: # echo "rdma 2 0.0.0.0 2050" > /proc/fs/nfsd/portlist # cat /proc/fs/nfsd/portlist rdma 0.0.0.0, port=2050 tcp 0.0.0.0, port=2049 udp 0.0.0.0, port=2049 client: # mount.nfs 10.2.2.1:/scratch /mnt -i -o rdma,port=2050 -v mount.nfs: timeout set for Wed Feb 6 19:30:18 2008 mount.nfs: text-based options: 'rdma,port=2050,addr=10.2.2.1' 10.2.2.1:/scratch on /mnt type nfs (rdma,port=2050) # ls -la /mnt total 28 drwxr-xr-x 3 root root 4096 Feb 6 12:39 ./ drwxr-xr-x 24 root root 4096 Feb 6 13:33 ../ drwx------ 2 root root 16384 Jan 25 16:29 lost+found/ -rw-r--r-- 1 root root 0 Feb 6 12:39 qqq thanks!! I'm going to start performance tests now - I will report results. cheers, P -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From landman at scalableinformatics.com Wed Feb 6 13:38:11 2008 From: landman at scalableinformatics.com (Joe Landman) Date: Wed, 06 Feb 2008 16:38:11 -0500 Subject: [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPM is available In-Reply-To: <47A87586.6010904@Voltaire.COM> References: <47A87586.6010904@Voltaire.COM> Message-ID: <47AA28C3.7090003@scalableinformatics.com> Hi Erez Erez Zilber wrote: > stgt (SCSI target) is an open-source framework for storage target > drivers. It supports iSCSI over iSER among other storage target drivers. > > Voltaire added a git tree for stgt that will be added to OFED 1.4: > http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary > > Until OFED 1.4 gets released, it is possible to install the stgt RPM on > top of OFED 1.3. For more details about how to install and use stgt, > please refer to https://wiki.openfabrics.org/tiki-index.php?page=ISER-target > > Some performance numbers that were measured by OSC (using SDR cards): Is there a 2TB limit on this target? It turns our 6TB partition into a 2TB lun. > * READ: 920 MB/sec > * WRITE: 850 MB/sec Not getting anything even remotely close to this. Are there more details on configuration somewhere? I followed the web page as indicated. Joe > > We hope to have DDR measurements numbers soon. > -- Joseph Landman, Ph.D Founder and CEO Scalable Informatics LLC, email: landman at scalableinformatics.com web : http://www.scalableinformatics.com http://jackrabbit.scalableinformatics.com phone: +1 734 786 8423 fax : +1 866 888 3112 cell : +1 734 612 4615 From rdreier at cisco.com Wed Feb 6 14:37:06 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 06 Feb 2008 14:37:06 -0800 Subject: [ofa-general] Dubious use of barrier() in ipath In-Reply-To: (Roland Dreier's message of "Fri, 11 Jan 2008 11:40:53 -0800") References: Message-ID: In ipath_rc.c, there are a couple of places that do: qp->r_msn++; qp->r_psn++; qp->r_state = opcode; qp->r_nak_state = 0; barrier(); qp->r_head_ack_queue = next; This looks pretty suspicious to me -- I haven't really tried to understand the code, but it has the flavor of protecting against another CPU seeing the r_head_ack_queue update before the other updates; and barrier() doesn't actually do that. If this code is correct, I think a comment explaining the barrier() would be good. But I have the feeling that the barrier() should really be wmb(), with rmb()s added on the reader side. - R. From xma at us.ibm.com Wed Feb 6 15:13:05 2008 From: xma at us.ibm.com (Shirley Ma) Date: Wed, 6 Feb 2008 15:13:05 -0800 Subject: [ofa-general] 4K MTU patch review In-Reply-To: Message-ID: Hello Eli, To optimize ipoib_ud_skb_put_frags(), the patch could be like this since we know the first buf only has IPOIB_UD_HEAD_SIZE, how do you think? static void ipoib_ud_skb_put_frags(struct sk_buff *skb, unsigned int length) { skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; if (skb_shinfo(skb)->nr_frags) { /* * we know only two buffers here, first buf size is * IPOIB_UD_HEAD_SIZE */ skb->tail += IPOIB_UD_HEAD_SIZE; frag->size = length - IPOIB_UD_HEAD_SIZE; skb->data_len += frag->size; skb->truesize += frag->size; skb->len += length; } else skb_put(skb, length); } Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwresolecasam at resolecasa.it Wed Feb 6 16:56:23 2008 From: dwresolecasam at resolecasa.it (Susanna Smith) Date: Thu, 7 Feb 2008 00:56:23 +0000 Subject: [ofa-general] Medications that you need. Message-ID: <01c86924$422c5580$65ad954e@dwresolecasam> Buy Must Have medications at Canada based pharmacy. No prescription at all! Save your money, buy pills immediately. Same quality! http://geocities.com/benitotucker517/ We provide confidential and secure purchase! From fujita.tomonori at lab.ntt.co.jp Wed Feb 6 17:06:18 2008 From: fujita.tomonori at lab.ntt.co.jp (FUJITA Tomonori) Date: Thu, 07 Feb 2008 10:06:18 +0900 Subject: [Stgt-devel] [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPM is available In-Reply-To: <47AA28C3.7090003@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> Message-ID: <20080207100618G.fujita.tomonori@lab.ntt.co.jp> On Wed, 06 Feb 2008 16:38:11 -0500 Joe Landman wrote: > Hi Erez > > Erez Zilber wrote: > > stgt (SCSI target) is an open-source framework for storage target > > drivers. It supports iSCSI over iSER among other storage target drivers. > > > > Voltaire added a git tree for stgt that will be added to OFED 1.4: > > http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary > > > > Until OFED 1.4 gets released, it is possible to install the stgt RPM on > > top of OFED 1.3. For more details about how to install and use stgt, > > please refer to https://wiki.openfabrics.org/tiki-index.php?page=ISER-target > > > > Some performance numbers that were measured by OSC (using SDR cards): > > Is there a 2TB limit on this target? It turns our 6TB partition into a > 2TB lun. No, there isn't. From arlin.r.davis at intel.com Wed Feb 6 17:17:40 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Wed, 6 Feb 2008 17:17:40 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH Message-ID: <000a01c86927$3b9460c0$9f97070a@amr.corp.intel.com> I cannot ifconfig ib0 on ipath with using the latest build (ofed20080206). ifup ib0 SIOCSIFFLAGS: Invalid argument Failed to bring up ib0. >>> ib0: failed to create own ah CA 'ipath0' CA type: InfiniPath_QLE7140 Number of ports: 1 Firmware version: Hardware version: 2 Node GUID: 0x0011750000ffd75b System image GUID: 0x0011750000ffd75b Port 1: State: Active Physical state: LinkUp Rate: 10 Base lid: 14 LMC: 0 SM lid: 1 Capability mask: 0x02010800 Port GUID: 0x0011750000ffd75b It works fine on mthca adapters. Anyone else see this problem? -arlin -------------- next part -------------- An HTML attachment was scrubbed... URL: From ralph.campbell at qlogic.com Wed Feb 6 17:22:30 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Wed, 06 Feb 2008 17:22:30 -0800 Subject: [ofa-general] Dubious use of barrier() in ipath In-Reply-To: References: Message-ID: <1202347350.3638.15.camel@brick.pathscale.com> On Wed, 2008-02-06 at 14:37 -0800, Roland Dreier wrote: > In ipath_rc.c, there are a couple of places that do: > > qp->r_msn++; > qp->r_psn++; > qp->r_state = opcode; > qp->r_nak_state = 0; > barrier(); > qp->r_head_ack_queue = next; > > This looks pretty suspicious to me -- I haven't really tried to > understand the code, but it has the flavor of protecting against > another CPU seeing the r_head_ack_queue update before the other > updates; and barrier() doesn't actually do that. If this code is > correct, I think a comment explaining the barrier() would be good. > > But I have the feeling that the barrier() should really be wmb(), with > rmb()s added on the reader side. Probably the safer thing to do is use the qp->s_lock since the receive interrupt handler is the producer and the send tasklet which sends the RDMA read or ATOMIC response is the consumer. I will create a patch and send it out... From arthur.jones at qlogic.com Wed Feb 6 17:25:00 2008 From: arthur.jones at qlogic.com (Arthur Jones) Date: Wed, 6 Feb 2008 17:25:00 -0800 Subject: [ofa-general] Re: Dubious use of barrier() in ipath In-Reply-To: References: Message-ID: <20080207012500.GB8779@bauxite.pathscale.com> hi roland, thanks, i've forwarded this to ralph (the author), i couldn't follow at first glance, but i bet he'll remember... arthur On Wed, Feb 06, 2008 at 02:37:06PM -0800, Roland Dreier wrote: > In ipath_rc.c, there are a couple of places that do: > > qp->r_msn++; > qp->r_psn++; > qp->r_state = opcode; > qp->r_nak_state = 0; > barrier(); > qp->r_head_ack_queue = next; > > This looks pretty suspicious to me -- I haven't really tried to > understand the code, but it has the flavor of protecting against > another CPU seeing the r_head_ack_queue update before the other > updates; and barrier() doesn't actually do that. If this code is > correct, I think a comment explaining the barrier() would be good. > > But I have the feeling that the barrier() should really be wmb(), with > rmb()s added on the reader side. > > - R. From ardavis at ichips.intel.com Wed Feb 6 18:41:13 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Wed, 06 Feb 2008 18:41:13 -0800 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: <125bf18c14.18c14125bf@osu.edu> References: <125bf18c14.18c14125bf@osu.edu> Message-ID: <47AA6FC9.8050209@ichips.intel.com> LEI CHAI wrote: > Hi Arlin, > > When I ran programs with dapl 2.0.6 libraries I got this error by setting DAPL_DBG_TYPE=0xffff and DAT_DBG_TYPE=0xffff: > > libdaplofa.so.2: undefined symbol: dapl_extensions Can you give me a little more information? What programs? What is the program building and linking against? If v2, you should be linking against libdat2.so and using /usr/include/dat2. -arlin From pradeeps at linux.vnet.ibm.com Wed Feb 6 20:28:29 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 06 Feb 2008 20:28:29 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47A9FE1C.9010709@linux.vnet.ibm.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> Message-ID: <47AA88ED.4000400@linux.vnet.ibm.com> Pradeep Satyanarayana wrote: > Tziporet Koren wrote: >> Shirley Ma wrote: >>> Thanks Tziporet. We will test it right after it's out. >>> >>> >> You can start use the lates build - >> http://www.openfabrics.org/builds/ofed-1.3/OFED-1.3-20080206-0751.tgz >> >> Tziporet >> > > I have downloaded the todays build mentioned above. I am still seeing the issue > of failing ib_destroy_cq() for the rcq mentioned yesterday. > > Here are the steps that I follow: > > 1. On a freshly booted system configure ib0 > 2. Switch to connected mode ( on HCA that supports SRQ) > 3. ping remote interface > 4. modprobe -r ib_ehca > 5. I see the failures about ib_destroy_cq() failing and the > cascading failures following that (srq and pd cannot be destroyed) The ib_destroy_qp() fails because of refcnt is not zero. On my system it was set to 2. Pradeep From rdreier at cisco.com Wed Feb 6 21:18:36 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 06 Feb 2008 21:18:36 -0800 Subject: [ofa-general] Re: [PATCH 1 of 2] IB/mlx4: For 64-bit systems, use large virtually contiguous queue buffers (vmap) In-Reply-To: <200802030852.45683.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Sun, 3 Feb 2008 08:52:45 +0200") References: <200801281040.52138.jackm@dev.mellanox.co.il> <200802030852.45683.jackm@dev.mellanox.co.il> Message-ID: OK, I applied this along with a couple of cleanup patches of my own. I decided to use the vmap() access for CQ and SRQ buffers too, since I think that the CPU's MMU should be faster than walking our own page tables in software. Here's what I applied (I still need to test tomorrow so there may be silly bugs): commit b57aacfa7a95328f469d0360e49289b023c47e9e Author: Roland Dreier Date: Wed Feb 6 21:17:59 2008 -0800 mlx4_core: Clean up struct mlx4_buf Now that struct mlx4_buf.u is a struct instead of a union because of the vmap() changes, there's no point in having a struct at all. So move .direct and .page_list directly into struct mlx4_buf and get rid of a bunch of unnecessary ".u"s. Signed-off-by: Roland Dreier --- drivers/net/mlx4/alloc.c | 40 ++++++++++++++++++++-------------------- drivers/net/mlx4/mr.c | 4 ++-- include/linux/mlx4/device.h | 10 ++++------ 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 2da2c2e..521dc03 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -116,40 +116,40 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->u.direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, &t, GFP_KERNEL); - if (!buf->u.direct.buf) + if (!buf->direct.buf) return -ENOMEM; - buf->u.direct.map = t; + buf->direct.map = t; while (t & ((1 << buf->page_shift) - 1)) { --buf->page_shift; buf->npages *= 2; } - memset(buf->u.direct.buf, 0, size); + memset(buf->direct.buf, 0, size); } else { int i; buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; - buf->u.page_list = kzalloc(buf->nbufs * sizeof *buf->u.page_list, + buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list, GFP_KERNEL); - if (!buf->u.page_list) + if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { - buf->u.page_list[i].buf = + buf->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, &t, GFP_KERNEL); - if (!buf->u.page_list[i].buf) + if (!buf->page_list[i].buf) goto err_free; - buf->u.page_list[i].map = t; + buf->page_list[i].map = t; - memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); + memset(buf->page_list[i].buf, 0, PAGE_SIZE); } if (BITS_PER_LONG == 64) { @@ -158,10 +158,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, if (!pages) goto err_free; for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->u.page_list[i].buf); - buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + pages[i] = virt_to_page(buf->page_list[i].buf); + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); kfree(pages); - if (!buf->u.direct.buf) + if (!buf->direct.buf) goto err_free; } } @@ -180,18 +180,18 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, - buf->u.direct.map); + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + buf->direct.map); else { if (BITS_PER_LONG == 64) - vunmap(buf->u.direct.buf); + vunmap(buf->direct.buf); for (i = 0; i < buf->nbufs; ++i) - if (buf->u.page_list[i].buf) + if (buf->page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->u.page_list[i].buf, - buf->u.page_list[i].map); - kfree(buf->u.page_list); + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); } } EXPORT_SYMBOL_GPL(mlx4_buf_free); diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 9c9e308..679dfdb 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -419,9 +419,9 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, for (i = 0; i < buf->npages; ++i) if (buf->nbufs == 1) - page_list[i] = buf->u.direct.map + (i << buf->page_shift); + page_list[i] = buf->direct.map + (i << buf->page_shift); else - page_list[i] = buf->u.page_list[i].map; + page_list[i] = buf->page_list[i].map; err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6316077..4210ac4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,10 +189,8 @@ struct mlx4_buf_list { }; struct mlx4_buf { - struct { - struct mlx4_buf_list direct; - struct mlx4_buf_list *page_list; - } u; + struct mlx4_buf_list direct; + struct mlx4_buf_list *page_list; int nbufs; int npages; int page_shift; @@ -311,9 +309,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return buf->u.direct.buf + offset; + return buf->direct.buf + offset; else - return buf->u.page_list[offset >> PAGE_SHIFT].buf + + return buf->page_list[offset >> PAGE_SHIFT].buf + (offset & (PAGE_SIZE - 1)); } commit 313abe55a87bc10e55d00f337d609e17ad5f8c9a Author: Jack Morgenstein Date: Mon Jan 28 10:40:51 2008 +0200 mlx4_core: For 64-bit systems, vmap() kernel queue buffers Since kernel virtual memory is not a problem on 64-bit systems, there is no reason to use our own 2-layer page mapping scheme for large kernel queue buffers on such systems. Instead, map the page list to a single virtually contiguous buffer with vmap(), so that can we access buffer memory via direct indexing. Signed-off-by: Michael S. Tsirkin Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/mlx4/alloc.c | 16 ++++++++++++++++ include/linux/mlx4/device.h | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index b226e01..2da2c2e 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -151,6 +151,19 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, memset(buf->u.page_list[i].buf, 0, PAGE_SIZE); } + + if (BITS_PER_LONG == 64) { + struct page **pages; + pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); + if (!pages) + goto err_free; + for (i = 0; i < buf->nbufs; ++i) + pages[i] = virt_to_page(buf->u.page_list[i].buf); + buf->u.direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); + kfree(pages); + if (!buf->u.direct.buf) + goto err_free; + } } return 0; @@ -170,6 +183,9 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) dma_free_coherent(&dev->pdev->dev, size, buf->u.direct.buf, buf->u.direct.map); else { + if (BITS_PER_LONG == 64) + vunmap(buf->u.direct.buf); + for (i = 0; i < buf->nbufs; ++i) if (buf->u.page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a0afa75..6316077 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -189,7 +189,7 @@ struct mlx4_buf_list { }; struct mlx4_buf { - union { + struct { struct mlx4_buf_list direct; struct mlx4_buf_list *page_list; } u; @@ -310,7 +310,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { - if (buf->nbufs == 1) + if (BITS_PER_LONG == 64 || buf->nbufs == 1) return buf->u.direct.buf + offset; else return buf->u.page_list[offset >> PAGE_SHIFT].buf + commit 1c69fc2a9012e160c8d459f63df74a6b01db8322 Author: Roland Dreier Date: Wed Feb 6 21:07:54 2008 -0800 IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf We use struct mlx4_buf for kernel QP, CQ and SRQ buffers, and the code to look up an entry is duplicated in get_cqe_from_buf() and the QP and SRQ versions of get_wqe(). Factor this out into mlx4_buf_offset(). This will also make it easier to switch over to using vmap() for buffers. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 8 +------- drivers/infiniband/hw/mlx4/qp.c | 6 +----- drivers/infiniband/hw/mlx4/srq.c | 8 +------- include/linux/mlx4/device.h | 8 ++++++++ 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 7950aa6..8ac7b97 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -64,13 +64,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - int offset = n * sizeof (struct mlx4_cqe); - - if (buf->buf.nbufs == 1) - return buf->buf.u.direct.buf + offset; - else - return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8cba9c5..376db73 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -96,11 +96,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) static void *get_wqe(struct mlx4_ib_qp *qp, int offset) { - if (qp->buf.nbufs == 1) - return qp->buf.u.direct.buf + offset; - else - return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&qp->buf, offset); } static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index e7e9a3d..beaa3b0 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -38,13 +38,7 @@ static void *get_wqe(struct mlx4_ib_srq *srq, int n) { - int offset = n << srq->msrq.wqe_shift; - - if (srq->buf.nbufs == 1) - return srq->buf.u.direct.buf + offset; - else - return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf + - (offset & (PAGE_SIZE - 1)); + return mlx4_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); } static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 222815d..a0afa75 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -308,6 +308,14 @@ struct mlx4_init_port_param { int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); +static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) +{ + if (buf->nbufs == 1) + return buf->u.direct.buf + offset; + else + return buf->u.page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); From rdreier at cisco.com Wed Feb 6 21:26:34 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 06 Feb 2008 21:26:34 -0800 Subject: [ofa-general] oops in cxgb3:t3_l2t_get Message-ID: Hi guys, With a fairly recent vanilla Linus tree (git head is 551e4fb2), I get the following oops when I run a program that creates a loopback connection with the RDMA CM over an iw_cxgb3 device. It seems to be totally reproducible. I can give more info tomorrow, I just figured I'd send this out now in case it's obvious what the problem is... BUG: unable to handle kernel paging request at 00007efebbb8ad5f IP: [] :cxgb3:t3_l2t_get+0x9e/0x314 PGD 0 Oops: 0000 [1] SMP CPU 0 Modules linked in: rdma_ucm rdma_cm ib_cm iw_cm ib_sa ib_mad ib_addr ib_uv] Pid: 3482, comm: order Not tainted 2.6.24 #6 RIP: 0010:[] [] :cxgb3:t3_l2t_get+0x94 RSP: 0018:ffff81007ecb1c48 EFLAGS: 00010202 RAX: 00007efebbb8ad3f RBX: ffff81007e806e00 RCX: 000000006476a5c4 RDX: ffff810144488000 RSI: 000000006da48cd2 RDI: 000000003a5e9ba6 RBP: ffff8101c3280000 R08: 0000000042843f96 R09: ffffc20000d631c0 R10: 0000000000000000 R11: 0000000000000002 R12: ffff81007e806c00 R13: 000000000000051f R14: 0000000000000001 R15: 0000000000000000 FS: 00007fcc6f3f66e0(0000) GS:ffffffff8052e000(0000) knlGS:000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007efebbb8ad5f CR3: 000000014418e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process order (pid: 3482, threadinfo ffff81007ecb0000, task ffff81007e4e16) Stack: 0000000000000000 0000000000000000 a89ca99c00000006 ffff81007e806e00 ffff81007e8a0000 ffff81007ecb1d18 ffff8101c403f000 ffff81007ecb1d18 00007fff77400cd8 ffffffff8822c3e8 ffff81007e806e00 0000000000000000 Call Trace: [] ? :iw_cxgb3:iwch_connect+0x162/0x40e [] ? :iw_cm:iw_cm_connect+0xd1/0x144 [] ? :rdma_cm:rdma_connect+0x3a6/0x3ed [] ? :rdma_ucm:ucma_connect+0x6d/0x89 [] ? :rdma_ucm:ucma_write+0x73/0x91 [] ? vfs_write+0xad/0x136 [] ? sys_write+0x45/0x6e [] ? system_call_after_swapgs+0x7b/0x80 Code: c1 ee 03 31 c6 89 f0 29 f7 29 f1 c1 e0 0a 31 f8 29 c1 41 89 c5 8b 45 RIP [] :cxgb3:t3_l2t_get+0x9e/0x314 RSP CR2: 00007efebbb8ad5f ---[ end trace dae655a0e2ac8f4c ]--- From rdreier at cisco.com Wed Feb 6 21:37:55 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 06 Feb 2008 21:37:55 -0800 Subject: [ofa-general] oops in cxgb3:t3_l2t_get In-Reply-To: (Roland Dreier's message of "Wed, 06 Feb 2008 21:26:34 -0800") References: Message-ID: > I can give more info tomorrow, I just figured I'd send this out now in > case it's obvious what the problem is... I guess it is sort obvious what the problem is... the oops is in t3_l2t_get(), which looks like struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct neighbour *neigh, struct net_device *dev) and a quick debugging print shows that in my case, this function is being called with dev->name and neigh->dev->name equal to "lo," which I guess is not that surprising considering I'm trying to make a loopback connection to INADDR_LOOPBACK. Given this, trying to look up cxgb3-specific stuff like struct port_info *p = netdev_priv(dev); int smt_idx = p->port_id; is probably not going to work out too well. The fix is a little beyond my knowledge, though... - R. From dwroyalscottishacademym at royalscottishacademy.org Wed Feb 6 21:58:16 2008 From: dwroyalscottishacademym at royalscottishacademy.org (Brain Mcdaniel) Date: Thu, 7 Feb 2008 13:58:16 +0800 Subject: [ofa-general] Buy cheap Canadian drugs and start saving now with CanadianPharmacy. Message-ID: <01c86991$7c800c00$50a205dd@dwroyalscottishacademym> We are glad to offer you the possibility to save on your medications and to receive top quality pharmaceutical products. It becomes possible with «CanadianPharmacy»! «CanadianPharmacy» has an excellent level of service, helpful and cooperating customer care team. Purchase with «CanadianPharmacy» and your medications will come right on time well packed and in perfect condition. Privacy and confidentiality are guaranteed! Great selection of medications! http://geocities.com/gerardalvarez564/ Enjoy new saving options with «CanadianPharmacy»! From erezz at Voltaire.COM Thu Feb 7 00:24:39 2008 From: erezz at Voltaire.COM (Erez Zilber) Date: Thu, 07 Feb 2008 10:24:39 +0200 Subject: [ewg] Re: [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPMis available In-Reply-To: <47AA28C3.7090003@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> Message-ID: <47AAC047.4000306@Voltaire.COM> > > * READ: 920 MB/sec > > * WRITE: 850 MB/sec > > Not getting anything even remotely close to this. Are there more > details on configuration somewhere? I followed the web page as indicated. > Are you running iSCSI over TCP or iSCSI over iSER (over InfiniBand)? Our results are with iSER. Erez From eli at dev.mellanox.co.il Thu Feb 7 00:36:02 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 07 Feb 2008 10:36:02 +0200 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47AA88ED.4000400@linux.vnet.ibm.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> Message-ID: <1202373362.13132.84.camel@mtls03> > > I have downloaded the todays build mentioned above. I am still seeing the issue > > of failing ib_destroy_cq() for the rcq mentioned yesterday. > > > > Here are the steps that I follow: > > > > 1. On a freshly booted system configure ib0 > > 2. Switch to connected mode ( on HCA that supports SRQ) > > 3. ping remote interface > > 4. modprobe -r ib_ehca > > 5. I see the failures about ib_destroy_cq() failing and the > > cascading failures following that (srq and pd cannot be destroyed) > > The ib_destroy_qp() fails because of refcnt is not zero. On my > system it was set to 2. > > Pradeep > I have tried to reproduce this but when using ib_mthca and mlx4_ib and could not see this problem. Could you try to dig more into this and provide more details. From eli at mellanox.co.il Thu Feb 7 00:54:29 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Thu, 07 Feb 2008 10:54:29 +0200 Subject: [ofa-general] 4K MTU patch review In-Reply-To: References: Message-ID: <1202374469.13132.86.camel@mtls03> On Wed, 2008-02-06 at 15:13 -0800, Shirley Ma wrote: > Hello Eli, > > To optimize ipoib_ud_skb_put_frags(), the patch could be like this > since we know the first buf only has IPOIB_UD_HEAD_SIZE, how do you > think? > > static void ipoib_ud_skb_put_frags(struct sk_buff *skb, unsigned int > length) > { > skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; > if (skb_shinfo(skb)->nr_frags) { I would take the above condition instead of this: if (ipoib_ud_need_sg(priv->max_ib_mtu)) { > /* > * we know only two buffers here, first buf size is > * IPOIB_UD_HEAD_SIZE > */ > skb->tail += IPOIB_UD_HEAD_SIZE; > frag->size = length - IPOIB_UD_HEAD_SIZE; > skb->data_len += frag->size; > skb->truesize += frag->size; > skb->len += length; > } else > skb_put(skb, length); > } > > Thanks > Shirley > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From tziporet at dev.mellanox.co.il Thu Feb 7 01:57:34 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 07 Feb 2008 11:57:34 +0200 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <1202373362.13132.84.camel@mtls03> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> Message-ID: <47AAD60E.5000108@mellanox.co.il> Eli Cohen wrote: > I have tried to reproduce this but when using ib_mthca and mlx4_ib and > could not see this problem. Could you try to dig more into this and > provide more details. > > > Please reproduce the issue on our HCAs since we do not have any ehca Note that Eli tried the code when using the non-SRQ path Tziporet From ogerlitz at voltaire.com Thu Feb 7 02:14:21 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 07 Feb 2008 12:14:21 +0200 Subject: [ofa-general] Re: Standard RDS port number In-Reply-To: <200802061756.12092.olaf.kirch@oracle.com> References: <479C5A45.2010701@dev.mellanox.co.il> <200802061756.12092.olaf.kirch@oracle.com> Message-ID: <47AAD9FD.6050206@voltaire.com> Olaf Kirch wrote: > On Sunday 27 January 2008 11:17, Yevgeny Kliteynik wrote: >> I'm using this port number to recognize RDS >> connection in QoS manager (OpenSM). > There are no plans to change the port number right now. In fact, the > TCP support is currently rather instable, so that we disabled it for now. Hi Olaf, The IBTA approach was to derive the "QoS level" from bunch of params where one of them is the Service ID (aka SID). For ULPs that use the RDMA CM (as RDS does), the SID is derived from the destination port number provided as part of the sockaddr structure you call rdma_connect() with, does this explains the question? Or From dwsitodelgiornom at sitodelgiorno.com Thu Feb 7 02:52:20 2008 From: dwsitodelgiornom at sitodelgiorno.com (Lea Barnes) Date: Thu, 7 Feb 2008 18:52:20 +0800 Subject: [ofa-general] Software in many languages! Message-ID: <01c869ba$91448700$a0c03c3b@dwsitodelgiornom> Purchase perfectly working software available in all European languages! Also for Macintosh! Fast to download, only original versions are offered at very cheap prices. Special offers and discounts allow you to save! Free of charge professional installation consultations could be of great help. Prompt reply on all your requests. Money back guarantee ensures the quality of product. http://geocities.com/danerobbins250/ Purchase perfectly functioning software. From vlad at lists.openfabrics.org Thu Feb 7 03:03:14 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 7 Feb 2008 03:03:14 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080207-0200 daily build status Message-ID: <20080207110314.5486DE60055@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Failed: From congiuses at outarex.net Thu Feb 7 20:13:35 2008 From: congiuses at outarex.net (Noam Collins) Date: Thu, 07 Feb 2008 20:13:35 -0800 Subject: [ofa-general] /\dobe FontFolio 11 for MAC:XP:Vis+a 189. Retail 2599 'save 2412' Message-ID: <000a01c86980$19d24480$0100007f@gtcnbbf> autodesk 3ds max 9.0 - 149 paragon partition manager 9 Pro - 39 ms xp professional with sp2 - 49 microsoft exchange server enterprise 2003 - 59 luxology modo 301 for mac - 129 nero 7 premium - 39 virtualdj 4.3 for mac - 39 microsoft sql server developer edition 2005 - 69 adobe after effects 7.0 standard - 59 alias studiotools 11.02 - 49 place 'mysoft4less .com' !n !nterne+ Explorer Take out ' before you place !n !nterne+ Explorer microsoft expression studio 1.0 - 79 adobe photoshop cs3 extended - 89 avid newscutter xp 6.7.2 - 69 From dwshruthimusicalsm at shruthimusicals.com Thu Feb 7 04:21:03 2008 From: dwshruthimusicalsm at shruthimusicals.com (Winnie Carrier) Date: Thu, 7 Feb 2008 14:21:03 +0200 Subject: [ofa-general] What is Generic Medication? Message-ID: <01c86994$ab4bb180$8325eb58@dwshruthimusicalsm> What is Generic Medication? A generic drug is identical, or bioequivalent to a brand name drug in dosage form, safety, strength, route of administration, quality, performance characteristics and intended use. Although generic drugs are chemically identical to their branded counterparts, they are typically sold at substantial discounts from the branded price. Generic drugs save consumers an estimated $8 to $10 billion a year at retail pharmacies. http://geocities.com/xaviergraves695/ From landman at scalableinformatics.com Thu Feb 7 05:50:33 2008 From: landman at scalableinformatics.com (Joe Landman) Date: Thu, 07 Feb 2008 08:50:33 -0500 Subject: [ewg] Re: [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPMis available In-Reply-To: <47AAC047.4000306@Voltaire.COM> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> <47AAC047.4000306@Voltaire.COM> Message-ID: <47AB0CA9.4020904@scalableinformatics.com> Erez Zilber wrote: >>> * READ: 920 MB/sec >>> * WRITE: 850 MB/sec >> Not getting anything even remotely close to this. Are there more >> details on configuration somewhere? I followed the web page as indicated. >> > > Are you running iSCSI over TCP or iSCSI over iSER (over InfiniBand)? Our > results are with iSER. I followed the instructions on the web pages that were pointed to for iSER. Are there updated pages? Is there a way to tell whether or not the RDMA path is being used? Thanks. Joe -- Joseph Landman, Ph.D Founder and CEO Scalable Informatics LLC, email: landman at scalableinformatics.com web : http://www.scalableinformatics.com http://jackrabbit.scalableinformatics.com phone: +1 734 786 8423 fax : +1 866 888 3112 cell : +1 734 612 4615 From swise at opengridcomputing.com Thu Feb 7 06:31:34 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 07 Feb 2008 08:31:34 -0600 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: References: Message-ID: <47AB1646.8030006@opengridcomputing.com> Roland Dreier wrote: > Hi guys, > > With a fairly recent vanilla Linus tree (git head is 551e4fb2), I get > the following oops when I run a program that creates a loopback > connection with the RDMA CM over an iw_cxgb3 device. It seems to be > totally reproducible. The Chelsio RNIC doesn't support loopback rdma connections. I guess we'll have to fail addr/route resolution for local addresses. I believe other rdma devices do support loopback, so maybe this needs to be some sort of ib_device attribute or something so the rdma-cma can fail the resolution based on what the device supports... Steve From jape77 at mindspring.com Thu Feb 7 06:49:17 2008 From: jape77 at mindspring.com (Annamae Shapiro) Date: Thu, 7 Feb 2008 16:49:17 +0200 Subject: [ofa-general] Read this and choose your New Reality Message-ID: <550395488.12541331876862@mindspring.com> Dear openib-general at openib.orgMany men, unsatisfied with their cock size, are starting to take interest in male medical products. The selection of products is so great that they often don’t know which product to choose. Choose the safest product called VPXL. It is known as the most effective male enhancements too. Buy our VPXL now and be happy about your size.http://geocities.com/clintononeil73/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From pradeeps at linux.vnet.ibm.com Thu Feb 7 06:50:22 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 07 Feb 2008 06:50:22 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47AAD60E.5000108@mellanox.co.il> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> <47AAD60E.5000108@mellanox.co.il> Message-ID: <47AB1AAE.5000409@linux.vnet.ibm.com> Tziporet Koren wrote: > Eli Cohen wrote: >> I have tried to reproduce this but when using ib_mthca and mlx4_ib and >> could not see this problem. Could you try to dig more into this and >> provide more details. >> >> >> > Please reproduce the issue on our HCAs since we do not have any ehca > Note that Eli tried the code when using the non-SRQ path This problem was seen on a ehca that supports SRQ. Pradeep From eli at dev.mellanox.co.il Thu Feb 7 07:04:52 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 7 Feb 2008 17:04:52 +0200 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47AB1AAE.5000409@linux.vnet.ibm.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> <47AAD60E.5000108@mellanox.co.il> <47AB1AAE.5000409@linux.vnet.ibm.com> Message-ID: <4e6a6b3c0802070704s7141cf6o914012147398c4ab@mail.gmail.com> > > This problem was seen on a ehca that supports SRQ. > Please reply how many scatter entries does ehca support when working in SRQ mode? Also any piece of info I might need to try and mimic ehca behaviour on Mellanox devices. I will appreciate if you can repeat the exact sequence of actions you do to reproduce this. thanks. From swise at opengridcomputing.com Thu Feb 7 07:14:12 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 07 Feb 2008 09:14:12 -0600 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: <47AB1646.8030006@opengridcomputing.com> References: <47AB1646.8030006@opengridcomputing.com> Message-ID: <47AB2044.3060902@opengridcomputing.com> Steve Wise wrote: > Roland Dreier wrote: >> Hi guys, >> >> With a fairly recent vanilla Linus tree (git head is 551e4fb2), I get >> the following oops when I run a program that creates a loopback >> connection with the RDMA CM over an iw_cxgb3 device. It seems to be >> totally reproducible. > > The Chelsio RNIC doesn't support loopback rdma connections. I guess > we'll have to fail addr/route resolution for local addresses. I believe > other rdma devices do support loopback, so maybe this needs to be some > sort of ib_device attribute or something so the rdma-cma can fail the > resolution based on what the device supports... > Or I could fail it down in iwch_connect(). Thoughts? From hrosenstock at xsigo.com Thu Feb 7 07:18:14 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 07 Feb 2008 07:18:14 -0800 Subject: [ofa-general] [PATCH 1/2 V2] ib/sa: Add InformInfo/Notice support In-Reply-To: <200802060923.19034.jackm@dev.mellanox.co.il> References: <200802060923.19034.jackm@dev.mellanox.co.il> Message-ID: <1202397494.11210.651.camel@hrosenstock-ws.xsigo.com> On Wed, 2008-02-06 at 09:23 +0200, Jack Morgenstein wrote: > Add SA client support for notice/trap registration using InformInfo. > Clients can use the ib_sa interface to register for SA events based > on trap numbers, and receive SA event notification. This allows > clients to receive notification, such as GID in/out of service. Now that IBA 1.2.1 is out, should support for unpath/repath (traps 68/69) be added to this ? -- Hal > Signed-off-by: Sean Hefty From landman at scalableinformatics.com Thu Feb 7 08:05:03 2008 From: landman at scalableinformatics.com (Joe Landman) Date: Thu, 07 Feb 2008 11:05:03 -0500 Subject: [ofa-general] Update (Re: open iSCSI over iSER target RPM ...) In-Reply-To: <47AA28C3.7090003@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> Message-ID: <47AB2C2F.2090707@scalableinformatics.com> Update: [root at woody etc]# dd if=/dev/zero of=/big/local.file bs=256k count=100000 100000+0 records in 100000+0 records out 26214400000 bytes (26 GB) copied, 58.7484 seconds, 446 MB/s Better. I rebuilt OFED 1.2.5.5. Are there specific recommended tuning guides for iSER? Backing store in this case are real disks, and we can sink/source >750 MB/s on them, so I am not worried about disk IO bottlenecks, more worried about bad config of iSCSI/iSER. BTW: the 2TB LUN limit I asked about is still here in this code. Same machines (initiator and target) used for SRP reported correct LUN sizes. Here we are using the -868 open-iscsi initiator, and the tgt RPM announced. I would like to dig into this. This is what I am getting in dmesg for this iSER target: iscsi: registered transport (tcp) iscsi: registered transport (iser) iser: iser_connect:connecting to: 10.2.1.2, port 0xbc0c iser: iser_cma_handler:event 0 conn ffff81024b9f69c0 id ffff810209748c00 iser: iser_cma_handler:event 2 conn ffff81024b9f69c0 id ffff810209748c00 iser: iser_create_ib_conn_res:setting conn ffff81024b9f69c0 cma_id ffff810209748c00: fmr_pool ffff81024bfb32c0 qp ffff8101cb16d600 iser: iser_cma_handler:event 9 conn ffff81024b9f69c0 id ffff810209748c00 iser: iscsi_iser_ep_poll:ib conn ffff81024b9f69c0 rc = 1 scsi13 : iSCSI Initiator over iSER, v.0.1 iser: iscsi_iser_conn_bind:binding iscsi conn ffff81021b65fa90 to iser_conn ffff81024b9f69c0 Vendor: IET Model: Controller Rev: 0001 Type: RAID ANSI SCSI revision: 05 scsi 13:0:0:0: Attached scsi generic sg2 type 12 Vendor: IET Model: VIRTUAL-DISK Rev: 0001 Type: Direct-Access ANSI SCSI revision: 05 sdc : very big device. try to use READ CAPACITY(16). sdc : READ CAPACITY(16) failed. sdc : status=1, message=00, host=0, driver=08 sdc : use 0xffffffff as device size SCSI device sdc: 4294967296 512-byte hdwr sectors (2199023 MB) sdc: Write Protect is off sdc: Mode Sense: 79 00 00 08 SCSI device sdc: drive cache: write back sdc : very big device. try to use READ CAPACITY(16). sdc : READ CAPACITY(16) failed. sdc : status=1, message=00, host=0, driver=08 sdc : use 0xffffffff as device size SCSI device sdc: 4294967296 512-byte hdwr sectors (2199023 MB) sdc: Write Protect is off sdc: Mode Sense: 79 00 00 08 SCSI device sdc: drive cache: write back sdc: unknown partition table sd 13:0:0:1: Attached scsi disk sdc sd 13:0:0:1: Attached scsi generic sg3 type 0 and this is what we get in SRP scsi6 : SRP.T10:0008F104039862A4 Vendor: SCST_BIO Model: vdisk0 Rev: 096 Type: Direct-Access ANSI SCSI revision: 04 sdc : very big device. try to use READ CAPACITY(16). SCSI device sdc: 12693355130 512-byte hdwr sectors (6498998 MB) sdc: Write Protect is off sdc: Mode Sense: 6b 00 10 08 SCSI device sdc: drive cache: write back w/ FUA This looks suspiciously like a 2^32 limit somewhere. Our exported device is [root at jr1 ~]# parted /dev/sdb print Model: Areca jrvs1 (scsi) Disk /dev/sdb: 6500GB Sector size (logical/physical): 512B/512B Partition Table: loop Number Start End Size File system Flags 1 0.00kB 6500GB 6500GB xfs and this is what tgtadm reports [root at jr1 ~]# tgtadm --lld iscsi --op show --mode target Target 1: iqn.2001-04.com.jr1-jackrabbit.small System information: Driver: iscsi Status: running I_T nexus information: I_T nexus: 4 Initiator: iqn.1996-04.voltaire.com:01:dfa8888a3fd Connection: 0 RDMA IP Address: 10.2.1.1 LUN information: LUN: 0 Type: controller SCSI ID: deadbeaf1:0 SCSI SN: beaf10 Size: 0 Online: No Poweron/Reset: Yes Removable media: No Backing store: No backing store LUN: 1 Type: disk SCSI ID: deadbeaf1:1 SCSI SN: beaf11 Size: 5T Online: Yes Poweron/Reset: No Removable media: No Backing store: /dev/sdb Account information: ACL information: 10.2.1.1 So it looks like the LUN 1 is approximately correct (5T ???) on the target, and incorrect when the initiator asks for it. Please note that I have successfully used the full 6+TB as an iSCSI target using the SCST-iscsi code, so I do know that the initiator works correctly. Is there a source RPM/tree for this target? Joe Landman wrote: > Hi Erez > > Erez Zilber wrote: >> stgt (SCSI target) is an open-source framework for storage target >> drivers. It supports iSCSI over iSER among other storage target drivers. >> >> Voltaire added a git tree for stgt that will be added to OFED 1.4: >> http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary >> >> Until OFED 1.4 gets released, it is possible to install the stgt RPM on >> top of OFED 1.3. For more details about how to install and use stgt, >> please refer to >> https://wiki.openfabrics.org/tiki-index.php?page=ISER-target >> >> Some performance numbers that were measured by OSC (using SDR cards): > > Is there a 2TB limit on this target? It turns our 6TB partition into a > 2TB lun. > >> * READ: 920 MB/sec >> * WRITE: 850 MB/sec > > Not getting anything even remotely close to this. Are there more > details on configuration somewhere? I followed the web page as indicated. > > Joe > >> >> We hope to have DDR measurements numbers soon. >> > > -- Joseph Landman, Ph.D Founder and CEO Scalable Informatics LLC, email: landman at scalableinformatics.com web : http://www.scalableinformatics.com http://jackrabbit.scalableinformatics.com phone: +1 734 786 8423 fax : +1 866 888 3112 cell : +1 734 612 4615 From jackm at dev.mellanox.co.il Thu Feb 7 07:59:49 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 7 Feb 2008 17:59:49 +0200 Subject: [ofa-general] [PATCH 0/ 8] XRC patch series (including xrc receive-only QPs) In-Reply-To: References: <200801231159.30989.jackm@dev.mellanox.co.il> Message-ID: <200802071759.49476.jackm@dev.mellanox.co.il> On Friday 01 February 2008 00:50, Tang, Changqing wrote: > Jack: >         In order to open a new XRC domain, all processes on a node open a file descriptor using the > same pathname, and pass the fd to ibv_open_xrc_domain(). > >         When can I close the fd ? when can I remove the temp file ?  Can I close the fd and unlink the > temp file right after ibv_open_xrc_domain() returns ? > >         Does ibv_open_xrc_domain() increase the fd reference count and ibv_close_xrc_domain() decrease the > fd reference count ? > I don't know what you mean by "unlink the temp file". However, the following is true: 1. The first time ibv_open_xrc_domain() is used with a temp file, in kernel space the temp file descriptor is used to access the file's inode entry. a. An xrc_domain "object" is created in kernel space, with a reference count of 1. b. A reference on the inode is taken (just once, at xrc_domain object creation time, for all ibv_open_xrc_domain() calls which arrive at that same inode). c. Once you have an xrc_domain handle, you may close the temp file -- you've already got the xrc handle you need, and no longer need to go through the inode. d. All subsequent calls to ibv_open_xrc_domain() for that inode will not increment the inode's reference count. However, each such call WILL increment the xrc_domain object's reference count. (a 2-layer reference counting system) -- and each such call must have a corresponding ibv_close_xrc_domain() call to decrement the xrc_domain object's ref count. 2. ibv_close_xrc_domain() a. decrements the xrc_domain object's ref count. If that count is then zero, the inode reference count is also decremented (see step 1.b above), and the xrc_domain object is destroyed. Note: NO extra reference counts are taken on the fd -- only a single extra ref count is taken on the inode itself. (Thus, even if the file is removed, the inode associated with the file will still be kept, until all xrc_domain users have closed the xrc domain). Note 2: Even if you "remove" the file and create a new file with the same filename before ALL users of the previous domain have released (i.e., closed) the xrc domain, the new file will get a different inode entry, so there should be no xrc domain collisions. - Jack From changquing.tang at hp.com Thu Feb 7 08:13:20 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 7 Feb 2008 16:13:20 +0000 Subject: [ofa-general] [PATCH 0/ 8] XRC patch series (including xrc receive-only QPs) In-Reply-To: <200802071759.49476.jackm@dev.mellanox.co.il> References: <200801231159.30989.jackm@dev.mellanox.co.il> <200802071759.49476.jackm@dev.mellanox.co.il> Message-ID: > Note: NO extra reference counts are taken on the fd -- only > a single extra ref count is taken > on the inode itself. (Thus, even if the file is > removed, the inode associated with the > file will still be kept, until all xrc_domain users > have closed the xrc domain). > > Note 2: Even if you "remove" the file and create a new file > with the same filename before ALL > users of the previous domain have released (i.e., > closed) the xrc domain, the new file > will get a different inode entry, so there should be > no xrc domain collisions. Thanks, These are the info I need. --CQ > > - Jack > From mashirle at us.ibm.com Wed Feb 6 22:21:08 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Wed, 06 Feb 2008 22:21:08 -0800 Subject: [ofa-general] 4K MTU patch review In-Reply-To: <1202374469.13132.86.camel@mtls03> References: <1202374469.13132.86.camel@mtls03> Message-ID: <1202365269.27381.2.camel@localhost.localdomain> > > if (skb_shinfo(skb)->nr_frags) { > > I would take the above condition instead of this: > > if (ipoib_ud_need_sg(priv->max_ib_mtu)) { Thanks Eli. I will create a patch for the code optimization. This patch might not be picked up since it doesn't belong to critical bug. Shirley From rdreier at cisco.com Thu Feb 7 08:29:29 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 07 Feb 2008 08:29:29 -0800 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: <47AB1646.8030006@opengridcomputing.com> (Steve Wise's message of "Thu, 07 Feb 2008 08:31:34 -0600") References: <47AB1646.8030006@opengridcomputing.com> Message-ID: > The Chelsio RNIC doesn't support loopback rdma connections. I guess > we'll have to fail addr/route resolution for local addresses. I > believe other rdma devices do support loopback, so maybe this needs to > be some sort of ib_device attribute or something so the rdma-cma can > fail the resolution based on what the device supports... That's unfortunate. Setting up loopback connections does work at least with IB adapters (not sure about nes, don't have HW yet), so yes this needs to be something that fails specifically for cxgb3. - R. From swise at opengridcomputing.com Thu Feb 7 08:34:05 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 07 Feb 2008 10:34:05 -0600 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: References: <47AB1646.8030006@opengridcomputing.com> Message-ID: <47AB32FD.6050700@opengridcomputing.com> Roland Dreier wrote: > > The Chelsio RNIC doesn't support loopback rdma connections. I guess > > we'll have to fail addr/route resolution for local addresses. I > > believe other rdma devices do support loopback, so maybe this needs to > > be some sort of ib_device attribute or something so the rdma-cma can > > fail the resolution based on what the device supports... > > That's unfortunate. Setting up loopback connections does work at > least with IB adapters (not sure about nes, don't have HW yet), so yes > this needs to be something that fails specifically for cxgb3. > > - R. Have you ever thought about a SW rdma loopback device? (call me crazy :) I mean, can linux map memory from one process to another to do direct copies of data for rdma read/write operations? From beehive at desertdesignstudio.com Thu Feb 7 08:47:53 2008 From: beehive at desertdesignstudio.com (Kenton Lopez) Date: Fri, 08 Feb 2008 00:47:53 +0800 Subject: [ofa-general] /\utodesk 3D Studlo Max 9 for XP 149. Retail 6720 "save 2979" Message-ID: <000901c869a7$ab08e180$0100007f@kmddptm> roxio toast titanium 8 - 39 quarkxpress passport 7.3 - 79 luxology modo 301 for mac - 129 virtualdj 4.3 for mac - 39 acronis true image enterprise server 9.1.3666 - 79 roxio digitalmedia studio deluxe suite 7.0 - 49 softimage alienbrain 8 - 169 microsoft frontpage 2003 - 29 adobe creative suite 3 master collection for win - 299 propellerhead reason 3 - 69 p1ace "mysoft4less .com" |n Interne+ Explorer Remove " before you p1ace |n Interne+ Explorer quarkxpress passport 7.3 - 79 google sketchup pro 6 for mac - 59 cakewalk sonar 6 producer edition - 69 From pradeeps at linux.vnet.ibm.com Thu Feb 7 08:52:34 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 07 Feb 2008 08:52:34 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <4e6a6b3c0802070704s7141cf6o914012147398c4ab@mail.gmail.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> <47AAD60E.5000108@mellanox.co.il> <47AB1AAE.5000409@linux.vnet.ibm.com> <4e6a6b3c0802070704s7141cf6o914012147398c4ab@mail.gmail.com> Message-ID: <47AB3752.5030703@linux.vnet.ibm.com> Eli Cohen wrote: >> This problem was seen on a ehca that supports SRQ. >> > > Please reply how many scatter entries does ehca support when working > in SRQ mode? Also any piece of info I might need to try and mimic ehca > behaviour on Mellanox devices. I will appreciate if you can repeat the > exact sequence of actions you do to reproduce this. Hello Eli, Ehca supports fewer than 16 s/g entries- hence the srq patch addresses that issue. The sequence of steps that I followed for the touch test: 1. On a freshly booted system, configure ib0 and assign an IP addresss 2. Switch to connected mode and change mtu 3. ping remote ib interface (already in CM mode) 4. modprobe -r ib_ehca I see a series of cascading failures in /var/log/messages, starting with the issue of not being able to destroy the cq (specifically rcq) Pradeep From rdreier at cisco.com Thu Feb 7 08:57:11 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 07 Feb 2008 08:57:11 -0800 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: <47AB32FD.6050700@opengridcomputing.com> (Steve Wise's message of "Thu, 07 Feb 2008 10:34:05 -0600") References: <47AB1646.8030006@opengridcomputing.com> <47AB32FD.6050700@opengridcomputing.com> Message-ID: > Have you ever thought about a SW rdma loopback device? (call me crazy :) > > I mean, can linux map memory from one process to another to do direct > copies of data for rdma read/write operations? You could do that (and of course there is the OSC software iwarp implementation), but I guess the complications in this case come from stuff like someone sharing a CQ between a loopback and a non-loopback QP on the same device, etc. - R. From rdreier at cisco.com Thu Feb 7 09:32:28 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 07 Feb 2008 09:32:28 -0800 Subject: [ofa-general] Re: [PATCH 2 of 2] IB/mlx4: shrinking WQE In-Reply-To: <200801281040.59398.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 28 Jan 2008 10:40:59 +0200") References: <200801281040.59398.jackm@dev.mellanox.co.il> Message-ID: > /* > * Stamp a SQ WQE so that it is invalid if prefetched by marking the > - * first four bytes of every 64 byte chunk with 0xffffffff, except for > - * the very first chunk of the WQE. > + * first four bytes of every 64 byte chunk with > + * 0x7FFFFFF | (invalid_ownership_value << 31). > + * > + * When max WR is than or equal to the WQE size, "less than or equal"? > + * as an optimization, we can stamp WQE with 0xffffffff, > + * and skip the very first chunk of the WQE. > */ From ssufficool at rov.sbcounty.gov Thu Feb 7 11:43:17 2008 From: ssufficool at rov.sbcounty.gov (Sufficool, Stanley) Date: Thu, 7 Feb 2008 11:43:17 -0800 Subject: [ofa-general] FW: possible rkey byteswap Message-ID: The OFED group might want to be included in this discussion. ;) -----Original Message----- From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier Sent: Thursday, February 07, 2008 11:25 AM To: Tzachi Dar; frank zago Cc: ofw at lists.openfabrics.org Subject: RE: possible rkey byteswap (was Re: [ofw] what's up withtheOFAWindows project?) As long as the rkey is exchanged on the wire in network order interop is not an issue. However, this requires the client to swap the rkey before putting it on the wire, as well as after getting it from the wire. So both the Linux and Windows driver models work, they just work differently which introduces confusion when people port their applications. It might be helpful to have a 'porting how-to guide' that highlights such differences if the Linux stack can't be changed. -Fab -----Original Message----- From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Tzachi Dar Sent: Thursday, February 07, 2008 1:22 AM To: Fab Tillier; frank zago Cc: ofw at lists.openfabrics.org Subject: RE: possible rkey byteswap (was Re: [ofw] what's up with theOFAWindows project?) I'm just trying to verify what Fab says: Is interoperation between Linux big- and little-endian broken? If so, who can push the fix to the Linux code? Once that will be done we can start looking at the windows to Linux interoperation. Thanks Tzachi > -----Original Message----- > From: ofw-bounces at lists.openfabrics.org > [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Fab Tillier > Sent: Thursday, February 07, 2008 1:20 AM > To: frank zago > Cc: ofw at lists.openfabrics.org > Subject: RE: possible rkey byteswap (was Re: [ofw] what's up with > theOFAWindows project?) > > Ok, let me rephrase - the RKey should be opaque, and should always be > in network order (i.e. never ever swapped in any code except the > lowest level HW driver if necessary). > > In your example, your application would also be broken between the > big- and little-endian Linux machines, independent of Windows. For > the big- and little-endian machines to interoperate, they must agree > on a wire format for exchanging the RKey. The application in this > case would swap the RKey into network order (and the swap would be a > noop on the big-endian machine) when it puts it on the wire, > and swap to host order (again a noop for the big-endian > machine) when it receives it form the wire. The Windows > version of the application would never swap the RKey as it is > always treated as an opaque network order value. > > -Fab > > -----Original Message----- > From: frank zago [mailto:fzago at systemfabricworks.com] > Sent: Wednesday, February 06, 2008 2:22 PM > To: Fab Tillier > Cc: Robert Pearson; 'Tom Tucker'; ofw at lists.openfabrics.org > Subject: Re: possible rkey byteswap (was Re: [ofw] what's up with the > OFAWindows project?) > > Fab Tillier wrote: > > The RKey is always an opaque value - it only has meaning to > the HCA hardware, and is used in combination with the PD of the QP on > which the RETH is received to do the address translation. It's a > token, and should be treated as such. It cannot be interpreted by > anything other than the HW/driver that generated it. The value really > should never be manipulated outside of the HCA hardware domain (which > includes the HW and the HW-specific driver). > > > > -Fab > The rkey cannot be opaque. You register a memory region on windows, > get a rkey, send it to a big endian linux and a little endian linux > host. Both try to use it, and one of them will fail. > > I think the application must know what format is this rkey, so they > can pass it along while keeping its byte order property. If all IB > stacks return the rkey in network order, and accept rkey also in > network order, then there is no interop problem anymore. > > Frank. > > _______________________________________________ > ofw mailing list > ofw at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw > _______________________________________________ ofw mailing list ofw at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw _______________________________________________ ofw mailing list ofw at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw From eli at dev.mellanox.co.il Thu Feb 7 11:51:49 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 7 Feb 2008 21:51:49 +0200 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <47AB3752.5030703@linux.vnet.ibm.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> <47AAD60E.5000108@mellanox.co.il> <47AB1AAE.5000409@linux.vnet.ibm.com> <4e6a6b3c0802070704s7141cf6o914012147398c4ab@mail.gmail.com> <47AB3752.5030703@linux.vnet.ibm.com> Message-ID: <4e6a6b3c0802071151i3efda6e0g65a24cddf27ac7f3@mail.gmail.com> > Ehca supports fewer than 16 s/g entries- hence the srq patch addresses that issue. > The sequence of steps that I followed for the touch test: > 1. On a freshly booted system, configure ib0 and assign an IP addresss > 2. Switch to connected mode and change mtu > 3. ping remote ib interface (already in CM mode) > 4. modprobe -r ib_ehca > > I see a series of cascading failures in /var/log/messages, starting with > the issue of not being able to destroy the cq (specifically rcq) > I followed the procedure you describe with Arbel device. I changed the code such that it will publish 12 scatter entires for the SRQ. I did not see this problem however so I don't how to debug this. Could it be a problem in the ehca driver? From changquing.tang at hp.com Thu Feb 7 12:42:20 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 7 Feb 2008 20:42:20 +0000 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code Message-ID: HI, I downloaded today's tarball and installed. But both libdat.so and libdat2.so report version 1.2 below is the code I used to test: #include #include #include // if linking with libdat.so int main() { int i; DAT_RETURN err; DAT_COUNT nif; DAT_PROVIDER_INFO *list[10]; DAT_PROVIDER_INFO interface[10]; nif = 10; for (i = 0; i < nif; i++) { list[i] = &(interface[i]); } err = dat_registry_list_providers(nif, &nif, list); if (err != DAT_SUCCESS) { fprintf(stderr, "dat_registry_list_providers() failed\n"); return (-1); } if (nif < 1) { fprintf(stderr, "no interface found\n"); return (-1); } fprintf(stderr, "version: %d.%d\n", interface[0].dapl_version_major, interface[0].dapl_version_minor); } From ardavis at ichips.intel.com Thu Feb 7 13:33:06 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 07 Feb 2008 13:33:06 -0800 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code In-Reply-To: References: Message-ID: <47AB7912.5040700@ichips.intel.com> Tang, Changqing wrote: > HI, > I downloaded today's tarball and installed. But both libdat.so and libdat2.so report version 1.2 > This is not the DAT version, it is the provider configured in your /etc/dat.conf. The OFED configuration supplies OFA providers for both 1.2 and 2.0 versions. Your application picks accordingly. For example, if you change your code to list more then one and include the name you will see the list: for (i=0;i<10;i++) { fprintf(stderr, "version: %s %d.%d\n", interface[i].ia_name, interface[i].dapl_version_major, interface[i].dapl_version_minor); } ./test version: OpenIB-cma 1.2 version: OpenIB-cma-1 1.2 version: OpenIB-cma-2 1.2 version: OpenIB-cma-3 1.2 version: OpenIB-bond 1.2 version: ofa-v2-ib0 2.0 version: ofa-v2-ib1 2.0 version: ofa-v2-ib2 2.0 version: ofa-v2-ib3 2.0 version: ofa-v2-bond 2.0 The dat_ia_open will validate the build version against the provider version. -arlin From changquing.tang at hp.com Thu Feb 7 14:15:28 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 7 Feb 2008 22:15:28 +0000 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code In-Reply-To: <47AB7912.5040700@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> Message-ID: Arlin: Thank you for pointing this out, I almost forgot this mismatch issue. Here is the error we got when we port v1 code to v2, can you give some hint ? setup_listener Permission denied setup_listener Permission denied setup_listener Permission denied setup_listener Permission denied setup_listener Permission denied setup_listener Permission denied .... I verified that rdma-cm works using 'rping' utility. --CQ > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Thursday, February 07, 2008 3:33 PM > To: Tang, Changqing > Cc: OpenFabrics General > Subject: Re: [ofa-general] uDAPL libdat2.so version # problem > for today's OFED code > > Tang, Changqing wrote: > > HI, > > I downloaded today's tarball and installed. But > both libdat.so > > and libdat2.so report version 1.2 > > > > This is not the DAT version, it is the provider configured in > your /etc/dat.conf. The OFED configuration supplies OFA > providers for both > 1.2 and 2.0 versions. Your application picks accordingly. > > For example, if you change your code to list more then one > and include the name you will see the list: > > for (i=0;i<10;i++) { > fprintf(stderr, "version: %s %d.%d\n", > interface[i].ia_name, > interface[i].dapl_version_major, > interface[i].dapl_version_minor); > } > > ./test > version: OpenIB-cma 1.2 > version: OpenIB-cma-1 1.2 > version: OpenIB-cma-2 1.2 > version: OpenIB-cma-3 1.2 > version: OpenIB-bond 1.2 > version: ofa-v2-ib0 2.0 > version: ofa-v2-ib1 2.0 > version: ofa-v2-ib2 2.0 > version: ofa-v2-ib3 2.0 > version: ofa-v2-bond 2.0 > > The dat_ia_open will validate the build version against the > provider version. > > -arlin > From ardavis at ichips.intel.com Thu Feb 7 14:46:38 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 07 Feb 2008 14:46:38 -0800 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code In-Reply-To: References: <47AB7912.5040700@ichips.intel.com> Message-ID: <47AB8A4E.5080409@ichips.intel.com> Tang, Changqing wrote: > Arlin: > Thank you for pointing this out, I almost forgot this mismatch issue. > > Here is the error we got when we port v1 code to v2, can you give some hint ? > > setup_listener Permission denied > setup_listener Permission denied > setup_listener Permission denied > setup_listener Permission denied > setup_listener Permission denied > setup_listener Permission denied > .... > The conn_qual (port) is less then 1024 and rdma_cma returns EPERM. If you are calling dat_psp_create() with a conn_qual less then 1024 it will fail. If calling dat_psp_create_any(), the seed value is 1000 so you will get some warning messages until it hits a valid port. This is not an error just a warning. The call should actually return DAT_SUCCESS along with a valid conn_qual. I will change the seed value to 1024 in the next release. -arlin From ardavis at ichips.intel.com Thu Feb 7 15:05:26 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 07 Feb 2008 15:05:26 -0800 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: <47AB8A4E.5080409@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> Message-ID: <47AB8EB6.1040600@ichips.intel.com> Arlin Davis wrote: > Tang, Changqing wrote: >> Arlin: >> Thank you for pointing this out, I almost forgot this mismatch > > If calling dat_psp_create_any(), the seed value is 1000 so you will get > some warning messages until it hits a valid port. This is not an error > just a warning. Actually this will return an ERR. Here is a patch to fix both v1 and v2 providers. Change PSP seed value to start with non-privileged port mappings. Signed-off by: Arlin Davis diff --git a/dapl/common/dapl_psp_create_any.c b/dapl/common/dapl_psp_create_any.c index a2768fb..e2faa4a 100644 --- a/dapl/common/dapl_psp_create_any.c +++ b/dapl/common/dapl_psp_create_any.c @@ -82,7 +82,7 @@ dapl_psp_create_any ( DAPL_SP *sp_ptr; DAPL_EVD *evd_ptr; DAT_RETURN dat_status; - static DAT_CONN_QUAL hint_conn_qual = 1000; /* seed value */ + static DAT_CONN_QUAL hint_conn_qual = 1024; /* seed value */ DAT_CONN_QUAL lcl_conn_qual; DAT_CONN_QUAL limit_conn_qual; From changquing.tang at hp.com Thu Feb 7 15:18:29 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 7 Feb 2008 23:18:29 +0000 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: <47AB8EB6.1040600@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> Message-ID: Yes, the error is from dat_psp_create_any(). After changing seed value to 1024, do I still get any warning message ? Can I get this fix from tomorow's tarball ? --CQ > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Thursday, February 07, 2008 5:05 PM > To: Tang, Changqing > Cc: OpenFabrics General > Subject: Re: [ofa-general] uDAPL libdat2.so version [PATCH] > udapl v1 and v2 - dat_create_psp_any() seed value wrong > > Arlin Davis wrote: > > Tang, Changqing wrote: > >> Arlin: > >> Thank you for pointing this out, I almost forgot this > >> mismatch > > > > If calling dat_psp_create_any(), the seed value is 1000 so you will > > get some warning messages until it hits a valid port. This > is not an > > error just a warning. > > Actually this will return an ERR. Here is a patch to fix both > v1 and v2 providers. > > Change PSP seed value to start with non-privileged port mappings. > > Signed-off by: Arlin Davis > > diff --git a/dapl/common/dapl_psp_create_any.c > b/dapl/common/dapl_psp_create_any.c > index a2768fb..e2faa4a 100644 > --- a/dapl/common/dapl_psp_create_any.c > +++ b/dapl/common/dapl_psp_create_any.c > @@ -82,7 +82,7 @@ dapl_psp_create_any ( > DAPL_SP *sp_ptr; > DAPL_EVD *evd_ptr; > DAT_RETURN dat_status; > - static DAT_CONN_QUAL hint_conn_qual = 1000; /* > seed value */ > + static DAT_CONN_QUAL hint_conn_qual = 1024; /* > seed value */ > DAT_CONN_QUAL lcl_conn_qual; > DAT_CONN_QUAL limit_conn_qual; > > > From pradeeps at linux.vnet.ibm.com Thu Feb 7 15:34:04 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 07 Feb 2008 15:34:04 -0800 Subject: [ofa-general] Re: [ewg] OFED 1.3 rc4 update In-Reply-To: <4e6a6b3c0802071151i3efda6e0g65a24cddf27ac7f3@mail.gmail.com> References: <47A9DF62.8090803@mellanox.co.il> <1202279622.6286.6.camel@localhost.localdomain> <47A9E432.7070508@mellanox.co.il> <47A9FE1C.9010709@linux.vnet.ibm.com> <47AA88ED.4000400@linux.vnet.ibm.com> <1202373362.13132.84.camel@mtls03> <47AAD60E.5000108@mellanox.co.il> <47AB1AAE.5000409@linux.vnet.ibm.com> <4e6a6b3c0802070704s7141cf6o914012147398c4ab@mail.gmail.com> <47AB3752.5030703@linux.vnet.ibm.com> <4e6a6b3c0802071151i3efda6e0g65a24cddf27ac7f3@mail.gmail.com> Message-ID: <47AB956C.6080808@linux.vnet.ibm.com> Eli Cohen wrote: >> Ehca supports fewer than 16 s/g entries- hence the srq patch addresses that issue. >> The sequence of steps that I followed for the touch test: >> 1. On a freshly booted system, configure ib0 and assign an IP addresss >> 2. Switch to connected mode and change mtu >> 3. ping remote ib interface (already in CM mode) >> 4. modprobe -r ib_ehca >> >> I see a series of cascading failures in /var/log/messages, starting with >> the issue of not being able to destroy the cq (specifically rcq) >> > I followed the procedure you describe with Arbel device. I changed the > code such that it will publish 12 scatter entires for the SRQ. I did > not see this problem however so I don't how to debug this. Could it be > a problem in the ehca driver? > Hello Eli, Thanks for the update. We are continuing to investigate this issue. Pradeep From sashak at voltaire.com Thu Feb 7 15:51:17 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 7 Feb 2008 23:51:17 +0000 Subject: [ofa-general] Re: [PATCH] opensm: separated current loop in main into its own function In-Reply-To: <47A79938.8020306@llnl.gov> References: <47A79938.8020306@llnl.gov> Message-ID: <20080207235117.GL11526@sashak.voltaire.com> On 15:01 Mon 04 Feb , Timothy A. Meier wrote: > Sasha, > > I am finally getting back to adding the OpenSSL option to the console. > > This patch, as well as the next one, are just a little cleanup to prepare > for that effort. > > From e1cd363fe9a24e7d88b0b4354b0467a191627073 Mon Sep 17 00:00:00 2001 > From: Tim Meier > Date: Fri, 1 Feb 2008 16:43:47 -0800 > Subject: [PATCH] opensm: separated current loop in main into its own > function > > Put the endless while loop in "main" into its own function to > support decoupling the osm_console from opensm. > > Signed-off-by: Tim Meier Applied. Thanks. Note that your mailer still mangle whitespaces - I used attached version. Sasha From sashak at voltaire.com Thu Feb 7 15:53:44 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 7 Feb 2008 23:53:44 +0000 Subject: [ofa-general] Re: [PATCH] opensm: osm_console - cleanup in preparation for adding OpenSSL option In-Reply-To: <47A799C3.7000207@llnl.gov> References: <47A799C3.7000207@llnl.gov> Message-ID: <20080207235344.GM11526@sashak.voltaire.com> On 15:03 Mon 04 Feb , Timothy A. Meier wrote: > Sasha, the second one. > > From 7a82f221e5e02ddc660aa917dc95256774fdc508 Mon Sep 17 00:00:00 2001 > From: Tim Meier > Date: Mon, 4 Feb 2008 14:49:34 -0800 > Subject: [PATCH] opensm: osm_console - cleanup in preparation for adding > OpenSSL option > > Trivial reorganization and cleanup, no new functionality. This is to > help minimize the impact (on existing code) of adding new features > to the Console (such as OpenSSL). > > Signed-off-by: Tim Meier Applied (attached version). Thanks. Sasha From ardavis at ichips.intel.com Thu Feb 7 16:37:55 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 07 Feb 2008 16:37:55 -0800 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> Message-ID: <47ABA463.6020707@ichips.intel.com> Tang, Changqing wrote: > Yes, the error is from dat_psp_create_any(). > > After changing seed value to 1024, do I still get any warning message ? You may see some in-use warning messages. I noticed rdma_cm changed the return code from EBUSY to EADDRINUSE so the warning message is not suppressed like it should be. > > Can I get this fix from tomorow's tarball ? > OFED pulls from DAPL package releases so I would have to roll-up another package. In the meantime, you could install the dapl src rpm that comes with OFED, apply the patch, and rebuild the libraries to test the fix. -arlin From ralph.campbell at qlogic.com Thu Feb 7 17:44:52 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 07 Feb 2008 17:44:52 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <000a01c86927$3b9460c0$9f97070a@amr.corp.intel.com> References: <000a01c86927$3b9460c0$9f97070a@amr.corp.intel.com> Message-ID: <1202435092.3638.63.camel@brick.pathscale.com> We can reproduce the problem here. We haven't made any ib_ipath driver changes between RC3 and RC4 so some recent patch has broken us. I'm in the process of looking at it. On Wed, 2008-02-06 at 17:17 -0800, Arlin Davis wrote: > I cannot ifconfig ib0 on ipath with using the latest build > (ofed20080206). > > ifup ib0 > SIOCSIFFLAGS: Invalid argument > Failed to bring up ib0. > > >>> ib0: failed to create own ah > > CA 'ipath0' > CA type: InfiniPath_QLE7140 > Number of ports: 1 > Firmware version: > Hardware version: 2 > Node GUID: 0x0011750000ffd75b > System image GUID: 0x0011750000ffd75b > Port 1: > State: Active > Physical state: LinkUp > Rate: 10 > Base lid: 14 > LMC: 0 > SM lid: 1 > Capability mask: 0x02010800 > Port GUID: 0x0011750000ffd75b > > It works fine on mthca adapters. Anyone else see this problem? > > > -arlin > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From fujita.tomonori at lab.ntt.co.jp Thu Feb 7 18:09:44 2008 From: fujita.tomonori at lab.ntt.co.jp (FUJITA Tomonori) Date: Fri, 08 Feb 2008 11:09:44 +0900 Subject: [ofa-general] Re: [Stgt-devel] Update (Re: open iSCSI over iSER target RPM ...) In-Reply-To: <47AB2C2F.2090707@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> <47AB2C2F.2090707@scalableinformatics.com> Message-ID: <20080208110944W.fujita.tomonori@lab.ntt.co.jp> On Thu, 07 Feb 2008 11:05:03 -0500 Joe Landman wrote: > Update: > > [root at woody etc]# dd if=/dev/zero of=/big/local.file bs=256k count=100000 > 100000+0 records in > 100000+0 records out > 26214400000 bytes (26 GB) copied, 58.7484 seconds, 446 MB/s > > Better. I rebuilt OFED 1.2.5.5. Are there specific recommended tuning > guides for iSER? Backing store in this case are real disks, and we can > sink/source >750 MB/s on them, so I am not worried about disk IO > bottlenecks, more worried about bad config of iSCSI/iSER. > > BTW: the 2TB LUN limit I asked about is still here in this code. Same > machines (initiator and target) used for SRP reported correct LUN sizes. > Here we are using the -868 open-iscsi initiator, and the tgt RPM > announced. I would like to dig into this. Thanks a lot, I thought that I tested tgt with >2TB devices but seems that I didn't. I'll try to fix the problem shortly. From xma at us.ibm.com Thu Feb 7 18:32:23 2008 From: xma at us.ibm.com (Shirley Ma) Date: Thu, 7 Feb 2008 18:32:23 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202435092.3638.63.camel@brick.pathscale.com> Message-ID: I repost my email here since my email was blocked. On Thu, 2008-02-07 at 18:16 -0800, Ralph Campbell wrote: > # cat /etc/*release > Red Hat Enterprise Linux Server release 5 (Tikanga) > # uname -r > 2.6.18-8.el5 > > 4K PAGE_SIZE I don't have ipath driver here. Otherwise I could try them out. A couple suggestions here, could you please try out? 1. try this on 64K page size, like RHEL5U1 to see whether you have the same issue. 2. Can you put a debug message in ipath_create_ah() to see whether this is a memory allocation failure? 3. How many IB cards in your system? If you have severals, just leave one ipath there to see whether you can hit this problem. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Thu Feb 7 18:44:28 2008 From: xma at us.ibm.com (Shirley Ma) Date: Thu, 7 Feb 2008 18:44:28 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202435092.3638.63.camel@brick.pathscale.com> Message-ID: Hello Ralph, What's the send_queue_size, recv_queue_size for ib_ipoib module? Can you reload ib_ipoib module with send_queue_size=2, recv_queue_size=2 to see any difference? thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From chai.15 at osu.edu Thu Feb 7 19:04:41 2008 From: chai.15 at osu.edu (LEI CHAI) Date: Thu, 07 Feb 2008 22:04:41 -0500 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release Message-ID: <48ddf471ab.471ab48ddf@osu.edu> I modified mvapich2 to compile with udapl v2 and used mvapich2 to compile and run an MPI latency program (osu_latency.c in osu_benchmarks). I set LD_LIBRARY_PATH=dapl-2.0.6/lib, but it didn't run (cannot open IA) and by setting the debug information I saw the error below. I've made sure mvapich2 was compiled with header files in include/dat2/ and linked with libdat2.so. And when I set LD_LIBRARY_PATH=dapl-2.0.5/lib I was able to run the program successfully. Lei ----- Original Message ----- From: Arlin Davis Date: Wednesday, February 6, 2008 9:41 pm Subject: Re: [ofa-general] [ANNOUCE] dapl 2.0.6 release > LEI CHAI wrote: > > Hi Arlin, > > > > When I ran programs with dapl 2.0.6 libraries I got this error > by setting DAPL_DBG_TYPE=0xffff and DAT_DBG_TYPE=0xffff: > > > > libdaplofa.so.2: undefined symbol: dapl_extensions > > Can you give me a little more information? What programs? What is > the > program building and linking against? If v2, you should be linking > against libdat2.so and using /usr/include/dat2. > > -arlin > > > From sashak at voltaire.com Thu Feb 7 19:57:05 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 8 Feb 2008 03:57:05 +0000 Subject: [ofa-general] [PATCH] opensm/osm_mcast_mgr: OSM_SIGNAL_NONE is not in use anymore Message-ID: <20080208035705.GN11526@sashak.voltaire.com> Nobody refers OSM_SIGNAL_NONE, osm_mcast_mgr_process_mgroups() is not running in a loop - no special need to track empty queues. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_mcast_mgr.c | 5 ----- 1 files changed, 0 insertions(+), 5 deletions(-) diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c index 1178522..ca42a9f 100644 --- a/opensm/opensm/osm_mcast_mgr.c +++ b/opensm/opensm/osm_mcast_mgr.c @@ -1376,11 +1376,6 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_mcast_mgr_t * p_mgr) /* we need a lock to make sure the p_mgrp is not change other ways */ CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); - if (cl_is_qlist_empty(p_list)) { - CL_PLOCK_RELEASE(p_mgr->p_lock); - return OSM_SIGNAL_NONE; - } - while (!cl_is_qlist_empty(p_list)) { ctx = (osm_mcast_mgr_ctxt_t *) cl_qlist_remove_head(p_list); req_type = ctx->req_type; -- 1.5.4.rc5 From sashak at voltaire.com Thu Feb 7 20:02:14 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 8 Feb 2008 04:02:14 +0000 Subject: [ofa-general] [PATCH] opensm: kill osm_state_mgr object Message-ID: <20080208040214.GO11526@sashak.voltaire.com> Remove not used anymore sm's osm_state_mgr sub-object. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_sm.h | 46 +++- opensm/include/opensm/osm_state_mgr.h | 51 --- opensm/opensm/osm_console.c | 2 +- opensm/opensm/osm_perfmgr.c | 8 +- opensm/opensm/osm_sm.c | 12 +- opensm/opensm/osm_state_mgr.c | 567 ++++++++++++++------------------- 6 files changed, 298 insertions(+), 388 deletions(-) diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h index 2cdbdd0..83bd4da 100644 --- a/opensm/include/opensm/osm_sm.h +++ b/opensm/include/opensm/osm_sm.h @@ -70,7 +70,6 @@ #include #include #include -#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -113,6 +112,7 @@ BEGIN_C_DECLS */ typedef struct osm_sm { osm_thread_state_t thread_state; + osm_sm_state_t state; unsigned signal_mask; cl_spinlock_t signal_lock; cl_event_t signal_event; @@ -136,7 +136,6 @@ typedef struct osm_sm { osm_lid_mgr_t lid_mgr; osm_ucast_mgr_t ucast_mgr; osm_link_mgr_t link_mgr; - osm_state_mgr_t state_mgr; osm_drop_mgr_t drop_mgr; osm_sweep_fail_ctrl_t sweep_fail_ctrl; osm_sm_state_mgr_t sm_state_mgr; @@ -655,5 +654,48 @@ osm_sm_wait_for_subnet_up(IN osm_sm_t * const p_sm, * SEE ALSO *********/ +/****f* OpenSM: State Manager/osm_sm_is_greater_than +* NAME +* osm_sm_is_greater_than +* +* DESCRIPTION +* Compares two SM's (14.4.1.2) +* +* SYNOPSIS +*/ +static inline boolean_t +osm_sm_is_greater_than(IN const uint8_t l_priority, + IN const ib_net64_t l_guid, + IN const uint8_t r_priority, IN const ib_net64_t r_guid) +{ + return (l_priority > r_priority + || (l_priority == r_priority + && cl_ntoh64(l_guid) < cl_ntoh64(r_guid))); +} + +/* +* PARAMETERS +* l_priority +* [in] Priority of the SM on the "left" +* +* l_guid +* [in] GUID of the SM on the "left" +* +* r_priority +* [in] Priority of the SM on the "right" +* +* r_guid +* [in] GUID of the SM on the "right" +* +* RETURN VALUES +* Return TRUE if an sm with l_priority and l_guid is higher than an sm +* with r_priority and r_guid, return FALSE otherwise. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + END_C_DECLS #endif /* _OSM_SM_H_ */ diff --git a/opensm/include/opensm/osm_state_mgr.h b/opensm/include/opensm/osm_state_mgr.h index f3886ec..1173981 100644 --- a/opensm/include/opensm/osm_state_mgr.h +++ b/opensm/include/opensm/osm_state_mgr.h @@ -254,57 +254,6 @@ osm_state_mgr_init(IN osm_state_mgr_t * const p_mgr, struct osm_sm * sm); * osm_state_mgr_destroy *********/ -/****f* OpenSM: State Manager/osm_sm_is_greater_than -* NAME -* osm_sm_is_greater_than -* -* DESCRIPTION -* Compares two SM's (14.4.1.2) -* -* SYNOPSIS -*/ -static inline boolean_t -osm_sm_is_greater_than(IN const uint8_t l_priority, - IN const ib_net64_t l_guid, - IN const uint8_t r_priority, IN const ib_net64_t r_guid) -{ - if (l_priority > r_priority) { - return (TRUE); - } else { - if (l_priority == r_priority) { - if (cl_ntoh64(l_guid) < cl_ntoh64(r_guid)) { - return (TRUE); - } - } - } - return (FALSE); -} - -/* -* PARAMETERS -* l_priority -* [in] Priority of the SM on the "left" -* -* l_guid -* [in] GUID of the SM on the "left" -* -* r_priority -* [in] Priority of the SM on the "right" -* -* r_guid -* [in] GUID of the SM on the "right" -* -* RETURN VALUES -* Return TRUE if an sm with l_priority and l_guid is higher than an sm -* with r_priority and r_guid, -* return FALSE otherwise. -* -* NOTES -* -* SEE ALSO -* State Manager -*********/ - /****f* OpenSM: State Manager/osm_state_mgr_process * NAME * osm_state_mgr_process diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 8b6642e..1a6208f 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -314,7 +314,7 @@ static void print_status(osm_opensm_t * p_osm, FILE * out) fprintf(out, " OpenSM Version : %s\n", OSM_VERSION); fprintf(out, " SM State/Mgr State : %s/%s\n", sm_state_str(p_osm->subn.sm_state), - sm_state_mgr_str(p_osm->sm.state_mgr.state)); + sm_state_mgr_str(p_osm->sm.state)); fprintf(out, " SA State : %s\n", sa_state_str(p_osm->sa.state)); fprintf(out, " Routing Engine : %s\n", diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 9480ad7..1099445 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -790,12 +790,12 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) if (pm->state != PERFMGR_STATE_ENABLED) return; - if (pm->sm->state_mgr.state != OSM_SM_STATE_IDLE && - pm->sm->state_mgr.state != OSM_SM_STATE_STANDBY) + if (pm->sm->state != OSM_SM_STATE_IDLE && + pm->sm->state != OSM_SM_STATE_STANDBY) return; - if (pm->sm->state_mgr.state == OSM_SM_STATE_STANDBY || - (pm->sm->state_mgr.state == OSM_SM_STATE_IDLE && + if (pm->sm->state == OSM_SM_STATE_STANDBY || + (pm->sm->state == OSM_SM_STATE_IDLE && pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)) perfmgr_discovery(pm->subn->p_osm); diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index 019fa51..37dae43 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -80,6 +80,8 @@ extern void osm_si_rcv_process(IN void *context, IN void *data); extern void osm_trap_rcv_process(IN void *context, IN void *data); extern void osm_vla_rcv_process(IN void *context, IN void *data); +extern void osm_state_mgr_process(IN osm_sm_t *sm, IN osm_signal_t signal); + /********************************************************************** **********************************************************************/ static void osm_sm_process(osm_sm_t * sm, osm_signal_t signal) @@ -89,7 +91,7 @@ static void osm_sm_process(osm_sm_t * sm, osm_signal_t signal) osm_perfmgr_process(&sm->p_subn->p_osm->perfmgr); else #endif - osm_state_mgr_process(&sm->state_mgr, signal); + osm_state_mgr_process(sm, signal); } static void __osm_sm_sweeper(IN void *p_ptr) @@ -154,6 +156,7 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) { memset(p_sm, 0, sizeof(*p_sm)); p_sm->thread_state = OSM_THREAD_STATE_NONE; + p_sm->state = OSM_SM_STATE_INIT; p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE; cl_spinlock_construct(&p_sm->signal_lock); cl_event_construct(&p_sm->signal_event); @@ -165,7 +168,6 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) osm_lid_mgr_construct(&p_sm->lid_mgr); osm_ucast_mgr_construct(&p_sm->ucast_mgr); osm_link_mgr_construct(&p_sm->link_mgr); - osm_state_mgr_construct(&p_sm->state_mgr); osm_drop_mgr_construct(&p_sm->drop_mgr); osm_sweep_fail_ctrl_construct(&p_sm->sweep_fail_ctrl); osm_sm_state_mgr_construct(&p_sm->sm_state_mgr); @@ -229,7 +231,6 @@ void osm_sm_destroy(IN osm_sm_t * const p_sm) osm_ucast_mgr_destroy(&p_sm->ucast_mgr); osm_link_mgr_destroy(&p_sm->link_mgr); osm_drop_mgr_destroy(&p_sm->drop_mgr); - osm_state_mgr_destroy(&p_sm->state_mgr); osm_sm_state_mgr_destroy(&p_sm->sm_state_mgr); osm_mcast_mgr_destroy(&p_sm->mcast_mgr); cl_event_wheel_destroy(&p_sm->trap_aging_tracker); @@ -316,10 +317,6 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (status != IB_SUCCESS) goto Exit; - status = osm_state_mgr_init(&p_sm->state_mgr, p_sm); - if (status != IB_SUCCESS) - goto Exit; - status = osm_drop_mgr_init(&p_sm->drop_mgr, p_sm); if (status != IB_SUCCESS) goto Exit; @@ -396,6 +393,7 @@ osm_sm_init(IN osm_sm_t * const p_sm, * the sweeper thread if the user wants sweeping. */ p_sm->thread_state = OSM_THREAD_STATE_RUN; + p_sm->state = OSM_SM_STATE_IDLE; status = cl_thread_init(&p_sm->sweeper, __osm_sm_sweeper, p_sm, "opensm sweeper"); if (status != IB_SUCCESS) diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 4dcb584..674ccf7 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -74,54 +73,7 @@ osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm); /********************************************************************** **********************************************************************/ -void osm_state_mgr_construct(IN osm_state_mgr_t * const p_mgr) -{ - memset(p_mgr, 0, sizeof(*p_mgr)); - p_mgr->state = OSM_SM_STATE_INIT; -} - -/********************************************************************** - **********************************************************************/ -void osm_state_mgr_destroy(IN osm_state_mgr_t * const p_mgr) -{ - CL_ASSERT(p_mgr); - - OSM_LOG_ENTER(p_mgr->p_log, osm_state_mgr_destroy); - - OSM_LOG_EXIT(p_mgr->p_log); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_state_mgr_init(IN osm_state_mgr_t * const p_mgr, IN osm_sm_t * sm) -{ - OSM_LOG_ENTER(sm->p_log, osm_state_mgr_init); - - osm_state_mgr_construct(p_mgr); - - p_mgr->sm = sm; - p_mgr->p_log = sm->p_log; - p_mgr->p_subn = sm->p_subn; - p_mgr->p_lid_mgr = &sm->lid_mgr; - p_mgr->p_ucast_mgr = &sm->ucast_mgr; - p_mgr->p_mcast_mgr = &sm->mcast_mgr; - p_mgr->p_link_mgr = &sm->link_mgr; - p_mgr->p_drop_mgr = &sm->drop_mgr; - p_mgr->p_mad_ctrl = &sm->mad_ctrl; - p_mgr->p_stats = &sm->p_subn->p_osm->stats; - p_mgr->p_sm_state_mgr = &sm->sm_state_mgr; - p_mgr->state = OSM_SM_STATE_IDLE; - p_mgr->p_lock = sm->p_lock; - p_mgr->p_subnet_up_event = &sm->subnet_up_event; - - OSM_LOG_EXIT(p_mgr->p_log); - return IB_SUCCESS; -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_up_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_up_msg(IN const osm_sm_t *sm) { /* * This message should be written only once - when the @@ -129,16 +81,15 @@ static void __osm_state_mgr_up_msg(IN const osm_state_mgr_t * p_mgr) * the first time. The change of state is marked with * the subnet flag moved_to_master_state */ - if (p_mgr->p_subn->moved_to_master_state == TRUE) { - osm_log(p_mgr->p_log, OSM_LOG_SYS, "SUBNET UP\n"); /* Format Waived */ + if (sm->p_subn->moved_to_master_state == TRUE) { + osm_log(sm->p_log, OSM_LOG_SYS, "SUBNET UP\n"); /* Format Waived */ /* clear the signal */ - p_mgr->p_subn->moved_to_master_state = FALSE; - } else { - osm_log(p_mgr->p_log, OSM_LOG_INFO, "SUBNET UP\n"); /* Format Waived */ - } + sm->p_subn->moved_to_master_state = FALSE; + } else + osm_log(sm->p_log, OSM_LOG_INFO, "SUBNET UP\n"); /* Format Waived */ - if (p_mgr->p_subn->opt.sweep_interval) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (sm->p_subn->opt.sweep_interval) + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_up_msg: " "\n\n\n********************************" "**********************************\n" @@ -146,8 +97,8 @@ static void __osm_state_mgr_up_msg(IN const osm_state_mgr_t * p_mgr) "***************************\n" "**************************************" "****************************\n\n\n"); - } else { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + else + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_up_msg: " "\n\n\n********************************" "**********************************\n" @@ -155,16 +106,15 @@ static void __osm_state_mgr_up_msg(IN const osm_state_mgr_t * p_mgr) "(sweep disabled) *******************\n" "**************************************" "****************************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_init_errors_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_init_errors_msg(IN osm_log_t *log) { - osm_log(p_mgr->p_log, OSM_LOG_SYS, "Errors during initialization\n"); /* Format Waived */ + osm_log(log, OSM_LOG_SYS, "Errors during initialization\n"); /* Format Waived */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(log, OSM_LOG_ERROR, "__osm_state_mgr_init_errors_msg: " "\n\n\n********************************" "**********************************\n" @@ -176,11 +126,10 @@ static void __osm_state_mgr_init_errors_msg(IN const osm_state_mgr_t * p_mgr) /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_light_sweep_done_msg(IN const osm_state_mgr_t * - p_mgr) +static void __osm_state_mgr_light_sweep_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_light_sweep_done_msg: " "\n\n\n********************************" "**********************************\n" @@ -188,15 +137,14 @@ static void __osm_state_mgr_light_sweep_done_msg(IN const osm_state_mgr_t * "COMPLETE **********************\n" "**************************************" "****************************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_standby_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_standby_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_standby_msg: " "\n\n\n********************************" "**********************************\n" @@ -204,17 +152,16 @@ static void __osm_state_mgr_standby_msg(IN const osm_state_mgr_t * p_mgr) " STATE **********************\n" "**************************************" "****************************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_sm_port_down_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_sm_port_down_msg(IN osm_log_t *log) { - osm_log(p_mgr->p_log, OSM_LOG_SYS, "SM port is down\n"); /* Format Waived */ + osm_log(log, OSM_LOG_SYS, "SM port is down\n"); /* Format Waived */ - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_sm_port_down_msg: " "\n\n\n********************************" "**********************************\n" @@ -222,15 +169,14 @@ static void __osm_state_mgr_sm_port_down_msg(IN const osm_state_mgr_t * p_mgr) "**************************\n" "**************************************" "****************************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_lid_assign_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_lid_assign_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_lid_assign_msg: " "\n\n\n**************************************" "****************************\n" @@ -238,16 +184,14 @@ static void __osm_state_mgr_lid_assign_msg(IN const osm_state_mgr_t * p_mgr) "H TABLE CONFIG *****\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_set_sm_lid_done_msg(IN const osm_state_mgr_t * - p_mgr) +static void __osm_state_mgr_set_sm_lid_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_set_sm_lid_done_msg: " "\n\n\n**************************************" "****************************\n" @@ -255,15 +199,14 @@ static void __osm_state_mgr_set_sm_lid_done_msg(IN const osm_state_mgr_t * "ET LID CONFIG *****\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_switch_config_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_switch_config_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_switch_config_msg: " "\n\n\n**************************************" "****************************\n" @@ -271,16 +214,14 @@ static void __osm_state_mgr_switch_config_msg(IN const osm_state_mgr_t * p_mgr) "****************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_multicast_config_msg(IN const osm_state_mgr_t * - p_mgr) +static void __osm_state_mgr_multicast_config_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_multicast_config_msg: " "\n\n\n**************************************" "****************************\n" @@ -288,15 +229,14 @@ static void __osm_state_mgr_multicast_config_msg(IN const osm_state_mgr_t * "***************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_links_ports_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_links_ports_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_links_ports_msg: " "\n\n\n**************************************" "****************************\n" @@ -304,15 +244,14 @@ static void __osm_state_mgr_links_ports_msg(IN const osm_state_mgr_t * p_mgr) "STATE ********\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_links_armed_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_links_armed_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_links_armed_msg: " "\n\n\n**************************************" "****************************\n" @@ -320,15 +259,14 @@ static void __osm_state_mgr_links_armed_msg(IN const osm_state_mgr_t * p_mgr) "STATE ************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_sweep_heavy_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_sweep_heavy_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_sweep_heavy_msg: " "\n\n\n**************************************" "****************************\n" @@ -336,16 +274,14 @@ static void __osm_state_mgr_sweep_heavy_msg(IN const osm_state_mgr_t * p_mgr) "**********************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_sweep_heavy_done_msg(IN const osm_state_mgr_t * - p_mgr) +static void __osm_state_mgr_sweep_heavy_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_sweep_heavy_done_msg: " "\n\n\n**************************************" "****************************\n" @@ -353,15 +289,14 @@ static void __osm_state_mgr_sweep_heavy_done_msg(IN const osm_state_mgr_t * "***********************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_sweep_light_msg(IN const osm_state_mgr_t * p_mgr) +static void __osm_state_mgr_sweep_light_msg(IN osm_log_t *log) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(log, OSM_LOG_VERBOSE)) + osm_log(log, OSM_LOG_VERBOSE, "__osm_state_mgr_sweep_light_msg: " "\n\n\n**************************************" "****************************\n" @@ -369,38 +304,37 @@ static void __osm_state_mgr_sweep_light_msg(IN const osm_state_mgr_t * p_mgr) "**********************\n" "*********************************************" "*********************\n\n\n"); - } } /********************************************************************** **********************************************************************/ static void -__osm_state_mgr_signal_warning(IN const osm_state_mgr_t * const p_mgr, +__osm_state_mgr_signal_warning(IN osm_sm_t *sm, IN const osm_signal_t signal) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_signal_warning: " "Invalid signal %s(%lu) in state %s\n", osm_get_sm_signal_str(signal), signal, - osm_get_sm_state_str(p_mgr->state)); + osm_get_sm_state_str(sm->state)); } /********************************************************************** **********************************************************************/ static void -__osm_state_mgr_signal_error(IN const osm_state_mgr_t * const p_mgr, +__osm_state_mgr_signal_error(IN osm_sm_t *sm, IN const osm_signal_t signal) { /* the Request for IDLE processing can come async to the state so it * really is just verbose ... */ if (signal == OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST) - __osm_state_mgr_signal_warning(p_mgr, signal); + __osm_state_mgr_signal_warning(sm, signal); else - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_signal_error: ERR 3303: " "Invalid signal %s(%lu) in state %s\n", osm_get_sm_signal_str(signal), signal, - osm_get_sm_state_str(p_mgr->state)); + osm_get_sm_state_str(sm->state)); } /********************************************************************** @@ -409,10 +343,10 @@ static void __osm_state_mgr_reset_node_count(IN cl_map_item_t * const p_map_item, IN void *context) { osm_node_t *p_node = (osm_node_t *) p_map_item; - osm_state_mgr_t *const p_mgr = (osm_state_mgr_t *) context; + osm_sm_t *sm = context; - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_node_count: " "Resetting discovery count for node 0x%" PRIx64 "(%s)\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -428,10 +362,10 @@ static void __osm_state_mgr_reset_port_count(IN cl_map_item_t * const p_map_item, IN void *context) { osm_port_t *p_port = (osm_port_t *) p_map_item; - osm_state_mgr_t *const p_mgr = (osm_state_mgr_t *) context; + osm_sm_t *sm = context; - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_port_count: " "Resetting discovery count for port 0x%" PRIx64 "(node %s)\n", cl_ntoh64(osm_port_get_guid(p_port)), @@ -448,10 +382,10 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, IN void *context) { osm_switch_t *p_sw = (osm_switch_t *) p_map_item; - osm_state_mgr_t *const p_mgr = (osm_state_mgr_t *) context; + osm_sm_t *sm = context; - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_switch_count: " "Resetting discovery count for switch 0x%" PRIx64 " (%s)\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), @@ -471,10 +405,10 @@ static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object, osm_dr_path_t *p_dr_path; osm_madw_context_t mad_context; osm_switch_t *const p_sw = (osm_switch_t *) p_object; - osm_state_mgr_t *const p_mgr = (osm_state_mgr_t *) context; + osm_sm_t *sm = context; ib_api_status_t status; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_get_sw_info); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_sw_info); p_node = p_sw->p_node; p_dr_path = osm_node_get_any_dr_path_ptr(p_node); @@ -485,23 +419,23 @@ static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object, mad_context.si_context.set_method = FALSE; mad_context.si_context.light_sweep = TRUE; - status = osm_req_get(p_mgr->sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0, + status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0, OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_get_sw_info: ERR 3304: " "Request for SwitchInfo failed\n"); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** Initiate a remote port info request for the given physical port **********************************************************************/ static void -__osm_state_mgr_get_remote_port_info(IN osm_state_mgr_t * const p_mgr, +__osm_state_mgr_get_remote_port_info(IN osm_sm_t *sm, IN osm_physp_t * const p_physp) { osm_dr_path_t *p_dr_path; @@ -509,7 +443,7 @@ __osm_state_mgr_get_remote_port_info(IN osm_state_mgr_t * const p_mgr, osm_madw_context_t mad_context; ib_api_status_t status; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_get_remote_port_info); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_remote_port_info); /* generate a dr path leaving on the physp to the remote node */ p_dr_path = osm_physp_get_dr_path_ptr(p_physp); @@ -529,56 +463,55 @@ __osm_state_mgr_get_remote_port_info(IN osm_state_mgr_t * const p_mgr, /* note that with some negative logic - if the query failed it means that * there is no point in going to heavy sweep */ - status = osm_req_get(p_mgr->sm, &rem_node_dr_path, + status = osm_req_get(sm, &rem_node_dr_path, IB_MAD_ATTR_PORT_INFO, 0, CL_DISP_MSGID_NONE, &mad_context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_get_remote_port_info: ERR 332E: " "Request for PortInfo failed\n"); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** Initiates a thorough sweep of the subnet. Used when there is suspicion that something on the subnet has changed. **********************************************************************/ -static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_state_mgr_t * - const p_mgr) +static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t *sm) { ib_api_status_t status; osm_dr_path_t dr_path; osm_bind_handle_t h_bind; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_sweep_hop_0); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_sweep_hop_0); memset(path_array, 0, sizeof(path_array)); /* * First, get the bind handle. */ - h_bind = osm_sm_mad_ctrl_get_bind_handle(p_mgr->p_mad_ctrl); + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind != OSM_BIND_INVALID_HANDLE) { - __osm_state_mgr_sweep_heavy_msg(p_mgr); + __osm_state_mgr_sweep_heavy_msg(sm->p_log); /* * Start the sweep by clearing the port counts, then * get our own NodeInfo at 0 hops. */ - CL_PLOCK_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_ACQUIRE(sm->p_lock); - cl_qmap_apply_func(&p_mgr->p_subn->node_guid_tbl, - __osm_state_mgr_reset_node_count, p_mgr); + cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, + __osm_state_mgr_reset_node_count, sm); - cl_qmap_apply_func(&p_mgr->p_subn->port_guid_tbl, - __osm_state_mgr_reset_port_count, p_mgr); + cl_qmap_apply_func(&sm->p_subn->port_guid_tbl, + __osm_state_mgr_reset_port_count, sm); - cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl, - __osm_state_mgr_reset_switch_count, p_mgr); + cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, + __osm_state_mgr_reset_switch_count, sm); /* Set the in_sweep_hop_0 flag in subn to be TRUE. * This will indicate the sweeping not to continue beyond the @@ -586,52 +519,51 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_state_mgr_t * * This is relevant for the case of SM on switch, since in the * switch info we need to signal somehow not to continue * the sweeping. */ - p_mgr->p_subn->in_sweep_hop_0 = TRUE; + sm->p_subn->in_sweep_hop_0 = TRUE; - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); osm_dr_path_init(&dr_path, h_bind, 0, path_array); - status = osm_req_get(p_mgr->sm, + status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, NULL); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_sweep_hop_0: ERR 3305: " "Request for NodeInfo failed\n"); } } else { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_sweep_hop_0: " "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } /********************************************************************** Clear out all existing port lid assignments **********************************************************************/ -static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_state_mgr_t * - const p_mgr) +static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_sm_t *sm) { ib_api_status_t status = IB_SUCCESS; - cl_ptr_vector_t *p_vec = &(p_mgr->p_subn->port_lid_tbl); + cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); uint32_t i; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_clean_known_lids); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_clean_known_lids); /* we need a lock here! */ - CL_PLOCK_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_ACQUIRE(sm->p_lock); for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++) cl_ptr_vector_set(p_vec, i, NULL); - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -639,20 +571,19 @@ static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_state_mgr_t * Notifies the transport layer that the local LID has changed, which give it a chance to update address vectors, etc.. **********************************************************************/ -static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_state_mgr_t * - const p_mgr) +static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t *sm) { ib_api_status_t status; osm_bind_handle_t h_bind; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_notify_lid_change); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_notify_lid_change); /* * First, get the bind handle. */ - h_bind = osm_sm_mad_ctrl_get_bind_handle(p_mgr->p_mad_ctrl); + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_notify_lid_change: ERR 3306: " "No bound ports\n"); status = IB_ERROR; @@ -664,14 +595,14 @@ static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_state_mgr_t * */ status = osm_vendor_local_lid_change(h_bind); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_notify_lid_change: ERR 3307: " "Vendor LID update failed (%s)\n", ib_get_err_str(status)); } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -679,23 +610,22 @@ static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_state_mgr_t * Returns true if the SM port is down. The SM's port object must exist in the port_guid table. **********************************************************************/ -static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_state_mgr_t * - const p_mgr) +static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t *sm) { ib_net64_t port_guid; osm_port_t *p_port; osm_physp_t *p_physp; uint8_t state; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_is_sm_port_down); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_is_sm_port_down); - port_guid = p_mgr->p_subn->sm_port_guid; + port_guid = sm->p_subn->sm_port_guid; /* * If we don't know our own port guid yet, assume the port is down. */ if (port_guid == 0) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_is_sm_port_down: ERR 3308: " "SM port GUID unknown\n"); state = IB_LINK_DOWN; @@ -704,16 +634,16 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_state_mgr_t * CL_ASSERT(port_guid); - CL_PLOCK_ACQUIRE(p_mgr->p_lock); - p_port = osm_get_port_by_guid(p_mgr->p_subn, port_guid); + CL_PLOCK_ACQUIRE(sm->p_lock); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_is_sm_port_down: ERR 3309: " "SM port with GUID:%016" PRIx64 " (%s) is unknown\n", cl_ntoh64(port_guid), p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); state = IB_LINK_DOWN; - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); goto Exit; } @@ -722,10 +652,10 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_state_mgr_t * CL_ASSERT(p_physp); state = osm_physp_get_port_state(p_physp); - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (state == IB_LINK_DOWN); } @@ -734,8 +664,7 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_state_mgr_t * This sets off a "chain reaction" that causes discovery of the subnet. Used when there is suspicion that something on the subnet has changed. **********************************************************************/ -static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * - const p_mgr) +static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t *sm) { ib_api_status_t status = IB_SUCCESS; osm_bind_handle_t h_bind; @@ -751,12 +680,12 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * uint8_t num_ports; osm_physp_t *p_ext_physp; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_sweep_hop_1); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_sweep_hop_1); /* * First, get our own port and node objects. */ - port_guid = p_mgr->p_subn->sm_port_guid; + port_guid = sm->p_subn->sm_port_guid; CL_ASSERT(port_guid); @@ -766,11 +695,11 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * * This is relevant for the case of SM on switch, since in the * switch info we need to signal that the sweeping should * continue through the switch. */ - p_mgr->p_subn->in_sweep_hop_0 = FALSE; + sm->p_subn->in_sweep_hop_0 = FALSE; - p_port = osm_get_port_by_guid(p_mgr->p_subn, port_guid); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_sweep_hop_1: ERR 3310: " "No SM port object\n"); status = IB_ERROR; @@ -782,7 +711,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * port_num = ib_node_info_get_local_port_num(&p_node->node_info); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_sweep_hop_1: " "Probing hop 1 on local port %u\n", port_num); @@ -808,12 +737,12 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * path_array[1] = port_num; osm_dr_path_init(&hop_1_path, h_bind, 1, path_array); - status = osm_req_get(p_mgr->sm, &hop_1_path, + status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_sweep_hop_1: ERR 3311: " "Request for NodeInfo failed\n"); } @@ -843,12 +772,12 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * osm_dr_path_init(&hop_1_path, h_bind, 1, path_array); status = - osm_req_get(p_mgr->sm, &hop_1_path, + osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_sweep_hop_1: ERR 3312: " "Request for NodeInfo failed\n"); } @@ -857,14 +786,14 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * break; default: - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_sweep_hop_1: ERR 3313: Unknown node type %d (%s)\n", osm_node_get_type(p_node), p_node->print_desc); } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -872,8 +801,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * Initiates a lightweight sweep of the subnet. Used during normal sweeps after the subnet is up. **********************************************************************/ -static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * - const p_mgr) +static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t *sm) { ib_api_status_t status = IB_SUCCESS; osm_bind_handle_t h_bind; @@ -883,25 +811,24 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * osm_physp_t *p_physp; uint8_t port_num; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_light_sweep_start); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_light_sweep_start); - p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_sw_tbl = &sm->p_subn->sw_guid_tbl; /* * First, get the bind handle. */ - h_bind = osm_sm_mad_ctrl_get_bind_handle(p_mgr->p_mad_ctrl); + h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind != OSM_BIND_INVALID_HANDLE) { - __osm_state_mgr_sweep_light_msg(p_mgr); - CL_PLOCK_ACQUIRE(p_mgr->p_lock); - cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, - p_mgr); - CL_PLOCK_RELEASE(p_mgr->p_lock); + __osm_state_mgr_sweep_light_msg(sm->p_log); + CL_PLOCK_ACQUIRE(sm->p_lock); + cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm); + CL_PLOCK_RELEASE(sm->p_lock); /* now scan the list of physical ports that were not down but have no remote port */ - CL_PLOCK_ACQUIRE(p_mgr->p_lock); - p_next = cl_qmap_head(&p_mgr->p_subn->node_guid_tbl); - while (p_next != cl_qmap_end(&p_mgr->p_subn->node_guid_tbl)) { + CL_PLOCK_ACQUIRE(sm->p_lock); + p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl); + while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) { p_node = (osm_node_t *) p_next; p_next = cl_qmap_next(p_next); @@ -914,7 +841,7 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * && (osm_physp_get_port_state(p_physp) != IB_LINK_DOWN) && !osm_physp_get_remote(p_physp)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_light_sweep_start: ERR 0108: " "Unknown remote side for node 0x%016" PRIx64 @@ -922,25 +849,25 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * cl_ntoh64(osm_node_get_node_guid (p_node)), p_node->print_desc, port_num); - osm_dump_dr_path(p_mgr->p_log, + osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr (p_physp), OSM_LOG_ERROR); __osm_state_mgr_get_remote_port_info - (p_mgr, p_physp); + (sm, p_physp); } } } - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); } else { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_light_sweep_start: " "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -950,17 +877,15 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * * If there is a remote master SM - return a pointer to it, * else - return NULL. **********************************************************************/ -static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN - osm_state_mgr_t * - const p_mgr) +static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t *sm) { cl_qmap_t *p_sm_tbl; osm_remote_sm_t *p_sm; osm_remote_sm_t *p_sm_res = NULL; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_exists_other_master_sm); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_exists_other_master_sm); - p_sm_tbl = &p_mgr->p_subn->sm_guid_tbl; + p_sm_tbl = &sm->p_subn->sm_guid_tbl; /* go over all the remote SMs */ for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); @@ -968,7 +893,7 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { /* If the sm is in MASTER state - return a pointer to it */ if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_exists_other_master_sm: " "Found remote master SM with guid:0x%016" PRIx64 " (node %s)\n", cl_ntoh64(p_sm->smi.guid), @@ -979,7 +904,7 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (p_sm_res); } @@ -989,8 +914,7 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN * Compare this SM to the local SM. If the local SM is higher - * return NULL, if the remote SM is higher - return a pointer to it. **********************************************************************/ -static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * - const p_mgr) +static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t *sm) { cl_qmap_t *p_sm_tbl; osm_remote_sm_t *p_sm = NULL; @@ -998,14 +922,14 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * uint8_t highest_sm_priority; ib_net64_t highest_sm_guid; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_get_highest_sm); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_highest_sm); - p_sm_tbl = &p_mgr->p_subn->sm_guid_tbl; + p_sm_tbl = &sm->p_subn->sm_guid_tbl; /* Start with the local sm as the standard */ p_highest_sm = NULL; - highest_sm_priority = p_mgr->p_subn->opt.sm_priority; - highest_sm_guid = p_mgr->p_subn->sm_port_guid; + highest_sm_priority = sm->p_subn->opt.sm_priority; + highest_sm_guid = sm->p_subn->sm_port_guid; /* go over all the remote SMs */ for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); @@ -1030,7 +954,7 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * } if (p_highest_sm != NULL) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_get_highest_sm: " "Found higher SM with guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_highest_sm->smi.guid), @@ -1038,7 +962,7 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * p_highest_sm->p_port->p_node->print_desc : "UNKNOWN"); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (p_highest_sm); } @@ -1047,7 +971,7 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * * remote_sm indicated. **********************************************************************/ static void -__osm_state_mgr_send_handover(IN osm_state_mgr_t * const p_mgr, +__osm_state_mgr_send_handover(IN osm_sm_t * const sm, IN osm_remote_sm_t * const p_sm) { uint8_t payload[IB_SMP_DATA_SIZE]; @@ -1056,7 +980,7 @@ __osm_state_mgr_send_handover(IN osm_state_mgr_t * const p_mgr, const osm_port_t *p_port; ib_api_status_t status; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_send_handover); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_send_handover); /* * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given. @@ -1065,66 +989,66 @@ __osm_state_mgr_send_handover(IN osm_state_mgr_t * const p_mgr, memset(&context, 0, sizeof(context)); p_port = p_sm->p_port; if (p_port == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_send_handover: ERR 3316: " "No port object on given remote_sm object\n"); goto Exit; } - /* update the master_guid in the p_sm_state_mgr object according to */ + /* update the master_guid in the sm_state_mgr object according to */ /* the guid of the port where the new Master SM should reside. */ - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_send_handover: " "Handing over mastership. Updating sm_state_mgr master_guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); - p_mgr->p_sm_state_mgr->master_guid = p_port->guid; + sm->sm_state_mgr.master_guid = p_port->guid; context.smi_context.port_guid = p_port->guid; context.smi_context.set_method = TRUE; - p_smi->guid = p_mgr->p_subn->sm_port_guid; - p_smi->act_count = cl_hton32(p_mgr->p_stats->qp0_mads_sent); - p_smi->pri_state = (uint8_t) (p_mgr->p_subn->sm_state | - p_mgr->p_subn->opt.sm_priority << 4); + p_smi->guid = sm->p_subn->sm_port_guid; + p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); + p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | + sm->p_subn->opt.sm_priority << 4); /* * Return 0 for the SM key unless we authenticate the requester * as the master SM. */ if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_send_handover: " "Responding to master SM with real sm_key\n"); - p_smi->sm_key = p_mgr->p_subn->opt.sm_key; + p_smi->sm_key = sm->p_subn->opt.sm_key; } else { /* The requester is not authenticated as master - set sm_key to zero */ - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_send_handover: " "Responding to SM not master with zero sm_key\n"); p_smi->sm_key = 0; } - status = osm_req_set(p_mgr->sm, + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), payload, sizeof(payload), IB_MAD_ATTR_SM_INFO, IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_send_handover: ERR 3317: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** * Send Trap 64 on all new ports. **********************************************************************/ -static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) +static void __osm_state_mgr_report_new_ports(IN osm_sm_t *sm) { ib_gid_t port_gid; ib_mad_notice_attr_t notice; @@ -1135,11 +1059,11 @@ static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) uint16_t min_lid_ho; uint16_t max_lid_ho; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_report_new_ports); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_report_new_ports); - CL_PLOCK_ACQUIRE(p_mgr->p_lock); - p_next = cl_qmap_head(&p_mgr->p_subn->port_guid_tbl); - while (p_next != cl_qmap_end(&p_mgr->p_subn->port_guid_tbl)) { + CL_PLOCK_ACQUIRE(sm->p_lock); + p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl); + while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) { p_port = (osm_port_t *) p_next; p_next = cl_qmap_next(p_next); @@ -1155,10 +1079,10 @@ static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) /* endport becomes to be reachable */ notice.g_or_v.generic.trap_num = CL_HTON16(64); /* The sm_base_lid is saved in network order already. */ - notice.issuer_lid = p_mgr->p_subn->sm_base_lid; + notice.issuer_lid = sm->p_subn->sm_base_lid; /* following C14-72.1.1 and table 119 p739 */ /* we need to provide the GID */ - port_gid.unicast.prefix = p_mgr->p_subn->opt.subnet_prefix; + port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; port_gid.unicast.interface_id = port_guid; memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid), sizeof(ib_gid_t)); @@ -1166,21 +1090,20 @@ static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) /* According to page 653 - the issuer gid in this case of trap * is the SM gid, since the SM is the initiator of this trap. */ notice.issuer_gid.unicast.prefix = - p_mgr->p_subn->opt.subnet_prefix; + sm->p_subn->opt.subnet_prefix; notice.issuer_gid.unicast.interface_id = - p_mgr->p_subn->sm_port_guid; + sm->p_subn->sm_port_guid; - status = - osm_report_notice(p_mgr->p_log, p_mgr->p_subn, ¬ice); + status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_report_new_ports: ERR 3318: " "Error sending trap reports on GUID:0x%016" PRIx64 " (%s)\n", port_gid.unicast.interface_id, ib_get_err_str(status)); } osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); - osm_log(p_mgr->p_log, OSM_LOG_INFO, + osm_log(sm->p_log, OSM_LOG_INFO, "__osm_state_mgr_report_new_ports: " "Discovered new port with GUID:0x%016" PRIx64 " LID range [0x%X,0x%X] of node:%s\n", @@ -1191,9 +1114,9 @@ static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) p_port->is_new = 0; } - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** @@ -1206,8 +1129,7 @@ static void __osm_state_mgr_report_new_ports(IN osm_state_mgr_t * const p_mgr) * initialization), but here we'll clean the database from incorrect * information. **********************************************************************/ -static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * - const p_mgr) +static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t *sm) { cl_qmap_t *p_port_guid_tbl; osm_port_t *p_port; @@ -1220,14 +1142,14 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * uint16_t max_lid_ho; uint16_t lid_ho; - OSM_LOG_ENTER(p_mgr->p_log, __osm_state_mgr_check_tbl_consistency); + OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_check_tbl_consistency); cl_ptr_vector_construct(&ref_port_lid_tbl); cl_ptr_vector_init(&ref_port_lid_tbl, - cl_ptr_vector_get_size(&p_mgr->p_subn->port_lid_tbl), + cl_ptr_vector_get_size(&sm->p_subn->port_lid_tbl), OSM_SUBNET_VECTOR_GROW_SIZE); - p_port_guid_tbl = &p_mgr->p_subn->port_guid_tbl; + p_port_guid_tbl = &sm->p_subn->port_guid_tbl; /* Let's go over all the ports according to port_guid_tbl, * and add the port to a reference port_lid_tbl. */ @@ -1242,7 +1164,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * cl_ptr_vector_set(&ref_port_lid_tbl, lid_ho, p_port); } - p_port_lid_tbl = &p_mgr->p_subn->port_lid_tbl; + p_port_lid_tbl = &sm->p_subn->port_lid_tbl; ref_size = cl_ptr_vector_get_size(&ref_port_lid_tbl); curr_size = cl_ptr_vector_get_size(p_port_lid_tbl); @@ -1268,7 +1190,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * * didn't get the PortInfo Set request. Due to this, the port * is updated with its original lid in our database, but with the * new lid we wanted to give it in our port_lid_tbl. */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_check_tbl_consistency: ERR 3322: " "lid 0x%zX is wrongly assigned to port 0x%016" PRIx64 " in port_lid_tbl\n", lid, @@ -1278,7 +1200,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * /* There is an object in the new database, but no object in our subnet * database. This is the matching case of the prior check - the port * still has its original lid. */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_check_tbl_consistency: ERR 3323: " "port 0x%016" PRIx64 " exists in new port_lid_tbl under " @@ -1290,7 +1212,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * /* if we reached here then p_port_stored != p_port_ref. * We were trying to set a lid to p_port_stored, but it didn't reach it, * and p_port_ref also didn't get the lid update. */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_state_mgr_check_tbl_consistency: ERR 3324: " "lid 0x%zX has port 0x%016" PRIx64 " in new port_lid_tbl db, " @@ -1305,11 +1227,11 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_state_mgr_t * /* In any of these cases we want to set NULL in the port_lid_tbl, since this * entry is invalid. Also, make sure we'll do another heavy sweep. */ cl_ptr_vector_set(p_port_lid_tbl, lid, NULL); - p_mgr->p_subn->subnet_initialization_error = TRUE; + sm->p_subn->subnet_initialization_error = TRUE; } cl_ptr_vector_destroy(&ref_port_lid_tbl); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** @@ -1355,11 +1277,11 @@ static void do_sweep(osm_sm_t * sm) && sm->p_subn->opt.force_heavy_sweep == FALSE && sm->p_subn->force_heavy_sweep == FALSE && sm->p_subn->subnet_initialization_error == FALSE - && (__osm_state_mgr_light_sweep_start(&sm->state_mgr) == IB_SUCCESS)) { + && (__osm_state_mgr_light_sweep_start(sm) == IB_SUCCESS)) { if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; if (!sm->p_subn->force_heavy_sweep) { - __osm_state_mgr_light_sweep_done_msg(&sm->state_mgr); + __osm_state_mgr_light_sweep_done_msg(sm->p_log); return; } } @@ -1375,19 +1297,19 @@ _repeat_discovery: status = osm_subn_rescan_conf_files(sm->p_subn); if (status != IB_SUCCESS) osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_state_mgr_process: ERR 331A: " + "do_sweep: ERR 331A: " "osm_subn_rescan_conf_file failed\n"); if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) sm->p_subn->need_update = 1; - status = __osm_state_mgr_sweep_hop_0(&sm->state_mgr); + status = __osm_state_mgr_sweep_hop_0(sm); if (status != IB_SUCCESS || wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - if (__osm_state_mgr_is_sm_port_down(&sm->state_mgr) == TRUE) { - __osm_state_mgr_sm_port_down_msg(&sm->state_mgr); + if (__osm_state_mgr_is_sm_port_down(sm) == TRUE) { + __osm_state_mgr_sm_port_down_msg(sm->p_log); /* Run the drop manager - we want to clear all records */ osm_drop_mgr_process(&sm->drop_mgr); @@ -1398,21 +1320,21 @@ _repeat_discovery: return; } - status = __osm_state_mgr_sweep_hop_1(&sm->state_mgr); + status = __osm_state_mgr_sweep_hop_1(sm); if (status != IB_SUCCESS || wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; /* discovery completed - check other sm presense */ if (sm->master_sm_found) { - sm->state_mgr.state = OSM_SM_STATE_STANDBY; + sm->state = OSM_SM_STATE_STANDBY; /* * Call the sm_state_mgr with signal * MASTER_OR_HIGHER_SM_DETECTED_DONE */ osm_sm_state_mgr_process(&sm->sm_state_mgr, OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE); - __osm_state_mgr_standby_msg(&sm->state_mgr); + __osm_state_mgr_standby_msg(sm->p_log); return; } @@ -1420,29 +1342,29 @@ _repeat_discovery: if (sm->p_subn->force_heavy_sweep) goto _repeat_discovery; - __osm_state_mgr_sweep_heavy_done_msg(&sm->state_mgr); + __osm_state_mgr_sweep_heavy_done_msg(sm->p_log); /* If we are MASTER - get the highest remote_sm, and * see if it is higher than our local sm. */ if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) { - p_remote_sm = __osm_state_mgr_get_highest_sm(&sm->state_mgr); + p_remote_sm = __osm_state_mgr_get_highest_sm(sm); if (p_remote_sm != NULL) { /* report new ports (trap 64) before leaving MASTER */ - __osm_state_mgr_report_new_ports(&sm->state_mgr); + __osm_state_mgr_report_new_ports(sm); /* need to handover the mastership * to the remote sm, and move to standby */ - __osm_state_mgr_send_handover(&sm->state_mgr, p_remote_sm); + __osm_state_mgr_send_handover(sm, p_remote_sm); osm_sm_state_mgr_process(&sm->sm_state_mgr, OSM_SM_SIGNAL_HANDOVER_SENT); - sm->state_mgr.state = OSM_SM_STATE_STANDBY; + sm->state = OSM_SM_STATE_STANDBY; return; } else { /* We are the highest sm - check to see if there is * a remote SM that is in master state. */ p_remote_sm = - __osm_state_mgr_exists_other_master_sm(&sm->state_mgr); + __osm_state_mgr_exists_other_master_sm(sm); if (p_remote_sm != NULL) { /* There is a remote SM that is master. * need to wait for that SM to relinquish control @@ -1484,8 +1406,8 @@ _repeat_discovery: if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_set_sm_lid_done_msg(&sm->state_mgr); - __osm_state_mgr_notify_lid_change(&sm->state_mgr); + __osm_state_mgr_set_sm_lid_done_msg(sm->p_log); + __osm_state_mgr_notify_lid_change(sm); osm_lid_mgr_process_subnet(&sm->lid_mgr); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) @@ -1495,15 +1417,15 @@ _repeat_discovery: * the port_lid_tbl under the subnet. There might be * errors in it if PortInfo Set reqeusts didn't reach * their destination. */ - __osm_state_mgr_check_tbl_consistency(&sm->state_mgr); + __osm_state_mgr_check_tbl_consistency(sm); - __osm_state_mgr_lid_assign_msg(&sm->state_mgr); + __osm_state_mgr_lid_assign_msg(sm->p_log); /* * Proceed with unicast forwarding table configuration. * First - send trap 64 on newly discovered endports */ - __osm_state_mgr_report_new_ports(&sm->state_mgr); + __osm_state_mgr_report_new_ports(sm); osm_ucast_mgr_process(&sm->ucast_mgr); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) @@ -1514,13 +1436,13 @@ _repeat_discovery: * take into account these lfts. */ sm->p_subn->ignore_existing_lfts = FALSE; - __osm_state_mgr_switch_config_msg(&sm->state_mgr); + __osm_state_mgr_switch_config_msg(sm->p_log); if (!sm->p_subn->opt.disable_multicast) { osm_mcast_mgr_process(&sm->mcast_mgr); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_multicast_config_msg(&sm->state_mgr); + __osm_state_mgr_multicast_config_msg(sm->p_log); } /* @@ -1535,13 +1457,13 @@ _repeat_discovery: if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_links_ports_msg(&sm->state_mgr); + __osm_state_mgr_links_ports_msg(sm->p_log); osm_link_mgr_process(&sm->link_mgr, IB_LINK_ARMED); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_links_armed_msg(&sm->state_mgr); + __osm_state_mgr_links_armed_msg(sm->p_log); osm_link_mgr_process(&sm->link_mgr, IB_LINK_ACTIVE); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) @@ -1556,7 +1478,7 @@ _repeat_discovery: /* If there were errors - then the subnet is not really up */ if (sm->p_subn->subnet_initialization_error == TRUE) - __osm_state_mgr_init_errors_msg(&sm->state_mgr); + __osm_state_mgr_init_errors_msg(sm->p_log); else { /* The subnet is up correctly - set the first_time_master_sweep * flag (if it is on) to FALSE. */ @@ -1565,7 +1487,7 @@ _repeat_discovery: sm->p_subn->need_update = 0; osm_dump_all(sm->p_subn->p_osm); - __osm_state_mgr_up_msg(&sm->state_mgr); + __osm_state_mgr_up_msg(sm); if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) osm_sa_db_file_dump(sm->p_subn->p_osm); @@ -1589,21 +1511,20 @@ static void do_process_mgrp_queue(osm_sm_t * sm) wait_for_pending_transactions(&sm->p_subn->p_osm->stats); } -void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr, - IN osm_signal_t signal) +void osm_state_mgr_process(IN osm_sm_t *sm, IN osm_signal_t signal) { - CL_ASSERT(p_mgr); + CL_ASSERT(sm); - OSM_LOG_ENTER(p_mgr->p_log, osm_state_mgr_process); + OSM_LOG_ENTER(sm->p_log, osm_state_mgr_process); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_state_mgr_process: " "Received signal %s in state %s\n", osm_get_sm_signal_str(signal), - osm_get_sm_state_str(p_mgr->state)); + osm_get_sm_state_str(sm->state)); - switch (p_mgr->state) { + switch (sm->state) { case OSM_SM_STATE_IDLE: switch (signal) { case OSM_SIGNAL_SWEEP: @@ -1611,8 +1532,8 @@ void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr, * If the osm_sm_state_mgr is in NOT-ACTIVE state - * stay in IDLE */ - if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) { - osm_vendor_set_sm(p_mgr->p_mad_ctrl->h_bind, FALSE); + if (sm->p_subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) { + osm_vendor_set_sm(sm->mad_ctrl.h_bind, FALSE); break; } @@ -1620,19 +1541,19 @@ void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr, * If the osm_sm_state_mgr is in INIT state - signal * it with a INIT signal to move it to DISCOVERY state. */ - if (p_mgr->p_subn->sm_state == IB_SMINFO_STATE_INIT) - osm_sm_state_mgr_process(p_mgr->p_sm_state_mgr, + if (sm->p_subn->sm_state == IB_SMINFO_STATE_INIT) + osm_sm_state_mgr_process(&sm->sm_state_mgr, OSM_SM_SIGNAL_INIT); - do_sweep(p_mgr->sm); + do_sweep(sm); break; case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST: - do_process_mgrp_queue(p_mgr->sm); + do_process_mgrp_queue(sm); break; default: - __osm_state_mgr_signal_error(p_mgr, signal); + __osm_state_mgr_signal_error(sm, signal); break; } break; @@ -1645,12 +1566,12 @@ void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr, * to do that we want all the ports to be considered * foriegn */ - __osm_state_mgr_clean_known_lids(p_mgr); - p_mgr->state = OSM_SM_STATE_IDLE; - osm_sm_signal(p_mgr->sm, OSM_SIGNAL_SWEEP); + __osm_state_mgr_clean_known_lids(sm); + sm->state = OSM_SM_STATE_IDLE; + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); break; default: - __osm_state_mgr_signal_error(p_mgr, signal); + __osm_state_mgr_signal_error(sm, signal); break; } /* stay with the same signal - so we can start the sweep */ @@ -1658,11 +1579,11 @@ void osm_state_mgr_process(IN osm_state_mgr_t * const p_mgr, default: CL_ASSERT(FALSE); - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_state_mgr_process: ERR 3320: " - "Invalid SM state %u\n", p_mgr->state); + "Invalid SM state %u\n", sm->state); break; } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } -- 1.5.4.rc5 From sashak at voltaire.com Thu Feb 7 20:05:08 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 8 Feb 2008 04:05:08 +0000 Subject: [ofa-general] [PATCH] opensm: consolidate message "box" logging code In-Reply-To: <20080208040214.GO11526@sashak.voltaire.com> References: <20080208040214.GO11526@sashak.voltaire.com> Message-ID: <20080208040508.GP11526@sashak.voltaire.com> Consolidate log message box (which looks like: ****************************************************************** ************************* Hello World! *************************** ****************************************************************** ) printing code. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_log.h | 8 +- opensm/opensm/libopensm.map | 1 + opensm/opensm/osm_log.c | 28 ++++++- opensm/opensm/osm_sm_state_mgr.c | 50 ++---------- opensm/opensm/osm_state_mgr.c | 164 +++++++------------------------------- 5 files changed, 70 insertions(+), 181 deletions(-) diff --git a/opensm/include/opensm/osm_log.h b/opensm/include/opensm/osm_log.h index 1aad786..97d0556 100644 --- a/opensm/include/opensm/osm_log.h +++ b/opensm/include/opensm/osm_log.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -391,9 +391,9 @@ osm_log_is_active(IN const osm_log_t * const p_log, extern int osm_log_printf(osm_log_t * p_log, osm_log_level_t level, const char *fmt, ...); - -void -osm_log_raw(IN osm_log_t * const p_log, +extern void osm_log_msg_box(osm_log_t *log, osm_log_level_t level, + const char *func_name, const char *msg); +extern void osm_log_raw(IN osm_log_t * const p_log, IN const osm_log_level_t verbosity, IN const char *p_buf); #define DBG_CL_LOCK 0 diff --git a/opensm/opensm/libopensm.map b/opensm/opensm/libopensm.map index b3d4fe0..1d574bc 100644 --- a/opensm/opensm/libopensm.map +++ b/opensm/opensm/libopensm.map @@ -2,6 +2,7 @@ OPENSM_1.5 { global: osm_log; osm_log_printf; + osm_log_msg_box; osm_is_debug; osm_log_init; osm_log_init_v2; diff --git a/opensm/opensm/osm_log.c b/opensm/opensm/osm_log.c index 97f8920..b5b4bd9 100644 --- a/opensm/opensm/osm_log.c +++ b/opensm/opensm/osm_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -243,6 +243,32 @@ osm_log_raw(IN osm_log_t * const p_log, } } +void osm_log_msg_box(IN osm_log_t *log, osm_log_level_t level, + const char *func_name, const char *msg) +{ +#define MSG_BOX_LENGTH 66 + char buf[MSG_BOX_LENGTH + 1]; + int i, n; + + if (!osm_log_is_active(log, level)) + return; + + n = (MSG_BOX_LENGTH - strlen(msg))/2 - 1; + if (n < 0) + n = 0; + for (i = 0 ; i < n; i++) + sprintf(buf + i, "*"); + n += snprintf(buf + n, sizeof(buf) - n, " %s ", msg); + for (i = n; i < MSG_BOX_LENGTH; i++) + sprintf(buf + i, "*"); + + osm_log(log, level, "%s:\n\n\n" + "*********************************************" + "*********************\n%s\n" + "*********************************************" + "*********************\n\n\n", func_name, buf); +} + boolean_t osm_is_debug(void) { #if defined( _DEBUG_ ) diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index 4d0b026..27d7536 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -74,16 +74,8 @@ __osm_sm_state_mgr_standby_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) { osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering STANDBY state\n"); /* Format Waived */ - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_standby_msg: " - "\n\n\n********************************" - "**********************************\n" - "******************** ENTERING SM STANDBY" - " STATE *******************\n" - "**************************************" - "****************************\n\n\n"); - } + osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING SM STANDBY STATE"); } /********************************************************************** @@ -93,16 +85,8 @@ __osm_sm_state_mgr_master_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) { osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering MASTER state\n"); /* Format Waived */ - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_master_msg: " - "\n\n\n********************************" - "**********************************\n" - "******************** ENTERING SM MASTER" - " STATE ********************\n" - "**************************************" - "****************************\n\n\n"); - } + osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING SM MASTER STATE"); } /********************************************************************** @@ -110,16 +94,8 @@ __osm_sm_state_mgr_master_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) static void __osm_sm_state_mgr_discovering_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) { - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_discovering_msg: " - "\n\n\n********************************" - "**********************************\n" - "******************** ENTERING SM DISCOVERING" - " STATE ***************\n" - "**************************************" - "****************************\n\n\n"); - } + osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING SM DISCOVERING STATE"); } /********************************************************************** @@ -129,16 +105,8 @@ __osm_sm_state_mgr_notactive_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) { osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering NOT-ACTIVE state\n"); /* Format Waived */ - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_notactive_msg: " - "\n\n\n********************************" - "**********************************\n" - "***************** ENTERING SM NOT-ACTIVE" - " STATE *******************\n" - "**************************************" - "****************************\n\n\n"); - } + osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING SM NOT-ACTIVE STATE"); } #if 0 diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 674ccf7..a2b0725 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -88,24 +88,9 @@ static void __osm_state_mgr_up_msg(IN const osm_sm_t *sm) } else osm_log(sm->p_log, OSM_LOG_INFO, "SUBNET UP\n"); /* Format Waived */ - if (sm->p_subn->opt.sweep_interval) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_state_mgr_up_msg: " - "\n\n\n********************************" - "**********************************\n" - "**************************** SUBNET UP " - "***************************\n" - "**************************************" - "****************************\n\n\n"); - else - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_state_mgr_up_msg: " - "\n\n\n********************************" - "**********************************\n" - "******************* SUBNET UP " - "(sweep disabled) *******************\n" - "**************************************" - "****************************\n\n\n"); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + sm->p_subn->opt.sweep_interval ? + "SUBNET UP" : "SUBNET UP (sweep disabled)"); } /********************************************************************** @@ -114,44 +99,24 @@ static void __osm_state_mgr_init_errors_msg(IN osm_log_t *log) { osm_log(log, OSM_LOG_SYS, "Errors during initialization\n"); /* Format Waived */ - osm_log(log, OSM_LOG_ERROR, - "__osm_state_mgr_init_errors_msg: " - "\n\n\n********************************" - "**********************************\n" - "****************** ERRORS DURING INITI" - "ALIZATION ******************\n" - "**************************************" - "****************************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_ERROR, __FUNCTION__, + "ERRORS DURING INITIALIZATION"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_light_sweep_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_light_sweep_done_msg: " - "\n\n\n********************************" - "**********************************\n" - "********************** LIGHT SWEEP " - "COMPLETE **********************\n" - "**************************************" - "****************************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "LIGHT SWEEP COMPLETE"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_standby_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_standby_msg: " - "\n\n\n********************************" - "**********************************\n" - "******************** ENTERING STANDBY" - " STATE **********************\n" - "**************************************" - "****************************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING STANDBY STATE"); } /********************************************************************** @@ -160,150 +125,79 @@ static void __osm_state_mgr_sm_port_down_msg(IN osm_log_t *log) { osm_log(log, OSM_LOG_SYS, "SM port is down\n"); /* Format Waived */ - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_sm_port_down_msg: " - "\n\n\n********************************" - "**********************************\n" - "************************** SM PORT DOWN " - "**************************\n" - "**************************************" - "****************************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, "SM PORT DOWN"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_lid_assign_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_lid_assign_msg: " - "\n\n\n**************************************" - "****************************\n" - "***** LID ASSIGNMENT COMPLETE - STARTING SWITC" - "H TABLE CONFIG *****\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_set_sm_lid_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_set_sm_lid_done_msg: " - "\n\n\n**************************************" - "****************************\n" - "**** SM LID ASSIGNMENT COMPLETE - STARTING SUBN" - "ET LID CONFIG *****\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_switch_config_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_switch_config_msg: " - "\n\n\n**************************************" - "****************************\n" - "***************** SWITCHES CONFIGURED FOR UNICAST " - "****************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "SWITCHES CONFIGURED FOR UNICAST"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_multicast_config_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_multicast_config_msg: " - "\n\n\n**************************************" - "****************************\n" - "**************** SWITCHES CONFIGURED FOR MULTICAST " - "***************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "SWITCHES CONFIGURED FOR MULTICAST"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_links_ports_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_links_ports_msg: " - "\n\n\n**************************************" - "****************************\n" - "******* LINKS PORTS CONFIGURED - SET LINKS TO ARMED " - "STATE ********\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_links_armed_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_links_armed_msg: " - "\n\n\n**************************************" - "****************************\n" - "************* LINKS ARMED - SET LINKS TO ACTIVE " - "STATE ************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "LINKS ARMED - SET LINKS TO ACTIVE STATE"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_sweep_heavy_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_sweep_heavy_msg: " - "\n\n\n**************************************" - "****************************\n" - "******************** INITIATING HEAVY SWEEP " - "**********************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "INITIATING HEAVY SWEEP"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_sweep_heavy_done_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_sweep_heavy_done_msg: " - "\n\n\n**************************************" - "****************************\n" - "********************* HEAVY SWEEP COMPLETE " - "***********************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "HEAVY SWEEP COMPLETE"); } /********************************************************************** **********************************************************************/ static void __osm_state_mgr_sweep_light_msg(IN osm_log_t *log) { - if (osm_log_is_active(log, OSM_LOG_VERBOSE)) - osm_log(log, OSM_LOG_VERBOSE, - "__osm_state_mgr_sweep_light_msg: " - "\n\n\n**************************************" - "****************************\n" - "******************** INITIATING LIGHT SWEEP " - "**********************\n" - "*********************************************" - "*********************\n\n\n"); + osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, + "INITIATING LIGHT SWEEP"); } /********************************************************************** -- 1.5.4.rc5 From sashak at voltaire.com Thu Feb 7 20:06:23 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 8 Feb 2008 04:06:23 +0000 Subject: [ofa-general] [PATCH] opensm/osm_state_mgr: use osm_log_msg_box() In-Reply-To: <20080208040214.GO11526@sashak.voltaire.com> References: <20080208040214.GO11526@sashak.voltaire.com> Message-ID: <20080208040623.GQ11526@sashak.voltaire.com> Use osm_log_msg_box() for printing verbose log messages. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_state_mgr.c | 153 ++++++++--------------------------------- 1 files changed, 30 insertions(+), 123 deletions(-) diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index a2b0725..54f56c0 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -95,113 +95,6 @@ static void __osm_state_mgr_up_msg(IN const osm_sm_t *sm) /********************************************************************** **********************************************************************/ -static void __osm_state_mgr_init_errors_msg(IN osm_log_t *log) -{ - osm_log(log, OSM_LOG_SYS, "Errors during initialization\n"); /* Format Waived */ - - osm_log_msg_box(log, OSM_LOG_ERROR, __FUNCTION__, - "ERRORS DURING INITIALIZATION"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_light_sweep_done_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "LIGHT SWEEP COMPLETE"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_standby_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "ENTERING STANDBY STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_sm_port_down_msg(IN osm_log_t *log) -{ - osm_log(log, OSM_LOG_SYS, "SM port is down\n"); /* Format Waived */ - - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, "SM PORT DOWN"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_lid_assign_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_set_sm_lid_done_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_switch_config_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "SWITCHES CONFIGURED FOR UNICAST"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_multicast_config_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "SWITCHES CONFIGURED FOR MULTICAST"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_links_ports_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_links_armed_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "LINKS ARMED - SET LINKS TO ACTIVE STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_sweep_heavy_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "INITIATING HEAVY SWEEP"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_sweep_heavy_done_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "HEAVY SWEEP COMPLETE"); -} - -/********************************************************************** - **********************************************************************/ -static void __osm_state_mgr_sweep_light_msg(IN osm_log_t *log) -{ - osm_log_msg_box(log, OSM_LOG_VERBOSE, __FUNCTION__, - "INITIATING LIGHT SWEEP"); -} - -/********************************************************************** - **********************************************************************/ static void __osm_state_mgr_signal_warning(IN osm_sm_t *sm, IN const osm_signal_t signal) @@ -390,8 +283,8 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t *sm) */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind != OSM_BIND_INVALID_HANDLE) { - __osm_state_mgr_sweep_heavy_msg(sm->p_log); - + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "INITIATING HEAVY SWEEP"); /* * Start the sweep by clearing the port counts, then * get our own NodeInfo at 0 hops. @@ -714,7 +607,8 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t *sm) */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind != OSM_BIND_INVALID_HANDLE) { - __osm_state_mgr_sweep_light_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "INITIATING LIGHT SWEEP"); CL_PLOCK_ACQUIRE(sm->p_lock); cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm); CL_PLOCK_RELEASE(sm->p_lock); @@ -1175,7 +1069,8 @@ static void do_sweep(osm_sm_t * sm) if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; if (!sm->p_subn->force_heavy_sweep) { - __osm_state_mgr_light_sweep_done_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "LIGHT SWEEP COMPLETE"); return; } } @@ -1203,7 +1098,9 @@ _repeat_discovery: return; if (__osm_state_mgr_is_sm_port_down(sm) == TRUE) { - __osm_state_mgr_sm_port_down_msg(sm->p_log); + osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n"); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "SM PORT DOWN"); /* Run the drop manager - we want to clear all records */ osm_drop_mgr_process(&sm->drop_mgr); @@ -1228,7 +1125,8 @@ _repeat_discovery: */ osm_sm_state_mgr_process(&sm->sm_state_mgr, OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE); - __osm_state_mgr_standby_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "ENTERING STANDBY STATE"); return; } @@ -1236,7 +1134,8 @@ _repeat_discovery: if (sm->p_subn->force_heavy_sweep) goto _repeat_discovery; - __osm_state_mgr_sweep_heavy_done_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "HEAVY SWEEP COMPLETE"); /* If we are MASTER - get the highest remote_sm, and * see if it is higher than our local sm. @@ -1300,7 +1199,8 @@ _repeat_discovery: if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_set_sm_lid_done_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG"); __osm_state_mgr_notify_lid_change(sm); osm_lid_mgr_process_subnet(&sm->lid_mgr); @@ -1313,7 +1213,8 @@ _repeat_discovery: * their destination. */ __osm_state_mgr_check_tbl_consistency(sm); - __osm_state_mgr_lid_assign_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG"); /* * Proceed with unicast forwarding table configuration. @@ -1330,13 +1231,15 @@ _repeat_discovery: * take into account these lfts. */ sm->p_subn->ignore_existing_lfts = FALSE; - __osm_state_mgr_switch_config_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "SWITCHES CONFIGURED FOR UNICAST"); if (!sm->p_subn->opt.disable_multicast) { osm_mcast_mgr_process(&sm->mcast_mgr); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_multicast_config_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "SWITCHES CONFIGURED FOR MULTICAST"); } /* @@ -1351,13 +1254,15 @@ _repeat_discovery: if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_links_ports_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); osm_link_mgr_process(&sm->link_mgr, IB_LINK_ARMED); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; - __osm_state_mgr_links_armed_msg(sm->p_log); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, + "LINKS ARMED - SET LINKS TO ACTIVE STATE"); osm_link_mgr_process(&sm->link_mgr, IB_LINK_ACTIVE); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) @@ -1371,9 +1276,11 @@ _repeat_discovery: sm->p_subn->coming_out_of_standby = FALSE; /* If there were errors - then the subnet is not really up */ - if (sm->p_subn->subnet_initialization_error == TRUE) - __osm_state_mgr_init_errors_msg(sm->p_log); - else { + if (sm->p_subn->subnet_initialization_error == TRUE) { + osm_log(sm->p_log, OSM_LOG_SYS, "Errors during initialization\n"); + osm_log_msg_box(sm->p_log, OSM_LOG_ERROR, __FUNCTION__, + "ERRORS DURING INITIALIZATION"); + } else { /* The subnet is up correctly - set the first_time_master_sweep * flag (if it is on) to FALSE. */ if (sm->p_subn->first_time_master_sweep == TRUE) -- 1.5.4.rc5 From sashak at voltaire.com Thu Feb 7 20:11:21 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 8 Feb 2008 04:11:21 +0000 Subject: [ofa-general] [PATCH] opensm: cleanup IB_SMINFO_STATE_INIT In-Reply-To: <20080208040214.GO11526@sashak.voltaire.com> References: <20080208040214.GO11526@sashak.voltaire.com> Message-ID: <20080208041121.GR11526@sashak.voltaire.com> This state is not defined by IBTA and really not useful in OpenSM. Signed-off-by: Sasha Khapyorsky --- opensm/include/iba/ib_types.h | 12 ------------ opensm/include/opensm/osm_base.h | 2 +- opensm/opensm/osm_console.c | 2 -- opensm/opensm/osm_helper.c | 13 +++++++------ opensm/opensm/osm_sm_state_mgr.c | 35 +++-------------------------------- opensm/opensm/osm_state_mgr.c | 8 -------- 6 files changed, 11 insertions(+), 61 deletions(-) diff --git a/opensm/include/iba/ib_types.h b/opensm/include/iba/ib_types.h index 672184b..649ef1c 100644 --- a/opensm/include/iba/ib_types.h +++ b/opensm/include/iba/ib_types.h @@ -10588,18 +10588,6 @@ typedef uint32_t ib_mr_mod_t; * The access rights the memory region are being modified. *****/ -/****d* IBA Base: Constants/IB_SMINFO_STATE_INIT -* NAME -* IB_SMINFO_STATE_INIT -* -* DESCRIPTION -* Encoded state value used in the SMInfo attribute. -* -* SOURCE -*/ -#define IB_SMINFO_STATE_INIT 4 -/**********/ - /****d* IBA Base: Constants/IB_SMINFO_ATTR_MOD_HANDOVER * NAME * IB_SMINFO_ATTR_MOD_HANDOVER diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index 6f784ca..d5e3c27 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -794,7 +794,7 @@ typedef uintn_t osm_signal_t; * SYNOPSIS */ typedef enum _osm_sm_signal { - OSM_SM_SIGNAL_INIT = 0, + OSM_SM_SIGNAL_NONE = 0, OSM_SM_SIGNAL_DISCOVERY_COMPLETED, OSM_SM_SIGNAL_POLLING_TIMEOUT, OSM_SM_SIGNAL_DISCOVER, diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 1a6208f..9459b03 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -266,8 +266,6 @@ static void priority_parse(char **p_last, osm_opensm_t * p_osm, FILE * out) static char *sm_state_str(int state) { switch (state) { - case IB_SMINFO_STATE_INIT: - return ("Init"); case IB_SMINFO_STATE_DISCOVERING: return ("Discovering"); case IB_SMINFO_STATE_STANDBY: diff --git a/opensm/opensm/osm_helper.c b/opensm/opensm/osm_helper.c index bd345bc..de4ba33 100644 --- a/opensm/opensm/osm_helper.c +++ b/opensm/opensm/osm_helper.c @@ -57,6 +57,8 @@ #define LINE_LENGTH 256 +#define ARR_SIZE(a) (sizeof(a)/sizeof((a)[0])) + /* we use two tables - one for queries and one for responses */ const char *const __ib_sa_method_str[] = { "RESERVED", /* 0 */ @@ -2320,7 +2322,7 @@ const char *osm_get_lsa_str(IN uint8_t const lsa) **********************************************************************/ const char *const __osm_sm_mgr_signal_str[] = { - "OSM_SM_SIGNAL_INIT", /* 0 */ + "OSM_SM_SIGNAL_NONE", /* 0 */ "OSM_SM_SIGNAL_DISCOVERY_COMPLETED", /* 2 */ "OSM_SM_SIGNAL_POLLING_TIMEOUT", /* 3 */ "OSM_SM_SIGNAL_DISCOVER", /* 4 */ @@ -2348,13 +2350,12 @@ const char *const __osm_sm_mgr_state_str[] = { "IB_SMINFO_STATE_DISCOVERING", /* 1 */ "IB_SMINFO_STATE_STANDBY", /* 2 */ "IB_SMINFO_STATE_MASTER", /* 3 */ - "IB_SMINFO_STATE_INIT", /* 4 */ - "UNKNOWN STATE!!" /* 5 */ + "UNKNOWN STATE!!" /* 4 */ }; const char *osm_get_sm_mgr_state_str(IN uint16_t state) { - if (state > IB_SMINFO_STATE_INIT) - state = IB_SMINFO_STATE_INIT + 1; - return (__osm_sm_mgr_state_str[state]); + return state < ARR_SIZE(__osm_sm_mgr_state_str) ? + __osm_sm_mgr_state_str[state] : + __osm_sm_mgr_state_str[ARR_SIZE(__osm_sm_mgr_state_str) - 1]; } diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index 27d7536..73d39fa 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -378,8 +378,9 @@ osm_sm_state_mgr_init(IN osm_sm_state_mgr_t * const p_sm_mgr, IN osm_sm_t * sm) p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; __osm_sm_state_mgr_notactive_msg(p_sm_mgr); } else { - /* init the state of the SM to init */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_INIT; + /* init the state of the SM to discovering */ + p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + __osm_sm_state_mgr_discovering_msg(p_sm_mgr); } status = cl_spinlock_init(&p_sm_mgr->state_lock); @@ -452,24 +453,6 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, } switch (p_sm_mgr->p_subn->sm_state) { - case IB_SMINFO_STATE_INIT: - switch (signal) { - case OSM_SM_SIGNAL_INIT: - /* - * Update the state of the SM to DISCOVERING - */ - __osm_sm_state_mgr_discovering_msg(p_sm_mgr); - p_sm_mgr->p_subn->sm_state = - IB_SMINFO_STATE_DISCOVERING; - break; - - default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); - status = IB_INVALID_PARAMETER; - break; - } - break; - case IB_SMINFO_STATE_DISCOVERING: switch (signal) { case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: @@ -706,18 +689,6 @@ osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, } switch (p_sm_mgr->p_subn->sm_state) { - case IB_SMINFO_STATE_INIT: - switch (signal) { - case OSM_SM_SIGNAL_INIT: - status = IB_SUCCESS; - break; - default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); - status = IB_INVALID_PARAMETER; - break; - } - break; - case IB_SMINFO_STATE_DISCOVERING: switch (signal) { case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 54f56c0..20883e4 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -1338,14 +1338,6 @@ void osm_state_mgr_process(IN osm_sm_t *sm, IN osm_signal_t signal) break; } - /* - * If the osm_sm_state_mgr is in INIT state - signal - * it with a INIT signal to move it to DISCOVERY state. - */ - if (sm->p_subn->sm_state == IB_SMINFO_STATE_INIT) - osm_sm_state_mgr_process(&sm->sm_state_mgr, - OSM_SM_SIGNAL_INIT); - do_sweep(sm); break; -- 1.5.4.rc5 From rdreier at cisco.com Thu Feb 7 22:32:11 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 07 Feb 2008 22:32:11 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload Message-ID: It seems there's something wrong with the new CM sysfs stuff for exporting statistics. If I load ib_cm and ib_mthca, and then rmmod ib_mthca and then reload it, I get the trace below. I'm partly to blame here, since I rejiggered the CM code as part of the 2.6.25, but I think I did make things a little better ;) -- mostly I got the code to build, but I also tried to fix the object lifetime a bit. It still is suspicious that cm_counter_obj_type has no .release method. Sean, I'm just letting you know about this in case you have a chance to look at it -- I probably won't have time to work on it until next week. - R. WARNING: at /scratch/Ksrc/linux-git/lib/kref.c:43 kref_get+0x1a/0x1f() Modules linked in: ib_mthca(+) rdma_ucm rdma_cm iw_cm ib_addr ib_uverbs ib_ipoib ib_cm ib] Pid: 2713, comm: modprobe Not tainted 2.6.24-dbg #8 Call Trace: [] warn_on_slowpath+0x51/0x63 [] kvasprintf+0x44/0x6b [] poison_obj+0x26/0x2f [] vsnprintf+0x30f/0x571 [] cache_alloc_debugcheck_after+0xe5/0x11e [] kvasprintf+0x5d/0x6b [] kref_get+0x1a/0x1f [] kobject_get+0x12/0x17 [] kobject_add_internal+0x4c/0x177 [] kobject_add_varg+0x54/0x61 [] trace_hardirqs_on+0xef/0x113 [] kobject_init+0x42/0x82 [] kobject_init_and_add+0x5b/0x68 [] trace_hardirqs_on+0xef/0x113 [] :ib_mthca:mthca_query_device+0x24e/0x25e [] :ib_cm:cm_add_one+0xd7/0x335 [] trace_hardirqs_on+0xef/0x113 [] :ib_core:ib_register_device+0x3b8/0x3f1 [] static_obj+0x5d/0x74 [] lockdep_init_map+0x81/0x3d2 [] :ib_mthca:mthca_register_device+0x3f9/0x44b [] :ib_mthca:__mthca_init_one+0x629/0x714 [] mutex_lock_nested+0x230/0x23f [] :ib_mthca:mthca_init_one+0x7a/0x8e [] pci_device_probe+0xb3/0xfb [] driver_probe_device+0xb5/0x132 [] __driver_attach+0x86/0xc3 [] __driver_attach+0x0/0xc3 [] __driver_attach+0x0/0xc3 [] bus_for_each_dev+0x47/0x72 [] bus_add_driver+0xb1/0x1fa [] driver_register+0x59/0xce [] __pci_register_driver+0x5a/0x8d [] :ib_mthca:mthca_init+0x141/0x155 [] sys_init_module+0x18db/0x19e4 [] alloc_pages_current+0x0/0x78 [] trace_hardirqs_on+0xef/0x113 [] trace_hardirqs_on_thunk+0x35/0x3a [] system_call_after_swapgs+0x7b/0x80 ---[ end trace 39d6f4ee281e2f49 ]--- BUG: unable to handle kernel NULL pointer dereference at 0000000000000038 IP: [] sysfs_addrm_start+0x2f/0x9f PGD 22f130067 PUD 227cc5067 PMD 0 Oops: 0000 [1] SMP CPU 2 Modules linked in: ib_mthca(+) rdma_ucm rdma_cm iw_cm ib_addr ib_uverbs ib_ipoib ib_cm ib] Pid: 2713, comm: modprobe Not tainted 2.6.24-dbg #8 RIP: 0010:[] [] sysfs_addrm_start+0x2f/0x9f RSP: 0018:ffff81022b1a37b8 EFLAGS: 00010292 RAX: ffff81022b1a3748 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000022 RDI: ffff81022f03a248 RBP: ffff81022b1a37d8 R08: 0000000000000000 R09: ffff81022b1a3748 R10: 0000000000000000 R11: ffffffff802490c5 R12: 00000000fffffff4 R13: 0000000000000000 R14: ffff81022b1a3830 R15: ffff810227d83000 FS: 00007f511e1c16e0(0000) GS:ffff81022f07c300(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000038 CR3: 0000000229540000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2713, threadinfo ffff81022b1a2000, task ffff8102260ac080) Stack: 0000000000000000 ffff810228dd20f0 ffff810229ce07b8 ffffffff802c6ee8 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffff810228dd20f0 ffff810228dd20f0 00000000fffffffe ffffffff881e9200 Call Trace: [] ? create_dir+0x44/0x87 [] ? sysfs_create_dir+0x35/0x4c [] ? kobject_get+0x12/0x17 [] ? kobject_add_internal+0xbc/0x177 [] ? kobject_add_varg+0x54/0x61 [] ? trace_hardirqs_on+0xef/0x113 [] ? kobject_init+0x42/0x82 [] ? kobject_init_and_add+0x5b/0x68 [] ? trace_hardirqs_on+0xef/0x113 [] ? :ib_mthca:mthca_query_device+0x24e/0x25e [] ? :ib_cm:cm_add_one+0xd7/0x335 [] ? trace_hardirqs_on+0xef/0x113 [] ? :ib_core:ib_register_device+0x3b8/0x3f1 [] ? static_obj+0x5d/0x74 [] ? lockdep_init_map+0x81/0x3d2 [] ? :ib_mthca:mthca_register_device+0x3f9/0x44b [] ? :ib_mthca:__mthca_init_one+0x629/0x714 [] ? mutex_lock_nested+0x230/0x23f [] ? :ib_mthca:mthca_init_one+0x7a/0x8e [] ? pci_device_probe+0xb3/0xfb [] ? driver_probe_device+0xb5/0x132 [] ? __driver_attach+0x86/0xc3 [] ? __driver_attach+0x0/0xc3 [] ? __driver_attach+0x0/0xc3 [] ? bus_for_each_dev+0x47/0x72 [] ? bus_add_driver+0xb1/0x1fa [] ? driver_register+0x59/0xce [] ? __pci_register_driver+0x5a/0x8d [] ? :ib_mthca:mthca_init+0x141/0x155 [] ? sys_init_module+0x18db/0x19e4 [] ? alloc_pages_current+0x0/0x78 [] ? trace_hardirqs_on+0xef/0x113 [] ? trace_hardirqs_on_thunk+0x35/0x3a [] ? system_call_after_swapgs+0x7b/0x80 Code: 08 00 00 00 fc 53 48 89 fd 48 89 f3 48 83 ec 08 f3 ab 48 89 75 00 48 c7 c7 90 8a 57 RIP [] sysfs_addrm_start+0x2f/0x9f RSP CR2: 0000000000000038 From sean.hefty at intel.com Thu Feb 7 22:59:24 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 7 Feb 2008 22:59:24 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: References: Message-ID: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> >Sean, I'm just letting you know about this in case you have a chance >to look at it -- I probably won't have time to work on it until next week. Thanks - I likely won't get to this until next Monday either, but I'll take a look at it then. - Sean From dwqualysoftm at qualysoft.de Fri Feb 8 02:09:34 2008 From: dwqualysoftm at qualysoft.de (Denise Hastings) Date: Fri, 8 Feb 2008 18:09:34 +0800 Subject: [ofa-general] What is Generic Medication? Message-ID: <01c86a7d$c21a0300$61d8a83d@dwqualysoftm> What is Generic Medication? A generic drug is identical, or bioequivalent to a brand name drug in dosage form, safety, strength, route of administration, quality, performance characteristics and intended use. Although generic drugs are chemically identical to their branded counterparts, they are typically sold at substantial discounts from the branded price. Generic drugs save consumers an estimated $8 to $10 billion a year at retail pharmacies. http://geocities.com/terrymason844/ From vlad at lists.openfabrics.org Fri Feb 8 03:00:25 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 8 Feb 2008 03:00:25 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080208-0200 daily build status Message-ID: <20080208110025.82456E60042@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Failed: From swise at opengridcomputing.com Fri Feb 8 06:43:21 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 08 Feb 2008 08:43:21 -0600 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: References: <47AB1646.8030006@opengridcomputing.com> <47AB32FD.6050700@opengridcomputing.com> Message-ID: <47AC6A89.3010408@opengridcomputing.com> Roland Dreier wrote: > > Have you ever thought about a SW rdma loopback device? (call me crazy :) > > > > I mean, can linux map memory from one process to another to do direct > > copies of data for rdma read/write operations? > > You could do that (and of course there is the OSC software iwarp > implementation), but I guess the complications in this case come from > stuff like someone sharing a CQ between a loopback and a non-loopback > QP on the same device, etc. > Good point. The chelsio driver already has a sw-cq implementation that could handle mixing loopback + non loopback connections. Perhaps I should just implement loopback support for chelsio. Any applications out there that require loopback? Steve. From herbbird at juno.com Fri Feb 8 07:12:40 2008 From: herbbird at juno.com (Harlan Felix) Date: Fri, 8 Feb 2008 16:12:40 +0100 Subject: [ofa-general] Righteous software - righteous price Message-ID: <307969075.14822515805276@juno.com> Righteous software - righteous priceOur main goal is to supply our customers with legal and cheap software for PC and Mac. We can help to find necessary software products or computer solutions whether you are a corporate buyer, small company owner or looking for some software products for your own PC.View what we got to propose http://geocities.com/summerdaniels618/Most popular software in sight are:*Microsoft Office 2007 Enterprise: Retail price today - $899.00; Our only for today - $79.95 *Microsoft Windows XP Professional with SP2: Retail price now - $269.99; Our now just - $49.95 *Corel Procreate KPT Effects: Retail price this day - $199.00; Our only - $19.95 *Microsoft Money Home & Business 7: Retail price today - $89.90; Our only for today - $39.95 *Microsoft Office XP Professional: Retail price for now - $499.00; Our only for today - $49.95 *Macromedia Flash Professional 8: Retail price for this time - $699.00; Our now just - $49.95 *Macromedia Fontographer 4: Retail price for this time - $105.95; Our just - $19.95 *Crystal Reports 10: Retail price this day - $450.00; Our only today - $39.95Check what we have to propose http://geocities.com/summerdaniels618/ Can bring homeI quickly were. A bondWhereof the world takes note. Heaven delights to hearAnd loves. Admiringly and mourningly. My poor body madam requires. I am no great Nebuchadnezzar. Give away myself which is. All already unless thou canst. -------------- next part -------------- An HTML attachment was scrubbed... URL: From hnguyen at linux.vnet.ibm.com Fri Feb 8 07:10:23 2008 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Fri, 8 Feb 2008 16:10:23 +0100 Subject: [ofa-general] IB/ipoib: ipoib_ib_post_receive: infinite loop in error path Message-ID: <200802081610.23545.hnguyen@linux.vnet.ibm.com> Hello Eli! Looked at ipoib code from ofed-1.3-rc4 and the saw the following code snippet in ipoib_ib_post_receive(): if (++priv->rx_outst == UD_POST_RCV_COUNT) { ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); while (bad_wr) { id = bad_wr->wr_id & ~IPOIB_OP_RECV; ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[i].mapping); #1/ipoib_0240_4kmtu.patch: should be priv->rx_ring[id].mapping dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; #2/ipoib_0220_ud_post_list.patch: missing iterator forwarding, ie bad_wr = bad_wr->next; } } priv->rx_outst = 0; } #1: I've talked with Shirley about this. #2: I thought to have seen you fixed it, but still see it in rc4 after called configure script. Nam From xma at us.ibm.com Fri Feb 8 07:52:51 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 8 Feb 2008 07:52:51 -0800 Subject: [ofa-general] Re: IB/ipoib: ipoib_ib_post_receive: infinite loop in error path In-Reply-To: <200802081610.23545.hnguyen@linux.vnet.ibm.com> Message-ID: Thanks Nam. I will fix it along with ipoib_sg_skb_put_frags() optimization. Thanks Shirley Hoang-Nam Nguyen cc ewg at lists.openfabrics.org, 02/08/08 general at lists.openfabrics.org 07:10 AM Subject IB/ipoib: ipoib_ib_post_receive: infinite loop in error path Hello Eli! Looked at ipoib code from ofed-1.3-rc4 and the saw the following code snippet in ipoib_ib_post_receive(): if (++priv->rx_outst == UD_POST_RCV_COUNT) { ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); while (bad_wr) { id = bad_wr->wr_id & ~IPOIB_OP_RECV; ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[i].mapping); #1/ipoib_0240_4kmtu.patch: should be priv->rx_ring[id].mapping dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; #2/ipoib_0220_ud_post_list.patch: missing iterator forwarding, ie bad_wr = bad_wr->next; } } priv->rx_outst = 0; } #1: I've talked with Shirley about this. #2: I thought to have seen you fixed it, but still see it in rc4 after called configure script. Nam -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic06271.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From dwrremm at rrem.com Fri Feb 8 08:03:50 2008 From: dwrremm at rrem.com (Tamika Goss) Date: Fri, 8 Feb 2008 17:03:50 +0100 Subject: [ofa-general] Work hard, play harder. Message-ID: <01c86a74$934b5f00$3a9a7e4f@dwrremm> There is no more convenient way to win real money than joining our Golden Gate Casino members. All the most popular casino games! Easy to download, install and use free software! One of the industry's best welcome bonus $2400! Really fair play for the players guaranteed. The highest degree of security! 24/7 customer support! Quickest payouts! http://geocities.com/carolnguyen901/ Start downloading free software now! From cofelvca at agenziaterritorio.it Fri Feb 8 09:22:25 2008 From: cofelvca at agenziaterritorio.it (cofelvca at agenziaterritorio.it) Date: Fri, 8 Feb 2008 11:22:25 -0600 Subject: [ofa-general] Get bigger size day by day! Message-ID: <003101c86a77$2bef88b0$cc892ae7@ifwu> Best prices for blue-pill! http://uv.beautybegan.com From ardavis at ichips.intel.com Fri Feb 8 09:25:00 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 08 Feb 2008 09:25:00 -0800 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: <48ddf471ab.471ab48ddf@osu.edu> References: <48ddf471ab.471ab48ddf@osu.edu> Message-ID: <47AC906C.5060702@ichips.intel.com> LEI CHAI wrote: > I modified mvapich2 to compile with udapl v2 and used mvapich2 to compile and run an MPI latency program (osu_latency.c in osu_benchmarks). I set LD_LIBRARY_PATH=dapl-2.0.6/lib, but it didn't run (cannot open IA) and by setting the debug information I saw the error below. I've made sure mvapich2 was compiled with header files in include/dat2/ and linked with libdat2.so. And when I set LD_LIBRARY_PATH=dapl-2.0.5/lib I was able to run the program successfully. > > Lei > > Can you check the 2.0.6 libraries for extensions? nm /usr/lib64/libdat2.so | grep extensions 00000000001093a4 b g_dat_extensions nm /usr/lib64/libdaplofa.so.2.0.0 | grep extensions 00000000000169a0 T dapl_extensions Also, can you set DAT_DBG_TYPE=0xffff, retry, and send the output. Thanks, -arlin From ralph.campbell at qlogic.com Fri Feb 8 10:55:59 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 08 Feb 2008 10:55:59 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: References: Message-ID: <1202496960.3638.77.camel@brick.pathscale.com> On Thu, 2008-02-07 at 18:44 -0800, Shirley Ma wrote: > Hello Ralph, > > What's the send_queue_size, recv_queue_size for ib_ipoib module? Can > you reload ib_ipoib module with send_queue_size=2, recv_queue_size=2 > to see any difference? > > thanks > Shirley The queue sizes are not the problem. The problem is that priv->local_lid is not initialized (0) when create_own_ah() is called. Since zero is not a valid LID, ib_ipath ib_create_ah() returns EINVAL. From ralph.campbell at qlogic.com Fri Feb 8 11:41:14 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 08 Feb 2008 11:41:14 -0800 Subject: [ofa-general] [PATCH] IB/ipoib - Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202496960.3638.77.camel@brick.pathscale.com> References: <1202496960.3638.77.camel@brick.pathscale.com> Message-ID: <1202499674.3638.86.camel@brick.pathscale.com> Here is a suggested patch for ib_ipoib for fixing the OFED-1.3 RC4 problem with ib_ipath. Note that I'm not completely familiar with all the ib_ipoib changes. In particular, I haven't checked that IB_EVENT_LID_CHANGE correctly updates priv->own_ah. --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-07 17:51:06.000000000 -0800 +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-08 11:33:06.000000000 -0800 @@ -664,8 +664,8 @@ void ipoib_reap_ah(struct work_struct *w static int create_own_ah(struct ipoib_dev_priv *priv) { + struct ib_port_attr pattr; struct ib_ah_attr attr = { - .dlid = priv->local_lid, .port_num = priv->port, }; @@ -673,6 +673,9 @@ static int create_own_ah(struct ipoib_de ipoib_dbg(priv, "own ah already exists\n"); return -EINVAL; } + if (ib_query_port(priv->ca, priv->port, &pattr)) + return -EINVAL; + attr.dlid = pattr.lid; priv->own_ah = ib_create_ah(priv->pd, &attr); return IS_ERR(priv->own_ah); } From xma at us.ibm.com Fri Feb 8 11:53:29 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 8 Feb 2008 11:53:29 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202496960.3638.77.camel@brick.pathscale.com> Message-ID: Hello Ralph, In ipoib, priv->local_lid was set here: { struct ib_port_attr attr; if (!ib_query_port(priv->ca, priv->port, &attr)) priv->local_lid = attr.lid; } If you look at ipath_query_port(), static int ipath_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct ipath_ibdev *dev = to_idev(ibdev); enum ib_mtu mtu; u16 lid = dev->dd->ipath_lid; u64 ibcstat; memset(props, 0, sizeof(*props)); >> props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); attr.lid is set here. Do you have any clue it was set to 0? I don't see any code changes between RC3 and RC4 in IPoIB in this piece of code. I also found below code in ipath. Is there any issue here in ipath_query_port()? Since we do enable 4K MTU support. /* * Note: the chips support a maximum MTU of 4096, but the driver * hasn't implemented this feature yet, so set the maximum value * to 2048. */ props->max_mtu = IB_MTU_2048; switch (dev->dd->ipath_ibmtu) { case 4096: mtu = IB_MTU_4096; break; ... Shirley Ma IBM Linux Technology Center 15300 SW Koll Parkway Beaverton, OR 97006-6063 Phone(Fax): (503) 578-7638 -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Fri Feb 8 12:08:13 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 8 Feb 2008 12:08:13 -0800 Subject: [ofa-general] Re: [PATCH] IB/ipoib - Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202499674.3638.86.camel@brick.pathscale.com> Message-ID: Hello Ralph, I looked at ehca and mthca, in create_ah(), both driver didn't check dlid condition check like ipath here. In the port initilizaiton, priv->local_lid is set to 0 which is created by ipoib_0190_unsig_udqp.patch in RC4. I will let Eli look at this problem. static struct ib_ah *ipath_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ipath_ah *ah; struct ib_ah *ret; struct ipath_ibdev *dev = to_idev(pd->device); unsigned long flags; /* A multicast address requires a GRH (see ch. 8.4.1). */ if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID && !(ah_attr->ah_flags & IB_AH_GRH)) { ret = ERR_PTR(-EINVAL); goto bail; } if (ah_attr->dlid == 0) { ret = ERR_PTR(-EINVAL); goto bail; } Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From hartlch14 at gmail.com Fri Feb 8 12:21:36 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 8 Feb 2008 15:21:36 -0500 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: <47AC906C.5060702@ichips.intel.com> References: <48ddf471ab.471ab48ddf@osu.edu> <47AC906C.5060702@ichips.intel.com> Message-ID: I just built the 2.0.6 libraries and am getting the same thing. # nm /usr/lib64/libdat2.so | grep extensions 000000000001deac b g_dat_extensions # nm /usr/lib64/libdaplofa.so.2.0.0 | grep extensions U dapl_extensions DAT Registry: dat_ia_openv (ofa-v2-ib0,2:0,0) called DAT Registry: IA ofa-v2-ib0, trying to load library libdaplofa.so.2 DAT: library load failure: /usr/lib64/libdaplofa.so.2: undefined symbol: dapl_extensions DAT Registry: static registry unable to load library libdaplofa.so.2 DAT Registry: dat_ia_open () provider information for IA name ofa-v2-ib0 not found in dynamic registry Error opening Interface Adapter ofa-v2-ib0: DAT_PROVIDER_NOT_FOUND DAT_NAME_NOT_REGISTERED On Feb 8, 2008 12:25 PM, Arlin Davis wrote: > LEI CHAI wrote: > > I modified mvapich2 to compile with udapl v2 and used mvapich2 to > compile and run an MPI latency program (osu_latency.c in osu_benchmarks). I > set LD_LIBRARY_PATH=dapl-2.0.6/lib, but it didn't run (cannot open IA) and > by setting the debug information I saw the error below. I've made sure > mvapich2 was compiled with header files in include/dat2/ and linked with > libdat2.so. And when I set LD_LIBRARY_PATH=dapl-2.0.5/lib I was able to > run the program successfully. > > > > Lei > > > > > > Can you check the 2.0.6 libraries for extensions? > > nm /usr/lib64/libdat2.so | grep extensions > 00000000001093a4 b g_dat_extensions > > nm /usr/lib64/libdaplofa.so.2.0.0 | grep extensions > 00000000000169a0 T dapl_extensions > > Also, can you set DAT_DBG_TYPE=0xffff, retry, and send the output. > > Thanks, > > -arlin > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Fri Feb 8 12:17:55 2008 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 8 Feb 2008 12:17:55 -0800 Subject: [ofa-general] ***SPAM*** Re: [PATCH] IB/ipoib - Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: <1202499674.3638.86.camel@brick.pathscale.com> Message-ID: Hello Ralph, This patch looks OK to me. Let's wait for Eli's response. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From hartlch14 at gmail.com Fri Feb 8 12:52:34 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 8 Feb 2008 15:52:34 -0500 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: <47AC906C.5060702@ichips.intel.com> References: <48ddf471ab.471ab48ddf@osu.edu> <47AC906C.5060702@ichips.intel.com> Message-ID: It looks like the dapl/openib_cma/dapl_ib_extensions.c file is not being included in the library. # nm -u /usr/lib64/libdaplofa.so.2.0.0 | grep dap U dapl_extensions U dapls_cqe_to_event_extension In my Makefile, I have this: ~snip~ dapl/openib_cma/dapl_ib_extensions.c #am__objects_1 = dapl_udapl_libdaplofa_la-dapl_ib_extensions.lo am_dapl_udapl_libdaplofa_la_OBJECTS = \ dapl_udapl_libdaplofa_la-dapl_init.lo \ ~snip~ dapl_udapl_libdaplofa_la-dapl_ib_mem.lo $(am__objects_1) I don't understand the build philosophy, but for some reason it looks like file is getting excluded on purpose. BTW, I'm building on PPC. Chuck On Feb 8, 2008 12:25 PM, Arlin Davis wrote: > LEI CHAI wrote: > > I modified mvapich2 to compile with udapl v2 and used mvapich2 to > compile and run an MPI latency program (osu_latency.c in osu_benchmarks). I > set LD_LIBRARY_PATH=dapl-2.0.6/lib, but it didn't run (cannot open IA) and > by setting the debug information I saw the error below. I've made sure > mvapich2 was compiled with header files in include/dat2/ and linked with > libdat2.so. And when I set LD_LIBRARY_PATH=dapl-2.0.5/lib I was able to > run the program successfully. > > > > Lei > > > > > > Can you check the 2.0.6 libraries for extensions? > > nm /usr/lib64/libdat2.so | grep extensions > 00000000001093a4 b g_dat_extensions > > nm /usr/lib64/libdaplofa.so.2.0.0 | grep extensions > 00000000000169a0 T dapl_extensions > > Also, can you set DAT_DBG_TYPE=0xffff, retry, and send the output. > > Thanks, > > -arlin > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > -------------- next part -------------- An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Fri Feb 8 13:20:38 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 08 Feb 2008 13:20:38 -0800 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: References: <48ddf471ab.471ab48ddf@osu.edu> <47AC906C.5060702@ichips.intel.com> Message-ID: <47ACC7A6.6050304@ichips.intel.com> Chuck Hartley wrote: > It looks like the dapl/openib_cma/dapl_ib_extensions.c file is not being > included in the library. > > # nm -u /usr/lib64/libdaplofa.so.2.0.0 | grep dap > U dapl_extensions > U dapls_cqe_to_event_extension > > In my Makefile, I have this: > > ~snip~ > dapl/openib_cma/dapl_ib_extensions.c > #am__objects_1 = dapl_udapl_libdaplofa_la-dapl_ib_extensions.lo > am_dapl_udapl_libdaplofa_la_OBJECTS = \ > dapl_udapl_libdaplofa_la-dapl_init.lo \ > ~snip~ > dapl_udapl_libdaplofa_la-dapl_ib_mem.lo $(am__objects_1) > > I don't understand the build philosophy, but for some reason it looks > like file is getting excluded on purpose. > > BTW, I'm building on PPC. > Are you using the RPM to install or ./configure && make install? The dapl_ib_extension.c is built when --enable-ext-type=ib is set. -arlin From rdreier at cisco.com Fri Feb 8 13:30:17 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 13:30:17 -0800 Subject: [ofa-general] Re: [PATCH 2 of 2] IB/mlx4: shrinking WQE In-Reply-To: <200801281040.59398.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 28 Jan 2008 10:40:59 +0200") References: <200801281040.59398.jackm@dev.mellanox.co.il> Message-ID: Thanks, applied -- we might as well use this feature I guess. Although I'm not convinced this is really that useful; the only use I see for it would be speeding up IPoIB with S/G and checksum offload, when we have to size send WQEs for the worst case but most packets are smaller. But Eli's latest work seems to use selective signaling for the send queue, so this change doesn't actually help. I also made a few small changes to places like this: > + ind += DIV_ROUND_UP(size * 16, 1 << qp->sq.wqe_shift); For this case, the compiler can generate better code if we use "1U <<" to get an unsigned divisor; in that case it can use a shift instead of needing to use a divide operation to keep the sign correct. - R. From ralph.campbell at qlogic.com Fri Feb 8 13:32:10 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 08 Feb 2008 13:32:10 -0800 Subject: [ofa-general] Problem with latest OFED 1.3 build... IPoIB and iPATH In-Reply-To: References: Message-ID: <1202506330.3638.99.camel@brick.pathscale.com> On Fri, 2008-02-08 at 11:53 -0800, Shirley Ma wrote: > Hello Ralph, > > In ipoib, priv->local_lid was set here: > { > struct ib_port_attr attr; > > if (!ib_query_port(priv->ca, priv->port, &attr)) > priv->local_lid = attr.lid; > } Yes, but this is called via a workqueue thread after ipoib_ib_dev_open() is called. The recent change to call create_own_ah() in ipoib_ib_dev_open() is why ib_create_ah() is now failing and why local_lid is zero. > If you look at ipath_query_port(), > > static int ipath_query_port(struct ib_device *ibdev, > u8 port, struct ib_port_attr *props) > { > struct ipath_ibdev *dev = to_idev(ibdev); > enum ib_mtu mtu; > u16 lid = dev->dd->ipath_lid; > u64 ibcstat; > > memset(props, 0, sizeof(*props)); > >> props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); > > attr.lid is set here. Do you have any clue it was set to 0? I don't > see any code changes between RC3 and RC4 in IPoIB in this piece of > code. > > I also found below code in ipath. Is there any issue here in > ipath_query_port()? Since we do enable 4K MTU support. > > /* > * Note: the chips support a maximum MTU of 4096, but the driver > * hasn't implemented this feature yet, so set the maximum value > * to 2048. > */ > props->max_mtu = IB_MTU_2048; > switch (dev->dd->ipath_ibmtu) { > case 4096: > mtu = IB_MTU_4096; > break; > ... At the time this was written, we didn't have 4K MTU capable switches for testing. Now that we do, we have newer ib_ipath code supporting 4K MTUs. We are in the process of updating the OFED code to match our local changes. From weiny2 at llnl.gov Fri Feb 8 13:44:13 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Fri, 8 Feb 2008 13:44:13 -0800 Subject: [ofa-general] [PATCH] opensm/opensm/osm_console.c: fix seg fault when running "portstatus ca" in the console Message-ID: <20080208134413.0781b0aa.weiny2@llnl.gov> The osm_node_get_physp_ptr now returns NULL for invalid ports. Check for this before using the pointer. Ira >From 33dba7f427c38a4bc71bebaca82567c8857e901a Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Fri, 8 Feb 2008 13:39:29 -0800 Subject: [PATCH] opensm/opensm/osm_console.c: fix seg fault when running "portstatus ca" in the console Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_console.c | 22 +++++++++++++++------- 1 files changed, 15 insertions(+), 7 deletions(-) diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 8b6642e..44209af 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -555,17 +555,25 @@ static void __get_stats(cl_map_item_t * const p_map_item, void *context) for (port = 1; port < num_ports; port++) { osm_physp_t *phys = osm_node_get_physp_ptr(node, port); - ib_port_info_t *pi = &(phys->port_info); - uint8_t active_speed = ib_port_info_get_link_speed_active(pi); - uint8_t enabled_speed = ib_port_info_get_link_speed_enabled(pi); - uint8_t active_width = pi->link_width_active; - uint8_t enabled_width = pi->link_width_enabled; - uint8_t port_state = ib_port_info_get_port_state(pi); - uint8_t port_phys_state = ib_port_info_get_port_phys_state(pi); + ib_port_info_t *pi = NULL; + uint8_t active_speed = 0; + uint8_t enabled_speed = 0; + uint8_t active_width = 0; + uint8_t enabled_width = 0; + uint8_t port_state = 0; + uint8_t port_phys_state = 0; if (!phys) continue; + pi = &(phys->port_info); + active_speed = ib_port_info_get_link_speed_active(pi); + enabled_speed = ib_port_info_get_link_speed_enabled(pi); + active_width = pi->link_width_active; + enabled_width = pi->link_width_enabled; + port_state = ib_port_info_get_port_state(pi); + port_phys_state = ib_port_info_get_port_phys_state(pi); + if ((enabled_width ^ active_width) > active_width) { __tag_port_report(&(fs->reduced_width_ports), cl_ntoh64(node->node_info.node_guid), -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-opensm-osm_console.c-fix-seg-fault-when-runn.patch Type: application/octet-stream Size: 1926 bytes Desc: not available URL: From hartlch14 at gmail.com Fri Feb 8 13:32:45 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 8 Feb 2008 16:32:45 -0500 Subject: ***SPAM*** Re: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: <47ACC7A6.6050304@ichips.intel.com> References: <48ddf471ab.471ab48ddf@osu.edu> <47AC906C.5060702@ichips.intel.com> <47ACC7A6.6050304@ichips.intel.com> Message-ID: I'm doing ./configure && make install. I found the --enable-ext-type=ib rebuilt with it successfully. -------------- next part -------------- An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Fri Feb 8 14:11:57 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 08 Feb 2008 14:11:57 -0800 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release In-Reply-To: References: <48ddf471ab.471ab48ddf@osu.edu> <47AC906C.5060702@ichips.intel.com> <47ACC7A6.6050304@ichips.intel.com> Message-ID: <47ACD3AD.4030403@ichips.intel.com> Chuck Hartley wrote: > I'm doing ./configure && make install. I found the --enable-ext-type=ib > rebuilt with it successfully. > > sorry for the confusion. I will change the v2 defaults in the configure to match the RPM spec file. -arlin From akpm at linux-foundation.org Fri Feb 8 14:23:15 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 8 Feb 2008 14:23:15 -0800 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208220616.089936205@sgi.com> References: <20080208220616.089936205@sgi.com> Message-ID: <20080208142315.7fe4b95e.akpm@linux-foundation.org> On Fri, 08 Feb 2008 14:06:16 -0800 Christoph Lameter wrote: > This is a patchset implementing MMU notifier callbacks based on Andrea's > earlier work. These are needed if Linux pages are referenced from something > else than tracked by the rmaps of the kernel (an external MMU). MMU > notifiers allow us to get rid of the page pinning for RDMA and various > other purposes. It gets rid of the broken use of mlock for page pinning. > (mlock really does *not* pin pages....) > > More information on the rationale and the technical details can be found in > the first patch and the README provided by that patch in > Documentation/mmu_notifiers. > > The known immediate users are > > KVM > - Establishes a refcount to the page via get_user_pages(). > - External references are called spte. > - Has page tables to track pages whose refcount was elevated but > no reverse maps. > > GRU > - Simple additional hardware TLB (possibly covering multiple instances of > Linux) > - Needs TLB shootdown when the VM unmaps pages. > - Determines page address via follow_page (from interrupt context) but can > fall back to get_user_pages(). > - No page reference possible since no page status is kept.. > > XPmem > - Allows use of a processes memory by remote instances of Linux. > - Provides its own reverse mappings to track remote pte. > - Established refcounts on the exported pages. > - Must sleep in order to wait for remote acks of ptes that are being > cleared. > What about ib_umem_get()? From _nenahseo at abefuyumi.com Fri Feb 8 14:30:26 2008 From: _nenahseo at abefuyumi.com (laurenne Lipjankic) Date: Fri, 8 Feb 2008 17:30:26 -0500 Subject: [ofa-general] Be a new 5tud in weeks Message-ID: <000f01c86aa2$33768270$5893f747@DJ14NR61> Life will never be the same again with your new huge dck -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Fri Feb 8 14:32:52 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 14:32:52 -0800 Subject: [ofa-general] Re: [PATCH 2/16 v4] IB/ipoib: Add s/g support In-Reply-To: <1201710653.28794.168.camel@mtls03> (Eli Cohen's message of "Wed, 30 Jan 2008 18:30:53 +0200") References: <1201710653.28794.168.camel@mtls03> Message-ID: Thanks, applied... > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > +static inline int ipoib_dma_map_tx(struct ib_device *ca, > + struct ipoib_tx_buf *tx_req) I didn't see why this needed to be in a header-- I just moved it to ipoib_ib.c. Also > + int frags; > + int i; > + > + mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb), > + DMA_TO_DEVICE); > + if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) > + return -EIO; > + > + frags = skb_shinfo(skb)->nr_frags; Not sure what the advantage of having a local variable that is only used once to hold the value of nr_frags, so I got rid of it. - R. From changquing.tang at hp.com Fri Feb 8 14:37:00 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Fri, 8 Feb 2008 22:37:00 +0000 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: <47ABA463.6020707@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> <47ABA463.6020707@ichips.intel.com> Message-ID: Arlin: I am running today's OFED tarball uDAPL v1 version, pure RDMA works, but if I switch to SRQ mode, I got segfault in dat_srq_create(), I checked the parameters to dat_srq_create(), I don't see anything wrong: Core was generated by `/mpiscratch/ctang/test/pp.x'. Program terminated with signal 11, Segmentation fault. #0 0x00002aaaabda5c3b in dat_srq_create () from /usr/lib64/libdat.so (gdb) print hpmp_udapl->ia_handle $7 = (DAT_IA_HANDLE) 0x1 (gdb) print hpmp_udapl->pz_handle $8 = (DAT_PZ_HANDLE) 0xc4540e0 (gdb) print srq_attr $9 = {max_recv_dtos = 16, max_recv_iov = 1, low_watermark = 0} (gdb) print &srq_attr $10 = (DAT_SRQ_ATTR *) 0x7fffe64fb760 (gdb) print &hpmp_udapl->srq_handle $11 = (DAT_SRQ_HANDLE *) 0xc448bb8 Do you have any idea ? Thanks. --CQ > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Thursday, February 07, 2008 6:38 PM > To: Tang, Changqing > Cc: OpenFabrics General > Subject: Re: [ofa-general] uDAPL libdat2.so version [PATCH] > udapl v1 and v2 - dat_create_psp_any() seed value wrong > > Tang, Changqing wrote: > > Yes, the error is from dat_psp_create_any(). > > > > After changing seed value to 1024, do I still get any > warning message ? > > You may see some in-use warning messages. I noticed rdma_cm > changed the return code from EBUSY to EADDRINUSE so the > warning message is not suppressed like it should be. > > > > > Can I get this fix from tomorow's tarball ? > > > > OFED pulls from DAPL package releases so I would have to > roll-up another package. In the meantime, you could install > the dapl src rpm that comes with OFED, apply the patch, and > rebuild the libraries to test the fix. > > -arlin > From rdreier at cisco.com Fri Feb 8 14:39:34 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 14:39:34 -0800 Subject: [ofa-general] Re: [PATCH 3/16 v4] IB/core: Add checksum offload support In-Reply-To: <1201710657.28794.169.camel@mtls03> (Eli Cohen's message of "Wed, 30 Jan 2008 18:30:57 +0200") References: <1201710657.28794.169.camel@mtls03> Message-ID: thanks, applied. I changed the description and name of the device capability flag to make it clear that this is only for UD messages (since future devices may handle this for IPoIB CM with RC QPs). Also it seems we could use an anonymous union to make the csum_ok field share the same location as the port_num field in struct ib_wc and save a few bytes of space, but I'm not sure it's worth it. commit e0605d9199b462454f2f2e5ca01810255a6d5cfa Author: Eli Cohen Date: Wed Jan 30 18:30:57 2008 +0200 IB/core: Add IP checksum offload support Add a device capability to show when it can handle checksum offload. Also add a send flag for inserting checksums and a csum_ok field to the completion record. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cfbd38f..a5a7f96 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -95,7 +95,15 @@ enum ib_device_cap_flags { IB_DEVICE_N_NOTIFY_CQ = (1<<14), IB_DEVICE_ZERO_STAG = (1<<15), IB_DEVICE_SEND_W_INV = (1<<16), - IB_DEVICE_MEM_WINDOW = (1<<17) + IB_DEVICE_MEM_WINDOW = (1<<17), + /* + * Devices should set IB_DEVICE_UD_IP_SUM if they support + * insertion of UDP and TCP checksum on outgoing UD IPoIB + * messages and can verify the validity of checksum for + * incoming messages. Setting this flag implies that the + * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. + */ + IB_DEVICE_UD_IP_CSUM = (1<<18), }; enum ib_atomic_cap { @@ -431,6 +439,7 @@ struct ib_wc { u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ + int csum_ok; }; enum ib_cq_notify_flags { @@ -615,7 +624,8 @@ enum ib_send_flags { IB_SEND_FENCE = 1, IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), - IB_SEND_INLINE = (1<<3) + IB_SEND_INLINE = (1<<3), + IB_SEND_IP_CSUM = (1<<4) }; struct ib_sge { From rdreier at cisco.com Fri Feb 8 14:45:16 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 14:45:16 -0800 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: <1201710660.28794.170.camel@mtls03> (Eli Cohen's message of "Wed, 30 Jan 2008 18:31:00 +0200") References: <1201710660.28794.170.camel@mtls03> Message-ID: I'm a little worried here: > + if (priv->ca->flags & IB_DEVICE_IP_CSUM) > + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; I don't see any place that ca->flags ever gets set; in fact if I delete the flags member of struct ib_device from my tree, it all still compiles fine. So have you actually tested any of these checksum offload code paths? - R. From rdreier at cisco.com Fri Feb 8 14:48:00 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 14:48:00 -0800 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: (Roland Dreier's message of "Fri, 08 Feb 2008 14:45:16 -0800") References: <1201710660.28794.170.camel@mtls03> Message-ID: In fact I think I'll queue up the patch below to avoid problems like this in the future: commit 5128bdc97a1018aacac2550cf73bda61041cc3b8 Author: Roland Dreier Date: Fri Feb 8 14:47:26 2008 -0800 IB/core: Remove unused struct ib_device.flags member Avoid confusion about what it might mean, since it's never initialized. Signed-off-by: Roland Dreier diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a5a7f96..701e7b4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -900,8 +900,6 @@ struct ib_device { int *pkey_tbl_len; int *gid_tbl_len; - u32 flags; - int num_comp_vectors; struct iw_cm_verbs *iwcm; From rdreier at cisco.com Fri Feb 8 15:16:53 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 15:16:53 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get the last batch of InfiniBand/RDMA merges for 2.6.25, mostly preparation for enabling stateless offloads for IP-over-InfiniBand. Eli Cohen (3): IPoIB: Add high DMA feature flag IPoIB: Add send gather support IB/core: Add IP checksum offload support Jack Morgenstein (2): mlx4_core: For 64-bit systems, vmap() kernel queue buffers IB/mlx4: Use multiple WQ blocks to post smaller send WQEs Roland Dreier (3): IB/mlx4: Consolidate code to get an entry from a struct mlx4_buf mlx4_core: Clean up struct mlx4_buf IB/core: Remove unused struct ib_device.flags member drivers/infiniband/hw/mlx4/cq.c | 20 ++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 + drivers/infiniband/hw/mlx4/qp.c | 216 +++++++++++++++++++++++----- drivers/infiniband/hw/mlx4/srq.c | 8 +- drivers/infiniband/ulp/ipoib/ipoib.h | 4 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 89 +++++++++--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 10 +- drivers/net/mlx4/alloc.c | 48 ++++-- drivers/net/mlx4/mr.c | 4 +- include/linux/mlx4/device.h | 19 ++- include/linux/mlx4/qp.h | 4 + include/rdma/ib_verbs.h | 16 ++- 14 files changed, 342 insertions(+), 112 deletions(-) From ardavis at ichips.intel.com Fri Feb 8 15:19:14 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 08 Feb 2008 15:19:14 -0800 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> <47ABA463.6020707@ichips.intel.com> Message-ID: <47ACE372.5020904@ichips.intel.com> Tang, Changqing wrote: > Arlin: > I am running today's OFED tarball uDAPL v1 version, pure RDMA works, > but if I switch to SRQ mode, I got segfault in dat_srq_create(), I checked > the parameters to dat_srq_create(), I don't see anything wrong: > > Core was generated by `/mpiscratch/ctang/test/pp.x'. > Program terminated with signal 11, Segmentation fault. > #0 0x00002aaaabda5c3b in dat_srq_create () from /usr/lib64/libdat.so > > (gdb) print hpmp_udapl->ia_handle > $7 = (DAT_IA_HANDLE) 0x1 > (gdb) print hpmp_udapl->pz_handle > $8 = (DAT_PZ_HANDLE) 0xc4540e0 > (gdb) print srq_attr > $9 = {max_recv_dtos = 16, max_recv_iov = 1, low_watermark = 0} > (gdb) print &srq_attr > $10 = (DAT_SRQ_ATTR *) 0x7fffe64fb760 > (gdb) print &hpmp_udapl->srq_handle > $11 = (DAT_SRQ_HANDLE *) 0xc448bb8 > > > Do you have any idea ? Did you have SRQ working on previous versions? I am not certain that the v1.2 SRQ implementation has ever been fully tested. James, can you shed some light on SRQ DAPL code status? -arlin From clameter at sgi.com Fri Feb 8 15:32:19 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 15:32:19 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208142315.7fe4b95e.akpm@linux-foundation.org> References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> Message-ID: On Fri, 8 Feb 2008, Andrew Morton wrote: > What about ib_umem_get()? Ok. It pins using an elevated refcount. Same as XPmem right now. With that we effectively pin a page (page migration will fail) but we will continually be reclaiming the page and may repeatedly try to move it. We have issues with XPmem causing too many pages to be pinned and thus the OOM getting into weird behavior modes (OOM or stop lru scanning due to all_reclaimable set). An elevated refcount will also not be noticed by any of the schemes under consideration to improve LRU scanning performance. From holt at sgi.com Fri Feb 8 15:36:37 2008 From: holt at sgi.com (Robin Holt) Date: Fri, 8 Feb 2008 17:36:37 -0600 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> Message-ID: <20080208233636.GG26564@sgi.com> On Fri, Feb 08, 2008 at 03:32:19PM -0800, Christoph Lameter wrote: > On Fri, 8 Feb 2008, Andrew Morton wrote: > > > What about ib_umem_get()? > > Ok. It pins using an elevated refcount. Same as XPmem right now. With that > we effectively pin a page (page migration will fail) but we will > continually be reclaiming the page and may repeatedly try to move it. We > have issues with XPmem causing too many pages to be pinned and thus the > OOM getting into weird behavior modes (OOM or stop lru scanning due to > all_reclaimable set). > > An elevated refcount will also not be noticed by any of the schemes under > consideration to improve LRU scanning performance. Christoph, I am not sure what you are saying here. With v4 and later, I thought we were able to use the rmap invalidation to remove the ref count that XPMEM was holding and therefore be able to swapout. Did I miss something? I agree the existing XPMEM does pin. I hope we are not saying the XPMEM based upon these patches will not be able to swap/migrate. Thanks, Robin From clameter at sgi.com Fri Feb 8 15:41:24 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 15:41:24 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208233636.GG26564@sgi.com> References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> Message-ID: On Fri, 8 Feb 2008, Robin Holt wrote: > > > What about ib_umem_get()? > > > > Ok. It pins using an elevated refcount. Same as XPmem right now. With that > > we effectively pin a page (page migration will fail) but we will > > continually be reclaiming the page and may repeatedly try to move it. We > > have issues with XPmem causing too many pages to be pinned and thus the > > OOM getting into weird behavior modes (OOM or stop lru scanning due to > > all_reclaimable set). > > > > An elevated refcount will also not be noticed by any of the schemes under > > consideration to improve LRU scanning performance. > > Christoph, I am not sure what you are saying here. With v4 and later, > I thought we were able to use the rmap invalidation to remove the ref > count that XPMEM was holding and therefore be able to swapout. Did I miss > something? I agree the existing XPMEM does pin. I hope we are not saying > the XPMEM based upon these patches will not be able to swap/migrate. Correct. You missed the turn of the conversation to how ib_umem_get() works. Currently it seems to pin the same way that the SLES10 XPmem works. From holt at sgi.com Fri Feb 8 15:43:02 2008 From: holt at sgi.com (Robin Holt) Date: Fri, 8 Feb 2008 17:43:02 -0600 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> Message-ID: <20080208234302.GH26564@sgi.com> On Fri, Feb 08, 2008 at 03:41:24PM -0800, Christoph Lameter wrote: > On Fri, 8 Feb 2008, Robin Holt wrote: > > > > > What about ib_umem_get()? > > Correct. > > You missed the turn of the conversation to how ib_umem_get() works. > Currently it seems to pin the same way that the SLES10 XPmem works. Ah. I took Andrew's question as more of a probe about whether we had worked with the IB folks to ensure this fits the ib_umem_get needs as well. Thanks, Robin From akpm at linux-foundation.org Fri Feb 8 15:56:41 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 8 Feb 2008 15:56:41 -0800 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208234302.GH26564@sgi.com> References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> Message-ID: <20080208155641.2258ad2c.akpm@linux-foundation.org> On Fri, 8 Feb 2008 17:43:02 -0600 Robin Holt wrote: > On Fri, Feb 08, 2008 at 03:41:24PM -0800, Christoph Lameter wrote: > > On Fri, 8 Feb 2008, Robin Holt wrote: > > > > > > > What about ib_umem_get()? > > > > Correct. > > > > You missed the turn of the conversation to how ib_umem_get() works. > > Currently it seems to pin the same way that the SLES10 XPmem works. > > Ah. I took Andrew's question as more of a probe about whether we had > worked with the IB folks to ensure this fits the ib_umem_get needs > as well. > You took it correctly, and I didn't understand the answer ;) From dillowda at ornl.gov Fri Feb 8 16:02:48 2008 From: dillowda at ornl.gov (David Dillow) Date: Fri, 08 Feb 2008 19:02:48 -0500 Subject: [ofa-general] [OFED-1.3rc PATCH 0/3] IB/srp: bring OFED SRP initiator up-to-date with 2.6.25rc Message-ID: <1202515368.5298.23.camel@lap75545.ornl.gov> This series of patches adds the fixes and enhancements that have been applied to the 2.6.25-to-be kernel. The first patch to respect the credit limits is a correctness issue, and will avoid performance cliffs on hardware in the field. The rest make sysadmin's lives easier, but can be held back if need be. These have been built and lightly tested against the ofed_kernel git repository as of this morning. -- Dave Dillow National Center for Computational Science Oak Ridge National Laboratory (865) 241-6602 office From dillowda at ornl.gov Fri Feb 8 16:02:57 2008 From: dillowda at ornl.gov (David Dillow) Date: Fri, 08 Feb 2008 19:02:57 -0500 Subject: [ofa-general] [OFED-1.3rc PATCH 1/3] IB/srp: Respect target credit limit Message-ID: <1202515377.5298.25.camel@lap75545.ornl.gov> The current SRP initiator will send requests even if it has no credits available. The results of sending extra requests are vendor specific, but on some devices, overrunning credits will cost 85% of peak performance -- e.g. 100 MB/s vs 720 MB/s. Other devices may just drop the requests. This patch will tell the SCSI midlayer to queue requests if there are fewer than two credits remaining, and will not issue a task management request if there are no credits remaining. The mid-layer will retry the queued command once an outstanding command completes. The patch also removes the unlikely() in __srp_get_tx_iu(), as it is not at all unlikely to hit this limit under heavy load. Signed-off-by: David Dillow --- drivers/infiniband/ulp/srp/ib_srp.c | 16 +++++++++------- drivers/infiniband/ulp/srp/ib_srp.h | 5 +++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1562e66..986a4ec 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -954,13 +954,18 @@ static int srp_post_recv(struct srp_target_port *target) * req_lim and tx_head. Lock cannot be dropped between call here and * call to __srp_post_send(). */ -static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target) +static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, + enum srp_request_type req_type) { + s32 min = (req_type == SRP_REQ_TASK_MGMT) ? 1 : 2; + if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) return NULL; - if (unlikely(target->req_lim < 1)) + if (target->req_lim < min) { ++target->zero_req_lim; + return NULL; + } return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; } @@ -1018,7 +1023,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, return 0; } - iu = __srp_get_tx_iu(target); + iu = __srp_get_tx_iu(target, SRP_REQ_NORMAL); if (!iu) goto err; @@ -1205,9 +1210,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); target->req_lim = be32_to_cpu(rsp->req_lim_delta); - - target->scsi_host->can_queue = min(target->req_lim, - target->scsi_host->can_queue); } else { printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); target->status = -ECONNRESET; @@ -1307,7 +1309,7 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, init_completion(&req->done); - iu = __srp_get_tx_iu(target); + iu = __srp_get_tx_iu(target, SRP_REQ_TASK_MGMT); if (!iu) goto out; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 182f351..ada78b5 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -79,6 +79,11 @@ enum srp_target_state { SRP_TARGET_REMOVED }; +enum srp_request_type { + SRP_REQ_NORMAL, + SRP_REQ_TASK_MGMT, +}; + struct srp_device { struct list_head dev_list; struct ib_device *dev; From dillowda at ornl.gov Fri Feb 8 16:03:00 2008 From: dillowda at ornl.gov (David Dillow) Date: Fri, 08 Feb 2008 19:03:00 -0500 Subject: [ofa-general] [OFED-1.3rc PATCH 2/3] IB/srp: Add identifying information to log messages Message-ID: <1202515380.5298.27.camel@lap75545.ornl.gov> When you have multiple targets, it gets really confusing when you try to track down who did a reset when there is no identifying information in the log message, especially when the same extension ID is mapped through two different local IB ports. So, add an identifier that can be used to track back to which local IB port/remote target pair is the one having problems. Signed-off-by: David Dillow --- drivers/infiniband/ulp/srp/ib_srp.c | 117 +++++++++++++++++++++-------------- 1 files changed, 71 insertions(+), 46 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 986a4ec..4db7f8e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -269,7 +269,8 @@ static void srp_path_rec_completion(int status, target->status = status; if (status) - printk(KERN_ERR PFX "Got failed path rec status %d\n", status); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Got failed path rec status %d\n", status); else target->path = *pathrec; complete(&target->done); @@ -300,7 +301,8 @@ static int srp_lookup_path(struct srp_target_port *target) wait_for_completion(&target->done); if (target->status < 0) - printk(KERN_WARNING PFX "Path record query failed\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Path record query failed\n"); return target->status; } @@ -376,9 +378,10 @@ static int srp_send_req(struct srp_target_port *target) * the second 8 bytes to the local node GUID. */ if (srp_target_is_topspin(target)) { - printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " - "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Topspin/Cisco initiator port ID workaround " + "activated for target GUID %016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); memcpy(req->priv.initiator_port_id + 8, &target->srp_host->dev->dev->node_guid, 8); @@ -397,7 +400,8 @@ static void srp_disconnect_target(struct srp_target_port *target) init_completion(&target->done); if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { - printk(KERN_DEBUG PFX "Sending CM DREQ failed\n"); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); return; } } @@ -563,7 +567,8 @@ static int srp_reconnect_target(struct srp_target_port *target) return ret; err: - printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret); + shost_printk(KERN_ERR, target->scsi_host, + PFX "reconnect failed (%d), removing target port.\n", ret); /* * We couldn't reconnect, so kill our target port off. @@ -678,8 +683,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { - printk(KERN_WARNING PFX "Unhandled data direction %d\n", - scmnd->sc_data_direction); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled data direction %d\n", + scmnd->sc_data_direction); return -EINVAL; } @@ -781,8 +787,9 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) } else { scmnd = req->scmnd; if (!scmnd) - printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", - (unsigned long long) rsp->tag); + shost_printk(KERN_ERR, target->scsi_host, + "Null scmnd for RSP w/tag %016llx\n", + (unsigned long long) rsp->tag); scmnd->result = rsp->status; if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { @@ -826,7 +833,8 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) if (0) { int i; - printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode); + shost_printk(KERN_ERR, target->scsi_host, + PFX "recv completion, opcode 0x%02x\n", opcode); for (i = 0; i < wc->byte_len; ++i) { if (i % 8 == 0) @@ -847,11 +855,13 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) case SRP_T_LOGOUT: /* XXX Handle target logout */ - printk(KERN_WARNING PFX "Got target logout request\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Got target logout request\n"); break; default: - printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled SRP opcode 0x%02x\n", opcode); break; } @@ -887,9 +897,10 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr) ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { if (wc.status) { - printk(KERN_ERR PFX "failed %s status %d\n", - wc.wr_id & SRP_OP_RECV ? "receive" : "send", - wc.status); + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d\n", + wc.wr_id & SRP_OP_RECV ? "receive" : "send", + wc.status); if (!target->qp_in_error) { target->qp_in_error = 1; if (!timer_pending(&target->qp_err_timer)) { @@ -1052,12 +1063,13 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, len = srp_map_data(scmnd, target, req); if (len < 0) { - printk(KERN_ERR PFX "Failed to map data\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Failed to map data\n"); goto err; } if (__srp_post_recv(target)) { - printk(KERN_ERR PFX "Recv failed\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "Recv failed\n"); goto err_unmap; } @@ -1065,7 +1077,7 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd, DMA_TO_DEVICE); if (__srp_post_send(target, iu, len)) { - printk(KERN_ERR PFX "Send failed\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); goto err_unmap; } @@ -1120,6 +1132,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event, struct srp_target_port *target) { + struct Scsi_Host *shost = target->scsi_host; struct ib_class_port_info *cpi; int opcode; @@ -1145,19 +1158,22 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, memcpy(target->path.dgid.raw, event->param.rej_rcvd.ari, 16); - printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", - (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), - (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); + shost_printk(KERN_DEBUG, shost, + PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", + (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), + (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); target->status = SRP_PORT_REDIRECT; } else { - printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); target->status = -ECONNRESET; } break; case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: - printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); target->status = -ECONNRESET; break; @@ -1168,20 +1184,21 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, u32 reason = be32_to_cpu(rej->reason); if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) - printk(KERN_WARNING PFX - "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + shost_printk(KERN_WARNING, shost, + PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); else - printk(KERN_WARNING PFX - "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + shost_printk(KERN_WARNING, shost, + PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); } else - printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," - " opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," + " opcode 0x%02x\n", opcode); target->status = -ECONNRESET; break; default: - printk(KERN_WARNING " REJ reason 0x%x\n", - event->param.rej_rcvd.reason); + shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", + event->param.rej_rcvd.reason); target->status = -ECONNRESET; } } @@ -1196,7 +1213,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) switch (event->event) { case IB_CM_REQ_ERROR: - printk(KERN_DEBUG PFX "Sending CM REQ failed\n"); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM REQ failed\n"); comp = 1; target->status = -ECONNRESET; break; @@ -1211,7 +1229,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); target->req_lim = be32_to_cpu(rsp->req_lim_delta); } else { - printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled RSP opcode %#x\n", opcode); target->status = -ECONNRESET; break; } @@ -1257,20 +1276,23 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) break; case IB_CM_REJ_RECEIVED: - printk(KERN_DEBUG PFX "REJ received\n"); + shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); comp = 1; srp_cm_rej_handler(cm_id, event, target); break; case IB_CM_DREQ_RECEIVED: - printk(KERN_WARNING PFX "DREQ received - connection closed\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "DREQ received - connection closed\n"); if (ib_send_cm_drep(cm_id, NULL, 0)) - printk(KERN_ERR PFX "Sending CM DREP failed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Sending CM DREP failed\n"); break; case IB_CM_TIMEWAIT_EXIT: - printk(KERN_ERR PFX "connection closed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "connection closed\n"); target->status = 0; break; @@ -1281,7 +1303,8 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) break; default: - printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled CM event %d\n", event->event); break; } @@ -1358,7 +1381,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) struct srp_request *req; int ret = SUCCESS; - printk(KERN_ERR "SRP abort called\n"); + shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); if (target->qp_in_error) return FAILED; @@ -1388,7 +1411,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req, *tmp; - printk(KERN_ERR "SRP reset_device called\n"); + shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); if (target->qp_in_error) return FAILED; @@ -1415,7 +1438,7 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) struct srp_target_port *target = host_to_target(scmnd->device->host); int ret = FAILED; - printk(KERN_ERR PFX "SRP reset_host called\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); if (!srp_reconnect_target(target)) ret = SUCCESS; @@ -1827,8 +1850,9 @@ static ssize_t srp_create_target(struct class_device *class_dev, ib_get_cached_gid(host->dev->dev, host->port, 0, &target->path.sgid); - printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x " + "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", (unsigned long long) be64_to_cpu(target->id_ext), (unsigned long long) be64_to_cpu(target->ioc_guid), be16_to_cpu(target->path.pkey), @@ -1855,7 +1879,8 @@ static ssize_t srp_create_target(struct class_device *class_dev, target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) { - printk(KERN_ERR PFX "Connection failed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection failed\n"); goto err_cm_id; } From dillowda at ornl.gov Fri Feb 8 16:03:04 2008 From: dillowda at ornl.gov (David Dillow) Date: Fri, 08 Feb 2008 19:03:04 -0500 Subject: [ofa-general] [OFED-1.3rc PATCH 3/3] IB/srp: Retry stale connections Message-ID: <1202515384.5298.29.camel@lap75545.ornl.gov> When a host just goes away (crash, power loss, etc.) without tearing down its IB connections, it can get stale connection errors when it tries to reconnect to targets upon rebooting. Retrying the connection a few times will prevent sysadmins from playing the "which disk(s) went missing?" game. This would have made things slightly quicker when tracking down some of the recent bugs, but it also helps quite a bit when you've got a large number of targets hanging off a wedged server. Signed-off-by: David Dillow --- drivers/infiniband/ulp/srp/ib_srp.c | 53 +++++++++++++++++++++++++++-------- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4db7f8e..7ede462 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -201,6 +201,22 @@ out: return ret; } +static int srp_new_cm_id(struct srp_target_port *target) +{ + struct ib_cm_id *new_cm_id; + + new_cm_id = ib_create_cm_id(target->srp_host->dev->dev, + srp_cm_handler, target); + if (IS_ERR(new_cm_id)) + return PTR_ERR(new_cm_id); + + if (target->cm_id) + ib_destroy_cm_id(target->cm_id); + target->cm_id = new_cm_id; + + return 0; +} + static int srp_create_target_ib(struct srp_target_port *target) { struct ib_qp_init_attr *init_attr; @@ -431,6 +447,7 @@ static void srp_remove_work(struct work_struct *work) static int srp_connect_target(struct srp_target_port *target) { + int retries = 3; int ret; ret = srp_lookup_path(target); @@ -463,6 +480,21 @@ static int srp_connect_target(struct srp_target_port *target) case SRP_DLID_REDIRECT: break; + case SRP_STALE_CONN: + /* Our current CM id was stale, and is now in timewait. + * Try to reconnect with a new one. + */ + if (!retries-- || srp_new_cm_id(target)) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "giving up on stale connection\n"); + target->status = -ECONNRESET; + return target->status; + } + + shost_printk(KERN_ERR, target->scsi_host, PFX + "retrying stale connection\n"); + break; + default: return target->status; } @@ -502,7 +534,6 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { - struct ib_cm_id *new_cm_id; struct srp_request *req, *tmp; int ret; struct ib_cq *old_cq; @@ -521,14 +552,9 @@ static int srp_reconnect_target(struct srp_target_port *target) * Now get a new local CM ID so that we avoid confusing the * target in case things are really fouled up. */ - new_cm_id = ib_create_cm_id(target->srp_host->dev->dev, - srp_cm_handler, target); - if (IS_ERR(new_cm_id)) { - ret = PTR_ERR(new_cm_id); + ret = srp_new_cm_id(target); + if (ret) goto err; - } - ib_destroy_cm_id(target->cm_id); - target->cm_id = new_cm_id; old_qp = target->qp; old_cq = target->cq; @@ -1196,6 +1222,11 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, target->status = -ECONNRESET; break; + case IB_CM_REJ_STALE_CONN: + shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); + target->status = SRP_STALE_CONN; + break; + default: shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", event->param.rej_rcvd.reason); @@ -1870,11 +1901,9 @@ static ssize_t srp_create_target(struct class_device *class_dev, if (ret) goto err; - target->cm_id = ib_create_cm_id(host->dev->dev, srp_cm_handler, target); - if (IS_ERR(target->cm_id)) { - ret = PTR_ERR(target->cm_id); + ret = srp_new_cm_id(target); + if (ret) goto err_free; - } target->qp_in_error = 0; ret = srp_connect_target(target); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ada78b5..4f51c5f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -54,6 +54,7 @@ enum { SRP_PORT_REDIRECT = 1, SRP_DLID_REDIRECT = 2, + SRP_STALE_CONN = 3, SRP_MAX_LUN = 512, SRP_DEF_SG_TABLESIZE = 12, From clameter at sgi.com Fri Feb 8 16:05:00 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 16:05:00 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208155641.2258ad2c.akpm@linux-foundation.org> References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: On Fri, 8 Feb 2008, Andrew Morton wrote: > You took it correctly, and I didn't understand the answer ;) We have done several rounds of discussion on linux-kernel about this so far and the IB folks have not shown up to join in. I have tried to make this as general as possible. From akpm at linux-foundation.org Fri Feb 8 16:12:48 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 8 Feb 2008 16:12:48 -0800 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: <20080208161248.506556b0.akpm@linux-foundation.org> On Fri, 8 Feb 2008 16:05:00 -0800 (PST) Christoph Lameter wrote: > On Fri, 8 Feb 2008, Andrew Morton wrote: > > > You took it correctly, and I didn't understand the answer ;) > > We have done several rounds of discussion on linux-kernel about this so > far and the IB folks have not shown up to join in. I have tried to make > this as general as possible. infiniband would appear to be the major present in-kernel client of this new interface. So as a part of proving its usefulness, correctness, etc we should surely work on converting infiniband to use it, and prove its goodness. Quite possibly none of the infiniband developers even know about it.. From rdreier at cisco.com Fri Feb 8 16:12:42 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 16:12:42 -0800 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: (Christoph Lameter's message of "Fri, 8 Feb 2008 16:05:00 -0800 (PST)") References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: > We have done several rounds of discussion on linux-kernel about this so > far and the IB folks have not shown up to join in. I have tried to make > this as general as possible. Sorry, this has been on my "things to look at" list for a while, but I haven't gotten a chance to really understand where things are yet. In general, this MMU notifier stuff will only be useful to a subset of InfiniBand/RDMA hardware. Some adapters are smart enough to handle changing the IO virtual -> bus/physical mapping on the fly, but some aren't. For the dumb adapters, I think the current ib_umem_get() is pretty close to as good as we can get: we have to keep the physical pages pinned for as long as the adapter is allowed to DMA into the memory region. For the smart adapters, we just need a chance to change the adapter's page table when the kernel/CPU's mapping changes, and naively, this stuff looks like it would work. Andrew, does that help? - R. From clameter at sgi.com Fri Feb 8 16:16:34 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 16:16:34 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: On Fri, 8 Feb 2008, Roland Dreier wrote: > In general, this MMU notifier stuff will only be useful to a subset of > InfiniBand/RDMA hardware. Some adapters are smart enough to handle > changing the IO virtual -> bus/physical mapping on the fly, but some > aren't. For the dumb adapters, I think the current ib_umem_get() is > pretty close to as good as we can get: we have to keep the physical > pages pinned for as long as the adapter is allowed to DMA into the > memory region. I thought the adaptor can always remove the mapping by renegotiating with the remote side? Even if its dumb then a callback could notify the driver that it may be required to tear down the mapping. We then hold the pages until we get okay by the driver that the mapping has been removed. We could also let the unmapping fail if the driver indicates that the mapping must stay. From clameter at sgi.com Fri Feb 8 16:18:38 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 16:18:38 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080208161248.506556b0.akpm@linux-foundation.org> References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080208161248.506556b0.akpm@linux-foundation.org> Message-ID: On Fri, 8 Feb 2008, Andrew Morton wrote: > Quite possibly none of the infiniband developers even know about it.. Well Andrea's initial approach was even featured on LWN a couple of weeks back. From srssum1ne at hotmail.com Fri Feb 8 16:21:37 2008 From: srssum1ne at hotmail.com (R S) Date: Fri, 8 Feb 2008 16:21:37 -0800 Subject: [ofa-general] trying to get of all lists In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: unsubscribe > Date: Fri, 8 Feb 2008 16:16:34 -0800> From: clameter at sgi.com> To: rdreier at cisco.com> CC: akpm at linux-foundation.org; andrea at qumranet.com; a.p.zijlstra at chello.nl; linux-mm at kvack.org; izike at qumranet.com; steiner at sgi.com; linux-kernel at vger.kernel.org; avi at qumranet.com; kvm-devel at lists.sourceforge.net; daniel.blueman at quadrics.com; holt at sgi.com; general at lists.openfabrics.org> Subject: Re: [ofa-general] Re: [patch 0/6] MMU Notifiers V6> > On Fri, 8 Feb 2008, Roland Dreier wrote:> > > In general, this MMU notifier stuff will only be useful to a subset of> > InfiniBand/RDMA hardware. Some adapters are smart enough to handle> > changing the IO virtual -> bus/physical mapping on the fly, but some> > aren't. For the dumb adapters, I think the current ib_umem_get() is> > pretty close to as good as we can get: we have to keep the physical> > pages pinned for as long as the adapter is allowed to DMA into the> > memory region.> > I thought the adaptor can always remove the mapping by renegotiating > with the remote side? Even if its dumb then a callback could notify the > driver that it may be required to tear down the mapping. We then hold the > pages until we get okay by the driver that the mapping has been removed.> > We could also let the unmapping fail if the driver indicates that the > mapping must stay.> --> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in> the body of a message to majordomo at vger.kernel.org> More majordomo info at http://vger.kernel.org/majordomo-info.html> Please read the FAQ at http://www.tux.org/lkml/ _________________________________________________________________ Shed those extra pounds with MSN and The Biggest Loser! http://biggestloser.msn.com/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From jim at mellanox.com Fri Feb 8 16:21:39 2008 From: jim at mellanox.com (Jim Mott) Date: Fri, 8 Feb 2008 16:21:39 -0800 Subject: [ofa-general] [PATCH 1/1] LIBSDP - accept() fails on 'both' after signal Message-ID: There are some tests that use signal-driven IO on TCP sockets through the fcntl option FASYNC. The second accept() would fall through to the TCP socket and return EINTR. The fix is to act like we do on nonblocking IO and use the SDP socket first again. While this fixes the problem in the test, it is not a good idea to use signal-driven IO on SDP sockets marked 'both'. If the application uses FASYNC sockets, it would be better to use libsdp.conf to direct the socket to either SDP or TCP. While this fix makes things work in the barely useful case where a new connection is established, bad things are likely to happen if you unleash all the TCP signal conditions on a system with both an SDP and a TCP socket pretending to be a single socket. Signed-off-by: Jim Mott --- Index: ofa_1_3_dev_user/src/userspace/libsdp/src/port.c =================================================================== --- ofa_1_3_dev_user.orig/src/userspace/libsdp/src/port.c 2008-02-04 00:32:17.000000000 -0600 +++ ofa_1_3_dev_user/src/userspace/libsdp/src/port.c 2008-02-08 19:07:01.000000000 -0600 @@ -2117,8 +2117,8 @@ shadow_fd, fopts); - /* we need different behavior for NONBLOCK and BLOCK */ - if ((fopts > 0) && (fopts & O_NONBLOCK)) { + /* we need different behavior for NONBLOCK or signal IO and BLOCK */ + if ((fopts > 0) && (fopts & (O_NONBLOCK | FASYNC))) { __sdp_log( 1, "ACCEPT: accepting (nonblock) on SDP fd:<%d>\n", shadow_fd ); ret = _socket_funcs.accept( shadow_fd, addr, addrlen ); From rdreier at cisco.com Fri Feb 8 16:22:41 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 16:22:41 -0800 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: (Christoph Lameter's message of "Fri, 8 Feb 2008 16:16:34 -0800 (PST)") References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: > I thought the adaptor can always remove the mapping by renegotiating > with the remote side? Even if its dumb then a callback could notify the > driver that it may be required to tear down the mapping. We then hold the > pages until we get okay by the driver that the mapping has been removed. Of course we can always destroy the memory region but that would break the semantics that applications expect. Basically an application can register some chunk of its memory and get a key that it can pass to a remote peer to let the remote peer operate on its memory via RDMA. And that memory region/key is expected to stay valid until there is an application-level operation to destroy it (or until the app crashes or gets killed, etc). > We could also let the unmapping fail if the driver indicates that the > mapping must stay. That would of course work -- dumb adapters would just always fail, which might be inefficient. From clameter at sgi.com Fri Feb 8 16:36:16 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 16:36:16 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208220616.089936205@sgi.com> <20080208142315.7fe4b95e.akpm@linux-foundation.org> <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: On Fri, 8 Feb 2008, Roland Dreier wrote: > That would of course work -- dumb adapters would just always fail, > which might be inefficient. Hmmmm.. that means we need something that actually pins pages for good so that the VM can avoid reclaiming it and so that page migration can avoid trying to migrate them. Something like yet another page flag. Ccing Rik. From andrea at qumranet.com Fri Feb 8 17:24:46 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Sat, 9 Feb 2008 02:24:46 +0100 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> Message-ID: <20080209012446.GB7051@v2.random> On Fri, Feb 08, 2008 at 04:36:16PM -0800, Christoph Lameter wrote: > On Fri, 8 Feb 2008, Roland Dreier wrote: > > > That would of course work -- dumb adapters would just always fail, > > which might be inefficient. > > Hmmmm.. that means we need something that actually pins pages for good so > that the VM can avoid reclaiming it and so that page migration can avoid > trying to migrate them. Something like yet another page flag. What's wrong with pinning with the page count like now? Dumb adapters would simply not register themself in the mmu notifier list no? > > Ccing Rik. From clameter at sgi.com Fri Feb 8 17:27:03 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 17:27:03 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080209012446.GB7051@v2.random> References: <20080208233636.GG26564@sgi.com> <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> Message-ID: On Sat, 9 Feb 2008, Andrea Arcangeli wrote: > > Hmmmm.. that means we need something that actually pins pages for good so > > that the VM can avoid reclaiming it and so that page migration can avoid > > trying to migrate them. Something like yet another page flag. > > What's wrong with pinning with the page count like now? Dumb adapters > would simply not register themself in the mmu notifier list no? Pages will still be on the LRU and cycle through rmap again and again. If page migration is used on those pages then the code may make repeated attempt to migrate the page thinking that the page count must at some point drop. I do not think that the page count was intended to be used to pin pages permanently. If we had a marker on such pages then we could take them off the LRU and not try to migrate them. From andrea at qumranet.com Fri Feb 8 17:56:59 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Sat, 9 Feb 2008 02:56:59 +0100 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> Message-ID: <20080209015659.GC7051@v2.random> On Fri, Feb 08, 2008 at 05:27:03PM -0800, Christoph Lameter wrote: > Pages will still be on the LRU and cycle through rmap again and again. > If page migration is used on those pages then the code may make repeated > attempt to migrate the page thinking that the page count must at some > point drop. > > I do not think that the page count was intended to be used to pin pages > permanently. If we had a marker on such pages then we could take them off > the LRU and not try to migrate them. The VM shouldn't break if try_to_unmap doesn't actually make the page freeable for whatever reason. Permanent pins shouldn't happen anyway, so defining an ad-hoc API for that doesn't sound too appealing. Not sure if old hardware deserves those special lru-size-reduction optimizations but it's not my call (certainly swapoff/mlock would get higher priority in that lru-size-reduction area). From clameter at sgi.com Fri Feb 8 18:16:16 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 8 Feb 2008 18:16:16 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080209015659.GC7051@v2.random> References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> Message-ID: On Sat, 9 Feb 2008, Andrea Arcangeli wrote: > The VM shouldn't break if try_to_unmap doesn't actually make the page > freeable for whatever reason. Permanent pins shouldn't happen anyway, VM is livelocking if too many page are pinned that way right now. The higher the processors per node the higher the risk of livelock because more processors are in the process of cycling through pages that have an elevated refcount. > so defining an ad-hoc API for that doesn't sound too appealing. Not > sure if old hardware deserves those special lru-size-reduction > optimizations but it's not my call (certainly swapoff/mlock would get > higher priority in that lru-size-reduction area). Rik has a patchset under development that addresses issues like this. The elevated refcount pin problem is not really relevant to the patchset we are discussing here. From dwrabbiweinm at rabbiwein.com Fri Feb 8 18:22:10 2008 From: dwrabbiweinm at rabbiwein.com (Nichole Mccarty) Date: Sat, 9 Feb 2008 10:22:10 +0800 Subject: [ofa-general] Want to be a hero in bed? Message-ID: <01c86b05$a0fcf500$ca31227d@dwrabbiweinm> Are U Tired with erectile dysfunction? Enhance your sexual life now! Want to be ready for sex in few minutes? Reproductive and ED problems solution http://geocities.com/nicknelson457/ We are verified by VISA. Confidential purchase. From rdreier at cisco.com Fri Feb 8 20:48:13 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 08 Feb 2008 20:48:13 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> (Sean Hefty's message of "Thu, 7 Feb 2008 22:59:24 -0800") References: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> Message-ID: OK, I think the following should make things work better: diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 638b727..017fdcf 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -159,7 +159,7 @@ struct cm_port { struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; - struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; + struct cm_counter_group *counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { @@ -1363,7 +1363,7 @@ static void cm_dup_req_handler(struct cm_work *work, struct ib_mad_send_buf *msg = NULL; int ret; - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ @@ -1741,7 +1741,7 @@ static void cm_dup_rep_handler(struct cm_work *work) if (!cm_id_priv) return; - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) @@ -1908,7 +1908,7 @@ static int cm_rtu_handler(struct cm_work *work) if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_RTU_COUNTER]); goto out; } @@ -2087,7 +2087,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; @@ -2108,7 +2108,7 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -2122,7 +2122,7 @@ static int cm_dreq_handler(struct cm_work *work) cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_DREQ_COUNTER]); goto unlock; default: @@ -2479,7 +2479,7 @@ static int cm_mra_handler(struct cm_work *work) cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> - counter_group[CM_RECV_DUPLICATES]. + counter_group[CM_RECV_DUPLICATES]-> counter[CM_MRA_COUNTER]); goto out; } @@ -2487,7 +2487,7 @@ static int cm_mra_handler(struct cm_work *work) break; case IB_CM_MRA_REQ_RCVD: case IB_CM_MRA_REP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_MRA_COUNTER]); /* fall through */ default: @@ -2649,7 +2649,7 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -2665,7 +2665,7 @@ static int cm_lap_handler(struct cm_work *work) cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_LAP_COUNTER]); goto unlock; default: @@ -2949,7 +2949,7 @@ static int cm_sidr_req_handler(struct cm_work *work) cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); - atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]-> counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } @@ -3161,10 +3161,10 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, msg->retries = 1; atomic_long_add(1 + msg->retries, - &port->counter_group[CM_XMIT].counter[attr_index]); + &port->counter_group[CM_XMIT]->counter[attr_index]); if (msg->retries) atomic_long_add(msg->retries, - &port->counter_group[CM_XMIT_RETRIES]. + &port->counter_group[CM_XMIT_RETRIES]-> counter[attr_index]); switch (mad_send_wc->status) { @@ -3373,7 +3373,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, } attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); - atomic_long_inc(&port->counter_group[CM_RECV]. + atomic_long_inc(&port->counter_group[CM_RECV]-> counter[attr_id - CM_ATTR_ID_OFFSET]); work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, @@ -3561,6 +3561,14 @@ static void cm_get_ack_delay(struct cm_device *cm_dev) cm_dev->ack_delay = attr.local_ca_ack_delay; } +static void cm_release_counter_obj(struct kobject *obj) +{ + struct cm_counter_group *counter_group; + + counter_group = container_of(obj, struct cm_counter_group, obj); + kfree(counter_group); +} + static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, char *buf) { @@ -3579,6 +3587,7 @@ static struct sysfs_ops cm_counter_ops = { }; static struct kobj_type cm_counter_obj_type = { + .release = cm_release_counter_obj, .sysfs_ops = &cm_counter_ops, .default_attrs = cm_counter_default_attrs }; @@ -3587,8 +3596,6 @@ static void cm_release_port_obj(struct kobject *obj) { struct cm_port *cm_port; - printk(KERN_ERR "free cm port\n"); - cm_port = container_of(obj, struct cm_port, port_obj); kfree(cm_port); } @@ -3601,8 +3608,6 @@ static void cm_release_dev_obj(struct kobject *obj) { struct cm_device *cm_dev; - printk(KERN_ERR "free cm dev\n"); - cm_dev = container_of(obj, struct cm_device, dev_obj); kfree(cm_dev); } @@ -3616,18 +3621,12 @@ struct class cm_class = { }; EXPORT_SYMBOL(cm_class); -static void cm_remove_fs_obj(struct kobject *obj) -{ - kobject_put(obj->parent); - kobject_put(obj); -} - static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - kobject_get(&port->cm_dev->dev_obj), + &port->cm_dev->dev_obj, "%d", port->port_num); if (ret) { kfree(port); @@ -3635,20 +3634,30 @@ static int cm_create_port_fs(struct cm_port *port) } for (i = 0; i < CM_COUNTER_GROUPS; i++) { - ret = kobject_init_and_add(&port->counter_group[i].obj, + port->counter_group[i] = kzalloc(sizeof *port->counter_group[i], + GFP_KERNEL); + if (!port->counter_group[i]) { + ret = -ENOMEM; + goto error; + } + + ret = kobject_init_and_add(&port->counter_group[i]->obj, &cm_counter_obj_type, - kobject_get(&port->port_obj), + &port->port_obj, "%s", counter_group_names[i]); - if (ret) + if (ret) { + kfree(port->counter_group[i]); goto error; + } } return 0; error: while (i--) - cm_remove_fs_obj(&port->counter_group[i].obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->counter_group[i]->obj); + + kobject_put(&port->port_obj); return ret; } @@ -3658,9 +3667,9 @@ static void cm_remove_port_fs(struct cm_port *port) int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) - cm_remove_fs_obj(&port->counter_group[i].obj); + kobject_put(&port->counter_group[i]->obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *device) @@ -3744,7 +3753,7 @@ error1: ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3771,7 +3780,7 @@ static void cm_remove_one(struct ib_device *device) ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static int __init ib_cm_init(void) From DrewdisciplineKlein at naja.com Fri Feb 8 20:57:08 2008 From: DrewdisciplineKlein at naja.com (Dewey Briggs) Date: Fri, 8 Feb 2008 20:57:08 -0800 (PST) Subject: [ofa-general] We have it all! Message-ID: <20080209045709.437EBE60284@openfabrics.org> Play your favorite games from the comfort of your home, USA players ARE included! Our safe, secure games will get you smiling when you start seeing dollars pouring in. Travel no further than your screen and get your free $2400 Visit and start seeing the dollars coming. http://fhayfg.com.cn/ From 9lollipop30 at msn.com Fri Feb 8 21:59:58 2008 From: 9lollipop30 at msn.com (Arthur Barnes) Date: Sat, 9 Feb 2008 13:59:58 +0800 Subject: [ofa-general] What are you up to? Message-ID: <01c86b24$0e1f8300$35cc717d@9lollipop30> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Linnea at EHealThies.info only, because I am using my friend's email to write this. To see my pics From dwriogirlm at riogirl.com Fri Feb 8 23:02:38 2008 From: dwriogirlm at riogirl.com (Nettie Sprague) Date: Sat, 9 Feb 2008 16:02:38 +0900 Subject: [ofa-general] What is Generic Medication? Message-ID: <01c86b35$3139a800$af785879@dwriogirlm> What is Generic Medication? A generic drug is identical, or bioequivalent to a brand name drug in dosage form, safety, strength, route of administration, quality, performance characteristics and intended use. Although generic drugs are chemically identical to their branded counterparts, they are typically sold at substantial discounts from the branded price. Generic drugs save consumers an estimated $8 to $10 billion a year at retail pharmacies. http://geocities.com/reinaldodorsey295/ From sean.hefty at intel.com Fri Feb 8 23:22:56 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 8 Feb 2008 23:22:56 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: References: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> Message-ID: <000001c86aec$97eda860$7ae1180a@amr.corp.intel.com> The changes themselves look fine to me, but I'm not sure that I fully understand the issue. >-static void cm_remove_fs_obj(struct kobject *obj) >-{ >- kobject_put(obj->parent); >- kobject_put(obj); >-} Was this function the cause of the problem? Was the cm port object freed while references were still on one of the cm counter objects? - Sean From vlad at lists.openfabrics.org Sat Feb 9 02:59:40 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 9 Feb 2008 02:59:40 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080209-0200 daily build status Message-ID: <20080209105940.BDB9BE60240@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Failed: From dwportavellam at portavella.com Sat Feb 9 03:06:40 2008 From: dwportavellam at portavella.com (Arlene Hawkins) Date: Sat, 9 Feb 2008 12:06:40 +0100 Subject: [ofa-general] Get your free 2400$ welcome bonus and win much more! Message-ID: <01c86b14$3abf6980$810b314e@dwportavellam> Where to gamble online? Check the list of the games in Golden Gate Casino! Just download free software and play from the comfort of your home! Get started and receive $2400 welcome bonus! Great online casino Golden Gate is one of the leading casinos known for fair playing, excellent customer service available to contact 24 hour a day, 7 days a week and prompt payouts. http://geocities.com/ronnieeaton695/ Choose Golden Gate Casino! From riel at redhat.com Sat Feb 9 04:55:56 2008 From: riel at redhat.com (Rik van Riel) Date: Sat, 9 Feb 2008 07:55:56 -0500 Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> Message-ID: <20080209075556.63062452@bree.surriel.com> On Fri, 8 Feb 2008 18:16:16 -0800 (PST) Christoph Lameter wrote: > On Sat, 9 Feb 2008, Andrea Arcangeli wrote: > > > The VM shouldn't break if try_to_unmap doesn't actually make the page > > freeable for whatever reason. Permanent pins shouldn't happen anyway, > > VM is livelocking if too many page are pinned that way right now. > Rik has a patchset under development that addresses issues like this PG_mlock is on the way and can easily be reused for this, too. -- All rights reversed. From dcy at bostonsci.com.br Sat Feb 9 07:19:09 2008 From: dcy at bostonsci.com.br (Emery London) Date: Sat, 9 Feb 2008 23:19:09 +0800 Subject: [ofa-general] Or maybe just to reward yourself with a gift for once? Message-ID: <291761928.23838291560557@bostonsci.com.br> An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: Price Type: application/pdf Size: 17988 bytes Desc: not available URL: From changquing.tang at hp.com Sat Feb 9 08:38:28 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Sat, 9 Feb 2008 16:38:28 +0000 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: <47ACE372.5020904@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> <47ABA463.6020707@ichips.intel.com> <47ACE372.5020904@ichips.intel.com> Message-ID: I am testing OFED 1.3 udapl v1, I have three nodes, n1, n2, and n3, if I run two ranks between n1 and n2, it works, n2 and n3, it works again, but if I run between n1 and n3, it fails with: dat_cr_accept() failed: DAT_INTERNAL_ERROR What could be the reason ? I did not change anything else except the node to run. Thanks for help. --CQ > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Friday, February 08, 2008 5:19 PM > To: Tang, Changqing > Cc: OpenFabrics General; James Lentini > Subject: Re: [ofa-general] uDAPL libdat2.so version [PATCH] > udapl v1 and v2 - dat_create_psp_any() seed value wrong > > Tang, Changqing wrote: > > Arlin: > > I am running today's OFED tarball uDAPL v1 version, > pure RDMA > > works, but if I switch to SRQ mode, I got segfault in > > dat_srq_create(), I checked the parameters to > dat_srq_create(), I don't see anything wrong: > > > > Core was generated by `/mpiscratch/ctang/test/pp.x'. > > Program terminated with signal 11, Segmentation fault. > > #0 0x00002aaaabda5c3b in dat_srq_create () from > /usr/lib64/libdat.so > > > > (gdb) print hpmp_udapl->ia_handle > > $7 = (DAT_IA_HANDLE) 0x1 > > (gdb) print hpmp_udapl->pz_handle > > $8 = (DAT_PZ_HANDLE) 0xc4540e0 > > (gdb) print srq_attr > > $9 = {max_recv_dtos = 16, max_recv_iov = 1, low_watermark = 0} > > (gdb) print &srq_attr > > $10 = (DAT_SRQ_ATTR *) 0x7fffe64fb760 > > (gdb) print &hpmp_udapl->srq_handle > > $11 = (DAT_SRQ_HANDLE *) 0xc448bb8 > > > > > > Do you have any idea ? > > Did you have SRQ working on previous versions? > > I am not certain that the v1.2 SRQ implementation has ever > been fully tested. > > James, can you shed some light on SRQ DAPL code status? > > -arlin > From tomof at acm.org Sat Feb 9 08:41:36 2008 From: tomof at acm.org (FUJITA Tomonori) Date: Sun, 10 Feb 2008 01:41:36 +0900 Subject: [ofa-general] Re: [Stgt-devel] Update (Re: open iSCSI over iSER target RPM ...) In-Reply-To: <47AB2C2F.2090707@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> <47AB2C2F.2090707@scalableinformatics.com> Message-ID: <200802091641.m19GffI5008280@mbox.iij4u.or.jp> From: Joe Landman Subject: [Stgt-devel] Update (Re: open iSCSI over iSER target RPM ...) Date: Thu, 07 Feb 2008 11:05:03 -0500 > Update: > > [root at woody etc]# dd if=/dev/zero of=/big/local.file bs=256k count=100000 > 100000+0 records in > 100000+0 records out > 26214400000 bytes (26 GB) copied, 58.7484 seconds, 446 MB/s > > Better. I rebuilt OFED 1.2.5.5. Are there specific recommended tuning > guides for iSER? Backing store in this case are real disks, and we can > sink/source >750 MB/s on them, so I am not worried about disk IO > bottlenecks, more worried about bad config of iSCSI/iSER. > > BTW: the 2TB LUN limit I asked about is still here in this code. Same > machines (initiator and target) used for SRP reported correct LUN sizes. > Here we are using the -868 open-iscsi initiator, and the tgt RPM > announced. I would like to dig into this. > > This is what I am getting in dmesg for this iSER target: > > iscsi: registered transport (tcp) > iscsi: registered transport (iser) > iser: iser_connect:connecting to: 10.2.1.2, port 0xbc0c > iser: iser_cma_handler:event 0 conn ffff81024b9f69c0 id ffff810209748c00 > iser: iser_cma_handler:event 2 conn ffff81024b9f69c0 id ffff810209748c00 > iser: iser_create_ib_conn_res:setting conn ffff81024b9f69c0 cma_id > ffff810209748c00: fmr_pool ffff81024bfb32c0 qp ffff8101cb16d600 > iser: iser_cma_handler:event 9 conn ffff81024b9f69c0 id ffff810209748c00 > iser: iscsi_iser_ep_poll:ib conn ffff81024b9f69c0 rc = 1 > scsi13 : iSCSI Initiator over iSER, v.0.1 > iser: iscsi_iser_conn_bind:binding iscsi conn ffff81021b65fa90 to > iser_conn ffff81024b9f69c0 > Vendor: IET Model: Controller Rev: 0001 > Type: RAID ANSI SCSI revision: 05 > scsi 13:0:0:0: Attached scsi generic sg2 type 12 > Vendor: IET Model: VIRTUAL-DISK Rev: 0001 > Type: Direct-Access ANSI SCSI revision: 05 > sdc : very big device. try to use READ CAPACITY(16). > sdc : READ CAPACITY(16) failed. > sdc : status=1, message=00, host=0, driver=08 > sdc : use 0xffffffff as device size > SCSI device sdc: 4294967296 512-byte hdwr sectors (2199023 MB) > sdc: Write Protect is off > sdc: Mode Sense: 79 00 00 08 > SCSI device sdc: drive cache: write back > sdc : very big device. try to use READ CAPACITY(16). > sdc : READ CAPACITY(16) failed. > sdc : status=1, message=00, host=0, driver=08 > sdc : use 0xffffffff as device size > SCSI device sdc: 4294967296 512-byte hdwr sectors (2199023 MB) > sdc: Write Protect is off > sdc: Mode Sense: 79 00 00 08 > SCSI device sdc: drive cache: write back > sdc: unknown partition table > sd 13:0:0:1: Attached scsi disk sdc > sd 13:0:0:1: Attached scsi generic sg3 type 0 > > > and this is what we get in SRP > > scsi6 : SRP.T10:0008F104039862A4 > Vendor: SCST_BIO Model: vdisk0 Rev: 096 > Type: Direct-Access ANSI SCSI revision: 04 > sdc : very big device. try to use READ CAPACITY(16). > SCSI device sdc: 12693355130 512-byte hdwr sectors (6498998 MB) > sdc: Write Protect is off > sdc: Mode Sense: 6b 00 10 08 > SCSI device sdc: drive cache: write back w/ FUA > > > This looks suspiciously like a 2^32 limit somewhere. Can you try the latest git tree (65a3f8b0c14305aaee5bcaade569b40882e8dd88)? It works for me: scsi3 : iSCSI Initiator over TCP/IP scsi 3:0:0:0: RAID IET Controller 0001 PQ: 0 ANSI: 5 scsi 3:0:0:1: Direct-Access IET VIRTUAL-DISK 0001 PQ: 0 ANSI: 5 sd 3:0:0:1: [sdb] Very big device. Trying to use READ CAPACITY(16). sd 3:0:0:1: [sdb] 12884901888 512-byte hardware sectors (6597070 MB) sd 3:0:0:1: [sdb] Write Protect is off sd 3:0:0:1: [sdb] Mode Sense: 79 00 00 08 sd 3:0:0:1: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA Somehow I forgot to add READ_CAPACITY_16 support. I would appreciate it if you could read and write on over 2TB position properly. > Our exported device is > > [root at jr1 ~]# parted /dev/sdb print > > Model: Areca jrvs1 (scsi) > Disk /dev/sdb: 6500GB > Sector size (logical/physical): 512B/512B > Partition Table: loop > > Number Start End Size File system Flags > 1 0.00kB 6500GB 6500GB xfs > > > and this is what tgtadm reports > > [root at jr1 ~]# tgtadm --lld iscsi --op show --mode target > Target 1: iqn.2001-04.com.jr1-jackrabbit.small > System information: > Driver: iscsi > Status: running > I_T nexus information: > I_T nexus: 4 > Initiator: iqn.1996-04.voltaire.com:01:dfa8888a3fd > Connection: 0 > RDMA IP Address: 10.2.1.1 > LUN information: > LUN: 0 > Type: controller > SCSI ID: deadbeaf1:0 > SCSI SN: beaf10 > Size: 0 > Online: No > Poweron/Reset: Yes > Removable media: No > Backing store: No backing store > LUN: 1 > Type: disk > SCSI ID: deadbeaf1:1 > SCSI SN: beaf11 > Size: 5T > Online: Yes > Poweron/Reset: No > Removable media: No > Backing store: /dev/sdb > Account information: > ACL information: > 10.2.1.1 > > So it looks like the LUN 1 is approximately correct (5T ???) on the > target, and incorrect when the initiator asks for it. I changed tgt to show the capacity like Linux does: Target 1: iqn.2007-03:marks-vtl-tgt:tulip System information: Driver: iscsi Status: running I_T nexus information: I_T nexus: 1 Initiator: iqn.2005-03.org.open-iscsi:d38a581f3318 Connection: 0 IP Address: 192.168.11.15 LUN information: LUN: 0 Type: controller SCSI ID: deadbeaf1:0 SCSI SN: beaf10 Size: 0 MB Online: Yes Poweron/Reset: Yes Removable media: No Backing store: No backing store LUN: 1 Type: disk SCSI ID: deadbeaf1:1 SCSI SN: beaf11 Size: 6597070 MB Online: Yes Poweron/Reset: No Removable media: No Backing store: /dev/sde Account information: ACL information: ALL > Please note that I have successfully used the full 6+TB as an iSCSI > target using the SCST-iscsi code, so I do know that the initiator works > correctly. > > Is there a source RPM/tree for this target? I guess that RedHat, SUSE, and OFED have tgt RPMs now so I think that you can find something. From landman at scalableinformatics.com Sat Feb 9 08:43:41 2008 From: landman at scalableinformatics.com (Joe Landman) Date: Sat, 09 Feb 2008 11:43:41 -0500 Subject: [ofa-general] Re: [Stgt-devel] Update (Re: open iSCSI over iSER target RPM ...) In-Reply-To: <200802091641.m19GffI5008280@mbox.iij4u.or.jp> References: <47A87586.6010904@Voltaire.COM> <47AA28C3.7090003@scalableinformatics.com> <47AB2C2F.2090707@scalableinformatics.com> <200802091641.m19GffI5008280@mbox.iij4u.or.jp> Message-ID: <47ADD83D.9080109@scalableinformatics.com> FUJITA Tomonori wrote: > Can you try the latest git tree > (65a3f8b0c14305aaee5bcaade569b40882e8dd88)? It works for me: > > scsi3 : iSCSI Initiator over TCP/IP > scsi 3:0:0:0: RAID IET Controller 0001 PQ: 0 ANSI: 5 > scsi 3:0:0:1: Direct-Access IET VIRTUAL-DISK 0001 PQ: 0 ANSI: 5 > sd 3:0:0:1: [sdb] Very big device. Trying to use READ CAPACITY(16). > sd 3:0:0:1: [sdb] 12884901888 512-byte hardware sectors (6597070 MB) > sd 3:0:0:1: [sdb] Write Protect is off > sd 3:0:0:1: [sdb] Mode Sense: 79 00 00 08 > sd 3:0:0:1: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA > > > Somehow I forgot to add READ_CAPACITY_16 support. I would appreciate > it if you could read and write on over 2TB position properly. Ok, I will pull it a little later today, build and let you know. -- Joseph Landman, Ph.D Founder and CEO Scalable Informatics LLC, email: landman at scalableinformatics.com web : http://www.scalableinformatics.com http://jackrabbit.scalableinformatics.com phone: +1 734 786 8423 fax : +1 866 888 3112 cell : +1 734 612 4615 From eli at dev.mellanox.co.il Sat Feb 9 08:57:36 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sat, 9 Feb 2008 18:57:36 +0200 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: References: <1201710660.28794.170.camel@mtls03> Message-ID: <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> On 2/9/08, Roland Dreier wrote: > I'm a little worried here: > > > + if (priv->ca->flags & IB_DEVICE_IP_CSUM) > > + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; > > I don't see any place that ca->flags ever gets set; in fact if I > delete the flags member of struct ib_device from my tree, it all still > compiles fine. I set these flags for mlx4 and mthca in patches 5/16 and 6/16 respectively. > > So have you actually tested any of these checksum offload code paths? > I my machines I can see the flags set by inspecting /sys/class/net/ib*/features From sashak at voltaire.com Sat Feb 9 09:13:18 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 9 Feb 2008 17:13:18 +0000 Subject: [ofa-general] Re: [PATCH] opensm/opensm/osm_console.c: fix seg fault when running "portstatus ca" in the console In-Reply-To: <20080208134413.0781b0aa.weiny2@llnl.gov> References: <20080208134413.0781b0aa.weiny2@llnl.gov> Message-ID: <20080209171318.GS11526@sashak.voltaire.com> On 13:44 Fri 08 Feb , Ira Weiny wrote: > The osm_node_get_physp_ptr now returns NULL for invalid ports. Check for this before using the pointer. > > Ira > > > From 33dba7f427c38a4bc71bebaca82567c8857e901a Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Fri, 8 Feb 2008 13:39:29 -0800 > Subject: [PATCH] opensm/opensm/osm_console.c: fix seg fault when running "portstatus ca" in the > > console > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From eli at dev.mellanox.co.il Sat Feb 9 09:03:05 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sat, 9 Feb 2008 19:03:05 +0200 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: References: <1201710660.28794.170.camel@mtls03> Message-ID: <4e6a6b3c0802090903x1bf9d39ci435b1e5be88e1934@mail.gmail.com> On 2/9/08, Roland Dreier wrote: > In fact I think I'll queue up the patch below to avoid problems like > this in the future: Doing so will cause compilation error if the checksum offload patches are applied. > > commit 5128bdc97a1018aacac2550cf73bda61041cc3b8 > Author: Roland Dreier > Date: Fri Feb 8 14:47:26 2008 -0800 > > IB/core: Remove unused struct ib_device.flags member > > Avoid confusion about what it might mean, since it's never initialized. > > Signed-off-by: Roland Dreier > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index a5a7f96..701e7b4 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -900,8 +900,6 @@ struct ib_device { > int *pkey_tbl_len; > int *gid_tbl_len; > > - u32 flags; > - > int num_comp_vectors; > > struct iw_cm_verbs *iwcm; > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From dwspiderwebm at spiderweb.com Sat Feb 9 10:08:56 2008 From: dwspiderwebm at spiderweb.com (Danny Cannon) Date: Sat, 9 Feb 2008 23:08:56 +0500 Subject: [ofa-general] Medications that you need. Message-ID: <01c86b70$befdea80$018c4f4d@dwspiderwebm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Save your money, buy pills immediately. Same quality! http://geocities.com/lazarocrawford923/ We provide confidential and secure purchase! From eli at mellanox.co.il Sat Feb 9 13:22:53 2008 From: eli at mellanox.co.il (Eli Cohen) Date: Sat, 9 Feb 2008 23:22:53 +0200 Subject: [ofa-general] RE: IB/ipoib: ipoib_ib_post_receive: infinite loop in error path In-Reply-To: <200802081610.23545.hnguyen@linux.vnet.ibm.com> References: <200802081610.23545.hnguyen@linux.vnet.ibm.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C90345F464@mtlexch01.mtl.com> Hi Nam, Thanks for cathing this - I will fix that for the next rc. -----Original Message----- From: Hoang-Nam Nguyen [mailto:hnguyen at linux.vnet.ibm.com] Sent: ו 08 פברואר 2008 17:10 To: Eli Cohen; xma at us.ibm.com Cc: ewg at lists.openfabrics.org; general at lists.openfabrics.org Subject: IB/ipoib: ipoib_ib_post_receive: infinite loop in error path Hello Eli! Looked at ipoib code from ofed-1.3-rc4 and the saw the following code snippet in ipoib_ib_post_receive(): if (++priv->rx_outst == UD_POST_RCV_COUNT) { ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); while (bad_wr) { id = bad_wr->wr_id & ~IPOIB_OP_RECV; ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[i].mapping); #1/ipoib_0240_4kmtu.patch: should be priv->rx_ring[id].mapping dev_kfree_skb_any(priv->rx_ring[id].skb); priv->rx_ring[id].skb = NULL; #2/ipoib_0220_ud_post_list.patch: missing iterator forwarding, ie bad_wr = bad_wr->next; } } priv->rx_outst = 0; } #1: I've talked with Shirley about this. #2: I thought to have seen you fixed it, but still see it in rc4 after called configure script. Nam From clameter at sgi.com Sat Feb 9 13:46:34 2008 From: clameter at sgi.com (Christoph Lameter) Date: Sat, 9 Feb 2008 13:46:34 -0800 (PST) Subject: [ofa-general] Re: [patch 0/6] MMU Notifiers V6 In-Reply-To: <20080209075556.63062452@bree.surriel.com> References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> Message-ID: On Sat, 9 Feb 2008, Rik van Riel wrote: > PG_mlock is on the way and can easily be reused for this, too. Note that a pinned page is different from an mlocked page. A mlocked page can be moved through page migration and/or memory hotplug. A pinned page must make both fail. From pawel.dziekonski at pwr.wroc.pl Sat Feb 9 14:17:11 2008 From: pawel.dziekonski at pwr.wroc.pl (Pawel Dziekonski) Date: Sat, 9 Feb 2008 23:17:11 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? (fwd) In-Reply-To: <20080206213543.GA21176@cefeid.wcss.wroc.pl> References: <20080206154701.GA11384@cefeid.wcss.wroc.pl> <1202320491.14810.29.camel@trinity.ogc.int> <20080206213543.GA21176@cefeid.wcss.wroc.pl> Message-ID: <20080209221711.GB20332@cefeid.wcss.wroc.pl> hi, the saga continues. ;) very basic benchmarks and surprising (at least for me) results - it look's like reading is much slower than writing and NFS/RDMA is twice slower in reading than classic NFS. :o results below - comments appreciated! regards, Pawel both nfs server and client have 8-cores, 16 GB RAM, Mellanox DDR HCAs (MT25204) connected port-port (no switch). local_hdd - 2 sata2 disks in soft-raid0, nfs_ipoeth - classic nfs over ethernet, nfs_ipoib - classic nfs over IPoIB, nfs_rdma - NFS/RDMA. simple write of 36GB file with dd (both machines have 16GB RAM): /usr/bin/time -p dd if=/dev/zero of=/mnt/qqq bs=1M count=36000 local_hdd sys 54.52 user 0.04 real 254.59 nfs_ipoib sys 36.35 user 0.00 real 266.63 nfs_rdma sys 39.03 user 0.02 real 323.77 nfs_ipoeth sys 34.21 user 0.01 real 375.24 remount /mnt to clear cache and read a file from nfs share and write it to /dev/: /usr/bin/time -p dd if=/mnt/qqq of=/scratch/qqq bs=1M nfs_ipoib sys 59.04 user 0.02 real 571.57 nfs_ipoeth sys 58.92 user 0.02 real 606.61 nfs_rdma sys 62.57 user 0.03 real 1296.36 results from bonnie++: Version 1.03c ------Sequential Write ------ --Sequential Read -- --Random- -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks-- Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP local_hdd 35G:128k 93353 12 58329 6 143293 7 243.6 1 local_hdd 35G:256k 92283 11 58189 6 144202 8 172.2 2 local_hdd 35G:512k 93879 12 57715 6 144167 8 128.2 4 local_hdd 35G:1024k 93075 12 58637 6 144172 8 95.3 7 nfs_ipoeth 35G:128k 91325 7 31848 4 64299 4 170.2 1 nfs_ipoeth 35G:256k 90668 7 32036 5 64542 4 163.2 2 nfs_ipoeth 35G:512k 93348 7 31757 5 64454 4 85.7 3 nfs_ipoet 35G:1024k 91283 7 31869 5 64241 5 51.7 4 nfs_ipoib 35G:128k 91733 7 36641 5 65839 4 178.4 2 nfs_ipoib 35G:256k 92453 7 36567 6 66682 4 166.9 3 nfs_ipoib 35G:512k 91157 7 37660 6 66318 4 86.8 3 nfs_ipoib 35G:1024k 92111 7 35786 6 66277 5 53.3 4 nfs_rdma 35G:128k 91152 8 29942 5 32147 2 187.0 1 nfs_rdma 35G:256k 89772 7 30560 5 34587 2 158.4 3 nfs_rdma 35G:512k 91290 7 29698 5 34277 2 60.9 2 nfs_rdma 35G:1024k 91336 8 29052 5 31742 2 41.5 3 ------Sequential Create------ --------Random Create-------- -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete-- files:max:min /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP local_hdd 16 10587 36 +++++ +++ 8674 29 10727 35 +++++ +++ 7015 28 local_hdd 16 11372 41 +++++ +++ 8490 29 11192 43 +++++ +++ 6881 27 local_hdd 16 10789 35 +++++ +++ 8520 29 11468 46 +++++ +++ 6651 24 local_hdd 16 10841 40 +++++ +++ 8443 28 11162 41 +++++ +++ 6441 22 nfs_ipoeth 16 3753 7 13390 12 3795 7 3773 8 22181 16 3635 7 nfs_ipoeth 16 3762 8 12358 7 3713 8 3753 7 20448 13 3632 6 nfs_ipoeth 16 3834 7 12697 6 3729 8 3725 9 22807 11 3673 7 nfs_ipoeth 16 3729 8 14260 10 3774 7 3744 7 25285 14 3688 7 nfs_ipoib 16 6803 17 +++++ +++ 6843 15 6820 14 +++++ +++ 5834 11 nfs_ipoib 16 6587 16 +++++ +++ 4959 9 6832 14 +++++ +++ 5608 12 nfs_ipoib 16 6820 18 +++++ +++ 6636 15 6479 15 +++++ +++ 5679 13 nfs_ipoib 16 6475 14 +++++ +++ 6435 14 5543 11 +++++ +++ 5431 11 nfs_rdma 16 7014 15 +++++ +++ 6714 10 7001 14 +++++ +++ 5683 8 nfs_rdma 16 7038 13 +++++ +++ 6713 12 6956 11 +++++ +++ 5488 8 nfs_rdma 16 7058 12 +++++ +++ 6797 11 6989 14 +++++ +++ 5761 9 nfs_rdma 16 7201 13 +++++ +++ 6821 12 7072 15 +++++ +++ 5609 9 -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From dwserbconm at serbcon.com Sat Feb 9 14:53:30 2008 From: dwserbconm at serbcon.com (Miriam Mueller) Date: Sat, 9 Feb 2008 19:53:30 -0300 Subject: [ofa-general] Software in many languages! Message-ID: <01c86b55$720bb060$bc99acbe@dwserbconm> Great site to purchase more than 270 programs! Even for Macintosh! Software in all European languages available! Cheap prices and original programs only! There are special offers and discounts for you to make even more significant savings. Purchasing software you can be sure you get perfectly working software, in case you are not satisfied, we offer money refund. Quick response and advice on how to install your software are guaranteed. http://geocities.com/jennahuff975/ Get software you need right now! From CarlocenterpieceDudley at emtech.net Sat Feb 9 16:02:54 2008 From: CarlocenterpieceDudley at emtech.net (Kirby Ashley) Date: Sat, 9 Feb 2008 16:02:54 -0800 (PST) Subject: [ofa-general] it's only fun and winning. Message-ID: <20080210000255.6B3FEE60079@openfabrics.org> Our safe, secure games will get you smiling when you start seeing dollars pouring in. We know how to treat our players - how about a $2400 welcome bonmus when you join? Our casino is for you and everyone else who likes to win! Get to know your new casino home! http://andnoc.cn/ From ogerlitz at voltaire.com Sat Feb 9 23:05:24 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 10 Feb 2008 09:05:24 +0200 Subject: [ofa-general] [OFED-1.3rc PATCH 0/3] IB/srp: bring OFED SRP initiator up-to-date with 2.6.25rc In-Reply-To: <1202515368.5298.23.camel@lap75545.ornl.gov> References: <1202515368.5298.23.camel@lap75545.ornl.gov> Message-ID: <47AEA234.8070601@voltaire.com> David Dillow wrote: > This series of patches adds the fixes and enhancements that have been > applied to the 2.6.25-to-be kernel. The first patch to respect the > credit limits is a correctness issue, and will avoid performance cliffs > on hardware in the field. The rest make sysadmin's lives easier, but can > be held back if need be. > > These have been built and lightly tested against the ofed_kernel git > repository as of this morning. Hi David, Please send such ofed related patches to the ewg mailing list (ewg at lists.openfabrics.org), this list is for mainline development, and its subscribers need not see each of your posting twice, thanks Or. From jackm at dev.mellanox.co.il Sat Feb 9 23:31:20 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 10 Feb 2008 09:31:20 +0200 Subject: [ofa-general] Re: [PATCH 2 of 2] IB/mlx4: shrinking WQE In-Reply-To: References: <200801281040.59398.jackm@dev.mellanox.co.il> Message-ID: <200802100931.20439.jackm@dev.mellanox.co.il> On Thursday 07 February 2008 19:32, Roland Dreier wrote: > > + * 0x7FFFFFF | (invalid_ownership_value << 31). >  > + * >  > + * When max WR is than or equal to the WQE size, > > "less than or equal"? > Clearly. From jackm at dev.mellanox.co.il Sat Feb 9 23:35:37 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 10 Feb 2008 09:35:37 +0200 Subject: [ofa-general] Re: [PATCH 2 of 2] IB/mlx4: shrinking WQE In-Reply-To: References: <200801281040.59398.jackm@dev.mellanox.co.il> Message-ID: <200802100935.37668.jackm@dev.mellanox.co.il> On Friday 08 February 2008 23:30, Roland Dreier wrote: > Although I'm not convinced this is really that useful; the only use I > see for it would be speeding up IPoIB with S/G and checksum offload, > when we have to size send WQEs for the worst case but most packets are > smaller.  But Eli's latest work seems to use selective signaling for > the send queue, so this change doesn't actually help. > You're right regarding Eli's change, but this is only for UD. The feature is useful for IPoIB-connected mode. Its also useful for SDP, and possibly for RDS. - Jack From ogerlitz at voltaire.com Sun Feb 10 00:07:27 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 10 Feb 2008 10:07:27 +0200 Subject: [ofa-general] Re: [PATCH 6/16 v4] IB/mthca: Add checksum offload support In-Reply-To: <1201710676.28794.172.camel@mtls03> References: <1201710676.28794.172.camel@mtls03> Message-ID: <47AEB0BF.1080504@voltaire.com> Eli Cohen wrote: > --- a/drivers/infiniband/hw/mthca/mthca_main.c > +++ b/drivers/infiniband/hw/mthca/mthca_main.c > @@ -267,6 +267,10 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) > if (dev_lim->flags & DEV_LIM_FLAG_SRQ) > mdev->mthca_flags |= MTHCA_FLAG_SRQ; > > + if (mthca_is_memfree(mdev)) > + if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM) > + mdev->device_cap_flags |= IB_DEVICE_IP_CSUM; > + ... > @@ -1109,6 +1113,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) > if (err) > goto err_cmd; > > + mdev->ib_dev.flags = mdev->device_cap_flags; > Roland Dreier wrote: > I don't see any place that ca->flags ever gets set; in fact if I > delete the flags member of struct ib_device from my tree, it all still > compiles fine. OK, Roland, correct, in my review I failed to catch the fact that the way to go by the HW driver is set the device_cap_flags and by the ULP to issue device query and not rely on the flags field. > So have you actually tested any of these checksum offload code paths? yes, although the vast majority of the testing here is under the ofed form of these patches (a point to fix!) where Eli updated the ofed ones after getting feedback/comments from me and others. To remove doubt "[PATCH 4/16 v4] IB/ipoib: Add checksum offload support" works well over Mellanox device b/c mthca and mlx4 sets the flags field, over other devices the behavior is not defined. So, Eli, this must be fixed for rc5. Also, and more important, lets no let the missing of 2.6.25 stop the merging of the stateless offloads, submitting the fixes early will ensure its goes into 2.6.26 Or. Or. From vlad at lists.openfabrics.org Sun Feb 10 03:07:12 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 10 Feb 2008 03:07:12 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080210-0200 daily build status Message-ID: <20080210110712.5E552E60953@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.24 Failed: Build failed on ia64 with linux-2.6.24 Log: from /home/vlad/tmp/ofa_1_3_kernel-20080210-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp/sdp_main.c:74: include/net/tcp.h: In function 'tcp_v4_check': include/net/tcp.h:846: error: implicit declaration of function 'csum_tcpudp_magic' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080210-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp/sdp_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080210-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080210-0200_linux-2.6.24_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080210-0200_linux-2.6.24_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From susancole at melileashop.com Sun Feb 10 03:24:06 2008 From: susancole at melileashop.com (Susan Cole) Date: Sun, 10 Feb 2008 05:24:06 -0600 Subject: [ofa-general] From Susan Cole Message-ID: <20080210052406.d8i8w2vta80skowo@www.melileashop.com> FROM; MRS SUSAN COLE, MAIL; susan_75 at myway.com My dear friend I am writing you with trust for a project I want you to undertake on my behalf due to my deterioating ill health (cancer ailment) which has totally weighed me down over years. When my late husband was alive he deposited the sum of 15.5 Million (Fifteen Million five hundred thousand Pounds Sterling) which were derived from his vast estates and investment in capital market with his bank here in europe. Recently, my Doctor told me that I have limited days to live due to the cancerous problems I am suffering from. Though what bothers me most is the stroke that I have in addition to the cancer. With this hard reality that has befallen my family, and me I have decided to donate this fund to you and want you to use this gift which comes from my husbands effort to fund the upkeep of widows, widowers, orphans, destitute,physically challenged children, barren- women and persons who prove to be genuinely handicapped financially. It is often said that blessed is the hand that giveth. I took this decision because I do not have any child that will inherit this money and my husband relatives are bourgeois and very wealthy and I do not want my husband hard earned money to be misused or invested into ill perceived ventures. I do not want a situation where this money will be used in an ungodly manner, hence the reason for taking this decision. With God all things are possible. I want you to stand as the new beneficiary to the funds. My happiness is that I lived a life worthy of emulation. Please always be prayerful for me. Please assure me that you will act just as I have stated herein. Hope to hear from you soon and God bless you and members of your family. Please do send response via my email I can access easily in the hospital here (susan_75 at myway.com) Yours sincerely, Susan Cole. From sashak at voltaire.com Sun Feb 10 05:31:33 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 10 Feb 2008 13:31:33 +0000 Subject: [ofa-general] [PATCH] opensm: consolidate SM state logging code Message-ID: <20080210133133.GT11526@sashak.voltaire.com> Consolidate SM state transition logging code with single __osm_report_sm_state() function. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_helper.c | 8 ++-- opensm/opensm/osm_sm_state_mgr.c | 71 +++++++++---------------------------- opensm/opensm/osm_sminfo_rcv.c | 6 +-- 3 files changed, 24 insertions(+), 61 deletions(-) diff --git a/opensm/opensm/osm_helper.c b/opensm/opensm/osm_helper.c index de4ba33..0c11198 100644 --- a/opensm/opensm/osm_helper.c +++ b/opensm/opensm/osm_helper.c @@ -2346,10 +2346,10 @@ const char *osm_get_sm_mgr_signal_str(IN osm_sm_signal_t signal) } const char *const __osm_sm_mgr_state_str[] = { - "IB_SMINFO_STATE_NOTACTIVE", /* 0 */ - "IB_SMINFO_STATE_DISCOVERING", /* 1 */ - "IB_SMINFO_STATE_STANDBY", /* 2 */ - "IB_SMINFO_STATE_MASTER", /* 3 */ + "NOTACTIVE", /* 0 */ + "DISCOVERING", /* 1 */ + "STANDBY", /* 2 */ + "MASTER", /* 3 */ "UNKNOWN STATE!!" /* 4 */ }; diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index 73d39fa..4f0bf72 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -69,44 +69,14 @@ /********************************************************************** **********************************************************************/ -static void -__osm_sm_state_mgr_standby_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) -{ - osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering STANDBY state\n"); /* Format Waived */ - - osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, - "ENTERING SM STANDBY STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_sm_state_mgr_master_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) -{ - osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering MASTER state\n"); /* Format Waived */ - - osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, - "ENTERING SM MASTER STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_sm_state_mgr_discovering_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) -{ - osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, - "ENTERING SM DISCOVERING STATE"); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_sm_state_mgr_notactive_msg(IN const osm_sm_state_mgr_t * p_sm_mgr) +static void __osm_report_sm_state(IN const osm_sm_state_mgr_t * p_sm_mgr) { - osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering NOT-ACTIVE state\n"); /* Format Waived */ + char buf[64]; + const char *state_str = osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state); - osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, - "ENTERING SM NOT-ACTIVE STATE"); + osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering %s state\n", state_str); + snprintf(buf, sizeof(buf), "ENTERING SM %s STATE", state_str); + osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, buf); } #if 0 @@ -373,15 +343,10 @@ osm_sm_state_mgr_init(IN osm_sm_state_mgr_t * const p_sm_mgr, IN osm_sm_t * sm) p_sm_mgr->p_log = sm->p_log; p_sm_mgr->p_subn = sm->p_subn; - if (p_sm_mgr->p_subn->opt.sm_inactive) { - /* init the state of the SM to not active */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; - __osm_sm_state_mgr_notactive_msg(p_sm_mgr); - } else { - /* init the state of the SM to discovering */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; - __osm_sm_state_mgr_discovering_msg(p_sm_mgr); - } + p_sm_mgr->p_subn->sm_state = p_sm_mgr->p_subn->opt.sm_inactive ? + IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING; + + __osm_report_sm_state(p_sm_mgr); status = cl_spinlock_init(&p_sm_mgr->state_lock); if (status != CL_SUCCESS) { @@ -459,7 +424,6 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, /* * Update the state of the SM to MASTER */ - __osm_sm_state_mgr_master_msg(p_sm_mgr); /* Turn on the moved_to_master_state flag */ p_sm_mgr->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ @@ -467,6 +431,7 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, p_sm_mgr->p_subn->first_time_master_sweep = TRUE; p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + __osm_report_sm_state(p_sm_mgr); /* * Make sure to set the subnet master_sm_base_lid * to the sm_base_lid value @@ -479,8 +444,8 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * Finished all discovery actions - move to STANDBY * start the polling */ - __osm_sm_state_mgr_standby_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + __osm_report_sm_state(p_sm_mgr); /* * Since another SM is doing the LFT config - we should not * ignore the results of it @@ -514,9 +479,9 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * case 2: Got a signal to move to DISCOVERING * Move to DISCOVERING state and start sweeping */ - __osm_sm_state_mgr_discovering_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + __osm_report_sm_state(p_sm_mgr); p_sm_mgr->p_subn->coming_out_of_standby = TRUE; osm_sm_signal(&p_sm_mgr->p_subn->p_osm->sm, OSM_SIGNAL_EXIT_STBY); @@ -525,15 +490,14 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, /* * Update the state to NOT_ACTIVE */ - __osm_sm_state_mgr_notactive_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; + __osm_report_sm_state(p_sm_mgr); break; case OSM_SM_SIGNAL_HANDOVER: /* * Update the state to MASTER, and start sweeping * OPTIONAL: send ACKNOWLEDGE */ - __osm_sm_state_mgr_master_msg(p_sm_mgr); /* Turn on the moved_to_master_state flag */ p_sm_mgr->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ @@ -545,6 +509,7 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, p_sm_mgr->p_subn->force_heavy_sweep = TRUE; p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + __osm_report_sm_state(p_sm_mgr); /* * Make sure to set the subnet master_sm_base_lid * to the sm_base_lid value @@ -574,8 +539,8 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * Update the state to STANDBY * start the polling */ - __osm_sm_state_mgr_standby_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + __osm_report_sm_state(p_sm_mgr); __osm_sm_state_mgr_start_polling(p_sm_mgr); break; default: @@ -622,8 +587,8 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * Just sent a HANDOVER signal - move to STANDBY * start the polling */ - __osm_sm_state_mgr_standby_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + __osm_report_sm_state(p_sm_mgr); __osm_sm_state_mgr_start_polling(p_sm_mgr); break; case OSM_SM_SIGNAL_WAIT_FOR_HANDOVER: @@ -637,9 +602,9 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, __osm_sm_state_mgr_start_polling(p_sm_mgr); break; case OSM_SM_SIGNAL_DISCOVER: - __osm_sm_state_mgr_discovering_msg(p_sm_mgr); p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + __osm_report_sm_state(p_sm_mgr); break; default: __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); diff --git a/opensm/opensm/osm_sminfo_rcv.c b/opensm/opensm/osm_sminfo_rcv.c index 0fe246a..1e9e5cf 100644 --- a/opensm/opensm/osm_sminfo_rcv.c +++ b/opensm/opensm/osm_sminfo_rcv.c @@ -238,8 +238,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, "__osm_sminfo_rcv_process_set_request: ERR 2F04: " "Check legality failed. AttributeModifier:0x%X RemoteState:%s\n", p_smp->attr_mod, - osm_get_sm_mgr_state_str(ib_sminfo_get_state - (sm_smi))); + osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); /* send a response with error code */ status = osm_resp_send(sm, p_madw, 7, payload); if (status != IB_SUCCESS) @@ -288,8 +287,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, "__osm_sminfo_rcv_process_set_request: ERR 2F07: " "Failed check of legality of needed SM transition. AttributeModifier:0x%X RemoteState:%s\n", p_smp->attr_mod, - osm_get_sm_mgr_state_str(ib_sminfo_get_state - (sm_smi))); + osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); /* send a response with error code */ status = osm_resp_send(sm, p_madw, 7, payload); if (status != IB_SUCCESS) -- 1.5.4.rc5 From sashak at voltaire.com Sun Feb 10 05:40:45 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 10 Feb 2008 13:40:45 +0000 Subject: [ofa-general] [PATCH] opensm: kill osm_sm_state_mgr sub-object In-Reply-To: <20080210133133.GT11526@sashak.voltaire.com> References: <20080210133133.GT11526@sashak.voltaire.com> Message-ID: <20080210134045.GU11526@sashak.voltaire.com> Remove redundant osm_sm_state_mgr SM's sub-object. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_sm.h | 92 +++++++- opensm/include/opensm/osm_sm_state_mgr.h | 108 --------- opensm/opensm/osm_sm.c | 25 ++- opensm/opensm/osm_sm_state_mgr.c | 385 +++++++++++------------------- opensm/opensm/osm_sminfo_rcv.c | 28 +-- opensm/opensm/osm_state_mgr.c | 17 +- 6 files changed, 271 insertions(+), 384 deletions(-) diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h index 83bd4da..25d0983 100644 --- a/opensm/include/opensm/osm_sm.h +++ b/opensm/include/opensm/osm_sm.h @@ -66,10 +66,10 @@ #include #include #include -#include #include #include #include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -115,12 +115,17 @@ typedef struct osm_sm { osm_sm_state_t state; unsigned signal_mask; cl_spinlock_t signal_lock; + cl_spinlock_t state_lock; cl_event_t signal_event; cl_event_t subnet_up_event; cl_timer_t sweep_timer; + cl_timer_t polling_timer; cl_event_wheel_t trap_aging_tracker; cl_thread_t sweeper; unsigned master_sm_found; + uint32_t retry_number; + ib_net64_t master_sm_guid; + osm_remote_sm_t *p_polling_sm; osm_subn_t *p_subn; osm_db_t *p_db; osm_vendor_t *p_vendor; @@ -138,7 +143,6 @@ typedef struct osm_sm { osm_link_mgr_t link_mgr; osm_drop_mgr_t drop_mgr; osm_sweep_fail_ctrl_t sweep_fail_ctrl; - osm_sm_state_mgr_t sm_state_mgr; osm_mcast_mgr_t mcast_mgr; cl_disp_reg_handle_t ni_disp_h; cl_disp_reg_handle_t pi_disp_h; @@ -697,5 +701,89 @@ osm_sm_is_greater_than(IN const uint8_t l_priority, * State Manager *********/ +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_process +* NAME +* osm_sm_state_mgr_process +* +* DESCRIPTION +* Processes and maintains the states of the SM. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_state_mgr_process(IN osm_sm_t *sm, + IN osm_sm_signal_t signal); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* signal +* [in] Signal to the state SM engine. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_signal_master_is_alive +* NAME +* osm_sm_state_mgr_signal_master_is_alive +* +* DESCRIPTION +* Signals that the remote Master SM is alive. +* Need to clear the retry_number variable. +* +* SYNOPSIS +*/ +void osm_sm_state_mgr_signal_master_is_alive(IN osm_sm_t *sm); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +/****f* OpenSM: SM State Manager/osm_sm_state_mgr_check_legality +* NAME +* osm_sm_state_mgr_check_legality +* +* DESCRIPTION +* Checks the legality of the signal received, according to the +* current state of the SM state machine. +* +* SYNOPSIS +*/ +ib_api_status_t osm_sm_state_mgr_check_legality(IN osm_sm_t *sm, + IN osm_sm_signal_t signal); +/* +* PARAMETERS +* sm +* [in] Pointer to an osm_sm_t object. +* +* signal +* [in] Signal to the state SM engine. +* +* RETURN VALUES +* None. +* +* NOTES +* +* SEE ALSO +* State Manager +*********/ + +void osm_report_sm_state(osm_sm_t *sm); + END_C_DECLS #endif /* _OSM_SM_H_ */ diff --git a/opensm/include/opensm/osm_sm_state_mgr.h b/opensm/include/opensm/osm_sm_state_mgr.h index 3007554..bdc0078 100644 --- a/opensm/include/opensm/osm_sm_state_mgr.h +++ b/opensm/include/opensm/osm_sm_state_mgr.h @@ -102,13 +102,8 @@ struct osm_sm; */ typedef struct _osm_sm_state_mgr { struct osm_sm *sm; - cl_spinlock_t state_lock; - cl_timer_t polling_timer; - uint32_t retry_number; - ib_net64_t master_guid; osm_subn_t *p_subn; osm_log_t *p_log; - osm_remote_sm_t *p_polling_sm; } osm_sm_state_mgr_t; /* @@ -116,30 +111,12 @@ typedef struct _osm_sm_state_mgr { * sm * Pointer to the SM object. * -* state_lock -* Spinlock guarding the state and processes. -* -* polling_timer -* Timer for polling. -* -* retry_number -* Used in Standby state - to count the number of retries -* of queries to the master SM. -* -* master_guid -* Port GUID of master SM. -* * p_subn * Pointer to the Subnet object for this subnet. * * p_log * Pointer to the log object. * -* p_polling_sm -* Pointer to a osm_remote_sm_t object. When our SM needs -* to poll on a remote sm, this will be the pointer of the -* polled SM. -* * SEE ALSO * SM State Manager object *********/ @@ -237,90 +214,5 @@ osm_sm_state_mgr_init(IN osm_sm_state_mgr_t * const p_sm_mgr, * osm_sm_state_mgr_destroy *********/ -/****f* OpenSM: SM State Manager/osm_sm_state_mgr_process -* NAME -* osm_sm_state_mgr_process -* -* DESCRIPTION -* Processes and maintains the states of the SM. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, - IN osm_sm_signal_t signal); -/* -* PARAMETERS -* p_sm_mgr -* [in] Pointer to an osm_sm_state_mgr_t object. -* -* signal -* [in] Signal to the state SM engine. -* -* RETURN VALUES -* None. -* -* NOTES -* -* SEE ALSO -* State Manager -*********/ - -/****f* OpenSM: SM State Manager/osm_sm_state_mgr_signal_master_is_alive -* NAME -* osm_sm_state_mgr_signal_master_is_alive -* -* DESCRIPTION -* Signals that the remote Master SM is alive. -* Need to clear the retry_number variable. -* -* SYNOPSIS -*/ -void -osm_sm_state_mgr_signal_master_is_alive(IN osm_sm_state_mgr_t * const p_sm_mgr); -/* -* PARAMETERS -* p_sm_mgr -* [in] Pointer to an osm_sm_state_mgr_t object. -* -* RETURN VALUES -* None. -* -* NOTES -* -* SEE ALSO -* State Manager -*********/ - -/****f* OpenSM: SM State Manager/osm_sm_state_mgr_check_legality -* NAME -* osm_sm_state_mgr_check_legality -* -* DESCRIPTION -* Checks the legality of the signal received, according to the -* current state of the SM state machine. -* -* SYNOPSIS -*/ -ib_api_status_t -osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, - IN osm_sm_signal_t signal); -/* -* PARAMETERS -* p_sm_mgr -* [in] Pointer to an osm_sm_state_mgr_t object. -* -* signal -* [in] Signal to the state SM engine. -* -* RETURN VALUES -* None. -* -* NOTES -* -* SEE ALSO -* State Manager -*********/ - END_C_DECLS #endif /* _OSM_SM_STATE_MGR_H_ */ diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index 37dae43..f2cc550 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -81,6 +81,7 @@ extern void osm_trap_rcv_process(IN void *context, IN void *data); extern void osm_vla_rcv_process(IN void *context, IN void *data); extern void osm_state_mgr_process(IN osm_sm_t *sm, IN osm_signal_t signal); +extern void osm_sm_state_mgr_polling_callback(IN void *context); /********************************************************************** **********************************************************************/ @@ -159,6 +160,8 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) p_sm->state = OSM_SM_STATE_INIT; p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE; cl_spinlock_construct(&p_sm->signal_lock); + cl_spinlock_construct(&p_sm->state_lock); + cl_timer_construct(&p_sm->polling_timer); cl_event_construct(&p_sm->signal_event); cl_event_construct(&p_sm->subnet_up_event); cl_event_wheel_construct(&p_sm->trap_aging_tracker); @@ -170,7 +173,6 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) osm_link_mgr_construct(&p_sm->link_mgr); osm_drop_mgr_construct(&p_sm->drop_mgr); osm_sweep_fail_ctrl_construct(&p_sm->sweep_fail_ctrl); - osm_sm_state_mgr_construct(&p_sm->sm_state_mgr); osm_mcast_mgr_construct(&p_sm->mcast_mgr); } @@ -197,6 +199,7 @@ void osm_sm_shutdown(IN osm_sm_t * const p_sm) if (signal_event) cl_event_signal(&p_sm->signal_event); + cl_timer_stop(&p_sm->polling_timer); cl_timer_stop(&p_sm->sweep_timer); cl_thread_destroy(&p_sm->sweeper); @@ -231,14 +234,15 @@ void osm_sm_destroy(IN osm_sm_t * const p_sm) osm_ucast_mgr_destroy(&p_sm->ucast_mgr); osm_link_mgr_destroy(&p_sm->link_mgr); osm_drop_mgr_destroy(&p_sm->drop_mgr); - osm_sm_state_mgr_destroy(&p_sm->sm_state_mgr); osm_mcast_mgr_destroy(&p_sm->mcast_mgr); cl_event_wheel_destroy(&p_sm->trap_aging_tracker); cl_timer_destroy(&p_sm->sweep_timer); + cl_timer_destroy(&p_sm->polling_timer); cl_event_destroy(&p_sm->signal_event); cl_event_destroy(&p_sm->subnet_up_event); cl_spinlock_destroy(&p_sm->signal_lock); cl_spinlock_destroy(&p_sm->mgrp_lock); + cl_spinlock_destroy(&p_sm->state_lock); osm_log(p_sm->p_log, OSM_LOG_SYS, "Exiting SM\n"); /* Format Waived */ OSM_LOG_EXIT(p_sm->p_log); @@ -274,6 +278,10 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (status != CL_SUCCESS) goto Exit; + status = cl_spinlock_init(&p_sm->state_lock); + if (status != CL_SUCCESS) + goto Exit; + status = cl_event_init(&p_sm->signal_event, FALSE); if (status != CL_SUCCESS) goto Exit; @@ -286,6 +294,11 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (status != CL_SUCCESS) goto Exit; + status = cl_timer_init(&p_sm->polling_timer, + osm_sm_state_mgr_polling_callback, p_sm); + if (status != CL_SUCCESS) + goto Exit; + cl_qlist_init(&p_sm->mgrp_list); status = cl_spinlock_init(&p_sm->mgrp_lock); @@ -325,10 +338,6 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (status != IB_SUCCESS) goto Exit; - status = osm_sm_state_mgr_init(&p_sm->sm_state_mgr, p_sm); - if (status != IB_SUCCESS) - goto Exit; - status = osm_mcast_mgr_init(&p_sm->mcast_mgr, p_sm); if (status != IB_SUCCESS) goto Exit; @@ -388,6 +397,10 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (p_sm->pkey_disp_h == CL_DISP_INVALID_HANDLE) goto Exit; + p_subn->sm_state = p_subn->opt.sm_inactive ? + IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(p_sm); + /* * Now that the component objects are initialized, start * the sweeper thread if the user wants sweeping. diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index 4f0bf72..fab90bf 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -63,43 +63,40 @@ #include #include #include -#include #include #include /********************************************************************** **********************************************************************/ -static void __osm_report_sm_state(IN const osm_sm_state_mgr_t * p_sm_mgr) +void osm_report_sm_state(osm_sm_t *sm) { char buf[64]; - const char *state_str = osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state); + const char *state_str = osm_get_sm_mgr_state_str(sm->p_subn->sm_state); - osm_log(p_sm_mgr->p_log, OSM_LOG_SYS, "Entering %s state\n", state_str); + osm_log(sm->p_log, OSM_LOG_SYS, "Entering %s state\n", state_str); snprintf(buf, sizeof(buf), "ENTERING SM %s STATE", state_str); - osm_log_msg_box(p_sm_mgr->p_log, OSM_LOG_VERBOSE, __FUNCTION__, buf); + osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, buf); } #if 0 /********************************************************************** **********************************************************************/ -static void -__osm_sm_state_mgr_send_local_port_info_req(IN osm_sm_state_mgr_t * p_sm_mgr) +static void __osm_sm_state_mgr_send_local_port_info_req(osm_sm_t *sm) { osm_madw_context_t context; osm_port_t *p_port; - ib_net64_t port_guid = p_sm_mgr->p_subn->sm_port_guid; + ib_net64_t port_guid = sm->p_subn->sm_port_guid; ib_api_status_t status; - OSM_LOG_ENTER(p_sm_mgr->p_log, - __osm_sm_state_mgr_send_local_port_info_req); + OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_send_local_port_info_req); /* * Send a query of SubnGet(PortInfo) to our own port, in order to * update the master_sm_base_lid of the subnet. */ memset(&context, 0, sizeof(context)); - p_port = osm_get_port_by_guid(p_sm_mgr->p_subn, port_guid); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_send_local_port_info_req: ERR 3205: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -109,135 +106,121 @@ __osm_sm_state_mgr_send_local_port_info_req(IN osm_sm_state_mgr_t * p_sm_mgr) context.pi_context.port_guid = port_guid; context.pi_context.node_guid = p_port->p_node->node_info.node_guid; context.pi_context.set_method = FALSE; - context.pi_context.ignore_errors = FALSE; /* mark the update_master_sm_base_lid with TRUE - we want to update it */ /* with the new master lid value. */ context.pi_context.update_master_sm_base_lid = TRUE; context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; - status = osm_req_get(p_sm_mgr->p_req, - osm_physp_get_dr_path_ptr - (p_port->p_physp), + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), IB_MAD_ATTR_PORT_INFO, cl_hton32(p_port->p_physp->port_num), CL_DISP_MSGID_NONE, &context); - if (status != IB_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + if (status != IB_SUCCESS) + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_send_local_port_info_req: ERR 3202: " "Failure requesting PortInfo (%s)\n", ib_get_err_str(status)); - } Exit: - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } #endif /********************************************************************** **********************************************************************/ -static void -__osm_sm_state_mgr_send_master_sm_info_req(IN osm_sm_state_mgr_t * p_sm_mgr) +static void __osm_sm_state_mgr_send_master_sm_info_req(osm_sm_t *sm) { osm_madw_context_t context; const osm_port_t *p_port; ib_api_status_t status; - OSM_LOG_ENTER(p_sm_mgr->p_log, - __osm_sm_state_mgr_send_master_sm_info_req); + OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_send_master_sm_info_req); memset(&context, 0, sizeof(context)); - if (p_sm_mgr->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) { + if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) { /* * We are in STANDBY state - this means we need to poll on the master * SM (according to master_guid) * Send a query of SubnGet(SMInfo) to the subn master_sm_base_lid object. */ - p_port = - osm_get_port_by_guid(p_sm_mgr->p_subn, - p_sm_mgr->master_guid); + p_port = osm_get_port_by_guid(sm->p_subn, sm->master_sm_guid); } else { /* * We are not in STANDBY - this means we are in MASTER state - so we need - * to poll on the SM that is saved in p_polling_sm under p_sm_mgr. + * to poll on the SM that is saved in p_polling_sm under sm. * Send a query of SubnGet(SMInfo) to that SM. */ - p_port = p_sm_mgr->p_polling_sm->p_port; + p_port = sm->p_polling_sm->p_port; } if (p_port == NULL) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_send_master_sm_info_req: ERR 3203: " "No port object for GUID 0x%016" PRIx64 "\n", - cl_ntoh64(p_sm_mgr->master_guid)); + cl_ntoh64(sm->master_sm_guid)); goto Exit; } context.smi_context.port_guid = p_port->guid; context.smi_context.set_method = FALSE; - status = osm_req_get(p_sm_mgr->sm, - osm_physp_get_dr_path_ptr(p_port->p_physp), + status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), IB_MAD_ATTR_SM_INFO, 0, CL_DISP_MSGID_NONE, &context); - if (status != IB_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + if (status != IB_SUCCESS) + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_send_master_sm_info_req: ERR 3204: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); - } Exit: - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ -static void __osm_sm_state_mgr_start_polling(IN osm_sm_state_mgr_t * p_sm_mgr) +static void __osm_sm_state_mgr_start_polling(osm_sm_t *sm) { - uint32_t sminfo_polling_timeout = - p_sm_mgr->p_subn->opt.sminfo_polling_timeout; + uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout; cl_status_t cl_status; - OSM_LOG_ENTER(p_sm_mgr->p_log, __osm_sm_state_mgr_start_polling); + OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_start_polling); /* * Init the retry_number back to zero - need to restart counting */ - p_sm_mgr->retry_number = 0; + sm->retry_number = 0; /* * Send a SubnGet(SMInfo) query to the current (or new) master found. */ - __osm_sm_state_mgr_send_master_sm_info_req(p_sm_mgr); + __osm_sm_state_mgr_send_master_sm_info_req(sm); /* * Start a timer that will wake up every sminfo_polling_timeout milliseconds. * The callback of the timer will send a SubnGet(SMInfo) to the Master SM * and restart the timer */ - cl_status = cl_timer_start(&p_sm_mgr->polling_timer, - sminfo_polling_timeout); - if (cl_status != CL_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + cl_status = cl_timer_start(&sm->polling_timer, timeout); + if (cl_status != CL_SUCCESS) + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_start_polling: ERR 3210: " "Failed to start timer\n"); - } - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ -static void __osm_sm_state_mgr_polling_callback(IN void *context) +void osm_sm_state_mgr_polling_callback(IN void *context) { - osm_sm_state_mgr_t *p_sm_mgr = (osm_sm_state_mgr_t *) context; - uint32_t sminfo_polling_timeout = - p_sm_mgr->p_subn->opt.sminfo_polling_timeout; + osm_sm_t *sm = context; + uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout; cl_status_t cl_status; - OSM_LOG_ENTER(p_sm_mgr->p_log, __osm_sm_state_mgr_polling_callback); + OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_polling_callback); /* * We can be here in one of two cases: @@ -245,25 +228,23 @@ static void __osm_sm_state_mgr_polling_callback(IN void *context) * 2. We are a MASTER sm, waiting for a handover from a remote master sm. * If we are not in one of these cases - don't need to restart the poller. */ - if (!((p_sm_mgr->p_subn->sm_state == IB_SMINFO_STATE_MASTER && - p_sm_mgr->p_polling_sm != NULL) || - (p_sm_mgr->p_subn->sm_state == IB_SMINFO_STATE_STANDBY))) { + if (!((sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER && + sm->p_polling_sm != NULL) || + (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY))) goto Exit; - } /* - * If we are a STANDBY sm and the osm_exit_flag is 1, then let's signal - * the subnet_up. This is relevant for the case of running only once. In that - * case - the program is stuck until this signal is received. In other - * cases - it is not relevant whether or not the signal is on - since we are - * currently in exit flow + * If we are a STANDBY sm and the osm_exit_flag is set, then let's + * signal the subnet_up. This is relevant for the case of running only + * once. In that case - the program is stuck until this signal is + * received. In other cases - it is not relevant whether or not the + * signal is on - since we are currently in exit flow */ - if (p_sm_mgr->p_subn->sm_state == IB_SMINFO_STATE_STANDBY && - osm_exit_flag == 1) { - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_polling_callback: " + if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY && osm_exit_flag) { + osm_log(sm->p_log, OSM_LOG_VERBOSE, + "osm_sm_state_mgr_polling_callback: " "Signalling subnet_up_event\n"); - cl_event_signal(&p_sm_mgr->p_subn->p_osm->sm.subnet_up_event); + cl_event_signal(&sm->subnet_up_event); goto Exit; } @@ -272,152 +253,81 @@ static void __osm_sm_state_mgr_polling_callback(IN void *context) * If it reached the max_retry_number in the subnet opt - call * osm_sm_state_mgr_process with signal OSM_SM_SIGNAL_POLLING_TIMEOUT */ - p_sm_mgr->retry_number++; - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, + sm->retry_number++; + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_sm_state_mgr_polling_callback: " - "Retry number:%d\n", p_sm_mgr->retry_number); + "Retry number:%d\n", sm->retry_number); - if (p_sm_mgr->retry_number >= - p_sm_mgr->p_subn->opt.polling_retry_number) { - osm_log(p_sm_mgr->p_log, OSM_LOG_DEBUG, + if (sm->retry_number >= sm->p_subn->opt.polling_retry_number) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_sm_state_mgr_polling_callback: " "Reached polling_retry_number value in retry_number. " "Go to DISCOVERY state\n"); - osm_sm_state_mgr_process(p_sm_mgr, - OSM_SM_SIGNAL_POLLING_TIMEOUT); + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_POLLING_TIMEOUT); goto Exit; } /* Send a SubnGet(SMInfo) request to the remote sm (depends on our state) */ - __osm_sm_state_mgr_send_master_sm_info_req(p_sm_mgr); + __osm_sm_state_mgr_send_master_sm_info_req(sm); /* restart the timer */ - cl_status = cl_timer_start(&p_sm_mgr->polling_timer, - sminfo_polling_timeout); - if (cl_status != CL_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + cl_status = cl_timer_start(&sm->polling_timer, timeout); + if (cl_status != CL_SUCCESS) + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_polling_callback: ERR 3211: " "Failed to restart timer\n"); - } Exit: - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return; } /********************************************************************** **********************************************************************/ -void osm_sm_state_mgr_construct(IN osm_sm_state_mgr_t * const p_sm_mgr) -{ - memset(p_sm_mgr, 0, sizeof(*p_sm_mgr)); - cl_spinlock_construct(&p_sm_mgr->state_lock); - cl_timer_construct(&p_sm_mgr->polling_timer); -} - -/********************************************************************** - **********************************************************************/ -void osm_sm_state_mgr_destroy(IN osm_sm_state_mgr_t * const p_sm_mgr) -{ - CL_ASSERT(p_sm_mgr); - - OSM_LOG_ENTER(p_sm_mgr->p_log, osm_sm_state_mgr_destroy); - - cl_spinlock_destroy(&p_sm_mgr->state_lock); - cl_timer_destroy(&p_sm_mgr->polling_timer); - - OSM_LOG_EXIT(p_sm_mgr->p_log); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_sm_state_mgr_init(IN osm_sm_state_mgr_t * const p_sm_mgr, IN osm_sm_t * sm) +static void __osm_sm_state_mgr_signal_error(osm_sm_t *sm, + IN const osm_sm_signal_t signal) { - cl_status_t status; - - OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_init); - - osm_sm_state_mgr_construct(p_sm_mgr); - - p_sm_mgr->sm = sm; - p_sm_mgr->p_log = sm->p_log; - p_sm_mgr->p_subn = sm->p_subn; - - p_sm_mgr->p_subn->sm_state = p_sm_mgr->p_subn->opt.sm_inactive ? - IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING; - - __osm_report_sm_state(p_sm_mgr); - - status = cl_spinlock_init(&p_sm_mgr->state_lock); - if (status != CL_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, - "osm_sm_state_mgr_init: ERR 3201: " - "Spinlock init failed (%s)\n", CL_STATUS_MSG(status)); - } - - status = cl_timer_init(&p_sm_mgr->polling_timer, - __osm_sm_state_mgr_polling_callback, p_sm_mgr); - - if (status != CL_SUCCESS) { - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, - "osm_sm_state_mgr_init: ERR 3206: " - "Timer init failed (%s)\n", CL_STATUS_MSG(status)); - } - - OSM_LOG_EXIT(p_sm_mgr->p_log); - return (status); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_sm_state_mgr_signal_error(IN const osm_sm_state_mgr_t * const p_sm_mgr, - IN const osm_sm_signal_t signal) -{ - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sm_state_mgr_signal_error: ERR 3207: " "Invalid signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), - osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state)); + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); } /********************************************************************** **********************************************************************/ -void -osm_sm_state_mgr_signal_master_is_alive(IN osm_sm_state_mgr_t * const p_sm_mgr) +void osm_sm_state_mgr_signal_master_is_alive(osm_sm_t *sm) { - OSM_LOG_ENTER(p_sm_mgr->p_log, osm_sm_state_mgr_signal_master_is_alive); - p_sm_mgr->retry_number = 0; - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_signal_master_is_alive); + sm->retry_number = 0; + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ -ib_api_status_t -osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, - IN osm_sm_signal_t signal) +ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, + IN osm_sm_signal_t signal) { ib_api_status_t status = IB_SUCCESS; - CL_ASSERT(p_sm_mgr); + CL_ASSERT(sm); - OSM_LOG_ENTER(p_sm_mgr->p_log, osm_sm_state_mgr_process); + OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_process); /* * The state lock prevents many race conditions from screwing * up the state transition process. */ - cl_spinlock_acquire(&p_sm_mgr->state_lock); + cl_spinlock_acquire(&sm->state_lock); - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_sm_state_mgr_process: " "Received signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), - osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state)); - } + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); - switch (p_sm_mgr->p_subn->sm_state) { + switch (sm->p_subn->sm_state) { case IB_SMINFO_STATE_DISCOVERING: switch (signal) { case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: @@ -425,34 +335,33 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * Update the state of the SM to MASTER */ /* Turn on the moved_to_master_state flag */ - p_sm_mgr->p_subn->moved_to_master_state = TRUE; + sm->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ - if (p_sm_mgr->p_subn->first_time_master_sweep == FALSE) - p_sm_mgr->p_subn->first_time_master_sweep = - TRUE; - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_MASTER; - __osm_report_sm_state(p_sm_mgr); + if (sm->p_subn->first_time_master_sweep == FALSE) + sm->p_subn->first_time_master_sweep = TRUE; + sm->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + osm_report_sm_state(sm); /* * Make sure to set the subnet master_sm_base_lid * to the sm_base_lid value */ - p_sm_mgr->p_subn->master_sm_base_lid = - p_sm_mgr->p_subn->sm_base_lid; + sm->p_subn->master_sm_base_lid = + sm->p_subn->sm_base_lid; break; case OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE: /* * Finished all discovery actions - move to STANDBY * start the polling */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; - __osm_report_sm_state(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); /* * Since another SM is doing the LFT config - we should not * ignore the results of it */ - p_sm_mgr->p_subn->ignore_existing_lfts = FALSE; + sm->p_subn->ignore_existing_lfts = FALSE; - __osm_sm_state_mgr_start_polling(p_sm_mgr); + __osm_sm_state_mgr_start_polling(sm); break; case OSM_SM_SIGNAL_HANDOVER: /* @@ -463,7 +372,7 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, */ break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -479,19 +388,17 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * case 2: Got a signal to move to DISCOVERING * Move to DISCOVERING state and start sweeping */ - p_sm_mgr->p_subn->sm_state = - IB_SMINFO_STATE_DISCOVERING; - __osm_report_sm_state(p_sm_mgr); - p_sm_mgr->p_subn->coming_out_of_standby = TRUE; - osm_sm_signal(&p_sm_mgr->p_subn->p_osm->sm, - OSM_SIGNAL_EXIT_STBY); + sm->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(sm); + sm->p_subn->coming_out_of_standby = TRUE; + osm_sm_signal(sm, OSM_SIGNAL_EXIT_STBY); break; case OSM_SM_SIGNAL_DISABLE: /* * Update the state to NOT_ACTIVE */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; - __osm_report_sm_state(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; + osm_report_sm_state(sm); break; case OSM_SM_SIGNAL_HANDOVER: /* @@ -499,26 +406,24 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * OPTIONAL: send ACKNOWLEDGE */ /* Turn on the moved_to_master_state flag */ - p_sm_mgr->p_subn->moved_to_master_state = TRUE; + sm->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ - if (p_sm_mgr->p_subn->first_time_master_sweep == FALSE) - p_sm_mgr->p_subn->first_time_master_sweep = - TRUE; + if (sm->p_subn->first_time_master_sweep == FALSE) + sm->p_subn->first_time_master_sweep = TRUE; /* Turn on the force_heavy_sweep - we want a * heavy sweep to occur on the first sweep of this SM. */ - p_sm_mgr->p_subn->force_heavy_sweep = TRUE; + sm->p_subn->force_heavy_sweep = TRUE; - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_MASTER; - __osm_report_sm_state(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_MASTER; + osm_report_sm_state(sm); /* * Make sure to set the subnet master_sm_base_lid * to the sm_base_lid value */ - p_sm_mgr->p_subn->master_sm_base_lid = - p_sm_mgr->p_subn->sm_base_lid; - p_sm_mgr->p_subn->coming_out_of_standby = TRUE; - osm_sm_signal(&p_sm_mgr->p_subn->p_osm->sm, - OSM_SIGNAL_EXIT_STBY); + sm->p_subn->master_sm_base_lid = + sm->p_subn->sm_base_lid; + sm->p_subn->coming_out_of_standby = TRUE; + osm_sm_signal(sm, OSM_SIGNAL_EXIT_STBY); break; case OSM_SM_SIGNAL_ACKNOWLEDGE: /* @@ -526,7 +431,7 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, */ break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -539,12 +444,12 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * Update the state to STANDBY * start the polling */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; - __osm_report_sm_state(p_sm_mgr); - __osm_sm_state_mgr_start_polling(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); + __osm_sm_state_mgr_start_polling(sm); break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -573,23 +478,22 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * We also want to clear the p_polling_sm object - since we are * done polling on that remote sm - we got a handover from it. */ - osm_log(p_sm_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "osm_sm_state_mgr_process: " "Forcing heavy sweep. " "Received OSM_SM_SIGNAL_HANDOVER or OSM_SM_SIGNAL_POLLING_TIMEOUT\n"); - p_sm_mgr->p_polling_sm = NULL; - p_sm_mgr->p_subn->force_heavy_sweep = TRUE; - osm_sm_signal(&p_sm_mgr->p_subn->p_osm->sm, - OSM_SIGNAL_SWEEP); + sm->p_polling_sm = NULL; + sm->p_subn->force_heavy_sweep = TRUE; + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); break; case OSM_SM_SIGNAL_HANDOVER_SENT: /* * Just sent a HANDOVER signal - move to STANDBY * start the polling */ - p_sm_mgr->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; - __osm_report_sm_state(p_sm_mgr); - __osm_sm_state_mgr_start_polling(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_STANDBY; + osm_report_sm_state(sm); + __osm_sm_state_mgr_start_polling(sm); break; case OSM_SM_SIGNAL_WAIT_FOR_HANDOVER: /* @@ -599,61 +503,58 @@ osm_sm_state_mgr_process(IN osm_sm_state_mgr_t * const p_sm_mgr, * we should move back to discovering, since something must * have happened to it. */ - __osm_sm_state_mgr_start_polling(p_sm_mgr); + __osm_sm_state_mgr_start_polling(sm); break; case OSM_SM_SIGNAL_DISCOVER: - p_sm_mgr->p_subn->sm_state = - IB_SMINFO_STATE_DISCOVERING; - __osm_report_sm_state(p_sm_mgr); + sm->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; + osm_report_sm_state(sm); break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } break; default: - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_sm_state_mgr_process: ERR 3208: " "Invalid state %s\n", - osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state)); + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); } - cl_spinlock_release(&p_sm_mgr->state_lock); + cl_spinlock_release(&sm->state_lock); - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } /********************************************************************** **********************************************************************/ -ib_api_status_t -osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, - IN osm_sm_signal_t signal) +ib_api_status_t osm_sm_state_mgr_check_legality(osm_sm_t *sm, + IN osm_sm_signal_t signal) { ib_api_status_t status = IB_SUCCESS; - CL_ASSERT(p_sm_mgr); + CL_ASSERT(sm); - OSM_LOG_ENTER(p_sm_mgr->p_log, osm_sm_state_mgr_check_legality); + OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_check_legality); /* * The state lock prevents many race conditions from screwing * up the state transition process. */ - cl_spinlock_acquire(&p_sm_mgr->state_lock); + cl_spinlock_acquire(&sm->state_lock); - if (osm_log_is_active(p_sm_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_sm_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_sm_state_mgr_check_legality: " "Received signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), - osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state)); - } + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); - switch (p_sm_mgr->p_subn->sm_state) { + switch (sm->p_subn->sm_state) { case IB_SMINFO_STATE_DISCOVERING: switch (signal) { case OSM_SM_SIGNAL_DISCOVERY_COMPLETED: @@ -662,7 +563,7 @@ osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, status = IB_SUCCESS; break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -678,7 +579,7 @@ osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, status = IB_SUCCESS; break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -690,7 +591,7 @@ osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, status = IB_SUCCESS; break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } @@ -703,23 +604,23 @@ osm_sm_state_mgr_check_legality(IN osm_sm_state_mgr_t * const p_sm_mgr, status = IB_SUCCESS; break; default: - __osm_sm_state_mgr_signal_error(p_sm_mgr, signal); + __osm_sm_state_mgr_signal_error(sm, signal); status = IB_INVALID_PARAMETER; break; } break; default: - osm_log(p_sm_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_sm_state_mgr_check_legality: ERR 3209: " "Invalid state %s\n", - osm_get_sm_mgr_state_str(p_sm_mgr->p_subn->sm_state)); + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); status = IB_INVALID_PARAMETER; } - cl_spinlock_release(&p_sm_mgr->state_lock); + cl_spinlock_release(&sm->state_lock); - OSM_LOG_EXIT(p_sm_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } diff --git a/opensm/opensm/osm_sminfo_rcv.c b/opensm/opensm/osm_sminfo_rcv.c index 1e9e5cf..e179385 100644 --- a/opensm/opensm/osm_sminfo_rcv.c +++ b/opensm/opensm/osm_sminfo_rcv.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include /********************************************************************** @@ -280,8 +280,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, } /* check legality of the needed transition in the SM state machine */ - status = osm_sm_state_mgr_check_legality(&sm->sm_state_mgr, - sm_signal); + status = osm_sm_state_mgr_check_legality(sm, sm_signal); if (status != IB_SUCCESS) { osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_sminfo_rcv_process_set_request: ERR 2F07: " @@ -318,12 +317,12 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, "Received a STANDBY signal. Updating " "sm_state_mgr master_guid: 0x%016" PRIx64 "\n", cl_ntoh64(sm_smi->guid)); - sm->sm_state_mgr.master_guid = sm_smi->guid; + sm->master_sm_guid = sm_smi->guid; } /* call osm_sm_state_mgr_process with the received signal. */ CL_PLOCK_RELEASE(sm->p_lock); - status = osm_sm_state_mgr_process(&sm->sm_state_mgr, sm_signal); + status = osm_sm_state_mgr_process(sm, sm_signal); if (status != IB_SUCCESS) osm_log(sm->p_log, OSM_LOG_ERROR, @@ -371,7 +370,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, "__osm_sminfo_rcv_process_get_sm: " "Found master SM. Updating sm_state_mgr master_guid: 0x%016" PRIx64 "\n", cl_ntoh64(p_sm->p_port->guid)); - sm->sm_state_mgr.master_guid = p_sm->p_port->guid; + sm->master_sm_guid = p_sm->p_port->guid; break; case IB_SMINFO_STATE_DISCOVERING: case IB_SMINFO_STATE_STANDBY: @@ -386,8 +385,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, "Found higher SM. Updating sm_state_mgr master_guid:" " 0x%016" PRIx64 "\n", cl_ntoh64(p_sm->p_port->guid)); - sm->sm_state_mgr.master_guid = - p_sm->p_port->guid; + sm->master_sm_guid = p_sm->p_port->guid; } break; default: @@ -402,20 +400,19 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, case IB_SMINFO_STATE_MASTER: /* This means the master is alive */ /* Signal that to the SM state mgr */ - osm_sm_state_mgr_signal_master_is_alive(&sm->sm_state_mgr); + osm_sm_state_mgr_signal_master_is_alive(sm); break; case IB_SMINFO_STATE_STANDBY: /* This should be the response from the sm we are polling. */ /* If it is - then signal master is alive */ - if (sm->sm_state_mgr.master_guid == p_sm->p_port->guid) { + if (sm->master_sm_guid == p_sm->p_port->guid) { /* Make sure that it is an SM with higher priority than us. If we started polling it when it was master, and it moved to standby - then it might be with a lower priority than us - and then we don't want to continue polling it. */ if (__osm_sminfo_rcv_remote_sm_is_higher (sm, p_smi) == TRUE) - osm_sm_state_mgr_signal_master_is_alive - (&sm->sm_state_mgr); + osm_sm_state_mgr_signal_master_is_alive(sm); } break; default: @@ -430,10 +427,9 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, /* If this is a response due to our polling, this means that we are waiting for a handover from this SM, and it is still alive - signal that. */ - if (sm->sm_state_mgr.p_polling_sm != NULL) { - osm_sm_state_mgr_signal_master_is_alive(&sm-> - sm_state_mgr); - } else { + if (sm->p_polling_sm) + osm_sm_state_mgr_signal_master_is_alive(sm); + else { /* This is a response we got while sweeping the subnet. We will handle a case of handover needed later on, when the sweep is done and all SMs are recongnized. */ diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 20883e4..c53ed45 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include @@ -790,7 +789,7 @@ __osm_state_mgr_send_handover(IN osm_sm_t * const sm, "Handing over mastership. Updating sm_state_mgr master_guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); - sm->sm_state_mgr.master_guid = p_port->guid; + sm->master_sm_guid = p_port->guid; context.smi_context.port_guid = p_port->guid; context.smi_context.set_method = TRUE; @@ -1106,8 +1105,7 @@ _repeat_discovery: osm_drop_mgr_process(&sm->drop_mgr); /* Move to DISCOVERING state */ - osm_sm_state_mgr_process(&sm->sm_state_mgr, - OSM_SM_SIGNAL_DISCOVER); + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER); return; } @@ -1123,7 +1121,7 @@ _repeat_discovery: * Call the sm_state_mgr with signal * MASTER_OR_HIGHER_SM_DETECTED_DONE */ - osm_sm_state_mgr_process(&sm->sm_state_mgr, + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED_DONE); osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, "ENTERING STANDBY STATE"); @@ -1149,7 +1147,7 @@ _repeat_discovery: /* need to handover the mastership * to the remote sm, and move to standby */ __osm_state_mgr_send_handover(sm, p_remote_sm); - osm_sm_state_mgr_process(&sm->sm_state_mgr, + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_HANDOVER_SENT); sm->state = OSM_SM_STATE_STANDBY; return; @@ -1163,8 +1161,8 @@ _repeat_discovery: * need to wait for that SM to relinquish control * of its portion of the subnet. C14-60.2.1. * Also - need to start polling on that SM. */ - sm->sm_state_mgr.p_polling_sm = p_remote_sm; - osm_sm_state_mgr_process(&sm->sm_state_mgr, + sm->p_polling_sm = p_remote_sm; + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_WAIT_FOR_HANDOVER); return; } @@ -1180,8 +1178,7 @@ _repeat_discovery: * DISCOVERY_COMPLETED */ if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING) - osm_sm_state_mgr_process(&sm->sm_state_mgr, - OSM_SM_SIGNAL_DISCOVERY_COMPLETED); + osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED); osm_pkey_mgr_process(sm->p_subn->p_osm); -- 1.5.4.rc5 From jackm at dev.mellanox.co.il Sun Feb 10 05:57:49 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 10 Feb 2008 15:57:49 +0200 Subject: [ofa-general] Re: [PATCH 2 of 2] IB/mlx4: shrinking WQE In-Reply-To: References: <200801281040.59398.jackm@dev.mellanox.co.il> Message-ID: <200802101557.50021.jackm@dev.mellanox.co.il> On Friday 08 February 2008 23:30, Roland Dreier wrote: > Thanks, applied -- we might as well use this feature I guess. > Although I'm not convinced this is really that useful; the only use I > see for it would be speeding up IPoIB with S/G and checksum offload, > when we have to size send WQEs for the worst case but most packets are > smaller. But Eli's latest work seems to use selective signaling for > the send queue, so this change doesn't actually help. > > I also made a few small changes to places like this: > > > + ind += DIV_ROUND_UP(size * 16, 1 << qp->sq.wqe_shift); > > For this case, the compiler can generate better code if we use "1U <<" > to get an unsigned divisor; in that case it can use a shift instead of > needing to use a divide operation to keep the sign correct. > I looked over your version (including the changes) and it looks good. - Jack From erezz at Voltaire.COM Sun Feb 10 06:06:31 2008 From: erezz at Voltaire.COM (Erez Zilber) Date: Sun, 10 Feb 2008 16:06:31 +0200 Subject: [Stgt-devel] [ewg] Re: [ofa-general] [ANNOUNCE] open iSCSI over iSER target RPMis available In-Reply-To: <47AB0CA9.4020904@scalableinformatics.com> References: <47A87586.6010904@Voltaire.COM><47AA28C3.7090003@scalableinformatics.com><47AAC047.4000306@Voltaire.COM> <47AB0CA9.4020904@scalableinformatics.com> Message-ID: <47AF04E7.6060808@Voltaire.COM> Joe Landman wrote: > > Erez Zilber wrote: > >>> * READ: 920 MB/sec > >>> * WRITE: 850 MB/sec > >> Not getting anything even remotely close to this. Are there more > >> details on configuration somewhere? I followed the web page as > indicated. > >> > > > > Are you running iSCSI over TCP or iSCSI over iSER (over InfiniBand)? Our > > results are with iSER. > > I followed the instructions on the web pages that were pointed to for > iSER. Are there updated pages? > For the initiator side, you can see the documentation that comes with OFED. For the target side, use the wiki page (updated it only last week): https://wiki.openfabrics.org/tiki-index.php?page=ISER-target I saw that you were asking about a RPM. There are RPMs for SuSE & RedHat on the wiki page. These RPMs will be included in OFED 1.4. Erez > Is there a way to tell whether or not > the RDMA path is being used? > Yes - on the initiator side, do the following: seed1:~ # iscsiadm -m session iser: [1] 192.168.10.63:3260,1 iqn.2001-04.com.noni-seed1 You can see that iSER is used. On the target side, do the following: noni:~ # tgtadm --lld iscsi --op show --mode target Target 1: iqn.2001-04.com.noni-seed1 System information: Driver: iscsi Status: running I_T nexus information: I_T nexus: 1 Initiator: iqn.seed1 Connection: 0 RDMA IP Address: 192.168.10.81 <-- "RDMA IP Address" means that iSER is used LUN information: LUN: 0 Type: controller SCSI ID: deadbeaf1:0 SCSI SN: beaf10 Size: 0 Online: Yes Poweron/Reset: Yes Removable media: No Backing store: No backing store LUN: 1 Type: disk SCSI ID: deadbeaf1:1 SCSI SN: beaf11 Size: 82G Online: Yes Poweron/Reset: No Removable media: No Backing store: /dev/sds Account information: ACL information: ALL Erez From erezz at Voltaire.COM Sun Feb 10 06:12:43 2008 From: erezz at Voltaire.COM (Erez Zilber) Date: Sun, 10 Feb 2008 16:12:43 +0200 Subject: [ofa-general] Re: [Stgt-devel] [ANNOUNCE] open iSCSI over iSER target RPM is available In-Reply-To: References: <47A87586.6010904@Voltaire.COM> Message-ID: <47AF065B.8080007@Voltaire.COM> Bart Van Assche wrote: > On Feb 5, 2008 3:41 PM, Erez Zilber wrote: > >> stgt (SCSI target) is an open-source framework for storage target >> drivers. It supports iSCSI over iSER among other storage target drivers. >> >> Voltaire added a git tree for stgt that will be added to OFED 1.4: >> http://www2.openfabrics.org/git/?p=~dorons/tgt.git;a=summary >> >> Until OFED 1.4 gets released, it is possible to install the stgt RPM on >> top of OFED 1.3. For more details about how to install and use stgt, >> please refer to https://wiki.openfabrics.org/tiki-index.php?page=ISER-target >> >> Some performance numbers that were measured by OSC (using SDR cards): >> >> * READ: 920 MB/sec >> * WRITE: 850 MB/sec >> >> We hope to have DDR measurements numbers soon. >> > > Hello Erez, > > Can you please post more information about how these numbers were > obtained (test program and configuration parameters) ? > > Bart Van Assche. > I will post more info. As mentioned above, I still didn't have a chance to run performance tests myself. The numbers are taken from measurements done in OSC by Pete Wyckoff. Anyway, after I run the performance tests (also on DDR cards), I will post instructions in the wiki page. Erez From eli at dev.mellanox.co.il Sun Feb 10 08:22:02 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sun, 10 Feb 2008 18:22:02 +0200 Subject: [ofa-general] Re: [PATCH 6/16 v4] IB/mthca: Add checksum offload support In-Reply-To: <47AEB0BF.1080504@voltaire.com> References: <1201710676.28794.172.camel@mtls03> <47AEB0BF.1080504@voltaire.com> Message-ID: <47AF24AA.4040206@dev.mellanox.co.il> Or Gerlitz wrote: > Eli Cohen wrote: >> --- a/drivers/infiniband/hw/mthca/mthca_main.c >> +++ b/drivers/infiniband/hw/mthca/mthca_main.c >> @@ -267,6 +267,10 @@ static int mthca_dev_lim(struct mthca_dev *mdev, >> struct mthca_dev_lim *dev_lim) >> if (dev_lim->flags & DEV_LIM_FLAG_SRQ) >> mdev->mthca_flags |= MTHCA_FLAG_SRQ; >> >> + if (mthca_is_memfree(mdev)) >> + if (dev_lim->flags & DEV_LIM_FLAG_IPOIB_CSUM) >> + mdev->device_cap_flags |= IB_DEVICE_IP_CSUM; >> + > ... >> @@ -1109,6 +1113,8 @@ static int __mthca_init_one(struct pci_dev >> *pdev, int hca_type) >> if (err) >> goto err_cmd; >> >> + mdev->ib_dev.flags = mdev->device_cap_flags; > >> Roland Dreier wrote: >> I don't see any place that ca->flags ever gets set; in fact if I >> delete the flags member of struct ib_device from my tree, it all still >> compiles fine. > > OK, Roland, correct, in my review I failed to catch the fact that the > way to go by the HW driver is set the device_cap_flags and by the ULP to > issue device query and not rely on the flags field. > >> So have you actually tested any of these checksum offload code paths? > > yes, although the vast majority of the testing here is under the ofed > form of these patches (a point to fix!) where Eli updated the ofed ones > after getting feedback/comments from me and others. > > To remove doubt "[PATCH 4/16 v4] IB/ipoib: Add checksum offload support" > works well over Mellanox device b/c mthca and mlx4 sets the flags field, > over other devices the behavior is not defined. > > So, Eli, this must be fixed for rc5. Also, and more important, lets no > let the missing of 2.6.25 stop the merging of the stateless offloads, > submitting the fixes early will ensure its goes into 2.6.26 > Or, I think you're right and that's how it should have been done. However querying device cap will cause quite a few changes in other patches so I think we should better leave this as it is right now. I will however re-generate the patches for 2.6.25 . From dave at thedillows.org Sun Feb 10 08:25:21 2008 From: dave at thedillows.org (David Dillow) Date: Sun, 10 Feb 2008 11:25:21 -0500 Subject: [ofa-general] [OFED-1.3rc PATCH 0/3] IB/srp: bring OFED SRP initiator up-to-date with 2.6.25rc In-Reply-To: <47AEA234.8070601@voltaire.com> References: <1202515368.5298.23.camel@lap75545.ornl.gov> <47AEA234.8070601@voltaire.com> Message-ID: <1202660721.2626.5.camel@obelisk.thedillows.org> On Sun, 2008-02-10 at 09:05 +0200, Or Gerlitz wrote: > Please send such ofed related patches to the ewg mailing list > (ewg at lists.openfabrics.org), this list is for mainline development, and > its subscribers need not see each of your posting twice, thanks Please forgive my newbie mistake. It would help other new people if various list's charter were spelled out in the listman pages. From a-allenh at aateledata.com Sun Feb 10 09:35:37 2008 From: a-allenh at aateledata.com (Silas Hanson) Date: Mon, 11 Feb 2008 01:35:37 +0800 Subject: [ofa-general] Where have you been? Message-ID: <01c86c4e$672e5e00$81a21679@a-allenh> Hello! I am bored today. I am nice girl that would like to chat with you. Email me at Berit at EHealThies.info only, because I am using my friend's email to write this. If you would like to see my pictures. From suri at baymicrosystems.com Sun Feb 10 11:06:58 2008 From: suri at baymicrosystems.com (Suresh Shelvapille) Date: Sun, 10 Feb 2008 14:06:58 -0500 Subject: [ofa-general] AckReq bit in the BTH In-Reply-To: <47AF24AA.4040206@dev.mellanox.co.il> References: <1201710676.28794.172.camel@mtls03> <47AEB0BF.1080504@voltaire.com> <47AF24AA.4040206@dev.mellanox.co.il> Message-ID: <021501c86c18$1f7a1ad0$3414a8c0@md.baymicrosystems.com> Is there a way control the AckReq Bit via the verbs interface(it didn't appear to be the case from the spec). Can someone please point me to where this bit is being set for RC. Thanks, Suri From sashak at voltaire.com Sun Feb 10 11:51:30 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 10 Feb 2008 19:51:30 +0000 Subject: [ofa-general] [PATCH] opensm: cleanup sm->state field In-Reply-To: <20080210134045.GU11526@sashak.voltaire.com> References: <20080210133133.GT11526@sashak.voltaire.com> <20080210134045.GU11526@sashak.voltaire.com> Message-ID: <20080210195129.GA18444@sashak.voltaire.com> Remove actually duplicated sm->state, instead refer sm_state only. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_base.h | 25 +-------- opensm/include/opensm/osm_helper.h | 23 --------- opensm/include/opensm/osm_sm.h | 1 - opensm/opensm/osm_console.c | 27 ++-------- opensm/opensm/osm_helper.c | 19 +------- opensm/opensm/osm_perfmgr.c | 9 +--- opensm/opensm/osm_sm.c | 2 - opensm/opensm/osm_sm_state_mgr.c | 5 +- opensm/opensm/osm_state_mgr.c | 97 ++++++++---------------------------- 9 files changed, 35 insertions(+), 173 deletions(-) diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index d5e3c27..1a9abf0 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -740,24 +740,6 @@ typedef enum _osm_thread_state { #define OSM_CAP2_IS_QOS_SUPPORTED (1 << 1) /***********/ -/****d* OpenSM: Base/osm_sm_state_t -* NAME -* osm_sm_state_t -* -* DESCRIPTION -* Enumerates the possible states of the SM object. -* -* SYNOPSIS -*/ -typedef enum _osm_sm_state { - OSM_SM_STATE_NO_STATE = 0, - OSM_SM_STATE_INIT, - OSM_SM_STATE_IDLE, - OSM_SM_STATE_STANDBY, - OSM_SM_STATE_MAX -} osm_sm_state_t; -/***********/ - /****d* OpenSM: Base/osm_signal_t * NAME * osm_signal_t @@ -773,15 +755,14 @@ typedef enum _osm_sm_state { #define OSM_SIGNAL_NONE 0 #define OSM_SIGNAL_SWEEP 1 #define OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST 2 -#define OSM_SIGNAL_EXIT_STBY 3 -#define OSM_SIGNAL_PERFMGR_SWEEP 4 -#define OSM_SIGNAL_MAX 4 +#define OSM_SIGNAL_PERFMGR_SWEEP 3 +#define OSM_SIGNAL_MAX 3 /* status values for sweep managers - can be removed later */ #define OSM_SIGNAL_DONE 16 #define OSM_SIGNAL_DONE_PENDING 17 -typedef uintn_t osm_signal_t; +typedef unsigned int osm_signal_t; /***********/ /****d* OpenSM: Base/osm_sm_signal_t diff --git a/opensm/include/opensm/osm_helper.h b/opensm/include/opensm/osm_helper.h index 5cbb8f0..bd885d7 100644 --- a/opensm/include/opensm/osm_helper.h +++ b/opensm/include/opensm/osm_helper.h @@ -446,29 +446,6 @@ void osm_dump_sa_mad(IN osm_log_t * const p_log, IN const ib_sa_mad_t * const p_smp, IN const osm_log_level_t level); -/****f* IBA Base: Types/osm_get_sm_state_str -* NAME -* osm_get_sm_state_str -* -* DESCRIPTION -* Returns a string for the specified SM state. -* -* SYNOPSIS -*/ -const char *osm_get_sm_state_str(IN osm_sm_state_t state); -/* -* PARAMETERS -* state -* [in] SM State value -* -* RETURN VALUES -* Pointer to the state discription string. -* -* NOTES -* -* SEE ALSO -*********/ - /****f* IBA Base: Types/osm_get_sm_signal_str * NAME * osm_get_sm_signal_str diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h index 25d0983..e77222d 100644 --- a/opensm/include/opensm/osm_sm.h +++ b/opensm/include/opensm/osm_sm.h @@ -112,7 +112,6 @@ BEGIN_C_DECLS */ typedef struct osm_sm { osm_thread_state_t thread_state; - osm_sm_state_t state; unsigned signal_mask; cl_spinlock_t signal_lock; cl_spinlock_t state_lock; diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 86f89ea..4597bde 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -289,33 +289,16 @@ static char *sa_state_str(osm_sa_state_t state) return ("UNKNOWN"); } -static char *sm_state_mgr_str(osm_sm_state_t state) -{ - switch (state) { - case OSM_SM_STATE_NO_STATE: - return ("No State"); - case OSM_SM_STATE_INIT: - return ("Init"); - case OSM_SM_STATE_IDLE: - return ("Idle"); - case OSM_SM_STATE_STANDBY: - return ("Standby"); - default: - return ("Unknown State"); - } -} - static void print_status(osm_opensm_t * p_osm, FILE * out) { if (out) { cl_plock_acquire(&p_osm->lock); - fprintf(out, " OpenSM Version : %s\n", OSM_VERSION); - fprintf(out, " SM State/Mgr State : %s/%s\n", - sm_state_str(p_osm->subn.sm_state), - sm_state_mgr_str(p_osm->sm.state)); - fprintf(out, " SA State : %s\n", + fprintf(out, " OpenSM Version: %s\n", OSM_VERSION); + fprintf(out, " SM State : %s\n", + sm_state_str(p_osm->subn.sm_state)); + fprintf(out, " SA State : %s\n", sa_state_str(p_osm->sa.state)); - fprintf(out, " Routing Engine : %s\n", + fprintf(out, " Routing Engine: %s\n", osm_routing_engine_type_str(p_osm-> routing_engine_used)); #ifdef ENABLE_OSM_PERF_MGR diff --git a/opensm/opensm/osm_helper.c b/opensm/opensm/osm_helper.c index 0c11198..cbc2a99 100644 --- a/opensm/opensm/osm_helper.c +++ b/opensm/opensm/osm_helper.c @@ -2060,15 +2060,7 @@ osm_dump_smp_dr_path(IN osm_log_t * const p_log, } } -const char *const __osm_sm_state_str[] = { - "OSM_SM_STATE_NO_STATE", /* 0 */ - "OSM_SM_STATE_INIT", /* 1 */ - "OSM_SM_STATE_IDLE", /* 2 */ - "OSM_SM_STATE_STANDBY", /* 3 */ - "UNKNOWN STATE!!" /* 4 */ -}; - -const char *const __osm_sm_signal_str[] = { +static const char *const __osm_sm_signal_str[] = { "OSM_SIGNAL_NONE", /* 0 */ "OSM_SIGNAL_SWEEP", /* 1 */ "OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST", /* 2 */ @@ -2079,15 +2071,6 @@ const char *const __osm_sm_signal_str[] = { /********************************************************************** **********************************************************************/ -const char *osm_get_sm_state_str(IN osm_sm_state_t state) -{ - if (state > OSM_SM_STATE_MAX) - state = OSM_SM_STATE_MAX; - return (__osm_sm_state_str[state]); -} - -/********************************************************************** - **********************************************************************/ const char *osm_get_sm_signal_str(IN osm_signal_t signal) { if (signal > OSM_SIGNAL_MAX) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 1099445..2b0d6f0 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -790,13 +790,8 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) if (pm->state != PERFMGR_STATE_ENABLED) return; - if (pm->sm->state != OSM_SM_STATE_IDLE && - pm->sm->state != OSM_SM_STATE_STANDBY) - return; - - if (pm->sm->state == OSM_SM_STATE_STANDBY || - (pm->sm->state == OSM_SM_STATE_IDLE && - pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)) + if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || + pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) perfmgr_discovery(pm->subn->p_osm); #if ENABLE_OSM_PERF_MGR_PROFILE diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index f2cc550..bdbb2e6 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -157,7 +157,6 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) { memset(p_sm, 0, sizeof(*p_sm)); p_sm->thread_state = OSM_THREAD_STATE_NONE; - p_sm->state = OSM_SM_STATE_INIT; p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE; cl_spinlock_construct(&p_sm->signal_lock); cl_spinlock_construct(&p_sm->state_lock); @@ -406,7 +405,6 @@ osm_sm_init(IN osm_sm_t * const p_sm, * the sweeper thread if the user wants sweeping. */ p_sm->thread_state = OSM_THREAD_STATE_RUN; - p_sm->state = OSM_SM_STATE_IDLE; status = cl_thread_init(&p_sm->sweeper, __osm_sm_sweeper, p_sm, "opensm sweeper"); if (status != IB_SUCCESS) diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index fab90bf..9c1c1f3 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -391,7 +391,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, sm->p_subn->sm_state = IB_SMINFO_STATE_DISCOVERING; osm_report_sm_state(sm); sm->p_subn->coming_out_of_standby = TRUE; - osm_sm_signal(sm, OSM_SIGNAL_EXIT_STBY); + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); break; case OSM_SM_SIGNAL_DISABLE: /* @@ -399,6 +399,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, */ sm->p_subn->sm_state = IB_SMINFO_STATE_NOTACTIVE; osm_report_sm_state(sm); + osm_vendor_set_sm(sm->mad_ctrl.h_bind, FALSE); break; case OSM_SM_SIGNAL_HANDOVER: /* @@ -423,7 +424,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, sm->p_subn->master_sm_base_lid = sm->p_subn->sm_base_lid; sm->p_subn->coming_out_of_standby = TRUE; - osm_sm_signal(sm, OSM_SIGNAL_EXIT_STBY); + osm_sm_signal(sm, OSM_SIGNAL_SWEEP); break; case OSM_SM_SIGNAL_ACKNOWLEDGE: /* diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index c53ed45..2f3c366 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -94,37 +94,6 @@ static void __osm_state_mgr_up_msg(IN const osm_sm_t *sm) /********************************************************************** **********************************************************************/ -static void -__osm_state_mgr_signal_warning(IN osm_sm_t *sm, - IN const osm_signal_t signal) -{ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_state_mgr_signal_warning: " - "Invalid signal %s(%lu) in state %s\n", - osm_get_sm_signal_str(signal), signal, - osm_get_sm_state_str(sm->state)); -} - -/********************************************************************** - **********************************************************************/ -static void -__osm_state_mgr_signal_error(IN osm_sm_t *sm, - IN const osm_signal_t signal) -{ - /* the Request for IDLE processing can come async to the state so it - * really is just verbose ... */ - if (signal == OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST) - __osm_state_mgr_signal_warning(sm, signal); - else - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_signal_error: ERR 3303: " - "Invalid signal %s(%lu) in state %s\n", - osm_get_sm_signal_str(signal), signal, - osm_get_sm_state_str(sm->state)); -} - -/********************************************************************** - **********************************************************************/ static void __osm_state_mgr_reset_node_count(IN cl_map_item_t * const p_map_item, IN void *context) { @@ -1046,6 +1015,18 @@ static void do_sweep(osm_sm_t * sm) ib_api_status_t status; osm_remote_sm_t *p_remote_sm; + if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER && + sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING) + return; + + if (sm->p_subn->coming_out_of_standby) + /* + * Need to force re-write of sm_base_lid to all ports + * to do that we want all the ports to be considered + * foriegn + */ + __osm_state_mgr_clean_known_lids(sm); + sm->master_sm_found = 0; /* @@ -1116,7 +1097,6 @@ _repeat_discovery: /* discovery completed - check other sm presense */ if (sm->master_sm_found) { - sm->state = OSM_SM_STATE_STANDBY; /* * Call the sm_state_mgr with signal * MASTER_OR_HIGHER_SM_DETECTED_DONE @@ -1149,7 +1129,6 @@ _repeat_discovery: __osm_state_mgr_send_handover(sm, p_remote_sm); osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_HANDOVER_SENT); - sm->state = OSM_SM_STATE_STANDBY; return; } else { /* We are the highest sm - check to see if there is @@ -1305,6 +1284,8 @@ _repeat_discovery: static void do_process_mgrp_queue(osm_sm_t * sm) { + if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) + return; osm_mcast_mgr_process_mgroups(&sm->mcast_mgr); wait_for_pending_transactions(&sm->p_subn->p_osm->stats); } @@ -1320,58 +1301,22 @@ void osm_state_mgr_process(IN osm_sm_t *sm, IN osm_signal_t signal) "osm_state_mgr_process: " "Received signal %s in state %s\n", osm_get_sm_signal_str(signal), - osm_get_sm_state_str(sm->state)); - - switch (sm->state) { - case OSM_SM_STATE_IDLE: - switch (signal) { - case OSM_SIGNAL_SWEEP: - /* - * If the osm_sm_state_mgr is in NOT-ACTIVE state - - * stay in IDLE - */ - if (sm->p_subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) { - osm_vendor_set_sm(sm->mad_ctrl.h_bind, FALSE); - break; - } - - do_sweep(sm); - break; + osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); - case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST: - do_process_mgrp_queue(sm); - break; - - default: - __osm_state_mgr_signal_error(sm, signal); - break; - } + switch (signal) { + case OSM_SIGNAL_SWEEP: + do_sweep(sm); break; - case OSM_SM_STATE_STANDBY: - switch (signal) { - case OSM_SIGNAL_EXIT_STBY: - /* - * Need to force re-write of sm_base_lid to all ports - * to do that we want all the ports to be considered - * foriegn - */ - __osm_state_mgr_clean_known_lids(sm); - sm->state = OSM_SM_STATE_IDLE; - osm_sm_signal(sm, OSM_SIGNAL_SWEEP); - break; - default: - __osm_state_mgr_signal_error(sm, signal); - break; - } - /* stay with the same signal - so we can start the sweep */ + case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST: + do_process_mgrp_queue(sm); break; default: CL_ASSERT(FALSE); osm_log(sm->p_log, OSM_LOG_ERROR, "osm_state_mgr_process: ERR 3320: " - "Invalid SM state %u\n", sm->state); + "Invalid SM signal %u\n", signal); break; } -- 1.5.4.rc2.60.gb2e62 From isotopeskn607 at afs-rechtsanwaelte.de Sun Feb 10 12:31:48 2008 From: isotopeskn607 at afs-rechtsanwaelte.de (Brady Griffin) Date: , 10 Feb 2008 22:31:48 +0200 Subject: [ofa-general] Men & Women Designer Footwear from Chanel Gucci Prada Dior Versace Message-ID: <699872510.90883146506687@afs-rechtsanwaelte.de> Welcome in our on-line shop!!! Enjoy DIRECT PRICING at more than 65% OFF on a wide variety of 2008 Collections from Versace, Prada, Chanel, Dior & More. We also carry TOP BRANDS such as Uggs, Gucci, Dsquared, D&G, Bally, Coach and much more. Find Loafers, Boots, High Heels, Sneakers and Casual Shoes from Brand Names at less than WHOLESALE prices. Selection is available for Women and Men, Shipping is FREE WorldWide, Trendy Fashion Footwear Sale of the YEAR! Forget Department Store Prices, Buy Designer Shoes Direct Visit Today! http://designerfeet.net -------------- next part -------------- An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Sun Feb 10 16:46:26 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Sun, 10 Feb 2008 16:46:26 -0800 Subject: [ofa-general] uDAPL libdat2.so version [PATCH] udapl v1 and v2 - dat_create_psp_any() seed value wrong In-Reply-To: References: <47AB7912.5040700@ichips.intel.com> <47AB8A4E.5080409@ichips.intel.com> <47AB8EB6.1040600@ichips.intel.com> <47ABA463.6020707@ichips.intel.com> <47ACE372.5020904@ichips.intel.com> Message-ID: <47AF9AE2.5050006@ichips.intel.com> Tang, Changqing wrote: > I am testing OFED 1.3 udapl v1, I have three nodes, n1, n2, and n3, > if I run two ranks between n1 and n2, it works, n2 and n3, it works again, > but if I run between n1 and n3, it fails with: > > dat_cr_accept() failed: DAT_INTERNAL_ERROR > > What could be the reason ? I did not change anything else except the > node to run. Thanks for help. > > What IPoIB interfaces are configured on the nodes? Can you ping via IPoIB from n1 to n3? Are you using the same IB port on each node? This error could be caused by a physical port mismatch between the connect request and the listen bindings due to the ARP reply. If you have multiple interfaces then one may reply to an ARP directed to the other interfaces on the system. The following configuration will cause the interfaces to ignore ARP requests not directed to their specific IP address. Add the following lines to /etc/sysctl.conf net.ipv4.conf.all.arp_ignore=1 net.ipv4.conf.ib0.arp_ignore=1 net.ipv4.conf.ib1.arp_ignore=1 or use sysctl: sysctl -w net.ipv4.conf.all.arp_ignore=1 sysctl -w net.ipv4.conf.ib0.arp_ignore=1 sysctl -w net.ipv4.conf.ib1.arp_ignore=1 -arlin From olof at lixom.net Sun Feb 10 18:22:57 2008 From: olof at lixom.net (Olof Johansson) Date: Sun, 10 Feb 2008 20:22:57 -0600 Subject: [ofa-general] [PATCH] mlx4: fix build break In-Reply-To: References: Message-ID: <20080211022257.GA26656@lixom.net> On Fri, Feb 08, 2008 at 03:16:53PM -0800, Roland Dreier wrote: > mlx4_core: For 64-bit systems, vmap() kernel queue buffers Hi, The above patch caused this to pop up on powerpc allyesconfig, looks like a missing include file: drivers/net/mlx4/alloc.c: In function 'mlx4_buf_alloc': drivers/net/mlx4/alloc.c:162: error: implicit declaration of function 'vmap' drivers/net/mlx4/alloc.c:162: error: 'VM_MAP' undeclared (first use in this function) drivers/net/mlx4/alloc.c:162: error: (Each undeclared identifier is reported only once drivers/net/mlx4/alloc.c:162: error: for each function it appears in.) drivers/net/mlx4/alloc.c:162: warning: assignment makes pointer from integer without a cast drivers/net/mlx4/alloc.c: In function 'mlx4_buf_free': drivers/net/mlx4/alloc.c:187: error: implicit declaration of function 'vunmap' Signed-off-by: Olof Johansson diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 521dc03..75ef9d0 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx4.h" From enfabrique at cox.net Sun Feb 10 22:34:08 2008 From: enfabrique at cox.net (Tamika Davis) Date: Mon, 11 Feb 2008 07:34:08 +0100 Subject: [ofa-general] Industry Standard Software at Nominal Fee Message-ID: <247662845.93644719525175@cox.net> An HTML attachment was scrubbed... URL: From xma at us.ibm.com Sun Feb 10 22:41:47 2008 From: xma at us.ibm.com (Shirley Ma) Date: Sun, 10 Feb 2008 22:41:47 -0800 Subject: [ofa-general] Re: [PATCH 2/16 v4] IB/ipoib: Add s/g support In-Reply-To: Message-ID: Hello Roland & Eli, We have seen memory allocation failure when allocating large tx_ring size. Like 1K tx_ring size, kerne page size = 4K. tx_ring will need at least 36 pages (MAX_SKB_FRAGS = 64K/4K + 2 =18) contiguous memory per port. Either each mapping needs to be dynamically allocated or the allocation should be based on page size for tx_ring. struct ipoib_tx_buf { struct sk_buff *skb; u64 mapping[MAX_SKB_FRAGS + 1]; }; Thanks Shirley Roland Dreier cc Sent by: openfabrics general-b Subject ounces at li [ofa-general] Re: [PATCH 2/16 v4] IB/ipoib: sts.openf Add s/g support abrics.or g 02/08/08 02:32 PM Thanks, applied... > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > +static inline int ipoib_dma_map_tx(struct ib_device *ca, > + struct ipoib_tx_buf *tx_req) I didn't see why this needed to be in a header-- I just moved it to ipoib_ib.c. Also > + int frags; > + int i; > + > + mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb), > + DMA_TO_DEVICE); > + if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) > + return -EIO; > + > + frags = skb_shinfo(skb)->nr_frags; Not sure what the advantage of having a local variable that is only used once to hold the value of nr_frags, so I got rid of it. - R. _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic31887.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From a-anned at abovenbeyond.com Sun Feb 10 23:46:51 2008 From: a-anned at abovenbeyond.com (Armando Pritchett) Date: Mon, 11 Feb 2008 15:46:51 +0800 Subject: [ofa-general] Can we talk? Message-ID: <01c86cc5$51652780$2bbb58da@a-anned> Hello! I am tired this afternoon. I am nice girl that would like to chat with you. Email me at Caroline at TheHealCare.info only, because I am using my friend's email to write this. You will see some of my private pics. From ogerlitz at voltaire.com Mon Feb 11 00:34:49 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 11 Feb 2008 10:34:49 +0200 Subject: [ofa-general] Re: [PATCH 6/16 v4] IB/mthca: Add checksum offload support In-Reply-To: <47AF24AA.4040206@dev.mellanox.co.il> References: <1201710676.28794.172.camel@mtls03> <47AEB0BF.1080504@voltaire.com> <47AF24AA.4040206@dev.mellanox.co.il> Message-ID: <47B008A9.2060600@voltaire.com> Eli Cohen wrote: > I think you're right and that's how it should have been done. However > querying device cap will cause quite a few changes in other patches so I think we > should better leave this as it is right now. I will however re-generate the > patches for 2.6.25 . you can not leave this as is for ofed 1.3, b/c as Roland said, no one sets the value of ca->flags in the NON mellanox hw drivers (eg ehca, ipath) so ipoib basically tests a random value, do you think it can remain that way? Basically, I think you can query the device once and have the attributes stored at the priv structure, so you can use it for deciding on the S/G, checksum, LSO (RSS...) capabilities, etc. Or. From eli at dev.mellanox.co.il Mon Feb 11 00:40:17 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Mon, 11 Feb 2008 10:40:17 +0200 Subject: [ofa-general] Re: [PATCH 6/16 v4] IB/mthca: Add checksum offload support In-Reply-To: <47B008A9.2060600@voltaire.com> References: <1201710676.28794.172.camel@mtls03> <47AEB0BF.1080504@voltaire.com> <47AF24AA.4040206@dev.mellanox.co.il> <47B008A9.2060600@voltaire.com> Message-ID: <47B009F1.9090109@dev.mellanox.co.il> Or Gerlitz wrote: > Eli Cohen wrote: > >> I think you're right and that's how it should have been done. However >> querying device cap will cause quite a few changes in other patches so >> I think we should better leave this as it is right now. I will however >> re-generate the patches for 2.6.25 . > > you can not leave this as is for ofed 1.3, b/c as Roland said, no one > sets the value of ca->flags in the NON mellanox hw drivers (eg ehca, > ipath) so ipoib basically tests a random value, do you think it can > remain that way? I think the flags member in clear when the object is allocated so there should be no problem with this. I will re-check though. > > Basically, I think you can query the device once and have the attributes > stored at the priv structure, so you can use it for deciding on the S/G, > checksum, LSO (RSS...) capabilities, etc. > Yes, that's what I have in mind to do. From noreply at eoxiamail.com Sun Feb 10 23:33:06 2008 From: noreply at eoxiamail.com (Airtist) Date: Mon, 11 Feb 2008 08:33:06 +0100 Subject: [ofa-general] Toujours plus de telechargement gratuit et legal Message-ID: <431abb046ef68e2e0a19a6868ee8c99e@www.eoxiamail.com> An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Mon Feb 11 03:04:13 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 11 Feb 2008 03:04:13 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080211-0200 daily build status Message-ID: <20080211110413.7E2E8E608F6@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: Build failed on ia64 with linux-2.6.24 Log: from /home/vlad/tmp/ofa_1_3_kernel-20080211-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp/sdp_main.c:74: include/net/tcp.h: In function 'tcp_v4_check': include/net/tcp.h:846: error: implicit declaration of function 'csum_tcpudp_magic' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080211-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp/sdp_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080211-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/sdp] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080211-0200_linux-2.6.24_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080211-0200_linux-2.6.24_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From a-allenh at acsdataline.com Mon Feb 11 06:02:01 2008 From: a-allenh at acsdataline.com (Darrin Barrett) Date: Mon, 11 Feb 2008 15:02:01 +0100 Subject: [ofa-general] What are you up to? Message-ID: <01c86cbf$0e07ba80$ee2b0654@a-allenh> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Astrid at TheHealCare.info only, because I am using my friend's email to write this. Hope you will like my pictures. From 9obzc0eh-emk at beer.com Mon Feb 11 07:32:56 2008 From: 9obzc0eh-emk at beer.com (Trina Friend) Date: Mon, 11 Feb 2008 23:32:56 +0800 Subject: [ofa-general] Can we talk? Message-ID: <086160485.78290890851228@beer.com> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Ingrid at TheHealCare.info only, because I am using my friend's email to write this. I would like to share some of my pics. From swise at opengridcomputing.com Mon Feb 11 07:39:38 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 11 Feb 2008 09:39:38 -0600 Subject: [ofa-general] [PATCH 2/3] RDMA/cxgb3: fix page shift calculation in build_phys_page_list() In-Reply-To: References: Message-ID: <47B06C3A.7080601@opengridcomputing.com> Bryan, I assume you sign-off on this patch? Bryan S Rosenburg wrote: > > On Mon Jan 21 12:39:36 PST 2008, Steve Wise wrote: > > RDMA/cxgb3: fix page shift calculation in build_phys_page_list() > > > > The existing logic incorrectly maps this buffer list: > > > > 0: addr 0x10001000, size 0x1000 > > 1: addr 0x10002000, size 0x1000 > > > > To this bogus page list: > > > > 0: 0x10000000 > > 1: 0x10002000 > > > > The shift calculation must also take into account the address of the > first > > entry masked by the page_mask as well as the last address+size rounded > > up to the next page size. > > I think the problem can still occur, even with the patch, if the buffer > list has just one entry. > > A single entry (addr 0x10001000, size 0x2000) will get converted to page > address 0x10000000 with a page size of 0x4000. The patch as it stands > doesn't address the single buffer case, but in fact it allows the > subsequent single-buffer special case to be eliminated entirely. > Because the mask now includes the (page adjusted) starting and ending > addresses, the general case works for the single buffer case as well: > > ================================================================================ > > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c > b/drivers/infiniband/hw/cxgb3/iwch_mem.c > index 73bfd16..b8797c6 100644 > --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c > +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c > @@ -136,14 +136,8 @@ int build_phys_page_list(struct ib_phys_buf > *buffer_list, > > /* Find largest page shift we can use to cover buffers */ > for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) > - if (num_phys_buf > 1) { > - if ((1ULL << *shift) & mask) > - break; > - } else > - if (1ULL << *shift >= > - buffer_list[0].size + > - (buffer_list[0].addr & ((1ULL << *shift) - 1))) > - break; > + if ((1ULL << *shift) & mask) > + break; > > buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); > buffer_list[0].addr &= ~0ull << *shift; > ================================================================================ > > > Don't try this without applying Steve's patch first. > > Incidentally, I've been tracking down exactly the bug that Steve fixed, > but in mthca_reg_phys_mr() rather than in the cxgb3 > build_phys_page_list(). I'll submit a patch for mthca, unless someone > else applies Steve's fix there soon. > > - Bryan Rosenburg - IBM Research > > From tziporet at mellanox.co.il Mon Feb 11 07:59:11 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 11 Feb 2008 17:59:11 +0200 Subject: [ofa-general] OFED meeting agenda for Feb 11 Message-ID: <6C2C79E72C305246B504CBA17B5500C9034B29F6@mtlexch01.mtl.com> > This is the agenda for today: > > 1. OFED 1.3-rc4 testing status - all 2. Critical bugs review: 903 blo P3 Othe pasha at mellanox.co.il mvapich fails to build on Redhat EL5 - update 1 905 blo P3 RHEL sean.hefty at intel.com scale-up issue with rdma_cm, requests rejected when exceed... 890 blo P1 Othe swise at opengridcomputing.com Intermittent stall starting MPI jobs due to lost NetEvent 874 cri P3 Othe jeremy.brown at qlogic.com Intel MPI (IMB test) hangs intermittently on the qlogic HCA 895 cri P3 Othe jim at mellanox.com kernel panic while running multiple test on sdp 846 cri P2 RHEL jim at mellanox.com SDP crash on RHEL5 ppc64 running netserver 906 cri P1 All raisch at de.ibm.com fail to destroy ipoib rx QP 760 maj P3 All eli at mellanox.co.il UDP performance on Rx is lower than Tx 894 maj P2 SLES jackm at dev.mellanox.co.il IPoIB connectivity lost during heavy testing on memfree 3. Open discussion > Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From rosnbrg at us.ibm.com Mon Feb 11 08:07:17 2008 From: rosnbrg at us.ibm.com (Bryan S Rosenburg) Date: Mon, 11 Feb 2008 11:07:17 -0500 Subject: [ofa-general] [PATCH 2/3] RDMA/cxgb3: fix page shift calculation in build_phys_page_list() In-Reply-To: <47B06C3A.7080601@opengridcomputing.com> Message-ID: Steve Wise wrote on 02/11/2008 10:39:38 AM: > Bryan, I assume you sign-off on this patch? Steve, yes, I "sign-off" on this patch, although I posted it just for discussion. I haven't tested (or even compiled) it for cxgb3. Let me know if you want me to do anything more formal. - Bryan > > > Bryan S Rosenburg wrote: > > > > On Mon Jan 21 12:39:36 PST 2008, Steve Wise wrote: > > > RDMA/cxgb3: fix page shift calculation in build_phys_page_list() > > > > > > The existing logic incorrectly maps this buffer list: > > > > > > 0: addr 0x10001000, size 0x1000 > > > 1: addr 0x10002000, size 0x1000 > > > > > > To this bogus page list: > > > > > > 0: 0x10000000 > > > 1: 0x10002000 > > > > > > The shift calculation must also take into account the address of the > > first > > > entry masked by the page_mask as well as the last address+size rounded > > > up to the next page size. > > > > I think the problem can still occur, even with the patch, if the buffer > > list has just one entry. > > > > A single entry (addr 0x10001000, size 0x2000) will get converted to page > > address 0x10000000 with a page size of 0x4000. The patch as it stands > > doesn't address the single buffer case, but in fact it allows the > > subsequent single-buffer special case to be eliminated entirely. > > Because the mask now includes the (page adjusted) starting and ending > > addresses, the general case works for the single buffer case as well: > > > > ================================================================================ > > > > > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c > > b/drivers/infiniband/hw/cxgb3/iwch_mem.c > > index 73bfd16..b8797c6 100644 > > --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c > > +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c > > @@ -136,14 +136,8 @@ int build_phys_page_list(struct ib_phys_buf > > *buffer_list, > > > > /* Find largest page shift we can use to cover buffers */ > > for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) > > - if (num_phys_buf > 1) { > > - if ((1ULL << *shift) & mask) > > - break; > > - } else > > - if (1ULL << *shift >= > > - buffer_list[0].size + > > - (buffer_list[0].addr & ((1ULL << *shift) - 1))) > > - break; > > + if ((1ULL << *shift) & mask) > > + break; > > > > buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); > > buffer_list[0].addr &= ~0ull << *shift; > > ================================================================================ > > > > > > Don't try this without applying Steve's patch first. > > > > Incidentally, I've been tracking down exactly the bug that Steve fixed, > > but in mthca_reg_phys_mr() rather than in the cxgb3 > > build_phys_page_list(). I'll submit a patch for mthca, unless someone > > else applies Steve's fix there soon. > > > > - Bryan Rosenburg - IBM Research > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From jeff at garzik.org Mon Feb 11 08:09:25 2008 From: jeff at garzik.org (Jeff Garzik) Date: Mon, 11 Feb 2008 11:09:25 -0500 Subject: [ofa-general] Re: [PATCH 2.6.25] cxgb3: Handle ARP completions that mark neighbors stale. In-Reply-To: <20080206180519.4680.34741.stgit@dell3.ogc.int> References: <20080206180519.4680.34741.stgit@dell3.ogc.int> Message-ID: <47B07335.5090709@garzik.org> Steve Wise wrote: > cxgb3: Handle ARP completions that mark neighbors stale. > > When ARP completes due to a request rather than a reply the neighbor is > marked NUD_STALE instead of reachable (see arp_process()). The handler > for the resulting netevent needs to check also for NUD_STALE. > > Failure to use the arp entry can cause RDMA connection failures. > > Signed-off-by: Steve Wise applied From swise at opengridcomputing.com Mon Feb 11 08:10:18 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 11 Feb 2008 10:10:18 -0600 Subject: [ofa-general] [PATCH 2/3] RDMA/cxgb3: fix page shift calculation in build_phys_page_list() In-Reply-To: References: Message-ID: <47B0736A.5030806@opengridcomputing.com> Bryan S Rosenburg wrote: > > Steve Wise wrote on 02/11/2008 10:39:38 AM: > > Bryan, I assume you sign-off on this patch? > > Steve, yes, I "sign-off" on this patch, although I posted it just for > discussion. I haven't tested (or even compiled) it for cxgb3. Let me > know if you want me to do anything more formal. > Oh. I thought you said it fixed a bug? :) Any testing you can do would be great. I've posted it for the lustre folks to try out as well. Steve. From rosnbrg at us.ibm.com Mon Feb 11 08:24:19 2008 From: rosnbrg at us.ibm.com (Bryan S Rosenburg) Date: Mon, 11 Feb 2008 11:24:19 -0500 Subject: [ofa-general] [PATCH 2/3] RDMA/cxgb3: fix page shift calculation in build_phys_page_list() In-Reply-To: <47B0736A.5030806@opengridcomputing.com> Message-ID: Steve Wise wrote on 02/11/2008 11:10:18 AM: > Bryan S Rosenburg wrote: > > > > Steve Wise wrote on 02/11/2008 10:39:38 AM: > > > Bryan, I assume you sign-off on this patch? > > > > Steve, yes, I "sign-off" on this patch, although I posted it just for > > discussion. I haven't tested (or even compiled) it for cxgb3. Let me > > know if you want me to do anything more formal. > > > > Oh. I thought you said it fixed a bug? :) Any testing you can do would > be great. Sorry if I've misled you. :-( I only have mthca cards to play with. I verified the analogous bug in that driver and tested the fix, although at this point Roland Dreier has implemented a more general fix that takes the alignment of the virtual address into account. - Bryan -------------- next part -------------- An HTML attachment was scrubbed... URL: From chai.15 at osu.edu Mon Feb 11 08:37:54 2008 From: chai.15 at osu.edu (LEI CHAI) Date: Mon, 11 Feb 2008 11:37:54 -0500 Subject: [ofa-general] [ANNOUCE] dapl 2.0.6 release Message-ID: <17e31c17e47f.17e47f17e31c@osu.edu> "--enable-ext-type=ib" solves my problem too. Thanks! Lei ----- Original Message ----- From: Arlin Davis Date: Friday, February 8, 2008 5:11 pm Subject: Re: [ofa-general] [ANNOUCE] dapl 2.0.6 release > Chuck Hartley wrote: > > I'm doing ./configure && make install. I found the --enable-ext- > type=ib > > rebuilt with it successfully. > > > > > sorry for the confusion. I will change the v2 defaults in the > configure > to match the RPM spec file. > > -arlin > From jlentini at netapp.com Mon Feb 11 09:25:14 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 11 Feb 2008 12:25:14 -0500 (EST) Subject: [ofa-general] new NFS/RDMA instructions for 2.6.25-rc1 Message-ID: Linux 2.6.25 will be the first official kernel release to contain the NFS/RDMA server. With the client and server now both available in 2.6.25-rc1, we've simplified our NFS/RDMA installation instructions. The new instructions are available here: http://nfs-rdma.sourceforge.net/Documents/README From bfields at fieldses.org Mon Feb 11 09:33:22 2008 From: bfields at fieldses.org (J. Bruce Fields) Date: Mon, 11 Feb 2008 12:33:22 -0500 Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: References: Message-ID: <20080211173322.GA4755@fieldses.org> On Mon, Feb 11, 2008 at 12:25:14PM -0500, James Lentini wrote: > > Linux 2.6.25 will be the first official kernel release to contain the > NFS/RDMA server. With the client and server now both available in > 2.6.25-rc1, we've simplified our NFS/RDMA installation instructions. > The new instructions are available here: > > http://nfs-rdma.sourceforge.net/Documents/README Any reason not to add that to the linux tree, say in Documentation/filesystems/nfs-rdma.txt? --b. From sashak at voltaire.com Mon Feb 11 09:59:55 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 11 Feb 2008 17:59:55 +0000 Subject: [ofa-general] [ANNOUNCE] management tarballs release Message-ID: <20080211175955.GV11526@sashak.voltaire.com> Hi, There is a new release of the management (OpenSM and infiniband diagnostics) tarballs available in: http://www.openfabrics.org/downloads/management/ md5sum: 1c9764865b4c4f03529494f4272a7daf libibcommon-1.0.8.tar.gz 879688a264e982600628dfa576c84cea libibumad-1.1.7.tar.gz de275d0aba15b587f00ac1e30ba8b0e8 libibmad-1.1.6.tar.gz 17678a2eaeb0cae9273b091da00cc7dc infiniband-diags-1.3.6.tar.gz 92385653112cd915ec31542eb2d94714 opensm-3.1.9.tar.gz 0f43afda110cfc285ef89637fcbf6ed7 opensm-3.2.0.tar.gz opensm-3.1.9 is recent OFED version. opensm-3.2.0 is recent master. The rest is the same. Sasha From sales at davesgadgetworld.com Mon Feb 11 10:10:56 2008 From: sales at davesgadgetworld.com (Dave's Gadget World) Date: Mon, 11 Feb 2008 10:10:56 -0800 Subject: [ofa-general] Trackstick GPS now available at Daves Gadget World Message-ID: <20080211101056.515044125@davesgadgetworld.com> Plain text message -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: Divider_Horizontal.gif Type: image/gif Size: 108 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 1-TSII-2T.jpg Type: image/gif Size: 7026 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: btn_addtocart.gif Type: image/gif Size: 2535 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: Bullet_MoreInfo.gif Type: image/gif Size: 117 bytes Desc: not available URL: From jim at mellanox.com Mon Feb 11 12:42:26 2008 From: jim at mellanox.com (Jim Mott) Date: Mon, 11 Feb 2008 12:42:26 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - Fix compile problem on 2.6.24 ia64 Message-ID: Signed-off-by: Jim Mott --- =================================================================== diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index 8f7c8f9..dcc60e3 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -45,6 +45,7 @@ but for SDP HW checksum is always set, so ... */ #include +#include #include static inline diff --git a/kernel_patches/backport/2.6.21/sdp_ia64.patch b/kernel_patches/backport/2.6.21/sdp_ia64.patch index b123e2b..f789cb2 100644 --- a/kernel_patches/backport/2.6.21/sdp_ia64.patch +++ b/kernel_patches/backport/2.6.21/sdp_ia64.patch @@ -1,9 +1,9 @@ -Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c +Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== ---- ofed_1_2.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:34.000000000 +0300 -+++ ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:47.000000000 +0300 -@@ -30,26 +30,6 @@ - * SOFTWARE. +--- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 11:34:32.000000000 -0800 ++++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 12:20:09.000000000 -0800 +@@ -39,27 +39,6 @@ + * 2 of the License, or(at your option) any later version. */ -#if defined(__ia64__) @@ -12,6 +12,7 @@ Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c - but for SDP HW checksum is always set, so ... */ - -#include +-#include -#include - -static inline diff --git a/kernel_patches/backport/2.6.22/sdp_ia64.patch b/kernel_patches/backport/2.6.22/sdp_ia64.patch index b123e2b..f789cb2 100644 --- a/kernel_patches/backport/2.6.22/sdp_ia64.patch +++ b/kernel_patches/backport/2.6.22/sdp_ia64.patch @@ -1,9 +1,9 @@ -Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c +Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== ---- ofed_1_2.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:34.000000000 +0300 -+++ ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:47.000000000 +0300 -@@ -30,26 +30,6 @@ - * SOFTWARE. +--- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 11:34:32.000000000 -0800 ++++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 12:20:09.000000000 -0800 +@@ -39,27 +39,6 @@ + * 2 of the License, or(at your option) any later version. */ -#if defined(__ia64__) @@ -12,6 +12,7 @@ Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c - but for SDP HW checksum is always set, so ... */ - -#include +-#include -#include - -static inline diff --git a/kernel_patches/backport/2.6.22_suse10_3/sdp_ia64.patch b/kernel_patches/backport/2.6.22_suse10_3/sdp_ia64.patch index b123e2b..f789cb2 100644 --- a/kernel_patches/backport/2.6.22_suse10_3/sdp_ia64.patch +++ b/kernel_patches/backport/2.6.22_suse10_3/sdp_ia64.patch @@ -1,9 +1,9 @@ -Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c +Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== ---- ofed_1_2.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:34.000000000 +0300 -+++ ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:47.000000000 +0300 -@@ -30,26 +30,6 @@ - * SOFTWARE. +--- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 11:34:32.000000000 -0800 ++++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 12:20:09.000000000 -0800 +@@ -39,27 +39,6 @@ + * 2 of the License, or(at your option) any later version. */ -#if defined(__ia64__) @@ -12,6 +12,7 @@ Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c - but for SDP HW checksum is always set, so ... */ - -#include +-#include -#include - -static inline diff --git a/kernel_patches/backport/2.6.23/sdp_ia64.patch b/kernel_patches/backport/2.6.23/sdp_ia64.patch index b123e2b..f789cb2 100644 --- a/kernel_patches/backport/2.6.23/sdp_ia64.patch +++ b/kernel_patches/backport/2.6.23/sdp_ia64.patch @@ -1,9 +1,9 @@ -Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c +Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== ---- ofed_1_2.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:34.000000000 +0300 -+++ ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c 2007-05-16 16:28:47.000000000 +0300 -@@ -30,26 +30,6 @@ - * SOFTWARE. +--- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 11:34:32.000000000 -0800 ++++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2008-02-11 12:20:09.000000000 -0800 +@@ -39,27 +39,6 @@ + * 2 of the License, or(at your option) any later version. */ -#if defined(__ia64__) @@ -12,6 +12,7 @@ Index: ofed_1_2/drivers/infiniband/ulp/sdp/sdp_main.c - but for SDP HW checksum is always set, so ... */ - -#include +-#include -#include - -static inline From jlentini at netapp.com Mon Feb 11 12:56:17 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 11 Feb 2008 15:56:17 -0500 (EST) Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: <20080211173322.GA4755@fieldses.org> References: <20080211173322.GA4755@fieldses.org> Message-ID: On Mon, 11 Feb 2008, J. Bruce Fields wrote: > On Mon, Feb 11, 2008 at 12:25:14PM -0500, James Lentini wrote: > > > > Linux 2.6.25 will be the first official kernel release to contain the > > NFS/RDMA server. With the client and server now both available in > > 2.6.25-rc1, we've simplified our NFS/RDMA installation instructions. > > The new instructions are available here: > > > > http://nfs-rdma.sourceforge.net/Documents/README > > Any reason not to add that to the linux tree, say in > Documentation/filesystems/nfs-rdma.txt? > > --b. That sounds like a good idea Bruce. The current document is strictly a HOWTO. Should we add sections on the design and implementation? From teresa at home-mail.info Mon Feb 11 09:32:50 2008 From: teresa at home-mail.info (=?iso-2022-jp?B?GyRCIVo4N0EqJS0lYyVDJTclcyUwMEZGYiFbGyhC?=) Date: Tue, 12 Feb 2008 02:32:50 +0900 Subject: [ofa-general] =?iso-2022-jp?b?GyRCIiMhISMyIzAjMCM4Ry8hISQqGyhC?= =?iso-2022-jp?b?GyRCJF4kSCRhJW0hPCVzJEdAODNoJHI6RiU5JT8hPCVIGyhC?= =?iso-2022-jp?b?GyRCISEiIxsoQg==?= Message-ID: <20080211205655.254B9E609FB@openfabrics.org> $BK\(-Ev(-$K(-I,(-MW(-$J(-$H(-$-(-$K(-;H(-$((-$k(-JX(-Mx(-$J(-%m(-!<(-%s(B $B(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(B $B!!!!!!!!!!%W(-%m(-%Q(-%F(-%#(-%U(-%!(-%$(-%J(-%s(-%9(--j(-(B $B!!!!!!!!!!(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(,(0(B $B!!!!!!!A!A!A!!Jk$i$7!&%+%(%k!&?76bMx!!(B2.50$B%Q!<%;%s%H!!!A!A!A(B $B!!!!!!!!(B~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ $B!z$*?=$79~$_$O$3$A$i!!!!(Bhttp://profs01.first-loan.info/ $B!!"#%W%m%Q%F%#%U%!%$%J%s%9$J$i!"!V:G9b#1#0#0#0K|1_$^$G!W(B $B!!!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E(B $B!!!!!!:G9b#1#0#0#0K|1_$^$G!"I,MW$K1~$8$FI,MW$J6b3[$,$4MxMQ2DG=!#(B $B!!!!!!"(?7EP>l!!L5C4J]#1#0#0#0K|1_%3!<%9EP>l!*!*(B $B!!!!!!"(?3::$N7k2L$K$h$C$F$O!"$44uK>$K$=$($J$$>l9g$,$"$j$^$9!#(B $B!!"#%W%m%Q%F%#%U%!%$%J%s%9$J$i!"!V6bMx!!G/#2!%#5#0!s!A#8!%#9#0!s!W(B $B!!!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E(B $B!!!!!!"(J?@.(B19$BG/(B10$B7n(B1$BF|8=:_(B $B!!!!!!"($*l=j$bB?:L!W(B $B!!!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E!E(B $B!!!!!!;00f=;M'6d9T$N(BATM$B!"(B at B$B&+(BNK$B!J(Bam/pm$B$K at _CV!K!"%$!<%M%C%H(BATM$B!"(B $B!!!!!!%m!<%=%s(BATM$B!"%;%V%s6d9T(BATM$B!"$f$&$A$g(BATM$BEy$G$4MxMQ2DG=!#(B $B!!!!!!"(%+!<%I$Nl9g$OG/(B6.0$B!s!"(B300$BK|1_D6(B400$BK|1_(B $B!!0J2<$N>l9g$OG/(B7.0$B!s!"(B200$BK|1_D6(B300$BK|1_0J2<$N>l9g$OG/(B8.0$B!s!"(B100$BK|1_D6(B $B!!(B200$BK|1_0J2<$N>l9g$OG/(B10.0$B!s!"(B100$BK|1_0J2<$N>l9g$OG/(B12.0$B!s(B $B!|$*?=$79~$_$$$?$@$1$kJ}(B/$B$*?=$79~$_;~$NG/Np$,K~(B20$B:P0J>eK~(B65$B:P0J2<$NJ}(B $B!!$G!"0BDj$7$?<}F~$N$"$kJ}!#(B $B!|$*;H$$$_$A(B/$B86B'<+M3!J;v6H at -;q6b$K$O$4MxMQ$$$?$@$1$^$;$s!#!K(B $B!|$47 at Ls4|4V(B/5$BG/Kh!JEv=i$47 at LsF|$h$j(B5$BG/8e$N1~EvF|!K$N<+F099?7(B $B!|$*MxB)7W;;J}K!(B/$BKhF|$N:G=i$N;D9b!_G/MxN(!`(B365$B!_!N=i2s$*l9g$,$"$j$^$9!#(B $B!|$*?=$79~$_!"$4MxMQ$K$"$?$C$F$N$/$o$7$$>r7o$O!"E9F,$N at bL@=q!"(B $B!!%[!<%`%Z!<%8!"$^$?$O%U%j!<%@%$%d%k$G$43NG'$/$@$5$$!#(B $B!!!Z%[!<%`%Z!<%8![!!!!!!(Bhttp://profs01.first-loan.info/ $B!|J?@.(B19$BG/(B10$B7n(B1$BF|8=:_(B ..................................................................... $B!!!!!!!!!!!!!!!!!!!!!!!z!!$4MxMQ$O7W2hE*$K!!!z(B ..................................................................... - $BG[?.$K$D$$$F(B ----------------------------------------------- $B7G:\FbMF$K$D$$$F$N>\:Y!">&IJ!&%5!<%S%9$K$D$$$F$O!"(B $B%a!<%kCf$G$40FFb$7$F$$$k3F4k6H$N$*Ld$$9g$o$;Ak8}$r$43NG'$/$@$5$$!#(B $B$^$?!"(BHTML$B7A<0$G$*Aw$j$9$k%a!<%k$K$O%&%'%C%V%S!<%3%s$,;HMQ$5$l$F$$$^$9!#(B $B$3$N%a!<%k$OFCDjEE;R%a!<%kAw?.E, at 52=K!!&2~@5FCDj>&e(B $BAw?.$7$F$*$j$^$9$,!"$4ITMW$JJ}$K$OBgJQ$4LBOG$r$*$+$1$$$?$7$^$7$?!#(B $B"#G[?.85(B $B%P%C%-%s%0!!%3%9%a(B $B"#=j:_CO(B $B!!El5~ETJ85~6hK\6?(B3-5-2 $B"#G[?.Dd;_!"$^$?$O?4$"$?$j$,$J$$>l9g!J0lHV?WB.$JJ}K!$G$9!K(B info at bkosume352.com $B"(%a!<%k$,Jx$l$F8+$($k>l9g$O!V(BMS$B%4%7%C%/!W$d!V(BOsaka$BEyI}!W$J$IEyI}%U%)%s(B $B%H$G$4Mw$/$@$5$$!#(B ----------------------------------------------------------------------- From bfields at fieldses.org Mon Feb 11 13:00:44 2008 From: bfields at fieldses.org (J. Bruce Fields) Date: Mon, 11 Feb 2008 16:00:44 -0500 Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: References: <20080211173322.GA4755@fieldses.org> Message-ID: <20080211210044.GA4561@fieldses.org> On Mon, Feb 11, 2008 at 03:56:17PM -0500, James Lentini wrote: > > > On Mon, 11 Feb 2008, J. Bruce Fields wrote: > > > On Mon, Feb 11, 2008 at 12:25:14PM -0500, James Lentini wrote: > > > > > > Linux 2.6.25 will be the first official kernel release to contain the > > > NFS/RDMA server. With the client and server now both available in > > > 2.6.25-rc1, we've simplified our NFS/RDMA installation instructions. > > > The new instructions are available here: > > > > > > http://nfs-rdma.sourceforge.net/Documents/README > > > > Any reason not to add that to the linux tree, say in > > Documentation/filesystems/nfs-rdma.txt? > > > > --b. > > That sounds like a good idea Bruce. The current document is strictly a > HOWTO. Should we add sections on the design and implementation? Sure, that'd be great. But I think it'd be fine to submit the howto pretty much as it is and add the rest later. --b. From Dominick.Kyle at alplm.org Mon Feb 11 15:47:19 2008 From: Dominick.Kyle at alplm.org (Dominick Kyle) Date: Mon, 11 Feb 2008 22:47:19 -0100 Subject: [ofa-general] Gamble and win. Mega variety of slots. Message-ID: <5IX983EJXVWDA985@alplm.org> Gaff at our Casino and you will pull of surely enough. The privileges of our Casino: Mega range of most popular gambles Fast payouts 24/7 Live chat support Big jackpot http://hitwebplaying.com From legsz at ac-rulez.de Sun Feb 10 13:55:26 2008 From: legsz at ac-rulez.de (Darin Nieves) Date: Mon, 10 Feb 2008 21:55:26 +0000 Subject: [ofa-general] Designer Footwear from Gucci Prada Chanel & More, buy direct, forget department store prices Message-ID: <287921060.21847795097081@ac-rulez.de> Welcome in our on-line shop!!! Enjoy DIRECT PRICING at more than 65% OFF on a wide variety of 2008 Collections from Versace, Prada, Chanel, Dior & More. We also carry TOP BRANDS such as Uggs, Gucci, Dsquared, D&G, Bally, Coach and much more. Find Loafers, Boots, High Heels, Sneakers and Casual Shoes from Brand Names at less than WHOLESALE prices. Selection is available for Women and Men, Shipping is FREE WorldWide, Trendy Fashion Footwear Sale of the YEAR! Forget Department Store Prices, Buy Designer Shoes Direct Visit Today! http://sshoes.byethost13.com/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From arlin.r.davis at intel.com Mon Feb 11 13:58:06 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 11 Feb 2008 13:58:06 -0800 Subject: [ofa-general] [PATCH] dapl v1: dat_psp_create_any fails, return code and conn_qual seed value incorrect Message-ID: <000001c86cf9$30ffdbf0$ff0da8c0@amr.corp.intel.com> Fix v1 udapl for OFED 1.3 rc4, Bug 910: The OFA dapl provider is checking for incorrect return code from rdma_bind_addr and rdma_listen calls. This causes an error to be returned back to the consumer instead of correctly incrementing the seed port and re-calling the OFA provider until a valid port is issued. The seed value (1000) is also incorrect and should start a non-privledged port (1024) to avoid EPERM errors when seeding the starting port value. Patch against dat1.2 branch: Signed-off by: Arlin Davis diff --git a/dapl/common/dapl_psp_create_any.c b/dapl/common/dapl_psp_create_any.c index a2057f5..f3ec594 100644 --- a/dapl/common/dapl_psp_create_any.c +++ b/dapl/common/dapl_psp_create_any.c @@ -82,7 +82,7 @@ dapl_psp_create_any ( DAPL_SP *sp_ptr; DAPL_EVD *evd_ptr; DAT_RETURN dat_status; - static DAT_CONN_QUAL hint_conn_qual = 1000; /* seed value */ + static DAT_CONN_QUAL hint_conn_qual = 1024; /* seed value */ DAT_CONN_QUAL lcl_conn_qual; DAT_CONN_QUAL limit_conn_qual; diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index ae12658..1826941 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -701,7 +701,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, if (rdma_bind_addr(conn->cm_id,(struct sockaddr *)&addr)) { - if (errno == EBUSY) + if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; else dat_status = @@ -724,7 +724,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, if (rdma_listen(conn->cm_id,64)) { /* backlog to 64 */ - if (errno == EBUSY) + if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; else dat_status = From arlin.r.davis at intel.com Mon Feb 11 13:58:30 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 11 Feb 2008 13:58:30 -0800 Subject: [ofa-general] [PATCH] dapl v2: dat_psp_create_any fails, return code and conn_qual seed value incorrect Message-ID: <000101c86cf9$3e1e5190$ff0da8c0@amr.corp.intel.com> Fix v2 udapl for OFED 1.3 rc4, Bug 910: The OFA dapl provider is checking for incorrect return code from rdma_bind_addr and rdma_listen calls. This causes an error to be returned back to the consumer instead of correctly incrementing the seed port and re-calling the OFA provider until a valid port is issued. The seed value (1000) is also incorrect and should start a non-privledged port (1024) to avoid EPERM errors when seeding the starting port value. Patch against master branch (v2): Signed-off by: Arlin Davis diff --git a/dapl/common/dapl_psp_create_any.c b/dapl/common/dapl_psp_create_any.c index a2768fb..e2faa4a 100644 --- a/dapl/common/dapl_psp_create_any.c +++ b/dapl/common/dapl_psp_create_any.c @@ -82,7 +82,7 @@ dapl_psp_create_any ( DAPL_SP *sp_ptr; DAPL_EVD *evd_ptr; DAT_RETURN dat_status; - static DAT_CONN_QUAL hint_conn_qual = 1000; /* seed value */ + static DAT_CONN_QUAL hint_conn_qual = 1024; /* seed value */ DAT_CONN_QUAL lcl_conn_qual; DAT_CONN_QUAL limit_conn_qual; diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index e65debc..811789e 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -690,7 +690,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID); if (rdma_bind_addr(conn->cm_id,(struct sockaddr *)&addr)) { - if (errno == EBUSY) + if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; else dat_status = @@ -713,7 +713,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, if (rdma_listen(conn->cm_id,64)) { /* backlog to 64 */ - if (errno == EBUSY) + if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; else dat_status = From rdreier at cisco.com Mon Feb 11 14:18:24 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 11 Feb 2008 14:18:24 -0800 Subject: [ofa-general] Re: [PATCH] mlx4: fix build break In-Reply-To: <20080211022257.GA26656@lixom.net> (Olof Johansson's message of "Sun, 10 Feb 2008 20:22:57 -0600") References: <20080211022257.GA26656@lixom.net> Message-ID: Thanks, applied. Jack, I thought you guys tested the build on powerpc. How did this sneak through? From rdreier at cisco.com Mon Feb 11 14:25:27 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 11 Feb 2008 14:25:27 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get one build fix: Olof Johansson (1): mlx4_core: Fix build break (missing include) drivers/net/mlx4/alloc.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 521dc03..75ef9d0 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx4.h" From rdreier at cisco.com Mon Feb 11 14:28:42 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 11 Feb 2008 14:28:42 -0800 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> (Eli Cohen's message of "Sat, 9 Feb 2008 18:57:36 +0200") References: <1201710660.28794.170.camel@mtls03> <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> Message-ID: > I set these flags for mlx4 and mthca in patches 5/16 and 6/16 respectively. Ah I see. Seems strange to use device->flags for just the checksum offload stuff and device_cap_flags for everything else though. I don't see any major issues with moving device_cap_flags into struct ib_device and not forcing a device query call, but I guess we need to convince ourselves that the flags would never change at runtime, and anyway that needs to be a separate change from this IPoIB stuff. > I my machines I can see the flags set by inspecting /sys/class/net/ib*/features I guess that's an OFED patch? Is there any interest in submitting it upstream. - R. From rdreier at cisco.com Mon Feb 11 14:40:12 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 11 Feb 2008 14:40:12 -0800 Subject: [ofa-general] Demand paging for memory regions (was Re: MMU Notifiers V6) In-Reply-To: (Christoph Lameter's message of "Sat, 9 Feb 2008 13:46:34 -0800 (PST)") References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> Message-ID: [Adding general at lists.openfabrics.org to get the IB/RDMA people involved] This thread has patches that add support for notifying drivers when a process's memory map changes. The hope is that this is useful for letting RDMA devices handle registered memory without pinning the underlying pages, by updating the RDMA device's translation tables whenever the host kernel's tables change. Is anyone interested in working on using this for drivers/infiniband? I am interested in participating, but I don't think I have enough time to do this by myself. Also, at least naively it seems that this is only useful for hardware that has support for this type of demand paging, and can handle not-present pages, generating interrupts for page faults, etc. I know that Mellanox HCAs should have this support; are there any other devices that can do this? The beginning of this thread is at . - R. From arlin.r.davis at intel.com Mon Feb 11 16:32:59 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 11 Feb 2008 16:32:59 -0800 Subject: [ofa-general] [PATCH] dapl v2: use max cma backlog as listen backlog Message-ID: <000301c86d0e$d1e49f50$ff0da8c0@amr.corp.intel.com> DAPL OFA provider: set listen backlog to default cma backlog max. Patch against master branch. Signed-off by: Arlin Davis diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index 811789e..04b9e41 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -711,7 +711,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, " listen(conn=%p cm_id=%d)\n", sp_ptr->cm_srvc_handle,conn->cm_id); - if (rdma_listen(conn->cm_id,64)) { /* backlog to 64 */ + if (rdma_listen(conn->cm_id,0)) { /* max cma backlog */ if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; From arlin.r.davis at intel.com Mon Feb 11 16:32:42 2008 From: arlin.r.davis at intel.com (Arlin Davis) Date: Mon, 11 Feb 2008 16:32:42 -0800 Subject: [ofa-general] [PATCH] dapl v1: use max cma backlog as listen backlog Message-ID: <000201c86d0e$c76369d0$ff0da8c0@amr.corp.intel.com> DAPL OFA provider: set listen backlog to default cma backlog max. Patch against dat1.2 branch. Signed-off by: Arlin Davis diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index 1826941..68e5909 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -722,7 +722,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia_ptr, " listen(conn=%p cm_id=%d)\n", sp_ptr->cm_srvc_handle,conn->cm_id); - if (rdma_listen(conn->cm_id,64)) { /* backlog to 64 */ + if (rdma_listen(conn->cm_id,0)) { /* max cma backlog */ if ((errno == EBUSY) || (errno == EADDRINUSE)) dat_status = DAT_CONN_QUAL_IN_USE; From _l_smith at acantho.com Mon Feb 11 14:56:40 2008 From: _l_smith at acantho.com (kean roald) Date: Mon, 11 Feb 2008 22:56:40 +0000 Subject: [ofa-general] Shocking mpeg4 with Rihanna + 4 korean lesbians! Message-ID: <000501c86d10$02fc601c$20159cae@tbnxpb> #VjlbsdAvril Lavigne New pornos. #pWDeZOThe photo is Partof! #jlbsdiOnly 1 day trial - get this New pornos now! #pWDeZO Download it now! -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwsilandm at siland.it Mon Feb 11 17:47:01 2008 From: dwsilandm at siland.it (Sean Langford) Date: Tue, 12 Feb 2008 09:47:01 +0800 Subject: [ofa-general] Save on quality software! Message-ID: <01c86d5c$375bc8c0$d62d9fdb@dwsilandm> Brilliant opportunity to get software right at the same time you need it without waiting for a CD to be delivered. Just pay money and download your soft. Low prices, discounts and special offers! Most popular localized software in German, French, Italian, Spanish, English and many other languages of the world! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/devinvalencia980/ The best software products at the best prices. From 20gilles.labat at ctba.fr Mon Feb 11 20:14:30 2008 From: 20gilles.labat at ctba.fr (Bud Enriquez) Date: Tue, 12 Feb 2008 13:14:30 +0900 Subject: [ofa-general] {Viagra_onli2_de} Message-ID: <01c86d79$3358ff00$fbb0fbde@20gilles.labat> Haben Sie endlich wieder Spass am Leben! Preise die keine Konkurrenz kennen - Kein peinlicher Arz tbesuch erforderlich - Bequem und diskret online bestellen. - Visa verifizierter Onlineshop - keine versteckte Kosten - Diskrete Verpackung und Zahlung - Kostenlose, arztliche Telefon-Beratung - Kein langes Warten - Auslieferung innerhalb von 2-3 Tagen Originalmedikamente Ciiaaaaaalis... 10 Pack. 21,00 Euro Viiaaaagra... 10 Pack. 11,00 Euro Jetzt bestellen - und vier Pillen umsonst erhalten (bitte warten Sie einen Moment bis die Seite vollstandig geladen ist) -------------- next part -------------- An HTML attachment was scrubbed... URL: From ayano.sakurai at bvillemn.net Mon Feb 11 20:49:42 2008 From: ayano.sakurai at bvillemn.net (ayano.sakurai at bvillemn.net) Date: Tue, 12 Feb 2008 10:19:42 +0530 Subject: [ofa-general] My Love Message-ID: <47B12566.1070201@bvillemn.net> Powerful Love http://72.145.32.27/ From maiko9932 at cow.livedoor.com Mon Feb 11 21:16:33 2008 From: maiko9932 at cow.livedoor.com (maiko9932 at cow.livedoor.com) Date: Tue, 12 Feb 2008 14:16:33 +0900 Subject: [ofa-general] =?iso-2022-jp?b?GyRCJSglQyVBJEo/TTpKJHJMNU5BGyhC?= =?iso-2022-jp?b?GyRCJEc+UjJwISobKEI=?= Message-ID: <20080212051715.0B0DAE60363@openfabrics.org> □■――――――――――――――――――□■ ★┌──┐★┌──┐★┌──┐★┌──┐★ ★│完全│★│無料│★│紹介│★│人妻│★ ★└──┘★└──┘★└──┘★└──┘★  ・ご近所に素敵な奥様  ・熟れたカラダを持て余す未亡人  ・セックスし足りないヤングミセス etc...  そんな方達との出会いを求めてませんか? □■――――――――――――――――――□■ そういう貴方に取って置きの安心コミュニティサイトを紹介いたします! このサイトは人妻がメインの割り切り大人恋愛主義! 全ての機能を無料で使いたいのは男も女も一緒♪ ぜひ利用して、豊かなセックスライフを貴方に!  ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ 完全無料! http://www.di-girl.com/?ff  ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ ̄ 旦那は子供ができてからセックスレスになってしまって… 毎日オナニーばかりしてます…こんな私をいじめてくれませんか?         ↓↓メールする↓↓         http://www.di-girl.com/?ff 見た目は若いけど、どうなんだろう?? 実際見て判断してもらいたいからまず連絡先交換しない?          ↓↓メールする↓↓        http://www.di-girl.com/?ff ※セキュリティ上名前非表示、文章等は若干変更させていただいています。       ※安心コミュニティサイト☆※      あなたの希望にマッチした女性が必ず見つかります。  しかも、完全無料だから一度体感してみてください。        ↓↓↓↓安心click↓↓↓安心click↓↓↓        http://www.di-girl.com/?ff From campingdarisi at libero.it Mon Feb 11 21:26:49 2008 From: campingdarisi at libero.it (Haywood Rouse) Date: Mon, 11 Feb 2008 23:26:49 -0600 Subject: [ofa-general] Read below and Get the Size you deserve Message-ID: <721493591.55596958445172@libero.it> Please read one of the testimonials:"Although my experience with different cock enlargement methods was certainly very negative, I still couldn't help looking for some efficient one. I'm so happy I tried your VPXL. With it my length has gone up by an inch and a half and I have gained nearly a whole inch in width. Thanks a lot." Finis, Oneonta.Enjoy your life with the our products. http://geocities.com/meghancarr468/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at dev.mellanox.co.il Mon Feb 11 21:30:52 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 12 Feb 2008 07:30:52 +0200 Subject: [ofa-general] Re: [PATCH] mlx4: fix build break In-Reply-To: References: <20080211022257.GA26656@lixom.net> Message-ID: <47B12F0C.8070407@mellanox.co.il> Roland Dreier wrote: > Thanks, applied. > > Jack, I thought you guys tested the build on powerpc. How did this > sneak through? > > We OFED on PPC - not clear how this happened We will check our process again Tziporet From rdreier at cisco.com Mon Feb 11 22:21:58 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 11 Feb 2008 22:21:58 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: <000001c86aec$97eda860$7ae1180a@amr.corp.intel.com> (Sean Hefty's message of "Fri, 8 Feb 2008 23:22:56 -0800") References: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> <000001c86aec$97eda860$7ae1180a@amr.corp.intel.com> Message-ID: > Was this function the cause of the problem? Was the cm port object freed while > references were still on one of the cm counter objects? To be honest, I haven't figured out the exact bug yet. I made a bunch of changes to make the counter_groups dynamically allocated, based on this from Documentation/kobject.txt: No structure should EVER have more than one kobject embedded within it. If it does, the reference counting for the object is sure to be messed up and incorrect, and your code will be buggy. So do not do this. but thinking about it further, I think the original way should be OK. So I want to go back and look for a smaller patch that still fixes things, and understand what the breakage was. From eli at dev.mellanox.co.il Mon Feb 11 23:13:34 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 12 Feb 2008 09:13:34 +0200 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: References: <1201710660.28794.170.camel@mtls03> <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> Message-ID: <47B1471E.20501@dev.mellanox.co.il> Roland Dreier wrote: > > I set these flags for mlx4 and mthca in patches 5/16 and 6/16 respectively. > > Ah I see. Seems strange to use device->flags for just the checksum > offload stuff and device_cap_flags for everything else though. I > don't see any major issues with moving device_cap_flags into struct > ib_device and not forcing a device query call, but I guess we need to > convince ourselves that the flags would never change at runtime, and > anyway that needs to be a separate change from this IPoIB stuff. I think the cleanest way is to call query device caps and save a copy of these flags in the private data. If we agree on this approach I can send patches with this change. > > > I my machines I can see the flags set by inspecting /sys/class/net/ib*/features > > I guess that's an OFED patch? Is there any interest in submitting it > upstream. All the patches I sent were checked also on the "for-2.6.25" branch and gone through basic testing. I also checked their variants on ofed. I think we should push them upstream. From jackm at dev.mellanox.co.il Mon Feb 11 23:43:33 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 12 Feb 2008 09:43:33 +0200 Subject: [ofa-general] Re: [PATCH] mlx4: fix build break In-Reply-To: References: <20080211022257.GA26656@lixom.net> Message-ID: <200802120943.34388.jackm@dev.mellanox.co.il> On Tuesday 12 February 2008 00:18, Roland Dreier wrote: > Thanks, applied. > > Jack, I thought you guys tested the build on powerpc. How did this > sneak through? > It did not sneak through, because the problem does not exist in the OFED git. The following commit was performed to git://git.openfabrics.org/ofed_1_3/linux-2.6.git on Sept 25, 2007: ======================================= commit 4a5709b81dfd249c98271801ddc01decb7acd466 Author: Eli Cohen Date: Tue Sep 25 12:41:30 2007 +0200 add missing include file. ia64 requires it. Signed-off-by: Eli Cohen diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index f8d63d3..704a56b 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx4.h" ======================================== I guess this just fell through the cracks with regard to posting it to the list. Sorry about that. - Jack From ogerlitz at voltaire.com Tue Feb 12 00:08:08 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 12 Feb 2008 10:08:08 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <476C2F62.2020900@linux.vnet.ibm.com> References: <476C2F62.2020900@linux.vnet.ibm.com> Message-ID: <47B153E8.2090803@voltaire.com> Pradeep Satyanarayana wrote: > I have seen sporadic errors while running the HCAs in connected mode. > These errors appear to be related to the speeds of the different HCAs. > Increasing the retry counts solves the problem. Hi Predeep, I see now that you have sent tonight this patch (posted on Dec 2007 to the mailing list and never discussed) to be included in ofed 1.3 I think more detailed are needed here on the problem, from the above three lines it seem to be more of a workaround than a solution. What is the problem here? > I looked at the RFC as regards to warnings about retries. The warnings > is to make sure that the IB timeouts do not interfere with TCP timeouts. > The TCP timeout are so much larger than the IB timeouts (even with > non zero values) that we are nowhere close to interfering with TCP > timeouts. IP provides "unreliable datagram service" to upper layers, hence don't really see a point in implementing it over a reliable HW transport. This was discussed on the list, and suggestions on how to move to IPoIB/CM over UC transports were made, not yet an implementation... Saying all that, I don't think we want to have --any RNR retries--, as for retries, I am open to hear what others think. Or. > > Signed-off-by: Pradeep Satyanarayana > --- > > --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-12-21 16:06:49.000000000 -0500 > +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-12-21 16:07:28.000000000 -0500 > @@ -990,8 +990,8 @@ static int ipoib_cm_send_req(struct net_ > req.responder_resources = 4; > req.remote_cm_response_timeout = 20; > req.local_cm_response_timeout = 20; > - req.retry_count = 0; /* RFC draft warns against retries */ > - req.rnr_retry_count = 0; /* RFC draft warns against retries */ > + req.retry_count = 3; > + req.rnr_retry_count = 3; > req.max_cm_retries = 15; > req.srq = ipoib_cm_has_srq(dev); > return ib_send_cm_req(id, &req); From ogerlitz at voltaire.com Tue Feb 12 00:56:16 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 12 Feb 2008 10:56:16 +0200 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offloadsupport In-Reply-To: References: <1201710660.28794.170.camel@mtls03> <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> Message-ID: <47B15F30.6090706@voltaire.com> Roland Dreier wrote: > Seems strange to use device->flags for just the checksum > offload stuff and device_cap_flags for everything else though. I > don't see any major issues with moving device_cap_flags into struct > ib_device and not forcing a device query call I also don't see major issue with this approach, but for better isolation between the ULPs and HW drivers, I would prefer to remain with the approach that requires the ULP to issue device query. Or. From maricel.estay at icce.cl Tue Feb 12 00:27:02 2008 From: maricel.estay at icce.cl (Resultados Concretos) Date: Tue, 12 Feb 2008 04:27:02 -0400 Subject: [ofa-general] Oferta Enero Message-ID: <20070123.SAMUEUSQMTBSYIBT@icce.cl> E-mail Masivos para tu campaña de Marketing $50.000 COMPLETA INSTALACIÓN DEL SOFTWARE DE ENVÍO EN UN MAXIMO DE 3 ORDENADORES + SOPORTE DE LUNES A DOMINGO + CONFIGURACION DE UNA CUENTA DE SALIDA ATRAVEZ DE UN SERVIDOR ESPECIAL QUE NO SE SATURA Y PUEDE MANDAR UN TOTAL DE 300.000 CORREOS DIARIOS NO PIERDA MAS SU DINERO EN PUBLICIDAD EL MARKETING DEL FUTURO YA ESTA AQUÍ Fonos: 8-7060025 o 3135931 VALOR INCLUYE UNA BASE DE DATOS DE REGALO DE 100.000 REGISTROS ACTUALIZADOS A NOVIEMBRE DE 2007 Y UN EXTRACTOR DE CORREOS -------------- next part -------------- An HTML attachment was scrubbed... URL: From broadcastinga1 at tfx.de Mon Feb 11 01:13:33 2008 From: broadcastinga1 at tfx.de (Estella Stallings) Date: Tue, 11 Feb 2008 10:13:33 +0100 Subject: [ofa-general] Save today 60% Off ALL Designer Footwear such as Gucci Prada Chanel Message-ID: <348000400.70613574145620@tfx.de> Welcome in our on-line shop!!! Enjoy DIRECT PRICING at more than 65% OFF on a wide variety of 2008 Collections from Versace, Prada, Chanel, Dior & More. We also carry TOP BRANDS such as Uggs, Gucci, Dsquared, D&G, Bally, Coach and much more. Find Loafers, Boots, High Heels, Sneakers and Casual Shoes from Brand Names at less than WHOLESALE prices. Selection is available for Women and Men, Shipping is FREE WorldWide, Trendy Fashion Footwear Sale of the YEAR! Forget Department Store Prices, Buy Designer Shoes Direct Visit Today! http://sshoes.freehostia.com/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From marisab at canadadrugs.com Tue Feb 12 01:31:58 2008 From: marisab at canadadrugs.com (marisab at canadadrugs.com) Date: Tue, 12 Feb 2008 14:31:58 +0500 Subject: [ofa-general] Valentine Invitation Message-ID: <000501c86d5a$1d096ba0$c055b45c@xhpf> Blind Love http://77.81.113.80/ From vlad at lists.openfabrics.org Tue Feb 12 03:07:59 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 12 Feb 2008 03:07:59 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080212-0200 daily build status Message-ID: <20080212110759.2C928E60ADA@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From benches at j-harbor.com Tue Feb 12 03:11:14 2008 From: benches at j-harbor.com (Lindsay Smith) Date: Tue, 12 Feb 2008 13:11:14 +0200 Subject: [ofa-general] M!cro soft Office_2OO8 for MAC 79. Retail 923 #save 2331# Message-ID: <000501c86d66$63e36d80$0100007f@juybhnr> kmt software officeready 4 pro adobe audition 2.0 - 49 ulead mediastudio pro v8.0 with extras - 79 microsoft vista ultimate - 89 paste #getsoftfast .com# ln lnternet Explorer Take away # before you paste ln lnternet Explorer corel painter ix for mac - 39 sas jmp statistical discovery 7 - 129 php maker 5 - 39 adobe framemaker 8.0 - 69 From tziporet at dev.mellanox.co.il Tue Feb 12 03:57:36 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 12 Feb 2008 13:57:36 +0200 Subject: [ofa-general] Re: [ewg] [ANNOUNCE] management tarballs release In-Reply-To: <20080211175955.GV11526@sashak.voltaire.com> References: <20080211175955.GV11526@sashak.voltaire.com> Message-ID: <47B189B0.9060807@mellanox.co.il> Sasha Khapyorsky wrote: > Hi, > > There is a new release of the management (OpenSM and infiniband > diagnostics) tarballs available in: > > http://www.openfabrics.org/downloads/management/ > > md5sum: > > 1c9764865b4c4f03529494f4272a7daf libibcommon-1.0.8.tar.gz > 879688a264e982600628dfa576c84cea libibumad-1.1.7.tar.gz > de275d0aba15b587f00ac1e30ba8b0e8 libibmad-1.1.6.tar.gz > 17678a2eaeb0cae9273b091da00cc7dc infiniband-diags-1.3.6.tar.gz > 92385653112cd915ec31542eb2d94714 opensm-3.1.9.tar.gz > 0f43afda110cfc285ef89637fcbf6ed7 opensm-3.2.0.tar.gz > > opensm-3.1.9 is recent OFED version. opensm-3.2.0 is recent master. > The rest is the same. > > > Sasha Is this the last planed release of OSM for OFED 1.3? Thanks, Tziporet From jeremy06 at free.fr Tue Feb 12 04:29:18 2008 From: jeremy06 at free.fr (Odessa Gold) Date: Tue, 12 Feb 2008 13:29:18 +0100 Subject: [ofa-general] Software Range Expansion - Price Downfall Message-ID: <134882603.75286118612899@free.fr> Industry standard software at nominal feeAnybody who is going to purchase legal PC and Mac software at low prices will definitely find necessary software products here, hether he/she is a corporate buyer, or owner of a small company, or just purchasing software for his/her own needs.View what we got to propose http://geocities.com/lennyhurley568/Most demanding software are:*Microsoft Windows XP Professional with SP2: Retail price today - $269.99; Our only today - $49.95 *Microsoft Office 2007 Enterprise: Retail price today - $899.00; Our only - $79.95 *Microsoft Visual Basic RAD Professional v1.01: Retail price for this time - $45.00; Our just - $19.95 *Acronis Disk Editor v6.0.360: Retail price now - $29.99; Our only for today - $19.95 *Adobe Acrobat 3D: Retail price for now - $995.00; Our only for today - $59.95 *Corel Designer 10.0: Retail price this day - $310.00; Our just - $39.95 *Macromedia Flash Professional 8: Retail price now - $699.00; Our now just - $49.95 *Corel Procreate KPT Effects: Retail price for this time - $199.00; Our now just - $19.95Check what we have to propose http://geocities.com/lennyhurley568/ Therefore Ill lie with himWhen. Dian no queen of virgins. If it should proveThat thou art. What will Count Rousillon do. Follow us.In what he did. Sickens but to speak a truth. Is but sluttish if itsmell so. My lord your son with a patch. -------------- next part -------------- An HTML attachment was scrubbed... URL: From 99ways at email.msn.com Tue Feb 12 05:44:15 2008 From: 99ways at email.msn.com (Stefan Lake) Date: Tue, 12 Feb 2008 21:44:15 +0800 Subject: [ofa-general] What are you up to? Message-ID: <01c86dc0$696da180$9d284fda@99ways> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Sandra at TheHealCare.info only, because I am using my friend's email to write this. You will see some of my private pics. From lillianwright at sympatico.ca Tue Feb 12 05:25:01 2008 From: lillianwright at sympatico.ca (Kendal Dye) Date: Tue, 12 Feb 2008 14:25:01 +0100 Subject: [ofa-general] Read this and choose your New Reality Message-ID: <250451889.25004646231072@sympatico.ca> Read other men's opinions about our product:"I've always been embarrassed to take my clothes off as I was insecure about my cock size. I tried the VPXL and I'm a whole new man. I'm much more confident now and I'm happy to know that my girlfriend is satisfied with our sexual life". Bertrand, Floral Park.Order our VPXL and start a new life of success and happiness. http://geocities.com/gilbertmclaughlin246/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Tue Feb 12 06:29:48 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 08:29:48 -0600 Subject: [ofa-general] UCMA_MAX_BACKLOG Message-ID: <47B1AD5C.6040904@opengridcomputing.com> Hey Sean, From ucma.c: > enum { > UCMA_MAX_BACKLOG = 128 > }; Why the arbitrary max limit? 128 doesn't seem very large. And the result if the backlog is exceeded is a connection teardown for iWARP. From sashak at voltaire.com Tue Feb 12 08:45:31 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 12 Feb 2008 16:45:31 +0000 Subject: [ofa-general] Re: [ewg] [ANNOUNCE] management tarballs release In-Reply-To: <47B189B0.9060807@mellanox.co.il> References: <20080211175955.GV11526@sashak.voltaire.com> <47B189B0.9060807@mellanox.co.il> Message-ID: <20080212164531.GA16074@sashak.voltaire.com> On 13:57 Tue 12 Feb , Tziporet Koren wrote: > Is this the last planed release of OSM for OFED 1.3? It would be ideal (if there will not be new bugs or so). Sasha From sean.hefty at intel.com Tue Feb 12 09:00:40 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 12 Feb 2008 09:00:40 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B153E8.2090803@voltaire.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> Message-ID: <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> >Saying all that, I don't think we want to have --any RNR retries--, as >for retries, I am open to hear what others think. I'm really not all that familiar with ipoib protocol, but if it's being implemented over an RC connection, then adding an RNR retry seems to make sense to me. I believe using UC is better, but if it's over RC, I don't know that we want to take the hit of tearing down and re-establishing the connection just because we have a fast sender. (This is just an opinion based on no fact whatsoever.) - Sean From sean.hefty at intel.com Tue Feb 12 09:07:16 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 12 Feb 2008 09:07:16 -0800 Subject: [ofa-general] RE: UCMA_MAX_BACKLOG In-Reply-To: <47B1AD5C.6040904@opengridcomputing.com> References: <47B1AD5C.6040904@opengridcomputing.com> Message-ID: <000101c86d99$b827d060$ff0da8c0@amr.corp.intel.com> >> enum { >> UCMA_MAX_BACKLOG = 128 >> }; > > >Why the arbitrary max limit? 128 doesn't seem very large. And the >result if the backlog is exceeded is a connection teardown for iWARP. The purpose is to prevent an app from queuing an unlimited number of connect request events. - Sean From sashak at voltaire.com Tue Feb 12 09:38:35 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 12 Feb 2008 17:38:35 +0000 Subject: [ofa-general] [PATCH] opensm: remove redundant moving_to_master flag Message-ID: <20080212173835.GD16074@sashak.voltaire.com> As currently used this flag just duplicates first_time_master_sweep flag. Replace its usage. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_subnet.h | 6 ------ opensm/opensm/osm_console.c | 2 -- opensm/opensm/osm_sm_state_mgr.c | 10 ++-------- opensm/opensm/osm_state_mgr.c | 19 +++++-------------- 4 files changed, 7 insertions(+), 30 deletions(-) diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index b5f2b6d..967c067 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -561,7 +561,6 @@ typedef struct _osm_subn { boolean_t subnet_initialization_error; boolean_t force_heavy_sweep; boolean_t in_sweep_hop_0; - boolean_t moved_to_master_state; boolean_t first_time_master_sweep; boolean_t coming_out_of_standby; unsigned need_update; @@ -665,11 +664,6 @@ typedef struct _osm_subn { * switch info we need to signal somehow not to continue * the sweeping. * -* moved_to_master_state -* Used for the writing of "SUBNET UP" into /var/log/messages. -* Will be TRUE when the SM switches to Master state, and returned -* to FALSE once the sunbet is up. -* * first_time_master_sweep * This flag is used for the PortInfo setting. On the first sweep as master * (meaning after moving from Standby|Discovering state), the SM must send diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 4597bde..7075817 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -335,13 +335,11 @@ static void print_status(osm_opensm_t * p_osm, FILE * out) " Ignore existing lfts : %d\n" " Subnet Init errors : %d\n" " In sweep hop 0 : %d\n" - " Moved to master state : %d\n" " First time master sweep : %d\n" " Coming out of standby : %d\n", p_osm->subn.ignore_existing_lfts, p_osm->subn.subnet_initialization_error, p_osm->subn.in_sweep_hop_0, - p_osm->subn.moved_to_master_state, p_osm->subn.first_time_master_sweep, p_osm->subn.coming_out_of_standby); fprintf(out, "\n"); diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index 9c1c1f3..eff9f19 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -334,11 +334,8 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, /* * Update the state of the SM to MASTER */ - /* Turn on the moved_to_master_state flag */ - sm->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ - if (sm->p_subn->first_time_master_sweep == FALSE) - sm->p_subn->first_time_master_sweep = TRUE; + sm->p_subn->first_time_master_sweep = TRUE; sm->p_subn->sm_state = IB_SMINFO_STATE_MASTER; osm_report_sm_state(sm); /* @@ -406,11 +403,8 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, * Update the state to MASTER, and start sweeping * OPTIONAL: send ACKNOWLEDGE */ - /* Turn on the moved_to_master_state flag */ - sm->p_subn->moved_to_master_state = TRUE; /* Turn on the first_time_master_sweep flag */ - if (sm->p_subn->first_time_master_sweep == FALSE) - sm->p_subn->first_time_master_sweep = TRUE; + sm->p_subn->first_time_master_sweep = TRUE; /* Turn on the force_heavy_sweep - we want a * heavy sweep to occur on the first sweep of this SM. */ sm->p_subn->force_heavy_sweep = TRUE; diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 2f3c366..7234053 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -77,15 +77,10 @@ static void __osm_state_mgr_up_msg(IN const osm_sm_t *sm) /* * This message should be written only once - when the * SM moves to Master state and the subnet is up for - * the first time. The change of state is marked with - * the subnet flag moved_to_master_state + * the first time. */ - if (sm->p_subn->moved_to_master_state == TRUE) { - osm_log(sm->p_log, OSM_LOG_SYS, "SUBNET UP\n"); /* Format Waived */ - /* clear the signal */ - sm->p_subn->moved_to_master_state = FALSE; - } else - osm_log(sm->p_log, OSM_LOG_INFO, "SUBNET UP\n"); /* Format Waived */ + osm_log(sm->p_log, sm->p_subn->first_time_master_sweep ? + OSM_LOG_SYS : OSM_LOG_INFO, "SUBNET UP\n"); osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, sm->p_subn->opt.sweep_interval ? @@ -1257,14 +1252,10 @@ _repeat_discovery: osm_log_msg_box(sm->p_log, OSM_LOG_ERROR, __FUNCTION__, "ERRORS DURING INITIALIZATION"); } else { - /* The subnet is up correctly - set the first_time_master_sweep - * flag (if it is on) to FALSE. */ - if (sm->p_subn->first_time_master_sweep == TRUE) - sm->p_subn->first_time_master_sweep = FALSE; - sm->p_subn->need_update = 0; - + sm->p_subn->need_update = 0; osm_dump_all(sm->p_subn->p_osm); __osm_state_mgr_up_msg(sm); + sm->p_subn->first_time_master_sweep = FALSE; if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) osm_sa_db_file_dump(sm->p_subn->p_osm); -- 1.5.4.rc5 From sashak at voltaire.com Tue Feb 12 09:41:43 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 12 Feb 2008 17:41:43 +0000 Subject: [ofa-general] [PATCH] opensm: kill drop_mgr, link_mgr and mcast_mgr SM sub-objects Message-ID: <20080212174143.GE16074@sashak.voltaire.com> Remove dummy drop_mgr, link_mgr and mcast_mgr SM sub-objects. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_sa.h | 1 + opensm/include/opensm/osm_sm.h | 6 - opensm/opensm/osm_drop_mgr.c | 196 +++++++++-------------- opensm/opensm/osm_link_mgr.c | 127 +++++---------- opensm/opensm/osm_mcast_mgr.c | 344 ++++++++++++++++++---------------------- opensm/opensm/osm_perfmgr.c | 4 +- opensm/opensm/osm_sm.c | 19 --- opensm/opensm/osm_state_mgr.c | 21 ++- 8 files changed, 290 insertions(+), 428 deletions(-) diff --git a/opensm/include/opensm/osm_sa.h b/opensm/include/opensm/osm_sa.h index c97cea6..a150695 100644 --- a/opensm/include/opensm/osm_sa.h +++ b/opensm/include/opensm/osm_sa.h @@ -61,6 +61,7 @@ #include #include #include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h index e77222d..f4339e4 100644 --- a/opensm/include/opensm/osm_sm.h +++ b/opensm/include/opensm/osm_sm.h @@ -63,11 +63,8 @@ #include #include #include -#include -#include #include #include -#include #include #include @@ -139,10 +136,7 @@ typedef struct osm_sm { osm_sm_mad_ctrl_t mad_ctrl; osm_lid_mgr_t lid_mgr; osm_ucast_mgr_t ucast_mgr; - osm_link_mgr_t link_mgr; - osm_drop_mgr_t drop_mgr; osm_sweep_fail_ctrl_t sweep_fail_ctrl; - osm_mcast_mgr_t mcast_mgr; cl_disp_reg_handle_t ni_disp_h; cl_disp_reg_handle_t pi_disp_h; cl_disp_reg_handle_t nd_disp_h; diff --git a/opensm/opensm/osm_drop_mgr.c b/opensm/opensm/osm_drop_mgr.c index 40534ab..8b0a36e 100644 --- a/opensm/opensm/osm_drop_mgr.c +++ b/opensm/opensm/osm_drop_mgr.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -69,56 +68,17 @@ /********************************************************************** **********************************************************************/ -void osm_drop_mgr_construct(IN osm_drop_mgr_t * const p_mgr) -{ - CL_ASSERT(p_mgr); - memset(p_mgr, 0, sizeof(*p_mgr)); -} - -/********************************************************************** - **********************************************************************/ -void osm_drop_mgr_destroy(IN osm_drop_mgr_t * const p_mgr) -{ - CL_ASSERT(p_mgr); - - OSM_LOG_ENTER(p_mgr->p_log, osm_drop_mgr_destroy); - - OSM_LOG_EXIT(p_mgr->p_log); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_drop_mgr_init(IN osm_drop_mgr_t * const p_mgr, IN osm_sm_t * sm) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER(sm->p_log, osm_drop_mgr_init); - - osm_drop_mgr_construct(p_mgr); - - p_mgr->sm = sm; - p_mgr->p_log = sm->p_log; - p_mgr->p_subn = sm->p_subn; - p_mgr->p_lock = sm->p_lock; - - OSM_LOG_EXIT(p_mgr->p_log); - return (status); -} - -/********************************************************************** - **********************************************************************/ static void -__osm_drop_mgr_remove_router(IN const osm_drop_mgr_t * const p_mgr, +__osm_drop_mgr_remove_router(osm_sm_t *sm, IN const ib_net64_t portguid) { osm_router_t *p_rtr; cl_qmap_t *p_rtr_guid_tbl; - p_rtr_guid_tbl = &p_mgr->p_subn->rtr_guid_tbl; + p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid); if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_remove_router: " "Cleaned router for port guid 0x%016" PRIx64 "\n", cl_ntoh64(portguid)); @@ -129,7 +89,7 @@ __osm_drop_mgr_remove_router(IN const osm_drop_mgr_t * const p_mgr, /********************************************************************** **********************************************************************/ static void -drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, +drop_mgr_clean_physp(osm_sm_t *sm, IN osm_physp_t * p_physp) { osm_physp_t *p_remote_physp; @@ -137,7 +97,7 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp) { - p_remote_port = osm_get_port_by_guid(p_mgr->p_subn, + p_remote_port = osm_get_port_by_guid(sm->p_subn, p_remote_physp->port_guid); if (p_remote_port) { @@ -149,14 +109,14 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, if (p_remote_port->discovery_count && osm_physp_get_port_state(p_remote_physp) == IB_LINK_ACTIVE) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "drop_mgr_clean_physp: " "Forcing new heavy sweep. Remote " "port 0x%016" PRIx64 " port num: 0x%X " "was recognized in ACTIVE state\n", cl_ntoh64(p_remote_physp->port_guid), p_remote_physp->port_num); - p_mgr->p_subn->force_heavy_sweep = TRUE; + sm->p_subn->force_heavy_sweep = TRUE; } /* If the remote node is ca or router - need to remove the remote port, @@ -164,7 +124,7 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, discovery count of the remote port. */ if (!p_remote_physp->p_node->sw) { p_remote_port->discovery_count = 0; - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "drop_mgr_clean_physp: Resetting discovery count of node: " "0x%016" PRIx64 " port num:0x%X\n", cl_ntoh64(osm_node_get_node_guid @@ -173,7 +133,7 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, } } - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "drop_mgr_clean_physp: " "Unlinking local node 0x%016" PRIx64 ", port 0x%X" "\n\t\t\t\tand remote node 0x%016" PRIx64 @@ -188,7 +148,7 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, } - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "drop_mgr_clean_physp: Clearing node 0x%016" PRIx64 " physical port number 0x%X\n", cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), @@ -200,7 +160,7 @@ drop_mgr_clean_physp(IN const osm_drop_mgr_t * const p_mgr, /********************************************************************** **********************************************************************/ static void -__osm_drop_mgr_remove_port(IN const osm_drop_mgr_t * const p_mgr, +__osm_drop_mgr_remove_port(osm_sm_t *sm, IN osm_port_t * p_port) { ib_net64_t port_guid; @@ -218,58 +178,58 @@ __osm_drop_mgr_remove_port(IN const osm_drop_mgr_t * const p_mgr, ib_mad_notice_attr_t notice; ib_api_status_t status; - OSM_LOG_ENTER(p_mgr->p_log, __osm_drop_mgr_remove_port); + OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_remove_port); port_guid = osm_port_get_guid(p_port); - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_remove_port: " "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); p_port_check = - (osm_port_t *) cl_qmap_remove(&p_mgr->p_subn->port_guid_tbl, + (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl, port_guid); if (p_port_check != p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_drop_mgr_remove_port: ERR 0101: " "Port 0x%016" PRIx64 " not in guid table\n", cl_ntoh64(port_guid)); goto Exit; } - p_sm_guid_tbl = &p_mgr->p_subn->sm_guid_tbl; + p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl; p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid); if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) { /* need to remove this item */ - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_remove_port: " "Cleaned SM for port guid\n"); free(p_sm); } - __osm_drop_mgr_remove_router(p_mgr, port_guid); + __osm_drop_mgr_remove_router(sm, port_guid); osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_remove_port: " "Clearing abandoned LID range [0x%X,0x%X]\n", min_lid_ho, max_lid_ho); - p_port_lid_tbl = &p_mgr->p_subn->port_lid_tbl; + p_port_lid_tbl = &sm->p_subn->port_lid_tbl; for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL); - drop_mgr_clean_physp(p_mgr, p_port->p_physp); + drop_mgr_clean_physp(sm, p_port->p_physp); p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list); while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) { p_mgrp = - (osm_mgrp_t *) cl_qmap_get(&p_mgr->p_subn->mgrp_mlid_tbl, + (osm_mgrp_t *) cl_qmap_get(&sm->p_subn->mgrp_mlid_tbl, p_mcm->mlid); if (p_mgrp != - (osm_mgrp_t *) cl_qmap_end(&p_mgr->p_subn->mgrp_mlid_tbl)) { - osm_mgrp_remove_port(p_mgr->p_subn, p_mgr->p_log, + (osm_mgrp_t *) cl_qmap_end(&sm->p_subn->mgrp_mlid_tbl)) { + osm_mgrp_remove_port(sm->p_subn, sm->p_log, p_mgrp, p_port->guid); osm_mcm_info_delete((osm_mcm_info_t *) p_mcm); } @@ -290,30 +250,30 @@ __osm_drop_mgr_remove_port(IN const osm_drop_mgr_t * const p_mgr, /* endport ceases to be reachable */ notice.g_or_v.generic.trap_num = CL_HTON16(65); /* The sm_base_lid is saved in network order already. */ - notice.issuer_lid = p_mgr->p_subn->sm_base_lid; + notice.issuer_lid = sm->p_subn->sm_base_lid; /* following C14-72.1.2 and table 119 p725 */ /* we need to provide the GID */ - port_gid.unicast.prefix = p_mgr->p_subn->opt.subnet_prefix; + port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; port_gid.unicast.interface_id = port_guid; memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid), sizeof(ib_gid_t)); /* According to page 653 - the issuer gid in this case of trap is the SM gid, since the SM is the initiator of this trap. */ - notice.issuer_gid.unicast.prefix = p_mgr->p_subn->opt.subnet_prefix; - notice.issuer_gid.unicast.interface_id = p_mgr->p_subn->sm_port_guid; + notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; + notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid; - status = osm_report_notice(p_mgr->p_log, p_mgr->p_subn, ¬ice); + status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_drop_mgr_remove_port: ERR 0103: " "Error sending trap reports (%s)\n", ib_get_err_str(status)); goto Exit; } - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_INFO)) { - osm_log(p_mgr->p_log, OSM_LOG_INFO, + if (osm_log_is_active(sm->p_log, OSM_LOG_INFO)) { + osm_log(sm->p_log, OSM_LOG_INFO, "__osm_drop_mgr_remove_port: " "Removed port with GUID:0x%016" PRIx64 " LID range [0x%X,0x%X] of node:%s\n", @@ -323,27 +283,27 @@ __osm_drop_mgr_remove_port(IN const osm_drop_mgr_t * const p_mgr, } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ static void -__osm_drop_mgr_remove_switch(IN const osm_drop_mgr_t * const p_mgr, +__osm_drop_mgr_remove_switch(osm_sm_t *sm, IN osm_node_t * p_node) { osm_switch_t *p_sw; cl_qmap_t *p_sw_guid_tbl; ib_net64_t node_guid; - OSM_LOG_ENTER(p_mgr->p_log, __osm_drop_mgr_remove_switch); + OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_remove_switch); node_guid = osm_node_get_node_guid(p_node); - p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl; p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid); if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_drop_mgr_remove_switch: ERR 0102: " "Node 0x%016" PRIx64 " not in switch table\n", cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -352,13 +312,13 @@ __osm_drop_mgr_remove_switch(IN const osm_drop_mgr_t * const p_mgr, osm_switch_delete(&p_sw); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ static boolean_t -__osm_drop_mgr_process_node(IN const osm_drop_mgr_t * const p_mgr, +__osm_drop_mgr_process_node(osm_sm_t *sm, IN osm_node_t * p_node) { osm_physp_t *p_physp; @@ -369,9 +329,9 @@ __osm_drop_mgr_process_node(IN const osm_drop_mgr_t * const p_mgr, ib_net64_t port_guid; boolean_t return_val = FALSE; - OSM_LOG_ENTER(p_mgr->p_log, __osm_drop_mgr_process_node); + OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_process_node); - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_process_node: " "Unreachable node 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -386,25 +346,25 @@ __osm_drop_mgr_process_node(IN const osm_drop_mgr_t * const p_mgr, if (p_physp) { port_guid = osm_physp_get_port_guid(p_physp); - p_port = osm_get_port_by_guid(p_mgr->p_subn, port_guid); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (p_port) - __osm_drop_mgr_remove_port(p_mgr, p_port); + __osm_drop_mgr_remove_port(sm, p_port); else - drop_mgr_clean_physp(p_mgr, p_physp); + drop_mgr_clean_physp(sm, p_physp); } } return_val = TRUE; if (p_node->sw) - __osm_drop_mgr_remove_switch(p_mgr, p_node); + __osm_drop_mgr_remove_switch(sm, p_node); p_node_check = - (osm_node_t *) cl_qmap_remove(&p_mgr->p_subn->node_guid_tbl, + (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl, osm_node_get_node_guid(p_node)); if (p_node_check != p_node) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_drop_mgr_process_node: ERR 0105: " "Node 0x%016" PRIx64 " not in guid table\n", cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -413,14 +373,14 @@ __osm_drop_mgr_process_node(IN const osm_drop_mgr_t * const p_mgr, /* free memory allocated to node */ osm_node_delete(&p_node); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (return_val); } /********************************************************************** **********************************************************************/ static void -__osm_drop_mgr_check_node(IN const osm_drop_mgr_t * const p_mgr, +__osm_drop_mgr_check_node(osm_sm_t *sm, IN osm_node_t * p_node) { ib_net64_t node_guid; @@ -428,12 +388,12 @@ __osm_drop_mgr_check_node(IN const osm_drop_mgr_t * const p_mgr, osm_port_t *p_port; ib_net64_t port_guid; - OSM_LOG_ENTER(p_mgr->p_log, __osm_drop_mgr_check_node); + OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_check_node); node_guid = osm_node_get_node_guid(p_node); if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_drop_mgr_check_node: ERR 0107: " "Node 0x%016" PRIx64 " is not a switch node\n", cl_ntoh64(node_guid)); @@ -443,59 +403,59 @@ __osm_drop_mgr_check_node(IN const osm_drop_mgr_t * const p_mgr, /* Make sure we have a switch object for this node */ if (!p_node->sw) { /* We do not have switch info for this node */ - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_check_node: " "Node 0x%016" PRIx64 " no switch in table\n", cl_ntoh64(node_guid)); - __osm_drop_mgr_process_node(p_mgr, p_node); + __osm_drop_mgr_process_node(sm, p_node); goto Exit; } /* Make sure we have a port object for port zero */ p_physp = osm_node_get_physp_ptr(p_node, 0); if (!p_physp) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_check_node: " "Node 0x%016" PRIx64 " no valid physical port 0\n", cl_ntoh64(node_guid)); - __osm_drop_mgr_process_node(p_mgr, p_node); + __osm_drop_mgr_process_node(sm, p_node); goto Exit; } port_guid = osm_physp_get_port_guid(p_physp); - p_port = osm_get_port_by_guid(p_mgr->p_subn, port_guid); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_check_node: " "Node 0x%016" PRIx64 " has no port object\n", cl_ntoh64(node_guid)); - __osm_drop_mgr_process_node(p_mgr, p_node); + __osm_drop_mgr_process_node(sm, p_node); goto Exit; } if (p_port->discovery_count == 0) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_check_node: " "Node 0x%016" PRIx64 " port has discovery count zero\n", cl_ntoh64(node_guid)); - __osm_drop_mgr_process_node(p_mgr, p_node); + __osm_drop_mgr_process_node(sm, p_node); goto Exit; } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return; } /********************************************************************** **********************************************************************/ -void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) +void osm_drop_mgr_process(osm_sm_t *sm) { cl_qmap_t *p_node_guid_tbl; cl_qmap_t *p_port_guid_tbl; @@ -506,14 +466,14 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) ib_net64_t port_guid; ib_net64_t node_guid; - CL_ASSERT(p_mgr); + CL_ASSERT(sm); - OSM_LOG_ENTER(p_mgr->p_log, osm_drop_mgr_process); + OSM_LOG_ENTER(sm->p_log, osm_drop_mgr_process); - p_node_guid_tbl = &p_mgr->p_subn->node_guid_tbl; - p_port_guid_tbl = &p_mgr->p_subn->port_guid_tbl; + p_node_guid_tbl = &sm->p_subn->node_guid_tbl; + p_port_guid_tbl = &sm->p_subn->port_guid_tbl; - CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) { @@ -524,9 +484,9 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) CL_ASSERT(cl_qmap_key(&p_node->map_item) == osm_node_get_node_guid(p_node)); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { node_guid = osm_node_get_node_guid(p_node); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_drop_mgr_process: " "Checking node 0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); @@ -538,7 +498,7 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) should therefore be removed from the subnet object. */ if (p_node->discovery_count == 0) - __osm_drop_mgr_process_node(p_mgr, p_node); + __osm_drop_mgr_process_node(sm, p_node); } /* @@ -554,9 +514,9 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) p_next_node = (osm_node_t *) cl_qmap_next(&p_next_node->map_item); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { node_guid = osm_node_get_node_guid(p_node); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_drop_mgr_process: " "Checking full discovery of node 0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); @@ -566,7 +526,7 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) continue; /* We are handling a switch node */ - __osm_drop_mgr_check_node(p_mgr, p_node); + __osm_drop_mgr_check_node(sm, p_node); } p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); @@ -578,9 +538,9 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) CL_ASSERT(cl_qmap_key(&p_port->map_item) == osm_port_get_guid(p_port)); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { port_guid = osm_port_get_guid(p_port); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_drop_mgr_process: " "Checking port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -590,9 +550,9 @@ void osm_drop_mgr_process(IN const osm_drop_mgr_t * const p_mgr) If the port is unreachable, remove it from the guid table. */ if (p_port->discovery_count == 0) - __osm_drop_mgr_remove_port(p_mgr, p_port); + __osm_drop_mgr_remove_port(sm, p_port); } - CL_PLOCK_RELEASE(p_mgr->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); } diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 19cb27d..723d9f6 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include @@ -60,44 +59,8 @@ /********************************************************************** **********************************************************************/ -void osm_link_mgr_construct(IN osm_link_mgr_t * const p_mgr) -{ - memset(p_mgr, 0, sizeof(*p_mgr)); -} - -/********************************************************************** - **********************************************************************/ -void osm_link_mgr_destroy(IN osm_link_mgr_t * const p_mgr) -{ - OSM_LOG_ENTER(p_mgr->p_log, osm_link_mgr_destroy); - - OSM_LOG_EXIT(p_mgr->p_log); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_link_mgr_init(IN osm_link_mgr_t * const p_mgr, IN osm_sm_t * sm) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER(sm->p_log, osm_link_mgr_init); - - osm_link_mgr_construct(p_mgr); - - p_mgr->sm = sm; - p_mgr->p_log = sm->p_log; - p_mgr->p_subn = sm->p_subn; - p_mgr->p_lock = sm->p_lock; - - OSM_LOG_EXIT(p_mgr->p_log); - return (status); -} - -/********************************************************************** - **********************************************************************/ static boolean_t -__osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, +__osm_link_mgr_set_physp_pi(osm_sm_t *sm, IN osm_physp_t * const p_physp, IN uint8_t const port_state) { @@ -114,7 +77,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, boolean_t send_set = FALSE; osm_physp_t *p_remote_physp; - OSM_LOG_ENTER(p_mgr->p_log, __osm_link_mgr_set_physp_pi); + OSM_LOG_ENTER(sm->p_log, __osm_link_mgr_set_physp_pi); p_node = osm_physp_get_node_ptr(p_physp); @@ -127,7 +90,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, For base port 0 the following parameters are not valid (p822, table 145). */ if (!p_node->sw) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_link_mgr_set_physp_pi: ERR 4201: " "Cannot find switch by guid: 0x%" PRIx64 "\n", cl_ntoh64(p_node->node_info.node_guid)); @@ -138,8 +101,8 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, == FALSE) { /* This means the switch doesn't support enhanced port 0. Can skip it. */ - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_link_mgr_set_physp_pi: " "Skipping port 0, GUID 0x%016" PRIx64 "\n", @@ -184,12 +147,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, /* The following fields are relevant only for CA port, router, or Enh. SP0 */ if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || port_num == 0) { - p_pi->m_key = p_mgr->p_subn->opt.m_key; + p_pi->m_key = sm->p_subn->opt.m_key; if (memcmp(&p_pi->m_key, &p_old_pi->m_key, sizeof(p_pi->m_key))) send_set = TRUE; - p_pi->subnet_prefix = p_mgr->p_subn->opt.subnet_prefix; + p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix; if (memcmp(&p_pi->subnet_prefix, &p_old_pi->subnet_prefix, sizeof(p_pi->subnet_prefix))) @@ -201,24 +164,24 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, send_set = TRUE; /* we are initializing the ports with our local sm_base_lid */ - p_pi->master_sm_base_lid = p_mgr->p_subn->sm_base_lid; + p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid; if (memcmp(&p_pi->master_sm_base_lid, &p_old_pi->master_sm_base_lid, sizeof(p_pi->master_sm_base_lid))) send_set = TRUE; p_pi->m_key_lease_period = - p_mgr->p_subn->opt.m_key_lease_period; + sm->p_subn->opt.m_key_lease_period; if (memcmp(&p_pi->m_key_lease_period, &p_old_pi->m_key_lease_period, sizeof(p_pi->m_key_lease_period))) send_set = TRUE; if (esp0 == FALSE) - p_pi->mkey_lmc = p_mgr->p_subn->opt.lmc; + p_pi->mkey_lmc = sm->p_subn->opt.lmc; else { - if (p_mgr->p_subn->opt.lmc_esp0) - p_pi->mkey_lmc = p_mgr->p_subn->opt.lmc; + if (sm->p_subn->opt.lmc_esp0) + p_pi->mkey_lmc = sm->p_subn->opt.lmc; else p_pi->mkey_lmc = 0; } @@ -227,7 +190,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, send_set = TRUE; ib_port_info_set_timeout(p_pi, - p_mgr->p_subn->opt. + sm->p_subn->opt. subnet_timeout); if (ib_port_info_get_timeout(p_pi) != ib_port_info_get_timeout(p_old_pi)) @@ -242,7 +205,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) == IB_NODE_TYPE_ROUTER) { ib_port_info_set_hoq_lifetime(p_pi, - p_mgr->p_subn-> + sm->p_subn-> opt. leaf_head_of_queue_lifetime); } else @@ -254,23 +217,23 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, (osm_physp_get_node_ptr(p_remote_physp)) != IB_NODE_TYPE_SWITCH) { ib_port_info_set_hoq_lifetime(p_pi, - p_mgr-> + sm-> p_subn-> opt. leaf_head_of_queue_lifetime); ib_port_info_set_vl_stall_count(p_pi, - p_mgr-> + sm-> p_subn-> opt. leaf_vl_stall_count); } else { ib_port_info_set_hoq_lifetime(p_pi, - p_mgr-> + sm-> p_subn-> opt. head_of_queue_lifetime); ib_port_info_set_vl_stall_count(p_pi, - p_mgr-> + sm-> p_subn-> opt. vl_stall_count); @@ -284,9 +247,9 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, } ib_port_info_set_phy_and_overrun_err_thd(p_pi, - p_mgr->p_subn->opt. + sm->p_subn->opt. local_phy_errors_threshold, - p_mgr->p_subn->opt. + sm->p_subn->opt. overrun_errors_threshold); if (memcmp(&p_pi->error_threshold, &p_old_pi->error_threshold, sizeof(p_pi->error_threshold))) @@ -302,12 +265,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, sizeof(p_pi->link_width_enabled))) send_set = TRUE; - if (p_mgr->p_subn->opt.force_link_speed && - (p_mgr->p_subn->opt.force_link_speed != 15 || + if (sm->p_subn->opt.force_link_speed && + (sm->p_subn->opt.force_link_speed != 15 || ib_port_info_get_link_speed_enabled(p_pi) != ib_port_info_get_link_speed_sup(p_pi))) { ib_port_info_set_link_speed_enabled(p_pi, - p_mgr->p_subn->opt. + sm->p_subn->opt. force_link_speed); if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed, sizeof(p_pi->link_speed))) @@ -316,9 +279,9 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, /* calc new op_vls and mtu */ op_vls = - osm_physp_calc_link_op_vls(p_mgr->p_log, p_mgr->p_subn, + osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp); - mtu = osm_physp_calc_link_mtu(p_mgr->p_log, p_physp); + mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp); ib_port_info_set_neighbor_mtu(p_pi, mtu); if (ib_port_info_get_neighbor_mtu(p_pi) != @@ -331,7 +294,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, send_set = TRUE; /* provide the vl_high_limit from the qos mgr */ - if (p_mgr->p_subn->opt.qos && + if (sm->p_subn->opt.qos && p_physp->vl_high_limit != p_old_pi->vl_high_limit) { send_set = TRUE; p_pi->vl_high_limit = p_physp->vl_high_limit; @@ -362,27 +325,25 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, PortInfoSet to every port. */ if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num - && p_mgr->p_subn->first_time_master_sweep == TRUE) + && sm->p_subn->first_time_master_sweep == TRUE) send_set = TRUE; if (send_set) - status = osm_req_set(p_mgr->sm, - osm_physp_get_dr_path_ptr(p_physp), - payload, - sizeof(payload), + status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return send_set; } /********************************************************************** **********************************************************************/ static osm_signal_t -__osm_link_mgr_process_node(IN osm_link_mgr_t * const p_mgr, +__osm_link_mgr_process_node(osm_sm_t *sm, IN osm_node_t * const p_node, IN const uint8_t link_state) { @@ -392,10 +353,10 @@ __osm_link_mgr_process_node(IN osm_link_mgr_t * const p_mgr, uint8_t current_state; osm_signal_t signal = OSM_SIGNAL_DONE; - OSM_LOG_ENTER(p_mgr->p_log, __osm_link_mgr_process_node); + OSM_LOG_ENTER(sm->p_log, __osm_link_mgr_process_node); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_link_mgr_process_node: " "Node 0x%" PRIx64 " going to %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -428,46 +389,46 @@ __osm_link_mgr_process_node(IN osm_link_mgr_t * const p_mgr, */ if (link_state != IB_LINK_NO_CHANGE && link_state <= current_state) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_link_mgr_process_node: " "Physical port 0x%X already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); - else if (__osm_link_mgr_set_physp_pi(p_mgr, p_physp, + else if (__osm_link_mgr_set_physp_pi(sm, p_physp, link_state)) signal = OSM_SIGNAL_DONE_PENDING; } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (signal); } /********************************************************************** **********************************************************************/ osm_signal_t -osm_link_mgr_process(IN osm_link_mgr_t * const p_mgr, +osm_link_mgr_process(osm_sm_t *sm, IN const uint8_t link_state) { cl_qmap_t *p_node_guid_tbl; osm_node_t *p_node; osm_signal_t signal = OSM_SIGNAL_DONE; - OSM_LOG_ENTER(p_mgr->p_log, osm_link_mgr_process); + OSM_LOG_ENTER(sm->p_log, osm_link_mgr_process); - p_node_guid_tbl = &p_mgr->p_subn->node_guid_tbl; + p_node_guid_tbl = &sm->p_subn->node_guid_tbl; - CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl); p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) { - if (__osm_link_mgr_process_node(p_mgr, p_node, link_state) == + if (__osm_link_mgr_process_node(sm, p_node, link_state) == OSM_SIGNAL_DONE_PENDING) signal = OSM_SIGNAL_DONE_PENDING; } - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (signal); } diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c index ca42a9f..44312a0 100644 --- a/opensm/opensm/osm_mcast_mgr.c +++ b/opensm/opensm/osm_mcast_mgr.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -121,23 +120,23 @@ static void __osm_mcast_mgr_purge_tree_node(IN osm_mtree_node_t * p_mtn) /********************************************************************** **********************************************************************/ static void -__osm_mcast_mgr_purge_tree(IN osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_purge_tree(osm_sm_t *sm, IN osm_mgrp_t * const p_mgrp) { - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_purge_tree); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_purge_tree); if (p_mgrp->p_root) __osm_mcast_mgr_purge_tree_node(p_mgrp->p_root); p_mgrp->p_root = NULL; - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ static float -osm_mcast_mgr_compute_avg_hops(osm_mcast_mgr_t * const p_mgr, +osm_mcast_mgr_compute_avg_hops(osm_sm_t *sm, const osm_mgrp_t * const p_mgrp, const osm_switch_t * const p_sw) { @@ -148,7 +147,7 @@ osm_mcast_mgr_compute_avg_hops(osm_mcast_mgr_t * const p_mgr, const osm_mcm_port_t *p_mcm_port; const cl_qmap_t *p_mcm_tbl; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_compute_avg_hops); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_compute_avg_hops); p_mcm_tbl = &p_mgrp->mcm_port_tbl; @@ -164,12 +163,12 @@ osm_mcast_mgr_compute_avg_hops(osm_mcast_mgr_t * const p_mgr, Acquire the port object for this port guid, then create the new worker object to build the list. */ - p_port = osm_get_port_by_guid(p_mgr->p_subn, + p_port = osm_get_port_by_guid(sm->p_subn, ib_gid_get_guid(&p_mcm_port-> port_gid)); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_compute_avg_hops: ERR 0A18: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid @@ -189,7 +188,7 @@ osm_mcast_mgr_compute_avg_hops(osm_mcast_mgr_t * const p_mgr, if (num_ports != 0) avg_hops = (float)(hops / num_ports); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (avg_hops); } @@ -198,7 +197,7 @@ osm_mcast_mgr_compute_avg_hops(osm_mcast_mgr_t * const p_mgr, of the group HCAs **********************************************************************/ static float -osm_mcast_mgr_compute_max_hops(osm_mcast_mgr_t * const p_mgr, +osm_mcast_mgr_compute_max_hops(osm_sm_t *sm, const osm_mgrp_t * const p_mgrp, const osm_switch_t * const p_sw) { @@ -208,7 +207,7 @@ osm_mcast_mgr_compute_max_hops(osm_mcast_mgr_t * const p_mgr, const osm_mcm_port_t *p_mcm_port; const cl_qmap_t *p_mcm_tbl; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_compute_max_hops); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_compute_max_hops); p_mcm_tbl = &p_mgrp->mcm_port_tbl; @@ -224,12 +223,12 @@ osm_mcast_mgr_compute_max_hops(osm_mcast_mgr_t * const p_mgr, Acquire the port object for this port guid, then create the new worker object to build the list. */ - p_port = osm_get_port_by_guid(p_mgr->p_subn, + p_port = osm_get_port_by_guid(sm->p_subn, ib_gid_get_guid(&p_mcm_port-> port_gid)); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_compute_max_hops: ERR 0A1A: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid @@ -249,7 +248,7 @@ osm_mcast_mgr_compute_max_hops(osm_mcast_mgr_t * const p_mgr, max_hops = 10001; /* see later - we use it to realize no hops */ } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (float)(max_hops); } @@ -259,8 +258,7 @@ osm_mcast_mgr_compute_max_hops(osm_mcast_mgr_t * const p_mgr, a switch with the lowest average hop count to the members of the multicast group. **********************************************************************/ -static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_mcast_mgr_t * - const p_mgr, +static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t *sm, const osm_mgrp_t * const p_mgrp) { @@ -276,9 +274,9 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_mcast_mgr_t * boolean_t use_avg_hops = FALSE; /* use max hops for root */ #endif - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_find_optimal_switch); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_find_optimal_switch); - p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_sw_tbl = &sm->p_subn->sw_guid_tbl; CL_ASSERT(!osm_mgrp_is_empty(p_mgrp)); @@ -290,15 +288,15 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_mcast_mgr_t * if (use_avg_hops) hops = - osm_mcast_mgr_compute_avg_hops(p_mgr, p_mgrp, p_sw); + osm_mcast_mgr_compute_avg_hops(sm, p_mgrp, p_sw); else hops = - osm_mcast_mgr_compute_max_hops(p_mgr, p_mgrp, p_sw); + osm_mcast_mgr_compute_max_hops(sm, p_mgrp, p_sw); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { sw_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_mcast_mgr_find_optimal_switch: " "Switch 0x%016" PRIx64 ", hops = %f\n", sw_guid_ho, hops); @@ -310,37 +308,36 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_mcast_mgr_t * } } - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { + if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { if (p_best_sw) { sw_guid_ho = cl_ntoh64(osm_node_get_node_guid (p_best_sw->p_node)); - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_mcast_mgr_find_optimal_switch: " "Best switch is 0x%" PRIx64 ", hops = %f\n", sw_guid_ho, best_hops); } else { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_mcast_mgr_find_optimal_switch: " "No multicast capable switches detected\n"); } } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return ((osm_switch_t *) p_best_sw); } /********************************************************************** This function returns the existing or optimal root swtich for the tree. **********************************************************************/ -static osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_mcast_mgr_t * - const p_mgr, +static osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_sm_t *sm, const osm_mgrp_t * const p_mgrp) { const osm_switch_t *p_sw = NULL; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_find_root_switch); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_find_root_switch); /* We always look for the best multicast tree root switch. @@ -348,54 +345,16 @@ static osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_mcast_mgr_t * the root will be always on the first switch attached to it. - Very bad ... */ - p_sw = __osm_mcast_mgr_find_optimal_switch(p_mgr, p_mgrp); + p_sw = __osm_mcast_mgr_find_optimal_switch(sm, p_mgrp); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return ((osm_switch_t *) p_sw); } /********************************************************************** **********************************************************************/ -void osm_mcast_mgr_construct(IN osm_mcast_mgr_t * const p_mgr) -{ - memset(p_mgr, 0, sizeof(*p_mgr)); -} - -/********************************************************************** - **********************************************************************/ -void osm_mcast_mgr_destroy(IN osm_mcast_mgr_t * const p_mgr) -{ - CL_ASSERT(p_mgr); - - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_destroy); - - OSM_LOG_EXIT(p_mgr->p_log); -} - -/********************************************************************** - **********************************************************************/ -ib_api_status_t -osm_mcast_mgr_init(IN osm_mcast_mgr_t * const p_mgr, IN osm_sm_t * sm) -{ - ib_api_status_t status = IB_SUCCESS; - - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_init); - - osm_mcast_mgr_construct(p_mgr); - - p_mgr->sm = sm; - p_mgr->p_log = sm->p_log; - p_mgr->p_subn = sm->p_subn; - p_mgr->p_lock = sm->p_lock; - - OSM_LOG_EXIT(p_mgr->p_log); - return (status); -} - -/********************************************************************** - **********************************************************************/ static osm_signal_t -__osm_mcast_mgr_set_tbl(IN osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_set_tbl(osm_sm_t *sm, IN osm_switch_t * const p_sw) { osm_node_t *p_node; @@ -410,9 +369,9 @@ __osm_mcast_mgr_set_tbl(IN osm_mcast_mgr_t * const p_mgr, ib_net16_t block[IB_MCAST_BLOCK_SIZE]; osm_signal_t signal = OSM_SIGNAL_DONE; - CL_ASSERT(p_mgr); + CL_ASSERT(sm); - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_set_tbl); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_set_tbl); CL_ASSERT(p_sw); @@ -438,22 +397,21 @@ __osm_mcast_mgr_set_tbl(IN osm_mcast_mgr_t * const p_mgr, while (osm_mcast_tbl_get_block(p_tbl, block_num, (uint8_t) position, block)) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_mcast_mgr_set_tbl: " "Writing MFT block 0x%X\n", block_id_ho); } block_id_ho = block_num + (position << 28); - status = osm_req_set(p_mgr->sm, p_path, (void *)block, - sizeof(block), + status = osm_req_set(sm, p_path, (void *)block, sizeof(block), IB_MAD_ATTR_MCAST_FWD_TBL, cl_hton32(block_id_ho), CL_DISP_MSGID_NONE, &mad_context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_set_tbl: ERR 0A02: " "Sending multicast fwd. tbl. block failed (%s)\n", ib_get_err_str(status)); @@ -467,7 +425,7 @@ __osm_mcast_mgr_set_tbl(IN osm_mcast_mgr_t * const p_mgr, } } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (signal); } @@ -477,7 +435,7 @@ __osm_mcast_mgr_set_tbl(IN osm_mcast_mgr_t * const p_mgr, contains the group members that must be routed from this switch. **********************************************************************/ static void -__osm_mcast_mgr_subdivide(osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_subdivide(osm_sm_t *sm, osm_mgrp_t * const p_mgrp, osm_switch_t * const p_sw, cl_qlist_t * const p_list, @@ -489,7 +447,7 @@ __osm_mcast_mgr_subdivide(osm_mcast_mgr_t * const p_mgr, boolean_t ignore_existing; osm_mcast_work_obj_t *p_wobj; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_subdivide); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_subdivide); mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); @@ -519,7 +477,7 @@ __osm_mcast_mgr_subdivide(osm_mcast_mgr_t * const p_mgr, */ uint64_t node_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_subdivide: ERR 0A03: " "Error routing MLID 0x%X through switch 0x%" PRIx64 "\n" @@ -535,7 +493,7 @@ __osm_mcast_mgr_subdivide(osm_mcast_mgr_t * const p_mgr, if (port_num > array_size) { uint64_t node_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_subdivide: ERR 0A04: " "Error routing MLID 0x%X through switch 0x%" PRIx64 "\n" @@ -554,29 +512,29 @@ __osm_mcast_mgr_subdivide(osm_mcast_mgr_t * const p_mgr, cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** **********************************************************************/ static void -__osm_mcast_mgr_purge_list(osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_purge_list(osm_sm_t *sm, cl_qlist_t * const p_list) { osm_mcast_work_obj_t *p_wobj; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_purge_list); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_purge_list); while ((p_wobj = (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) != (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_purge_list: ERR 0A06: " "Unable to route for port 0x%" PRIx64 "\n", osm_port_get_guid(p_wobj->p_port)); __osm_mcast_work_obj_delete(p_wobj); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } /********************************************************************** @@ -586,7 +544,7 @@ __osm_mcast_mgr_purge_list(osm_mcast_mgr_t * const p_mgr, The function returns the newly created mtree node element. **********************************************************************/ -static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, +static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t *sm, osm_mgrp_t * const p_mgrp, osm_switch_t * const p_sw, cl_qlist_t * const p_list, @@ -606,7 +564,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, uint16_t mlid_ho; osm_mcast_tbl_t *p_tbl; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_branch); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_branch); CL_ASSERT(p_sw); CL_ASSERT(p_list); @@ -616,8 +574,8 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, node_guid_ho = cl_ntoh64(node_guid); mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_mcast_mgr_branch: " "Routing MLID 0x%X through switch 0x%" PRIx64 ", %u nodes at depth %u\n", @@ -637,7 +595,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, /* This switch doesn't do multicast. Clean-up. */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_branch: ERR 0A14: " "Switch 0x%" PRIx64 " does not support multicast\n", node_guid_ho); @@ -645,7 +603,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, /* Deallocate all the work objects on this branch of the tree. */ - __osm_mcast_mgr_purge_list(p_mgr, p_list); + __osm_mcast_mgr_purge_list(sm, p_list); goto Exit; } @@ -655,14 +613,14 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, We are unable to continue routing down this leg of the tree. Clean-up. */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_branch: ERR 0A15: " "Insufficient memory to build multicast tree\n"); /* Deallocate all the work objects on this branch of the tree. */ - __osm_mcast_mgr_purge_list(p_mgr, p_list); + __osm_mcast_mgr_purge_list(sm, p_list); goto Exit; } @@ -677,10 +635,10 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, */ list_array = malloc(sizeof(cl_qlist_t) * max_children); if (list_array == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_branch: ERR 0A16: " "Unable to allocate list array\n"); - __osm_mcast_mgr_purge_list(p_mgr, p_list); + __osm_mcast_mgr_purge_list(sm, p_list); goto Exit; } @@ -689,7 +647,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, for (i = 0; i < max_children; i++) cl_qlist_init(&list_array[i]); - __osm_mcast_mgr_subdivide(p_mgr, p_mgrp, p_sw, p_list, list_array, + __osm_mcast_mgr_subdivide(sm, p_mgrp, p_sw, p_list, list_array, max_children); p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); @@ -699,8 +657,8 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, we're at the root of the spanning tree. */ if (depth > 1) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_mcast_mgr_branch: " "Adding upstream port 0x%X\n", upstream_port); } @@ -736,8 +694,8 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, if (count == 0) continue; /* No routes down this port. */ - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_mcast_mgr_branch: " "Routing %zu destinations via switch port 0x%X\n", count, i); @@ -773,7 +731,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, CL_ASSERT(p_remote_physp); p_mtn->child_array[i] = - __osm_mcast_mgr_branch(p_mgr, p_mgrp, + __osm_mcast_mgr_branch(sm, p_mgrp, p_remote_node->sw, p_port_list, depth, osm_physp_get_port_num @@ -792,8 +750,8 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, CL_ASSERT(cl_is_qlist_empty(p_port_list)); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "__osm_mcast_mgr_branch: " "Found leaf for port 0x%016" PRIx64 " on switch port 0x%X\n", @@ -807,14 +765,14 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_mcast_mgr_t * const p_mgr, free(list_array); Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (p_mtn); } /********************************************************************** **********************************************************************/ static ib_api_status_t -__osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_build_spanning_tree(osm_sm_t *sm, osm_mgrp_t * const p_mgrp) { const cl_qmap_t *p_mcm_tbl; @@ -828,7 +786,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, uint8_t max_depth = 0; uint32_t count; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_build_spanning_tree); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_build_spanning_tree); cl_qlist_init(&port_list); @@ -838,13 +796,13 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, on multicast forwarding table information if the user wants to preserve existing multicast routes. */ - __osm_mcast_mgr_purge_tree(p_mgr, p_mgrp); + __osm_mcast_mgr_purge_tree(sm, p_mgrp); p_mcm_tbl = &p_mgrp->mcm_port_tbl; num_ports = cl_qmap_count(p_mcm_tbl); if (num_ports == 0) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_mcast_mgr_build_spanning_tree: " "MLID 0x%X has no members - nothing to do\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); @@ -868,9 +826,9 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, Locate the switch around which to create the spanning tree for this multicast group. */ - p_sw = __osm_mcast_mgr_find_root_switch(p_mgr, p_mgrp); + p_sw = __osm_mcast_mgr_find_root_switch(sm, p_mgrp); if (p_sw == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_build_spanning_tree: ERR 0A08: " "Unable to locate a suitable switch for group 0x%X\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); @@ -889,11 +847,11 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, Acquire the port object for this port guid, then create the new worker object to build the list. */ - p_port = osm_get_port_by_guid(p_mgr->p_subn, + p_port = osm_get_port_by_guid(sm->p_subn, ib_gid_get_guid(&p_mcm_port-> port_gid)); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_build_spanning_tree: ERR 0A09: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid @@ -903,7 +861,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, p_wobj = __osm_mcast_work_obj_new(p_port); if (p_wobj == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_mcast_mgr_build_spanning_tree: ERR 0A10: " "Insufficient memory to route port 0x%016" PRIx64 "\n", @@ -915,16 +873,16 @@ __osm_mcast_mgr_build_spanning_tree(osm_mcast_mgr_t * const p_mgr, } count = cl_qlist_count(&port_list); - p_mgrp->p_root = __osm_mcast_mgr_branch(p_mgr, p_mgrp, p_sw, + p_mgrp->p_root = __osm_mcast_mgr_branch(sm, p_mgrp, p_sw, &port_list, 0, 0, &max_depth); - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_mcast_mgr_build_spanning_tree: " "Configured MLID 0x%X for %u ports, max tree depth = %u\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)), count, max_depth); Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -933,7 +891,7 @@ Exit: /********************************************************************** **********************************************************************/ void -osm_mcast_mgr_set_table(IN osm_mcast_mgr_t * const p_mgr, +osm_mcast_mgr_set_table(osm_sm_t *sm, IN const osm_mgrp_t * const p_mgrp, IN const osm_mtree_node_t * const p_mtn) { @@ -944,15 +902,15 @@ osm_mcast_mgr_set_table(IN osm_mcast_mgr_t * const p_mgr, osm_mcast_tbl_t *p_tbl; osm_switch_t *p_sw; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_set_table); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_set_table); mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); p_sw = osm_mtree_node_get_switch_ptr(p_mtn); CL_ASSERT(p_sw); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, + if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { + osm_log(sm->p_log, OSM_LOG_VERBOSE, "osm_mcast_mgr_set_table: " "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n", mlid_ho, osm_node_get_node_guid(p_sw->p_node)); @@ -977,27 +935,27 @@ osm_mcast_mgr_set_table(IN osm_mcast_mgr_t * const p_mgr, osm_mcast_tbl_set(p_tbl, mlid_ho, i); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } #endif /********************************************************************** **********************************************************************/ static void -__osm_mcast_mgr_clear(IN osm_mcast_mgr_t * const p_mgr, +__osm_mcast_mgr_clear(osm_sm_t *sm, IN osm_mgrp_t * const p_mgrp) { osm_switch_t *p_sw; cl_qmap_t *p_sw_tbl; osm_mcast_tbl_t *p_mcast_tbl; - OSM_LOG_ENTER(p_mgr->p_log, __osm_mcast_mgr_clear); + OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_clear); /* Walk the switches and clear the routing entries for this MLID. */ - p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_sw_tbl = &sm->p_subn->sw_guid_tbl; p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); @@ -1005,7 +963,7 @@ __osm_mcast_mgr_clear(IN osm_mcast_mgr_t * const p_mgr, p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); } - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); } #if 0 @@ -1014,7 +972,7 @@ __osm_mcast_mgr_clear(IN osm_mcast_mgr_t * const p_mgr, Lock must be held on entry. **********************************************************************/ ib_api_status_t -osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, +osm_mcast_mgr_process_single(osm_sm_t *sm, IN ib_net16_t const mlid, IN ib_net64_t const port_guid, IN uint8_t const join_state) @@ -1029,15 +987,15 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, osm_mcast_tbl_t *p_mcast_tbl; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_process_single); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_single); CL_ASSERT(mlid); CL_ASSERT(port_guid); mlid_ho = cl_ntoh16(mlid); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_single: " "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, " "\n\t\t\t\tjoin state = 0x%X\n", @@ -1047,9 +1005,9 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, /* Acquire the Port object. */ - p_port = osm_get_port_by_guid(p_mgr->p_subn, port_guid); + p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A01: " "Unable to acquire port object for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -1059,7 +1017,7 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, p_physp = p_port->p_physp; if (p_physp == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A05: " "Unable to acquire phsyical port object for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -1069,7 +1027,7 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A11: " "Unable to acquire remote phsyical port object " "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -1084,7 +1042,7 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, sw_guid = osm_node_get_node_guid(p_remote_node); if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A22: " "Remote node not a switch node 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); @@ -1093,7 +1051,7 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, } if (!p_remote_node->sw) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A12: " "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); status = IB_ERROR; @@ -1121,14 +1079,14 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, } else { if (join_state & IB_JOIN_STATE_SEND_ONLY) { if (osm_log_is_active - (p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + (sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_single: " "Success. Nothing to do for send" "only member\n"); } } else { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_single: ERR 0A13: " "Unknown join state 0x%X\n", join_state); @@ -1137,15 +1095,15 @@ osm_mcast_mgr_process_single(IN osm_mcast_mgr_t * const p_mgr, } } } else { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_single: " "Unable to add port\n"); } } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } #endif @@ -1154,7 +1112,7 @@ Exit: lock must already be held on entry **********************************************************************/ static ib_api_status_t -osm_mcast_mgr_process_tree(IN osm_mcast_mgr_t * const p_mgr, +osm_mcast_mgr_process_tree(osm_sm_t *sm, IN osm_mgrp_t * const p_mgrp, IN osm_mcast_req_type_t req_type, ib_net64_t port_guid) @@ -1163,12 +1121,12 @@ osm_mcast_mgr_process_tree(IN osm_mcast_mgr_t * const p_mgr, ib_net16_t mlid; boolean_t ui_mcast_fdb_assign_func_defined; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_process_tree); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_tree); mlid = osm_mgrp_get_mlid(p_mgrp); - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_tree: " "Processing multicast group 0x%X\n", cl_ntoh16(mlid)); } @@ -1176,16 +1134,16 @@ osm_mcast_mgr_process_tree(IN osm_mcast_mgr_t * const p_mgr, /* If there are no switches in the subnet, then we have nothing to do. */ - if (cl_qmap_count(&p_mgr->p_subn->sw_guid_tbl) == 0) { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_tree: " "No switches in subnet. Nothing to do\n"); } goto Exit; } - if (p_mgr->p_subn->opt.pfn_ui_mcast_fdb_assign) + if (sm->p_subn->opt.pfn_ui_mcast_fdb_assign) ui_mcast_fdb_assign_func_defined = TRUE; else ui_mcast_fdb_assign_func_defined = FALSE; @@ -1201,34 +1159,34 @@ osm_mcast_mgr_process_tree(IN osm_mcast_mgr_t * const p_mgr, */ if (ui_mcast_fdb_assign_func_defined == FALSE || req_type == OSM_MCAST_REQ_TYPE_CREATE) - __osm_mcast_mgr_clear(p_mgr, p_mgrp); + __osm_mcast_mgr_clear(sm, p_mgrp); /* If a UI function is defined, then we will call it here. If not - the use the regular build spanning tree function */ if (ui_mcast_fdb_assign_func_defined == FALSE) { - status = __osm_mcast_mgr_build_spanning_tree(p_mgr, p_mgrp); + status = __osm_mcast_mgr_build_spanning_tree(sm, p_mgrp); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "osm_mcast_mgr_process_tree: ERR 0A17: " "Unable to create spanning tree (%s)\n", ib_get_err_str(status)); goto Exit; } } else { - if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_tree: " "Invoking UI function pfn_ui_mcast_fdb_assign\n"); } - p_mgr->p_subn->opt.pfn_ui_mcast_fdb_assign(p_mgr->p_subn->opt. + sm->p_subn->opt.pfn_ui_mcast_fdb_assign(sm->p_subn->opt. ui_mcast_fdb_assign_ctx, mlid, req_type, port_guid); } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return (status); } @@ -1237,18 +1195,18 @@ Exit: NOTE : The lock should be held externally! **********************************************************************/ static ib_api_status_t -mcast_mgr_process_mgrp(IN osm_mcast_mgr_t * const p_mgr, +mcast_mgr_process_mgrp(osm_sm_t *sm, IN osm_mgrp_t * const p_mgrp, IN osm_mcast_req_type_t req_type, IN ib_net64_t port_guid) { ib_api_status_t status; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_process_mgrp); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_mgrp); - status = osm_mcast_mgr_process_tree(p_mgr, p_mgrp, req_type, port_guid); + status = osm_mcast_mgr_process_tree(sm, p_mgrp, req_type, port_guid); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, + osm_log(sm->p_log, OSM_LOG_ERROR, "mcast_mgr_process_mgrp: ERR 0A19: " "Unable to create spanning tree (%s)\n", ib_get_err_str(status)); @@ -1260,48 +1218,48 @@ mcast_mgr_process_mgrp(IN osm_mcast_mgr_t * const p_mgr, * Not a well known group */ if (cl_qmap_count(&p_mgrp->mcm_port_tbl) == 0 && !p_mgrp->well_known) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "mcast_mgr_process_mgrp: " "Destroying mgrp with lid:0x%X\n", cl_ntoh16(p_mgrp->mlid)); /* Send a Report to any InformInfo registered for Trap 67 : MCGroup delete */ - osm_mgrp_send_delete_notice(p_mgr->p_subn, p_mgr->p_log, + osm_mgrp_send_delete_notice(sm->p_subn, sm->p_log, p_mgrp); - cl_qmap_remove_item(&p_mgr->p_subn->mgrp_mlid_tbl, + cl_qmap_remove_item(&sm->p_subn->mgrp_mlid_tbl, (cl_map_item_t *) p_mgrp); osm_mgrp_delete(p_mgrp); } Exit: - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); return status; } /********************************************************************** **********************************************************************/ -osm_signal_t osm_mcast_mgr_process(IN osm_mcast_mgr_t * const p_mgr) +osm_signal_t osm_mcast_mgr_process(osm_sm_t *sm) { osm_signal_t signal; osm_switch_t *p_sw; cl_qmap_t *p_sw_tbl; cl_qmap_t *p_mcast_tbl; - cl_qlist_t *p_list = &p_mgr->p_subn->p_osm->sm.mgrp_list; + cl_qlist_t *p_list = &sm->mgrp_list; osm_mgrp_t *p_mgrp; osm_mgrp_t *p_next_mgrp; boolean_t pending_transactions = FALSE; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_process); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process); - p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; - p_mcast_tbl = &p_mgr->p_subn->mgrp_mlid_tbl; + p_sw_tbl = &sm->p_subn->sw_guid_tbl; + p_mcast_tbl = &sm->p_subn->mgrp_mlid_tbl; /* While holding the lock, iterate over all the established multicast groups, servicing each in turn. Then, download the multicast tables to the switches. */ - CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); p_mgrp = (osm_mgrp_t *) cl_qmap_head(p_mcast_tbl); while (p_mgrp != (osm_mgrp_t *) cl_qmap_end(p_mcast_tbl)) { @@ -1309,7 +1267,7 @@ osm_signal_t osm_mcast_mgr_process(IN osm_mcast_mgr_t * const p_mgr) of the subnet. Not due to a specific multicast request. So the request type is subnet_change and the port guid is 0. */ p_next_mgrp = (osm_mgrp_t *) cl_qmap_next(&p_mgrp->map_item); - mcast_mgr_process_mgrp(p_mgr, p_mgrp, + mcast_mgr_process_mgrp(sm, p_mgrp, OSM_MCAST_REQ_TYPE_SUBNET_CHANGE, 0); p_mgrp = p_next_mgrp; } @@ -1319,7 +1277,7 @@ osm_signal_t osm_mcast_mgr_process(IN osm_mcast_mgr_t * const p_mgr) */ p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { - signal = __osm_mcast_mgr_set_tbl(p_mgr, p_sw); + signal = __osm_mcast_mgr_set_tbl(sm, p_sw); if (signal == OSM_SIGNAL_DONE_PENDING) pending_transactions = TRUE; p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); @@ -1330,9 +1288,9 @@ osm_signal_t osm_mcast_mgr_process(IN osm_mcast_mgr_t * const p_mgr) free(p); } - CL_PLOCK_RELEASE(p_mgr->p_lock); + CL_PLOCK_RELEASE(sm->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + OSM_LOG_EXIT(sm->p_log); if (pending_transactions == TRUE) return (OSM_SIGNAL_DONE_PENDING); @@ -1343,13 +1301,13 @@ osm_signal_t osm_mcast_mgr_process(IN osm_mcast_mgr_t * const p_mgr) /********************************************************************** **********************************************************************/ static -osm_mgrp_t *__get_mgrp_by_mlid(IN osm_mcast_mgr_t * const p_mgr, +osm_mgrp_t *__get_mgrp_by_mlid(osm_sm_t *sm, IN ib_net16_t const mlid) { cl_map_item_t *map_item; - map_item = cl_qmap_get(&p_mgr->p_subn->mgrp_mlid_tbl, mlid); - if (map_item == cl_qmap_end(&p_mgr->p_subn->mgrp_mlid_tbl)) { + map_item = cl_qmap_get(&sm->p_subn->mgrp_mlid_tbl, mlid); + if (map_item == cl_qmap_end(&sm->p_subn->mgrp_mlid_tbl)) { return NULL; } return (osm_mgrp_t *) map_item; @@ -1359,9 +1317,9 @@ osm_mgrp_t *__get_mgrp_by_mlid(IN osm_mcast_mgr_t * const p_mgr, This is the function that is invoked during idle time to handle the process request for mcast groups where join/leave/delete was required. **********************************************************************/ -osm_signal_t osm_mcast_mgr_process_mgroups(osm_mcast_mgr_t * p_mgr) +osm_signal_t osm_mcast_mgr_process_mgroups(osm_sm_t *sm) { - cl_qlist_t *p_list = &p_mgr->p_subn->p_osm->sm.mgrp_list; + cl_qlist_t *p_list = &sm->mgrp_list; osm_switch_t *p_sw; cl_qmap_t *p_sw_tbl; osm_mgrp_t *p_mgrp; @@ -1371,10 +1329,10 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_mcast_mgr_t * p_mgr) osm_mcast_req_type_t req_type; ib_net64_t port_guid; - OSM_LOG_ENTER(p_mgr->p_log, osm_mcast_mgr_process_mgroups); + OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_mgroups); /* we need a lock to make sure the p_mgrp is not change other ways */ - CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); + CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); while (!cl_is_qlist_empty(p_list)) { ctx = (osm_mcast_mgr_ctxt_t *) cl_qlist_remove_head(p_list); @@ -1389,7 +1347,7 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_mcast_mgr_t * p_mgr) /* since we delayed the execution we prefer to pass the mlid as the mgrp identifier and then find it or abort */ - p_mgrp = __get_mgrp_by_mlid(p_mgr, mlid); + p_mgrp = __get_mgrp_by_mlid(sm, mlid); if (!p_mgrp) continue; @@ -1397,35 +1355,35 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_mcast_mgr_t * p_mgr) * we processed the group we can skip doing anything */ if (p_mgrp->last_change_id == p_mgrp->last_tree_id) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_mgroups: " "Skip processing mgrp with lid:0x%X change id:%u\n", cl_ntoh16(mlid), p_mgrp->last_change_id); continue; } - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, + osm_log(sm->p_log, OSM_LOG_DEBUG, "osm_mcast_mgr_process_mgroups: " "Processing mgrp with lid:0x%X change id:%u\n", cl_ntoh16(mlid), p_mgrp->last_change_id); - mcast_mgr_process_mgrp(p_mgr, p_mgrp, req_type, port_guid); + mcast_mgr_process_mgrp(sm, p_mgrp, req_type, port_guid); } /* Walk the switches and download the tables for each. */ - p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; + p_sw_tbl = &sm->p_subn->sw_guid_tbl; p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { - ret = __osm_mcast_mgr_set_tbl(p_mgr, p_sw); + ret = __osm_mcast_mgr_set_tbl(sm, p_sw); if (ret == OSM_SIGNAL_DONE_PENDING) signal = ret; p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); } - osm_dump_mcast_routes(p_mgr->p_subn->p_osm); + osm_dump_mcast_routes(sm->p_subn->p_osm); - CL_PLOCK_RELEASE(p_mgr->p_lock); - OSM_LOG_EXIT(p_mgr->p_log); + CL_PLOCK_RELEASE(sm->p_lock); + OSM_LOG_EXIT(sm->p_log); return signal; } diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 2b0d6f0..8375396 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -561,6 +561,8 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) * Discovery stuff. * Basically this code should not be here, but merged with main OpenSM **********************************************************************/ +extern void osm_drop_mgr_process(IN osm_sm_t *sm); + static int sweep_hop_1(osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; @@ -772,7 +774,7 @@ static int perfmgr_discovery(osm_opensm_t * osm) goto _exit; _drop: - osm_drop_mgr_process(&osm->sm.drop_mgr); + osm_drop_mgr_process(&osm->sm); _exit: return ret; diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index bdbb2e6..b576c26 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -169,10 +168,7 @@ void osm_sm_construct(IN osm_sm_t * const p_sm) osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl); osm_lid_mgr_construct(&p_sm->lid_mgr); osm_ucast_mgr_construct(&p_sm->ucast_mgr); - osm_link_mgr_construct(&p_sm->link_mgr); - osm_drop_mgr_construct(&p_sm->drop_mgr); osm_sweep_fail_ctrl_construct(&p_sm->sweep_fail_ctrl); - osm_mcast_mgr_construct(&p_sm->mcast_mgr); } /********************************************************************** @@ -231,9 +227,6 @@ void osm_sm_destroy(IN osm_sm_t * const p_sm) OSM_LOG_ENTER(p_sm->p_log, osm_sm_destroy); osm_lid_mgr_destroy(&p_sm->lid_mgr); osm_ucast_mgr_destroy(&p_sm->ucast_mgr); - osm_link_mgr_destroy(&p_sm->link_mgr); - osm_drop_mgr_destroy(&p_sm->drop_mgr); - osm_mcast_mgr_destroy(&p_sm->mcast_mgr); cl_event_wheel_destroy(&p_sm->trap_aging_tracker); cl_timer_destroy(&p_sm->sweep_timer); cl_timer_destroy(&p_sm->polling_timer); @@ -325,22 +318,10 @@ osm_sm_init(IN osm_sm_t * const p_sm, if (status != IB_SUCCESS) goto Exit; - status = osm_link_mgr_init(&p_sm->link_mgr, p_sm); - if (status != IB_SUCCESS) - goto Exit; - - status = osm_drop_mgr_init(&p_sm->drop_mgr, p_sm); - if (status != IB_SUCCESS) - goto Exit; - status = osm_sweep_fail_ctrl_init(&p_sm->sweep_fail_ctrl, p_sm); if (status != IB_SUCCESS) goto Exit; - status = osm_mcast_mgr_init(&p_sm->mcast_mgr, p_sm); - if (status != IB_SUCCESS) - goto Exit; - p_sm->ni_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_INFO, osm_ni_rcv_process, p_sm); if (p_sm->ni_disp_h == CL_DISP_INVALID_HANDLE) diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 7234053..516327e 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -68,7 +68,12 @@ #include #include -osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm); +extern void osm_drop_mgr_process(IN osm_sm_t *sm); +extern osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm); +extern osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm); +extern osm_signal_t osm_mcast_mgr_process(IN osm_sm_t *sm); +extern osm_signal_t osm_mcast_mgr_process_mgroups(IN osm_sm_t *sm); +extern osm_signal_t osm_link_mgr_process(IN osm_sm_t *sm, IN uint8_t state); /********************************************************************** **********************************************************************/ @@ -1078,7 +1083,7 @@ _repeat_discovery: "SM PORT DOWN"); /* Run the drop manager - we want to clear all records */ - osm_drop_mgr_process(&sm->drop_mgr); + osm_drop_mgr_process(sm); /* Move to DISCOVERING state */ osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER); @@ -1144,7 +1149,7 @@ _repeat_discovery: } /* Need to continue with lid assignment */ - osm_drop_mgr_process(&sm->drop_mgr); + osm_drop_mgr_process(sm); /* * If we are not MASTER already - this means that we are @@ -1206,7 +1211,7 @@ _repeat_discovery: "SWITCHES CONFIGURED FOR UNICAST"); if (!sm->p_subn->opt.disable_multicast) { - osm_mcast_mgr_process(&sm->mcast_mgr); + osm_mcast_mgr_process(sm); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, @@ -1221,21 +1226,21 @@ _repeat_discovery: * other parameters provided by the Set(PortInfo) Packet. */ - osm_link_mgr_process(&sm->link_mgr, IB_LINK_NO_CHANGE); + osm_link_mgr_process(sm, IB_LINK_NO_CHANGE); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); - osm_link_mgr_process(&sm->link_mgr, IB_LINK_ARMED); + osm_link_mgr_process(sm, IB_LINK_ARMED); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; osm_log_msg_box(sm->p_log, OSM_LOG_VERBOSE, __FUNCTION__, "LINKS ARMED - SET LINKS TO ACTIVE STATE"); - osm_link_mgr_process(&sm->link_mgr, IB_LINK_ACTIVE); + osm_link_mgr_process(sm, IB_LINK_ACTIVE); if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) return; @@ -1277,7 +1282,7 @@ static void do_process_mgrp_queue(osm_sm_t * sm) { if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) return; - osm_mcast_mgr_process_mgroups(&sm->mcast_mgr); + osm_mcast_mgr_process_mgroups(sm); wait_for_pending_transactions(&sm->p_subn->p_osm->stats); } -- 1.5.4.rc5 From mashirle at us.ibm.com Mon Feb 11 23:29:46 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Mon, 11 Feb 2008 23:29:46 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> Message-ID: <1202801386.4019.89.camel@localhost.localdomain> On Tue, 2008-02-12 at 09:00 -0800, Sean Hefty wrote: > >Saying all that, I don't think we want to have --any RNR retries--, as > >for retries, I am open to hear what others think. > > I'm really not all that familiar with ipoib protocol, but if it's being > implemented over an RC connection, then adding an RNR retry seems to make sense > to me. I believe using UC is better, but if it's over RC, I don't know that we > want to take the hit of tearing down and re-establishing the connection just > because we have a fast sender. (This is just an opinion based on no fact > whatsoever.) > > - Sean Did anyone ever run IPoIB-CM (multiple sockets and multiple connections) between ipath and mthca or connectX and mthca? I guess there might be a similar issue there, mismatched send rates. thanks Shirley From jim at mellanox.com Tue Feb 12 09:31:58 2008 From: jim at mellanox.com (Jim Mott) Date: Tue, 12 Feb 2008 09:31:58 -0800 Subject: [ofa-general] SDP performance with bzcopy testing help needed Message-ID: Now that SDP is shipping with a non-zero default value for sdp_zcopy_thresh (64K), I need some feedback from the list. Does anybody except me see a performance gain on large messages? 1) nerperf server node - export LD_PRELOAD=libsdp.so - netserver 2) netperf client node - export LD_PRELOAD=libsdp.so 3) Run some tests with bzcopy enabled on client - cat /sys/module/ib_sdp/sdp_zcopy_thresh 65536 (You might have to hunt for it in parameters) - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 64K - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 128K - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 1M 4) Run the tests with bzcopy disabled on the client - echo 0 > /sys/module/ib_sdp/sdp_zcopy_thresh - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 64K - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 128K - netperf -C -c -P 0 -t TCP_STREAM -H 193.168.10.143 -l 60 ---m 1M When I run these tests on ConnectX DDR through a switch, I see bandwidth numbers that look like: 64K 128K 1M SDP 8215.17 6429.09 6862.66 BZCOPY 8748.00 9997.07 9847.76 Looking at uS/KB transferred we see: 64K 128K 1M LCL RMT LCL RMT LCL RMT SDP 1.025 1.243 1.391 1.493 1.274 1.407 BZCOPY 0.966 1.148 0.838 1.014 0.603 0.984 Note that it is important that you use "-m" instead of "-r" because you want to present the largest possible buffers (netperf "Send Message Size") to SDP. Pinning user memory instead of copying through the kernel is only a win if you can amortize the costs over a big chunk of memory. From humomrczhoni at best-giving.com.tw Fri Feb 8 06:53:06 2008 From: humomrczhoni at best-giving.com.tw (Simone institute) Date: Fri, 08 Feb 2008 10:53:06 -0400 Subject: [ofa-general] Physician Contact List Message-ID: <494477z8tom0$o5829lg0$5598p7x0@Delldim5150 Special Package for this week Fully Licensed Physicians in the USA 788,961 in total * 17,946 emails Coverage in many different areas of medicine such as Endocrinology, Pathology, Urology, Neurology, Plastic Surgery, Psychiatry, Cardiology and much more Over a dozen sortable fields Database of American Pharma Companies 47,000 names and emails of the major positions American Hospitals complete contact information for CEO's, CFO's, Directors and more - over 23,000 listings in total for more than 7,000 hospitals in the USA Complete and Accurate Listing for Dental Service Providers 597,000 dentists and dental services ( a $350 value!) US Chiropractor List Over than 100k chiropractors practicing in the USA Price for this week only = $398 for all 5 datasets reply by email: sulliva_robert.1972 at hotmail.com good until feb 10 From sashak at voltaire.com Tue Feb 12 10:18:06 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 12 Feb 2008 18:18:06 +0000 Subject: [ofa-general] [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro Message-ID: <20080212181806.GF16074@sashak.voltaire.com> __func__ macro is used in the OSM_LOG_ENTER() to show an actual function name, so the second parameter is not really useful here. OTOH it makes it harder to grep over OpenSM source code, when searches are by function names it generates a lot of unrelated matches. If so what about to remove this second parameter (like in this patch)? Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_log.h | 2 +- opensm/libvendor/osm_pkt_randomizer.c | 10 +- opensm/libvendor/osm_vendor_al.c | 42 +++--- opensm/libvendor/osm_vendor_ibumad.c | 26 ++-- opensm/libvendor/osm_vendor_ibumad_sa.c | 10 +- opensm/libvendor/osm_vendor_mlx.c | 22 ++-- opensm/libvendor/osm_vendor_mlx_anafa.c | 22 ++-- opensm/libvendor/osm_vendor_mlx_dispatcher.c | 12 +- opensm/libvendor/osm_vendor_mlx_hca.c | 10 +- opensm/libvendor/osm_vendor_mlx_hca_anafa.c | 4 +- opensm/libvendor/osm_vendor_mlx_hca_pfs.c | 14 +- opensm/libvendor/osm_vendor_mlx_hca_sim.c | 16 +- opensm/libvendor/osm_vendor_mlx_ibmgt.c | 4 +- opensm/libvendor/osm_vendor_mlx_rmpp_ctx.c | 4 +- opensm/libvendor/osm_vendor_mlx_sa.c | 12 +- opensm/libvendor/osm_vendor_mlx_sender.c | 6 +- opensm/libvendor/osm_vendor_mlx_sim.c | 7 +- opensm/libvendor/osm_vendor_mlx_ts.c | 6 +- opensm/libvendor/osm_vendor_mlx_ts_anafa.c | 4 +- opensm/libvendor/osm_vendor_mlx_txn.c | 14 +- opensm/libvendor/osm_vendor_mtl.c | 26 ++-- opensm/libvendor/osm_vendor_mtl_hca_guid.c | 10 +- opensm/libvendor/osm_vendor_mtl_transaction_mgr.c | 12 +- opensm/libvendor/osm_vendor_test.c | 16 +- opensm/libvendor/osm_vendor_ts.c | 28 ++-- opensm/libvendor/osm_vendor_umadt.c | 12 +- opensm/opensm/main.c | 2 +- opensm/opensm/osm_db_files.c | 10 +- opensm/opensm/osm_drop_mgr.c | 10 +- opensm/opensm/osm_inform.c | 16 +- opensm/opensm/osm_lid_mgr.c | 18 ++-- opensm/opensm/osm_lin_fwd_rcv.c | 2 +- opensm/opensm/osm_link_mgr.c | 6 +- opensm/opensm/osm_mad_pool.c | 10 +- opensm/opensm/osm_mcast_fwd_rcv.c | 2 +- opensm/opensm/osm_mcast_mgr.c | 34 +++--- opensm/opensm/osm_multicast.c | 4 +- opensm/opensm/osm_node_desc_rcv.c | 4 +- opensm/opensm/osm_node_info_rcv.c | 22 ++-- opensm/opensm/osm_opensm.c | 2 +- opensm/opensm/osm_perfmgr.c | 24 ++-- opensm/opensm/osm_pkey.c | 6 +- opensm/opensm/osm_pkey_mgr.c | 2 +- opensm/opensm/osm_pkey_rcv.c | 2 +- opensm/opensm/osm_port.c | 6 +- opensm/opensm/osm_port_info_rcv.c | 16 +- opensm/opensm/osm_qos.c | 2 +- opensm/opensm/osm_qos_parser.y | 4 +- opensm/opensm/osm_qos_policy.c | 5 +- opensm/opensm/osm_req.c | 4 +- opensm/opensm/osm_resp.c | 4 +- opensm/opensm/osm_sa.c | 8 +- opensm/opensm/osm_sa_class_port_info.c | 4 +- opensm/opensm/osm_sa_guidinfo_record.c | 8 +- opensm/opensm/osm_sa_informinfo.c | 16 +- opensm/opensm/osm_sa_lft_record.c | 4 +- opensm/opensm/osm_sa_link_record.c | 10 +- opensm/opensm/osm_sa_mad_ctrl.c | 14 +- opensm/opensm/osm_sa_mcmember_record.c | 22 ++-- opensm/opensm/osm_sa_mft_record.c | 4 +- opensm/opensm/osm_sa_multipath_record.c | 22 ++-- opensm/opensm/osm_sa_node_record.c | 8 +- opensm/opensm/osm_sa_path_record.c | 26 ++-- opensm/opensm/osm_sa_pkey_record.c | 8 +- opensm/opensm/osm_sa_portinfo_record.c | 10 +- opensm/opensm/osm_sa_response.c | 2 +- opensm/opensm/osm_sa_service_record.c | 14 +- opensm/opensm/osm_sa_slvl_record.c | 6 +- opensm/opensm/osm_sa_sminfo_record.c | 6 +- opensm/opensm/osm_sa_sw_info_record.c | 8 +- opensm/opensm/osm_sa_vlarb_record.c | 8 +- opensm/opensm/osm_service.c | 6 +- opensm/opensm/osm_slvl_map_rcv.c | 2 +- opensm/opensm/osm_sm.c | 16 +- opensm/opensm/osm_sm_mad_ctrl.c | 22 ++-- opensm/opensm/osm_sm_state_mgr.c | 14 +- opensm/opensm/osm_sminfo_rcv.c | 12 +- opensm/opensm/osm_state_mgr.c | 28 ++-- opensm/opensm/osm_sw_info_rcv.c | 12 +- opensm/opensm/osm_sweep_fail_ctrl.c | 4 +- opensm/opensm/osm_trap_rcv.c | 10 +- opensm/opensm/osm_ucast_ftree.c | 39 +++--- opensm/opensm/osm_ucast_lash.c | 12 +- opensm/opensm/osm_ucast_mgr.c | 18 ++-- opensm/opensm/osm_ucast_updn.c | 18 ++-- opensm/opensm/osm_vl15intf.c | 12 +- opensm/opensm/osm_vl_arb_rcv.c | 2 +- opensm/osmtest/osmt_inform.c | 14 +- opensm/osmtest/osmt_multicast.c | 6 +- opensm/osmtest/osmt_service.c | 20 ++-- opensm/osmtest/osmt_slvl_vl_arb.c | 14 +- opensm/osmtest/osmtest.c | 142 ++++++++++---------- 92 files changed, 596 insertions(+), 603 deletions(-) diff --git a/opensm/include/opensm/osm_log.h b/opensm/include/opensm/osm_log.h index a024f66..bfd2e96 100644 --- a/opensm/include/opensm/osm_log.h +++ b/opensm/include/opensm/osm_log.h @@ -74,7 +74,7 @@ BEGIN_C_DECLS #define LOG_ENTRY_SIZE_MAX 4096 #define BUF_SIZE LOG_ENTRY_SIZE_MAX #define __func__ __FUNCTION__ -#define OSM_LOG_ENTER( OSM_LOG_PTR, NAME ) \ +#define OSM_LOG_ENTER( OSM_LOG_PTR ) \ osm_log( OSM_LOG_PTR, OSM_LOG_FUNCS, \ "%s: [\n", __func__); #define OSM_LOG_EXIT( OSM_LOG_PTR ) \ diff --git a/opensm/libvendor/osm_pkt_randomizer.c b/opensm/libvendor/osm_pkt_randomizer.c index 2fce57f..092700b 100644 --- a/opensm/libvendor/osm_pkt_randomizer.c +++ b/opensm/libvendor/osm_pkt_randomizer.c @@ -75,7 +75,7 @@ __osm_pkt_randomizer_is_path_in_fault_paths(IN osm_log_t * p_log, osm_dr_path_t *p_found_dr_path; uint8_t ind1, ind2; - OSM_LOG_ENTER(p_log, __osm_pkt_randomizer_is_path_in_fault_paths); + OSM_LOG_ENTER(p_log); for (ind1 = 0; ind1 < p_pkt_rand->num_paths_initialized; ind1++) { found_path = TRUE; @@ -131,7 +131,7 @@ __osm_pkt_randomizer_process_path(IN osm_log_t * p_log, char buf[BUF_SIZE]; char line[BUF_SIZE]; - OSM_LOG_ENTER(p_log, __osm_pkt_randomizer_process_path); + OSM_LOG_ENTER(p_log); if (rand_value_init == FALSE) { int seed; @@ -238,7 +238,7 @@ osm_pkt_randomizer_mad_drop(IN osm_log_t * p_log, boolean_t res = FALSE; osm_dr_path_t dr_path; - OSM_LOG_ENTER(p_log, osm_pkt_randomizer_mad_drop); + OSM_LOG_ENTER(p_log); p_smp = (ib_smp_t *) p_mad; @@ -273,7 +273,7 @@ osm_pkt_randomizer_init(IN OUT osm_pkt_randomizer_t ** pp_pkt_randomizer, uint8_t tmp; ib_api_status_t res = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_pkt_randomizer_init); + OSM_LOG_ENTER(p_log); *pp_pkt_randomizer = malloc(sizeof(osm_pkt_randomizer_t)); if (*pp_pkt_randomizer == NULL) { @@ -332,7 +332,7 @@ void osm_pkt_randomizer_destroy(IN OUT osm_pkt_randomizer_t ** pp_pkt_randomizer, IN osm_log_t * p_log) { - OSM_LOG_ENTER(p_log, osm_pkt_randomizer_destroy); + OSM_LOG_ENTER(p_log); if (*pp_pkt_randomizer != NULL) { free((*pp_pkt_randomizer)->fault_dr_paths); diff --git a/opensm/libvendor/osm_vendor_al.c b/opensm/libvendor/osm_vendor_al.c index 7d497c5..6694eb9 100644 --- a/opensm/libvendor/osm_vendor_al.c +++ b/opensm/libvendor/osm_vendor_al.c @@ -139,7 +139,7 @@ __osm_al_convert_wcs(IN ib_wc_status_t const wc_status) static void __osm_al_ca_err_callback(IN ib_async_event_rec_t * p_async_rec) { osm_vendor_t *p_vend = (osm_vendor_t *) p_async_rec->context; - OSM_LOG_ENTER(p_vend->p_log, __osm_al_ca_err_callback); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_ERROR, "__osm_al_ca_err_callback: ERR 3B01: " @@ -155,7 +155,7 @@ static void __osm_al_ca_destroy_callback(IN void *context) { osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) context; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, __osm_al_ca_destroy_callback); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_INFO, "__osm_al_ca_destroy_callback: " @@ -171,7 +171,7 @@ static void __osm_al_err_callback(IN ib_async_event_rec_t * p_async_rec) osm_al_bind_info_t *p_bind = (osm_al_bind_info_t *) p_async_rec->context; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, __osm_al_err_callback); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_ERROR, "__osm_al_err_callback: ERR 3B02: " @@ -193,7 +193,7 @@ __osm_al_send_callback(IN void *mad_svc_context, IN ib_mad_element_t * p_elem) osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); ib_mad_t *p_mad; - OSM_LOG_ENTER(p_vend->p_log, __osm_al_send_callback); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->h_av); @@ -271,7 +271,7 @@ __osm_al_rcv_callback(IN void *mad_svc_context, IN ib_mad_element_t * p_elem) ib_mad_t *p_new_mad; osm_mad_addr_t mad_addr; - OSM_LOG_ENTER(p_vend->p_log, __osm_al_rcv_callback); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_elem->context1 == NULL); CL_ASSERT(p_elem->context2 == NULL); @@ -352,7 +352,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN const uint32_t timeout) { ib_api_status_t status; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_log = p_log; @@ -383,7 +383,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); p_vend = malloc(sizeof(*p_vend)); if (p_vend == NULL) { @@ -425,7 +425,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, { ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); p_ca_info->guid = ca_guid; @@ -476,7 +476,7 @@ void osm_ca_info_destroy(IN osm_vendor_t * const p_vend, IN osm_ca_info_t * const p_ca_info) { - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_destroy); + OSM_LOG_ENTER(p_vend->p_log); if (p_ca_info->p_attr) free(p_ca_info->p_attr); @@ -494,7 +494,7 @@ osm_ca_info_t *osm_ca_info_new(IN osm_vendor_t * const p_vend, ib_api_status_t status; osm_ca_info_t *p_ca_info; - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_new); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(ca_guid); @@ -525,7 +525,7 @@ __osm_vendor_get_ca_guids(IN osm_vendor_t * const p_vend, { ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_get_ca_guids); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_guids); CL_ASSERT(p_num_guids); @@ -622,7 +622,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, ib_net64_t *p_ca_guid = NULL; osm_ca_info_t *p_ca_info; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); CL_ASSERT(p_vend->p_ca_info == NULL); @@ -708,7 +708,7 @@ osm_vendor_get_ca_guid(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca_info; uint32_t ca; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_ca_guid); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(port_guid); /* @@ -763,7 +763,7 @@ osm_vendor_get_port_num(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca_info; uint32_t ca; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_port_num); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(port_guid); /* @@ -816,7 +816,7 @@ __osm_vendor_open_ca(IN osm_vendor_t * const p_vend, ib_net64_t ca_guid; ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_open_ca); + OSM_LOG_ENTER(p_vend->p_log); ca_guid = osm_vendor_get_ca_guid(p_vend, port_guid); if (ca_guid == 0) { @@ -891,7 +891,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, ib_mad_svc_t mad_svc; ib_av_attr_t av; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_bind); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_user_bind); CL_ASSERT(p_mad_pool); @@ -1038,7 +1038,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_vendor_t *p_vend = p_bind->p_vend; ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); @@ -1083,7 +1083,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osm_vendor_t *p_vend = p_bind->p_vend; ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->p_elem); @@ -1120,7 +1120,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, ib_mad_element_t *p_elem; ib_av_attr_t av; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw->h_bind == h_bind); CL_ASSERT(p_vw->p_elem); @@ -1255,7 +1255,7 @@ ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) ib_av_attr_t av; ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); /* The only thing we need to do is refresh the directed @@ -1297,7 +1297,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) ib_api_status_t status; ib_port_attr_mod_t attr_mod; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 38c6628..061e5b8 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -101,7 +101,7 @@ static void clear_madw(osm_vendor_t * p_vend) umad_match_t *m, *e, *old_m; ib_net64_t old_tid; - OSM_LOG_ENTER(p_vend->p_log, clear_madw); + OSM_LOG_ENTER(p_vend->p_log); pthread_mutex_lock(&p_vend->match_tbl_mutex); for (m = p_vend->mtbl.tbl, e = m + p_vend->mtbl.max; m < e; m++) { if (m->tid) { @@ -250,7 +250,7 @@ static void *umad_receiver(void *p_ptr) void *umad = 0; int mad_agent, length; - OSM_LOG_ENTER(p_ur->p_log, umad_receiver); + OSM_LOG_ENTER(p_ur->p_log); for (;;) { if (!umad && @@ -451,7 +451,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, { int r, n_cas; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_log = p_log; p_vend->timeout = timeout; @@ -492,7 +492,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, { osm_vendor_t *p_vend = NULL; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); if (!timeout) { osm_log(p_log, OSM_LOG_ERROR, @@ -549,7 +549,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, unsigned done = 0; int r, i, j; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend && p_num_ports); @@ -614,7 +614,7 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, CL_ASSERT(p_vend); - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_open_port); + OSM_LOG_ENTER(p_vend->p_log); if (p_vend->umad_port_id >= 0) { umad_port_id = p_vend->umad_port_id; @@ -770,7 +770,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, int umad_port_id; uint8_t rmpp_version; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_bind); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_user_bind); CL_ASSERT(p_mad_pool); @@ -940,7 +940,7 @@ void osm_vendor_unbind(IN osm_bind_handle_t h_bind) osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_unbind); + OSM_LOG_ENTER(p_vend->p_log); pthread_mutex_lock(&p_vend->cb_mutex); p_bind->mad_recv_callback = __osm_vendor_recv_dummy_cb; @@ -959,7 +959,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "osm_vendor_get: " "Acquiring UMAD for p_madw = %p, size = %u\n", p_vw, mad_size); @@ -986,7 +986,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osm_vendor_t *p_vend = p_bind->p_vend; osm_madw_t *p_madw; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); @@ -1026,7 +1026,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, uint32_t paylen = 0; #endif - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw->h_bind == h_bind); CL_ASSERT(p_mad == umad_get_mad(p_vw->umad)); @@ -1124,7 +1124,7 @@ ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); ; OSM_LOG_EXIT(p_vend->p_log); return (0); @@ -1137,7 +1137,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) osm_umad_bind_info_t *p_bind = (osm_umad_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); if (TRUE == is_sm_val) { p_vend->issmfd = open(p_vend->issm_path, O_NONBLOCK); if (p_vend->issmfd < 0) { diff --git a/opensm/libvendor/osm_vendor_ibumad_sa.c b/opensm/libvendor/osm_vendor_ibumad_sa.c index 24f70bb..837d782 100644 --- a/opensm/libvendor/osm_vendor_ibumad_sa.c +++ b/opensm/libvendor/osm_vendor_ibumad_sa.c @@ -81,7 +81,7 @@ __osmv_sa_mad_rcv_cb(IN osm_madw_t * p_madw, ib_sa_mad_t *p_sa_mad; ib_net16_t mad_status; - OSM_LOG_ENTER(p_bind->p_log, __osmv_sa_mad_rcv_cb); + OSM_LOG_ENTER(p_bind->p_log); if (!p_req_madw) { osm_log(p_bind->p_log, OSM_LOG_DEBUG, @@ -191,7 +191,7 @@ static void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) osmv_query_req_t *p_query_req_copy = NULL; osmv_query_res_t query_res; - OSM_LOG_ENTER(p_bind->p_log, __osmv_sa_mad_err_cb); + OSM_LOG_ENTER(p_bind->p_log); /* Obtain the sent context etc */ p_query_req_copy = @@ -244,7 +244,7 @@ osmv_bind_sa(IN osm_vendor_t * const p_vend, osmv_sa_bind_info_t *p_sa_bind_info; cl_status_t cl_status; - OSM_LOG_ENTER(p_log, osmv_bind_sa); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osmv_bind_sa: " @@ -367,7 +367,7 @@ __osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, boolean_t sync; osmv_query_req_t *p_query_req_copy; - OSM_LOG_ENTER(p_log, __osmv_send_sa_req); + OSM_LOG_ENTER(p_log); /* since the sm_lid might change we obtain it every send @@ -502,7 +502,7 @@ osmv_query_sa(IN osm_bind_handle_t h_bind, osm_log_t *p_log = p_bind->p_log; ib_api_status_t status; - OSM_LOG_ENTER(p_log, osmv_query_sa); + OSM_LOG_ENTER(p_log); /* Set the request information. */ sa_mad_data.method = IB_MAD_METHOD_GETTABLE; diff --git a/opensm/libvendor/osm_vendor_mlx.c b/opensm/libvendor/osm_vendor_mlx.c index 0b49689..683f56d 100644 --- a/opensm/libvendor/osm_vendor_mlx.c +++ b/opensm/libvendor/osm_vendor_mlx.c @@ -69,7 +69,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_log); @@ -104,7 +104,7 @@ void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) osm_bind_handle_t bind_h; osm_log_t *p_log; - OSM_LOG_ENTER((*pp_vend)->p_log, osm_vendor_delete); + OSM_LOG_ENTER((*pp_vend)->p_log); p_log = (*pp_vend)->p_log; /* go over the bind handles , unbind them and remove from list */ @@ -151,7 +151,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_transport_info = NULL; p_vend->p_log = p_log; @@ -324,7 +324,7 @@ void osm_vendor_unbind(IN osm_bind_handle_t h_bind) cl_qlist_t *const p_bh_list = (cl_qlist_t * const)&p_bo->p_vendor->bind_handles; - OSM_LOG_ENTER(p_log, osm_vendor_unbind); + OSM_LOG_ENTER(p_log); /* go over all the items in the list and remove the specific item */ p_item = cl_qlist_head(p_bh_list); @@ -365,7 +365,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_vendor_t const *p_vend = p_bo->p_vendor; uint32_t act_mad_size; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); @@ -422,7 +422,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, ib_mad_t *p_mad; osm_log_t *p_log = p_bo->p_vendor->p_log; osm_mad_pool_t *p_mad_pool = p_bo->p_osm_pool; - OSM_LOG_ENTER(p_log, osm_vendor_send); + OSM_LOG_ENTER(p_log); if (NULL == h_bind || NULL == p_madw || NULL == (p_mad = osm_madw_get_mad_ptr(p_madw)) || @@ -534,7 +534,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osm_vendor_t const *p_vend = p_bo->p_vendor; if (p_bo->is_closing != TRUE) { - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->p_mad); @@ -563,7 +563,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) { osm_vendor_t const *p_vend = ((osmv_bind_obj_t *) h_bind)->p_vendor; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "osm_vendor_local_lid_change: " "Change of LID.\n"); @@ -589,7 +589,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) VAPI_hca_attr_t attr_mod; VAPI_hca_attr_mask_t attr_mask; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); memset(&attr_mask, 0, sizeof(attr_mask)); @@ -625,7 +625,7 @@ static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind) osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; osm_log_t *p_log = p_bo->p_vendor->p_log; - OSM_LOG_ENTER(p_log, __osm_vendor_internal_unbind); + OSM_LOG_ENTER(p_log); /* "notifying" all that from now on no new sends can be done */ p_bo->txn_mgr.p_event_wheel->closing = TRUE; @@ -687,7 +687,7 @@ __osmv_get_send_txn(IN osm_bind_handle_t h_bind, osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_get_send_txn); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(NULL != pp_txn); key = tid = cl_ntoh64(p_mad->trans_id); diff --git a/opensm/libvendor/osm_vendor_mlx_anafa.c b/opensm/libvendor/osm_vendor_mlx_anafa.c index 447e488..bb04530 100644 --- a/opensm/libvendor/osm_vendor_mlx_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_anafa.c @@ -76,7 +76,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_log); @@ -110,7 +110,7 @@ void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) osm_bind_handle_t bind_h; osm_log_t *p_log; - OSM_LOG_ENTER((*pp_vend)->p_log, osm_vendor_delete); + OSM_LOG_ENTER((*pp_vend)->p_log); p_log = (*pp_vend)->p_log; /* go over the bind handles , unbind them and remove from list */ @@ -165,7 +165,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, char device_file[16]; int device_fd; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_log = p_log; p_vend->resp_timeout = timeout; @@ -341,7 +341,7 @@ void osm_vendor_unbind(IN osm_bind_handle_t h_bind) cl_qlist_t *const p_bh_list = (cl_qlist_t * const)&p_bo->p_vendor->bind_handles; - OSM_LOG_ENTER(p_log, osm_vendor_unbind); + OSM_LOG_ENTER(p_log); /* go over all the items in the list and remove the specific item */ p_item = cl_qlist_head(&p_bo->p_vendor->bind_handles); @@ -379,7 +379,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_vendor_t const *p_vend = p_bo->p_vendor; uint32_t act_mad_size; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); @@ -435,7 +435,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, osmv_txn_ctx_t *p_txn = NULL; ib_mad_t *p_mad; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); if (NULL == h_bind || NULL == p_madw || NULL == (p_mad = osm_madw_get_mad_ptr(p_madw)) || @@ -539,7 +539,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; osm_vendor_t const *p_vend = p_bo->p_vendor; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->p_mad); @@ -566,7 +566,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) { osm_vendor_t const *p_vend = ((osmv_bind_obj_t *) h_bind)->p_vendor; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "osm_vendor_local_lid_change: " "Change of LID.\n"); @@ -591,7 +591,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) int ioctl_ret; osm_ts_set_port_info_ioctl port_info; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); port_info.port = 0; /* anafa has only 1 port */ port_info.port_info.valid_fields = IB_PORT_IS_SM; @@ -623,7 +623,7 @@ static void __osm_vendor_internal_unbind(osm_bind_handle_t h_bind) osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; osm_log_t *p_log = p_bo->p_vendor->p_log; - OSM_LOG_ENTER(p_log, __osm_vendor_internal_unbind); + OSM_LOG_ENTER(p_log); /* "notifying" all that from now on no new sends can be done */ p_bo->txn_mgr.p_event_wheel->closing = TRUE; @@ -678,7 +678,7 @@ __osmv_get_send_txn(IN osm_bind_handle_t h_bind, osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_get_send_txn); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(NULL != pp_txn); key = tid = cl_ntoh64(p_mad->trans_id); diff --git a/opensm/libvendor/osm_vendor_mlx_dispatcher.c b/opensm/libvendor/osm_vendor_mlx_dispatcher.c index 3956234..d476382 100644 --- a/opensm/libvendor/osm_vendor_mlx_dispatcher.c +++ b/opensm/libvendor/osm_vendor_mlx_dispatcher.c @@ -123,7 +123,7 @@ osmv_dispatch_mad(IN osm_bind_handle_t h_bind, osmv_txn_ctx_t *p_txn = NULL; osm_log_t *p_log = p_bo->p_vendor->p_log; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_dispatch_mad); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(NULL != h_bind && NULL != p_mad && NULL != p_mad_addr); @@ -243,7 +243,7 @@ __osmv_dispatch_simple_mad(IN osm_bind_handle_t h_bind, osm_madw_t *p_req_madw = NULL; osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_dispatch_simple_mad); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); /* Build the MAD wrapper to be returned to the user. * The actual storage for the MAD is allocated there. @@ -306,7 +306,7 @@ __osmv_dispatch_rmpp_mad(IN osm_bind_handle_t h_bind, osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; osm_madw_t *p_madw; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_dispatch_rmpp_mad); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); if (NULL == p_txn) { if (FALSE == osmv_rmpp_is_data(p_mad) @@ -397,7 +397,7 @@ __osmv_dispatch_rmpp_snd(IN osm_bind_handle_t h_bind, uint32_t new_wl = cl_ntoh32(((ib_rmpp_mad_t *) p_mad)->paylen_newwin); osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_dispatch_rmpp_snd); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); if (TRUE == osmv_rmpp_is_abort_stop(p_mad)) { @@ -496,7 +496,7 @@ __osmv_dispatch_rmpp_rcv(IN osm_bind_handle_t h_bind, uint64_t key = osmv_txn_get_key(p_txn); uint64_t tid = osmv_txn_get_tid(p_txn); - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_dispatch_rmpp_rcv); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); if (TRUE == osmv_rmpp_is_ack(p_mad)) { osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, @@ -653,7 +653,7 @@ __osmv_dispatch_accept_seg(IN osm_bind_handle_t h_bind, osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; uint64_t tid = osmv_txn_get_tid(p_txn); - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_dispatch_accept_seg); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); if (seg_num != p_recv_ctx->expected_seg) { osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, diff --git a/opensm/libvendor/osm_vendor_mlx_hca.c b/opensm/libvendor/osm_vendor_mlx_hca.c index d1cf762..e98e272 100644 --- a/opensm/libvendor/osm_vendor_mlx_hca.c +++ b/opensm/libvendor/osm_vendor_mlx_hca.c @@ -94,7 +94,7 @@ __osm_vendor_get_ca_ids(IN osm_vendor_t * const p_vend, ib_api_status_t status; VAPI_ret_t vapi_res; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_get_ca_ids); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_hca_ids); CL_ASSERT(p_num_guids); @@ -177,7 +177,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, IB_gid_t *p_port_gid; uint16_t maxNumGids; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); /* get the HCA handle */ vapi_res = EVAPI_get_hca_hndl(ca_id, &hca_hndl); @@ -271,7 +271,7 @@ osm_ca_info_destroy(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca; uint8_t i; - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_destroy); + OSM_LOG_ENTER(p_vend->p_log); for (i = 0; i < num_ca; i++) { p_ca = &p_ca_info[i]; @@ -312,7 +312,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca_infos = NULL; uint32_t attr_array_sz = *p_num_ports; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); @@ -418,7 +418,7 @@ osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, ib_net64_t port_guid; uint32_t ca, portIdx, ca_count; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_guid_ca_and_port); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); diff --git a/opensm/libvendor/osm_vendor_mlx_hca_anafa.c b/opensm/libvendor/osm_vendor_mlx_hca_anafa.c index 3a0b404..81506e4 100644 --- a/opensm/libvendor/osm_vendor_mlx_hca_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_hca_anafa.c @@ -89,7 +89,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, osm_ts_get_port_info_ioctl port_info; struct ib_get_dev_info_ioctl dev_info; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); /* query HCA guid */ ioctl_ret = ioctl(p_tpot_info->device_fd, TS_IB_IOCGDEVINFO, &dev_info); @@ -159,7 +159,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, osm_ca_info_t ca_info; uint32_t attr_array_sz = *p_num_ports; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); /* anafa has one port - the user didnt supply enough storage space */ diff --git a/opensm/libvendor/osm_vendor_mlx_hca_pfs.c b/opensm/libvendor/osm_vendor_mlx_hca_pfs.c index de86496..512b7bf 100644 --- a/opensm/libvendor/osm_vendor_mlx_hca_pfs.c +++ b/opensm/libvendor/osm_vendor_mlx_hca_pfs.c @@ -137,7 +137,7 @@ __parse_ca_info_file(IN osm_vendor_t * const p_vend, int num_ports; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __parse_ca_info_file); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__parse_ca_info_file: " "Querying CA %d.\n", idx); @@ -268,7 +268,7 @@ __parse_port_info_file(IN osm_vendor_t * const p_vend, int lid, sm_lid, lmc, sm_sl; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __parse_port_info_file); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__parse_port_info_file: " @@ -419,7 +419,7 @@ __get_port_guid_from_port_gid_tbl(IN osm_vendor_t * const p_vend, int g[8]; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __get_port_guid_from_port_gid_tbl); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__get_port_guid_from_port_gid_tbl: " @@ -486,7 +486,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, pfs_ca_info_t pfs_ca_info; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); /* parse the CA info file */ if (__parse_ca_info_file(p_vend, idx, &pfs_ca_info) != IB_SUCCESS) @@ -556,7 +556,7 @@ osm_ca_info_destroy(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca; uint8_t i; - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_destroy); + OSM_LOG_ENTER(p_vend->p_log); for (i = 0; i < num_ca; i++) { p_ca = &p_ca_info[i]; @@ -594,7 +594,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca_infos = NULL; uint32_t attr_array_sz = *p_num_ports; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); @@ -691,7 +691,7 @@ osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, uint8_t port_num; ib_api_status_t status = IB_ERROR; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_guid_ca_and_port); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); diff --git a/opensm/libvendor/osm_vendor_mlx_hca_sim.c b/opensm/libvendor/osm_vendor_mlx_hca_sim.c index 975c94f..b6c0193 100644 --- a/opensm/libvendor/osm_vendor_mlx_hca_sim.c +++ b/opensm/libvendor/osm_vendor_mlx_hca_sim.c @@ -183,7 +183,7 @@ __parse_ca_info_file(IN osm_vendor_t * const p_vend, int num_ports; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __parse_ca_info_file); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__parse_ca_info_file: " "Querying CA %d.\n", idx); @@ -314,7 +314,7 @@ __parse_port_info_file(IN osm_vendor_t * const p_vend, int lid, sm_lid, lmc, sm_sl; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __parse_port_info_file); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__parse_port_info_file: " @@ -465,7 +465,7 @@ __get_port_guid_from_port_gid_tbl(IN osm_vendor_t * const p_vend, int g[8]; uint32_t len; - OSM_LOG_ENTER(p_vend->p_log, __get_port_guid_from_port_gid_tbl); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "__get_port_guid_from_port_gid_tbl: " @@ -532,7 +532,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, sim_ca_info_t sim_ca_info; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); /* parse the CA info file */ if (__parse_ca_info_file(p_vend, idx, &sim_ca_info) != IB_SUCCESS) @@ -602,7 +602,7 @@ osm_ca_info_destroy(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca; uint8_t i; - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_destroy); + OSM_LOG_ENTER(p_vend->p_log); for (i = 0; i < num_ca; i++) { p_ca = &p_ca_info[i]; @@ -640,7 +640,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, osm_ca_info_t *p_ca_infos = NULL; uint32_t attr_array_sz = *p_num_ports; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); @@ -737,7 +737,7 @@ osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, uint8_t port_num; ib_api_status_t status = IB_ERROR; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_guid_ca_and_port); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); @@ -809,7 +809,7 @@ osm_vendor_get_guid_by_ca_and_port(IN osm_vendor_t * const p_vend, uint32_t ca_count = 0; ib_api_status_t status = IB_ERROR; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_guid_by_ca_and_port); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); diff --git a/opensm/libvendor/osm_vendor_mlx_ibmgt.c b/opensm/libvendor/osm_vendor_mlx_ibmgt.c index b3d72f7..3bdc08a 100644 --- a/opensm/libvendor/osm_vendor_mlx_ibmgt.c +++ b/opensm/libvendor/osm_vendor_mlx_ibmgt.c @@ -375,7 +375,7 @@ osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, IB_MGT_ret_t ret; ib_mad_t *p_mad = p_ib_mad; - OSM_LOG_ENTER(p_vend->p_log, osmv_transport_mad_send); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_bo->p_vendor->p_transport_info); @@ -482,7 +482,7 @@ void osmv_transport_done(IN const osm_bind_handle_t h_bind) int i; cl_qlist_t *p_list = NULL; - OSM_LOG_ENTER(p_log, osmv_transport_done); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_bo); diff --git a/opensm/libvendor/osm_vendor_mlx_rmpp_ctx.c b/opensm/libvendor/osm_vendor_mlx_rmpp_ctx.c index af1fd23..bbd42c3 100644 --- a/opensm/libvendor/osm_vendor_mlx_rmpp_ctx.c +++ b/opensm/libvendor/osm_vendor_mlx_rmpp_ctx.c @@ -120,7 +120,7 @@ osmv_rmpp_send_ctx_get_seg(IN osmv_rmpp_send_ctx_t * p_send_ctx, uint32_t num_segs, paylen = 0; ib_rmpp_mad_t *p_rmpp_mad; - OSM_LOG_ENTER(p_send_ctx->p_log, osmv_rmpp_send_ctx_get_seg); + OSM_LOG_ENTER(p_send_ctx->p_log); CL_ASSERT(p_send_ctx); st = osmv_rmpp_sar_get_mad_seg(&p_send_ctx->sar, seg_idx, p_buf); @@ -236,7 +236,7 @@ osmv_rmpp_recv_ctx_store_mad_seg(IN osmv_rmpp_recv_ctx_t * p_recv_ctx, cl_list_obj_t *p_obj = NULL; void *p_list_mad; - OSM_LOG_ENTER(p_recv_ctx->p_log, osmv_rmpp_recv_ctx_store_mad_seg); + OSM_LOG_ENTER(p_recv_ctx->p_log); CL_ASSERT(p_recv_ctx); p_list_mad = malloc(MAD_BLOCK_SIZE); diff --git a/opensm/libvendor/osm_vendor_mlx_sa.c b/opensm/libvendor/osm_vendor_mlx_sa.c index aeb8542..efd04bd 100644 --- a/opensm/libvendor/osm_vendor_mlx_sa.c +++ b/opensm/libvendor/osm_vendor_mlx_sa.c @@ -83,7 +83,7 @@ __osmv_sa_mad_rcv_cb(IN osm_madw_t * p_madw, ib_sa_mad_t *p_sa_mad; ib_net16_t mad_status; - OSM_LOG_ENTER(p_bind->p_log, __osmv_sa_mad_rcv_cb); + OSM_LOG_ENTER(p_bind->p_log); if (!p_req_madw) { osm_log(p_bind->p_log, OSM_LOG_DEBUG, @@ -189,7 +189,7 @@ void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) osmv_query_req_t *p_query_req_copy = NULL; osmv_query_res_t query_res; - OSM_LOG_ENTER(p_bind->p_log, __osmv_sa_mad_err_cb); + OSM_LOG_ENTER(p_bind->p_log); /* Obtain the sent context etc */ p_query_req_copy = @@ -237,7 +237,7 @@ __osmv_get_lid_and_sm_lid_by_port_guid(IN osm_vendor_t * const p_vend, uint32_t num_ports; uint32_t port_num; - OSM_LOG_ENTER(p_vend->p_log, __osmv_get_lid_and_sm_lid_by_port_guid); + OSM_LOG_ENTER(p_vend->p_log); /* use prevous values if current time is close enough to previous query */ if (cl_get_time_stamp_sec() <= *p_lids_update_time_sec + 30) { @@ -319,7 +319,7 @@ osmv_bind_sa(IN osm_vendor_t * const p_vend, osmv_sa_bind_info_t *p_sa_bind_info; cl_status_t cl_status; - OSM_LOG_ENTER(p_log, osmv_bind_sa); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osmv_bind_sa: " @@ -457,7 +457,7 @@ __osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, boolean_t sync; osmv_query_req_t *p_query_req_copy; - OSM_LOG_ENTER(p_log, __osmv_send_sa_req); + OSM_LOG_ENTER(p_log); /* since the sm_lid might change we obtain it every send @@ -590,7 +590,7 @@ osmv_query_sa(IN osm_bind_handle_t h_bind, osm_log_t *p_log = p_bind->p_log; ib_api_status_t status; - OSM_LOG_ENTER(p_log, osmv_query_sa); + OSM_LOG_ENTER(p_log); /* Set the request information. */ sa_mad_data.method = IB_MAD_METHOD_GETTABLE; diff --git a/opensm/libvendor/osm_vendor_mlx_sender.c b/opensm/libvendor/osm_vendor_mlx_sender.c index 6e3fc3c..a0bdef8 100644 --- a/opensm/libvendor/osm_vendor_mlx_sender.c +++ b/opensm/libvendor/osm_vendor_mlx_sender.c @@ -71,7 +71,7 @@ osmv_simple_send_madw(IN osm_bind_handle_t h_bind, ib_mad_t *p_mad = (ib_mad_t *) mad_buf; uint64_t key = 0; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_simple_send_madw); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(p_madw->mad_size <= MAD_BLOCK_SIZE); @@ -144,7 +144,7 @@ osmv_rmpp_send_madw(IN osm_bind_handle_t h_bind, osmv_rmpp_send_ctx_t *p_send_ctx = osmv_txn_get_rmpp_send_ctx(p_txn); osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_rmpp_send_madw); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); total_segs = osmv_rmpp_send_ctx_get_num_segs(p_send_ctx); CL_ASSERT(total_segs >= 1); @@ -317,7 +317,7 @@ __osmv_rmpp_send_segment(IN osm_bind_handle_t h_bind, uint32_t timeout = p_bo->p_vendor->resp_timeout; uint64_t key; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_rmpp_send_segment); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); #ifdef OSMV_RANDOM_DROP if (TRUE == osmv_random_drop()) { diff --git a/opensm/libvendor/osm_vendor_mlx_sim.c b/opensm/libvendor/osm_vendor_mlx_sim.c index c700759..3ec4ac4 100644 --- a/opensm/libvendor/osm_vendor_mlx_sim.c +++ b/opensm/libvendor/osm_vendor_mlx_sim.c @@ -92,8 +92,7 @@ void __osmv_ibms_receiver_callback(void *p_ctx, ibms_mad_msg_t * p_mad) return; { - OSM_LOG_ENTER(p_bo->p_vendor->p_log, - __osmv_ibms_receiver_callback); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); /* some logging */ osm_log(p_bo->p_vendor->p_log, OSM_LOG_DEBUG, @@ -253,7 +252,7 @@ osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, const ib_mad_t *p_mad_hdr = p_mad; - OSM_LOG_ENTER(p_vend->p_log, osmv_transport_mad_send); + OSM_LOG_ENTER(p_vend->p_log); memset(&mad_msg, 0, sizeof(mad_msg)); @@ -418,7 +417,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) int ret; ibms_cap_msg_t cap_msg; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); cap_msg.mask = IB_PORT_CAP_IS_SM; if (is_sm_val) diff --git a/opensm/libvendor/osm_vendor_mlx_ts.c b/opensm/libvendor/osm_vendor_mlx_ts.c index f5ca136..e9c50e3 100644 --- a/opensm/libvendor/osm_vendor_mlx_ts.c +++ b/opensm/libvendor/osm_vendor_mlx_ts.c @@ -84,7 +84,7 @@ void __osmv_TOPSPIN_receiver_thr(void *p_ctx) osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) p_ctx; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_TOPSPIN_receiver_thr); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); /* Make sure the p_bo object is still relevant */ if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) @@ -285,7 +285,7 @@ osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, const ib_mad_t *p_mad_hdr = p_mad; - OSM_LOG_ENTER(p_vend->p_log, osmv_transport_mad_send); + OSM_LOG_ENTER(p_vend->p_log); memset(&ts_mad, 0, sizeof(ts_mad)); @@ -484,7 +484,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) ((osmv_TOPSPIN_transport_mgr_t *) (p_bo->p_transp_mgr))->device_fd; struct ib_set_port_info_ioctl set_port_data; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&set_port_data, 0, sizeof(set_port_data)); diff --git a/opensm/libvendor/osm_vendor_mlx_ts_anafa.c b/opensm/libvendor/osm_vendor_mlx_ts_anafa.c index 9cbe1b6..1acd706 100644 --- a/opensm/libvendor/osm_vendor_mlx_ts_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_ts_anafa.c @@ -80,7 +80,7 @@ void __osmv_TOPSPIN_ANAFA_receiver_thr(void *p_ctx) osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) p_ctx; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_TOPSPIN_ANAFA_receiver_thr); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); /* Make sure the p_bo object is still relevant */ if ((p_bo->magic_ptr != p_bo) || p_bo->is_closing) @@ -272,7 +272,7 @@ osmv_transport_mad_send(IN const osm_bind_handle_t h_bind, const ib_mad_t *p_mad_hdr = p_mad; - OSM_LOG_ENTER(p_vend->p_log, osmv_transport_mad_send); + OSM_LOG_ENTER(p_vend->p_log); /* Make sure the p_bo object is still relevant */ if (p_bo->magic_ptr != p_bo) diff --git a/opensm/libvendor/osm_vendor_mlx_txn.c b/opensm/libvendor/osm_vendor_mlx_txn.c index c158b9d..a2da75a 100644 --- a/opensm/libvendor/osm_vendor_mlx_txn.c +++ b/opensm/libvendor/osm_vendor_mlx_txn.c @@ -71,7 +71,7 @@ osmv_txn_init(IN osm_bind_handle_t h_bind, osmv_txn_ctx_t *p_txn; osmv_bind_obj_t *p_bo = (osmv_bind_obj_t *) h_bind; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_txn_init); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(NULL != h_bind && NULL != pp_txn); @@ -227,7 +227,7 @@ osmv_txn_done(IN osm_bind_handle_t h_bind, osmv_txn_ctx_t *p_ctx; osmv_bind_obj_t *const p_bo = (osmv_bind_obj_t *) h_bind; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_txn_done); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); CL_ASSERT(h_bind); @@ -280,7 +280,7 @@ void osmv_txn_abort_rmpp_txns(osm_bind_handle_t h_bind) osmv_rmpp_send_ctx_t *p_send_ctx; cl_qmap_t *p_map = p_bo->txn_mgr.p_txn_map; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, osmv_txn_abort_rmpp_txns); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); while (FALSE == cl_is_qmap_empty(p_map)) { @@ -364,7 +364,7 @@ __osmv_txnmgr_lookup(IN osmv_txn_mgr_t * p_tx_mgr, uint64_t tmp_key; - OSM_LOG_ENTER(p_tx_mgr->p_log, __osmv_txnmgr_lookup); + OSM_LOG_ENTER(p_tx_mgr->p_log); CL_ASSERT(p_tx_mgr); CL_ASSERT(pp_txn); @@ -441,7 +441,7 @@ __osmv_txnmgr_remove_txn(IN osmv_txn_mgr_t * p_tx_mgr, cl_map_obj_t *p_obj; cl_map_item_t *p_item; - OSM_LOG_ENTER(p_tx_mgr->p_log, __osmv_txnmgr_remove_txn); + OSM_LOG_ENTER(p_tx_mgr->p_log); CL_ASSERT(p_tx_mgr); CL_ASSERT(pp_txn); @@ -474,7 +474,7 @@ void __osmv_txn_all_done(osm_bind_handle_t h_bind) cl_map_obj_t *p_obj; osmv_txn_ctx_t *p_txn; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_txn_all_done); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); p_item = cl_qmap_head(p_bo->txn_mgr.p_txn_map); while (p_item != cl_qmap_end(p_bo->txn_mgr.p_txn_map)) { @@ -535,7 +535,7 @@ __osmv_txn_timeout_cb(IN uint64_t key, osm_mad_addr_t *p_mad_addr; boolean_t invoke_err_cb = FALSE; - OSM_LOG_ENTER(p_bo->p_vendor->p_log, __osmv_txn_timeout_cb); + OSM_LOG_ENTER(p_bo->p_vendor->p_log); /* Don't try to acquire a lock on the Bind Object - * it's taken by the mechanism that drives the timeout based events! diff --git a/opensm/libvendor/osm_vendor_mtl.c b/opensm/libvendor/osm_vendor_mtl.c index d8f6715..7f6c0cd 100644 --- a/opensm/libvendor/osm_vendor_mtl.c +++ b/opensm/libvendor/osm_vendor_mtl.c @@ -186,7 +186,7 @@ void __osm_vendor_clear_sm(IN osm_bind_handle_t h_bind) VAPI_hca_attr_t attr_mod; VAPI_hca_attr_mask_t attr_mask; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_clear_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); memset(&attr_mask, 0, sizeof(attr_mask)); @@ -222,7 +222,7 @@ void osm_vendor_destroy(IN osm_vendor_t * const p_vend) { osm_vendor_mgt_bind_t *vendor_mgt_bind_p; IB_MGT_ret_t mgt_ret; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_destroy); + OSM_LOG_ENTER(p_vend->p_log); if (p_vend->h_al != NULL) { vendor_mgt_bind_p = (osm_vendor_mgt_bind_t *) p_vend->h_al; @@ -312,7 +312,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, osm_vendor_mgt_bind_t *ib_mgt_hdl_p; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_log = p_log; @@ -352,7 +352,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_log); @@ -392,7 +392,7 @@ __osm_mtl_rcv_callback(IN IB_MGT_mad_hndl_t mad_hndl, ib_mad_t *mad_buf_p; osm_log_t *const p_log = bind_info_p->p_vend->p_log; - OSM_LOG_ENTER(p_log, __osm_mtl_rcv_callback); + OSM_LOG_ENTER(p_log); /* if it is a response MAD we mustbe able to get the request */ if (ib_mad_is_response((ib_mad_t *) payload_p)) { @@ -525,7 +525,7 @@ __osm_mtl_send_callback(IN IB_MGT_mad_hndl_t mad_hndl, osm_vend_wrap_t *p_vw; uint8_t is_resp; - OSM_LOG_ENTER(p_log, __osm_mtl_send_callback); + OSM_LOG_ENTER(p_log); /* obtain the madp from the wrid */ __osm_set_p_madw_and_resp_by_wrid(wrid, &is_resp, &madw_p); @@ -609,7 +609,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, osm_vendor_mgt_bind_t *ib_mgt_hdl_p; IB_MGT_ret_t mgt_ret; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_bind); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_user_bind); CL_ASSERT(p_mad_pool); @@ -819,7 +819,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_mtl_bind_info_t *p_bind = (osm_mtl_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); /* HACK: We know we can not send through IB_MGT */ @@ -865,7 +865,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osm_vendor_t *p_vend = p_bind->p_vend; osm_madw_t *p_madw; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->mad_buf_p); @@ -909,7 +909,7 @@ osm_mtl_send_mad(IN osm_mtl_bind_info_t * p_bind, IN osm_madw_t * const p_madw) uint64_t wrid; uint32_t qpn; - OSM_LOG_ENTER(p_vend->p_log, osm_mtl_send_mad); + OSM_LOG_ENTER(p_vend->p_log); /* * For all sends other than directed route SM MADs, @@ -1004,7 +1004,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_vend->p_log); /* * If a response is expected to this MAD, then preallocate @@ -1056,7 +1056,7 @@ ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) { osm_vendor_t *p_vend = ((osm_mtl_bind_info_t *) h_bind)->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "osm_vendor_local_lid_change: DEBUG 2202: " "Change of LID.\n"); @@ -1076,7 +1076,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) VAPI_hca_attr_t attr_mod; VAPI_hca_attr_mask_t attr_mask; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); memset(&attr_mask, 0, sizeof(attr_mask)); diff --git a/opensm/libvendor/osm_vendor_mtl_hca_guid.c b/opensm/libvendor/osm_vendor_mtl_hca_guid.c index 34e7ad0..d78af31 100644 --- a/opensm/libvendor/osm_vendor_mtl_hca_guid.c +++ b/opensm/libvendor/osm_vendor_mtl_hca_guid.c @@ -114,7 +114,7 @@ __osm_vendor_get_ca_ids(IN osm_vendor_t * const p_vend, ib_api_status_t status; VAPI_ret_t vapi_res; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_get_ca_ids); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_hca_ids); CL_ASSERT(p_num_guids); @@ -197,7 +197,7 @@ __osm_ca_info_init(IN osm_vendor_t * const p_vend, IB_gid_t *p_port_gid; uint16_t maxNumGids; - OSM_LOG_ENTER(p_vend->p_log, __osm_ca_info_init); + OSM_LOG_ENTER(p_vend->p_log); /* get the HCA handle */ vapi_res = EVAPI_get_hca_hndl(ca_id, &hca_hndl); @@ -288,7 +288,7 @@ void osm_ca_info_destroy(IN osm_vendor_t * const p_vend, IN osm_ca_info_t * const p_ca_info) { - OSM_LOG_ENTER(p_vend->p_log, osm_ca_info_destroy); + OSM_LOG_ENTER(p_vend->p_log); if (p_ca_info->p_attr) { if (p_ca_info->p_attr->num_ports) { @@ -323,7 +323,7 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, VAPI_hca_id_t *p_ca_ids = NULL; osm_ca_info_t *p_ca_info; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); @@ -436,7 +436,7 @@ osm_vendor_get_guid_ca_and_port(IN osm_vendor_t * const p_vend, ib_net64_t port_guid; uint32_t ca, portIdx, ca_count; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_guid_ca_and_port); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); diff --git a/opensm/libvendor/osm_vendor_mtl_transaction_mgr.c b/opensm/libvendor/osm_vendor_mtl_transaction_mgr.c index 30d8bd0..6e8afb0 100644 --- a/opensm/libvendor/osm_vendor_mtl_transaction_mgr.c +++ b/opensm/libvendor/osm_vendor_mtl_transaction_mgr.c @@ -73,7 +73,7 @@ void __osm_transaction_mgr_callback(IN void *context) cl_list_t retry_madw_p_list; /* this list will include all the madw_p that were retried and need to be removed. */ osm_madw_t *madw_p; - OSM_LOG_ENTER(p_vend->p_log, __osm_transaction_mgr_callback); + OSM_LOG_ENTER(p_vend->p_log); trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; @@ -273,7 +273,7 @@ void osm_transaction_mgr_init(IN osm_vendor_t * const p_vend) { cl_status_t cl_status; osm_transaction_mgr_t *trans_mgr_p; - OSM_LOG_ENTER(p_vend->p_log, osm_transaction_mgr_init); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend->p_transaction_mgr == NULL); @@ -319,7 +319,7 @@ void osm_transaction_mgr_destroy(IN osm_vendor_t * const p_vend) cl_map_item_t *p_map_item; osm_madw_req_t *osm_madw_req_p; - OSM_LOG_ENTER(p_vend->p_log, osm_transaction_mgr_destroy); + OSM_LOG_ENTER(p_vend->p_log); trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; @@ -390,7 +390,7 @@ osm_transaction_mgr_insert_madw(IN osm_bind_handle_t * const p_bind, uint64_t key; const ib_mad_t *mad_p = p_madw->p_mad; - OSM_LOG_ENTER(p_vend->p_log, osm_transaction_mgr_insert_madw); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(mad_p); @@ -450,7 +450,7 @@ osm_transaction_mgr_erase_madw(IN osm_vendor_t * const p_vend, osm_madw_req_t *osm_madw_req_p; uint64_t key; cl_map_item_t *p_map_item; - OSM_LOG_ENTER(p_vend->p_log, osm_transaction_mgr_erase_madw); + OSM_LOG_ENTER(p_vend->p_log); trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; @@ -501,7 +501,7 @@ osm_transaction_mgr_get_madw_for_tid(IN osm_vendor_t * const p_vend, osm_madw_req_t *osm_madw_req_p; cl_map_item_t *p_map_item; uint64_t key; - OSM_LOG_ENTER(p_vend->p_log, osm_transaction_mgr_get_madw_for_tid); + OSM_LOG_ENTER(p_vend->p_log); trans_mgr_p = (osm_transaction_mgr_t *) p_vend->p_transaction_mgr; diff --git a/opensm/libvendor/osm_vendor_test.c b/opensm/libvendor/osm_vendor_test.c index 30a43f9..cda8cf7 100644 --- a/opensm/libvendor/osm_vendor_test.c +++ b/opensm/libvendor/osm_vendor_test.c @@ -91,7 +91,7 @@ ib_api_status_t osm_vendor_init(IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN const uint32_t timeout) { - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_vend); CL_ASSERT(p_log); @@ -109,7 +109,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, { ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_log); @@ -135,7 +135,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, { osm_vendor_t *p_vend; ib_mad_t *p_mad; - OSM_LOG_ENTER(h_bind->p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(h_bind->p_vend->p_log); UNUSED_PARAM(p_vend_wrap); @@ -165,7 +165,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, { osm_vendor_t *p_vend; - OSM_LOG_ENTER(h_bind->p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(h_bind->p_vend->p_log); UNUSED_PARAM(p_vend_wrap); @@ -193,7 +193,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, { osm_vendor_t *p_vend = h_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_vend->p_log); UNUSED_PARAM(p_vend_wrap); UNUSED_PARAM(p_mad_addr); @@ -218,7 +218,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, { osm_bind_handle_t h_bind; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_bind); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vend); CL_ASSERT(p_bind_info); @@ -256,7 +256,7 @@ osm_vendor_get_ports(IN osm_vendor_t * const p_vend, IN ib_net64_t * const p_guids, IN uint32_t * const num_guids) { - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_ports); + OSM_LOG_ENTER(p_vend->p_log); *p_guids = CL_NTOH64(0x0000000000001234); *num_guids = 1; @@ -271,7 +271,7 @@ ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) { osm_vendor_t *p_vend = h_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); OSM_LOG_EXIT(p_vend->p_log); diff --git a/opensm/libvendor/osm_vendor_ts.c b/opensm/libvendor/osm_vendor_ts.c index 365d609..6f20b82 100644 --- a/opensm/libvendor/osm_vendor_ts.c +++ b/opensm/libvendor/osm_vendor_ts.c @@ -148,7 +148,7 @@ void __osm_vendor_clear_sm(IN osm_bind_handle_t h_bind) VAPI_hca_attr_t attr_mod; VAPI_hca_attr_mask_t attr_mask; - OSM_LOG_ENTER(p_vend->p_log, __osm_vendor_clear_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); memset(&attr_mask, 0, sizeof(attr_mask)); @@ -184,7 +184,7 @@ void osm_vendor_construct(IN osm_vendor_t * const p_vend) **********************************************************************/ void osm_vendor_destroy(IN osm_vendor_t * const p_vend) { - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_destroy); + OSM_LOG_ENTER(p_vend->p_log); osm_transaction_mgr_destroy(p_vend); /* Destroy the poller threads */ @@ -218,7 +218,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_vend->p_log = p_log; p_vend->p_transaction_mgr = NULL; @@ -242,7 +242,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; osm_vendor_t *p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); CL_ASSERT(p_log); @@ -280,7 +280,7 @@ __osm_ts_rcv_callback(IN osm_ts_bind_info_t * p_bind, ib_mad_t *p_mad_buf; osm_log_t *const p_log = p_bind->p_vend->p_log; - OSM_LOG_ENTER(p_log, __osm_ts_rcv_callback); + OSM_LOG_ENTER(p_log); /* if it is a response MAD we mustbe able to get the request */ if (ib_mad_is_response((ib_mad_t *) p_mad)) { @@ -381,7 +381,7 @@ __osm_ts_send_callback(IN osm_ts_bind_info_t * bind_info_p, osm_log_t *const p_log = bind_info_p->p_vend->p_log; osm_vend_wrap_t *p_vw; - OSM_LOG_ENTER(p_log, __osm_ts_send_callback); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "__osm_ts_send_callback: INFO 1008: " @@ -452,7 +452,7 @@ void __osm_vendor_ts_poller(IN void *p_ptr) osm_mad_addr_t mad_addr; osm_ts_bind_info_t *const p_bind = (osm_ts_bind_info_t *) p_ptr; - OSM_LOG_ENTER(p_bind->p_vend->p_log, __osm_vendor_ts_poller); + OSM_LOG_ENTER(p_bind->p_vend->p_log); /* we set the type of cancelation for this thread */ pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); @@ -525,7 +525,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, int ts_ioctl_ret; int qpn; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_bind); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_mad_pool); @@ -650,7 +650,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, osm_ts_bind_info_t *p_bind = (osm_ts_bind_info_t *) h_bind; osm_vendor_t *p_vend = p_bind->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); @@ -693,7 +693,7 @@ osm_vendor_put(IN osm_bind_handle_t h_bind, IN osm_vend_wrap_t * const p_vw) osm_vendor_t *p_vend = p_bind->p_vend; osm_madw_t *p_madw; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_put); + OSM_LOG_ENTER(p_vend->p_log); CL_ASSERT(p_vw); CL_ASSERT(p_vw->p_mad_buf); @@ -735,7 +735,7 @@ osm_ts_send_mad(IN osm_ts_bind_info_t * p_bind, IN osm_madw_t * const p_madw) int ret; ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_ts_send_mad); + OSM_LOG_ENTER(p_vend->p_log); /* * Copy the MAD over to the sent mad @@ -799,7 +799,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, osm_vend_wrap_t *const p_vw = osm_madw_get_vend_ptr(p_madw); ib_api_status_t status; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_send); + OSM_LOG_ENTER(p_vend->p_log); /* * If a response is expected to this MAD, then preallocate @@ -855,7 +855,7 @@ ib_api_status_t osm_vendor_local_lid_change(IN osm_bind_handle_t h_bind) { osm_vendor_t *p_vend = ((osm_ts_bind_info_t *) h_bind)->p_vend; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_local_lid_change); + OSM_LOG_ENTER(p_vend->p_log); osm_log(p_vend->p_log, OSM_LOG_DEBUG, "osm_vendor_local_lid_change: DEBUG 2202: " "Change of LID.\n"); @@ -875,7 +875,7 @@ void osm_vendor_set_sm(IN osm_bind_handle_t h_bind, IN boolean_t is_sm_val) VAPI_hca_attr_t attr_mod; VAPI_hca_attr_mask_t attr_mask; - OSM_LOG_ENTER(p_vend->p_log, osm_vendor_set_sm); + OSM_LOG_ENTER(p_vend->p_log); memset(&attr_mod, 0, sizeof(attr_mod)); memset(&attr_mask, 0, sizeof(attr_mask)); diff --git a/opensm/libvendor/osm_vendor_umadt.c b/opensm/libvendor/osm_vendor_umadt.c index b68d6c1..7a11eef 100644 --- a/opensm/libvendor/osm_vendor_umadt.c +++ b/opensm/libvendor/osm_vendor_umadt.c @@ -137,7 +137,7 @@ osm_vendor_t *osm_vendor_new(IN osm_log_t * const p_log, ib_api_status_t status; umadt_obj_t *p_umadt_obj; - OSM_LOG_ENTER(p_log, osm_vendor_new); + OSM_LOG_ENTER(p_log); p_umadt_obj = malloc(sizeof(umadt_obj_t)); if (p_umadt_obj) { @@ -165,7 +165,7 @@ void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) uint32_t count, i; mad_bind_info_t *p_mad_bind_info; - OSM_LOG_ENTER(p_umadt_obj->p_log, osm_vendor_delete); + OSM_LOG_ENTER(p_umadt_obj->p_log); cl_spinlock_acquire(&p_umadt_obj->register_lock); p_mad_bind_info = @@ -202,7 +202,7 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, char *error; umadt_obj_t *p_umadt_obj = (umadt_obj_t *) p_vend; - OSM_LOG_ENTER(p_log, osm_vendor_init); + OSM_LOG_ENTER(p_log); p_umadt_obj->p_log = p_log; p_umadt_obj->timeout = timeout; @@ -265,7 +265,7 @@ osm_vendor_get_ports(IN osm_vendor_t * const p_vend, umadt_obj_t *p_umadt_obj = (umadt_obj_t *) p_vend; - OSM_LOG_ENTER(p_umadt_obj->p_log, osm_vendor_get_ports); + OSM_LOG_ENTER(p_umadt_obj->p_log); CL_ASSERT(p_guids); CL_ASSERT(p_num_guids); @@ -388,7 +388,7 @@ ib_mad_t *osm_vendor_get(IN osm_bind_handle_t h_bind, mad_bind_info_t *p_mad_bind_info = (mad_bind_info_t *) h_bind; umadt_obj_t *p_umadt_obj = p_mad_bind_info->p_umadt_obj; ib_mad_t *p_mad; - OSM_LOG_ENTER(p_umadt_obj->p_log, osm_vendor_get); + OSM_LOG_ENTER(p_umadt_obj->p_log); CL_ASSERT(h_bind); @@ -743,7 +743,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, RegisterClassStruct *p_umadt_reg_class; umadt_obj_t *p_umadt_obj; - OSM_LOG_ENTER(((umadt_obj_t *) p_vend)->p_log, osm_vendor_bind); + OSM_LOG_ENTER(((umadt_obj_t *) p_vend)->p_log); CL_ASSERT(p_vend); diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 187bc47..38608be 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -453,7 +453,7 @@ parse_ignore_guids_file(IN char *guids_file_name, IN osm_opensm_t * p_osm) ib_api_status_t status = IB_SUCCESS; unsigned int port_num; - OSM_LOG_ENTER(&p_osm->log, parse_ignore_guids_file); + OSM_LOG_ENTER(&p_osm->log); fh = fopen(guids_file_name, "r"); if (fh == NULL) { diff --git a/opensm/opensm/osm_db_files.c b/opensm/opensm/osm_db_files.c index cce26be..05e085f 100644 --- a/opensm/opensm/osm_db_files.c +++ b/opensm/opensm/osm_db_files.c @@ -164,7 +164,7 @@ int osm_db_init(IN osm_db_t * const p_db, IN osm_log_t * p_log) osm_db_imp_t *p_db_imp; struct stat dstat; - OSM_LOG_ENTER(p_log, osm_db_init); + OSM_LOG_ENTER(p_log); p_db_imp = (osm_db_imp_t *) malloc(sizeof(osm_db_imp_t)); CL_ASSERT(p_db_imp != NULL); @@ -213,7 +213,7 @@ osm_db_domain_t *osm_db_domain_init(IN osm_db_t * const p_db, osm_log_t *p_log = p_db->p_log; FILE *p_file; - OSM_LOG_ENTER(p_log, osm_db_domain_init); + OSM_LOG_ENTER(p_log); /* allocate a new domain object */ p_domain = (osm_db_domain_t *) malloc(sizeof(osm_db_domain_t)); @@ -281,7 +281,7 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) char *endptr = NULL; unsigned int line_num; - OSM_LOG_ENTER(p_log, osm_db_restore); + OSM_LOG_ENTER(p_log); /* take the lock on the domain */ cl_spinlock_acquire(&p_domain_imp->lock); @@ -452,7 +452,7 @@ int osm_db_store(IN osm_db_domain_t * p_domain) int status = 0; char *p_tmp_file_name; - OSM_LOG_ENTER(p_log, osm_db_store); + OSM_LOG_ENTER(p_log); p_domain_imp = (osm_db_domain_imp_t *) p_domain->p_domain_imp; p_tmp_file_name = @@ -622,7 +622,7 @@ int osm_db_delete(IN osm_db_domain_t * p_domain, IN char *const p_key) char *p_prev_val = NULL; int res; - OSM_LOG_ENTER(p_log, osm_db_delete); + OSM_LOG_ENTER(p_log); cl_spinlock_acquire(&p_domain_imp->lock); if (st_delete(p_domain_imp->p_hash, diff --git a/opensm/opensm/osm_drop_mgr.c b/opensm/opensm/osm_drop_mgr.c index 0956d17..785e9ec 100644 --- a/opensm/opensm/osm_drop_mgr.c +++ b/opensm/opensm/osm_drop_mgr.c @@ -173,7 +173,7 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) ib_mad_notice_attr_t notice; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_remove_port); + OSM_LOG_ENTER(sm->p_log); port_guid = osm_port_get_guid(p_port); osm_log(sm->p_log, OSM_LOG_VERBOSE, @@ -289,7 +289,7 @@ static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node) cl_qmap_t *p_sw_guid_tbl; ib_net64_t node_guid; - OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_remove_switch); + OSM_LOG_ENTER(sm->p_log); node_guid = osm_node_get_node_guid(p_node); p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl; @@ -321,7 +321,7 @@ __osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node) ib_net64_t port_guid; boolean_t return_val = FALSE; - OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_process_node); + OSM_LOG_ENTER(sm->p_log); osm_log(sm->p_log, OSM_LOG_VERBOSE, "__osm_drop_mgr_process_node: " @@ -378,7 +378,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) osm_port_t *p_port; ib_net64_t port_guid; - OSM_LOG_ENTER(sm->p_log, __osm_drop_mgr_check_node); + OSM_LOG_ENTER(sm->p_log); node_guid = osm_node_get_node_guid(p_node); @@ -458,7 +458,7 @@ void osm_drop_mgr_process(osm_sm_t * sm) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_drop_mgr_process); + OSM_LOG_ENTER(sm->p_log); p_node_guid_tbl = &sm->p_subn->node_guid_tbl; p_port_guid_tbl = &sm->p_subn->port_guid_tbl; diff --git a/opensm/opensm/osm_inform.c b/opensm/opensm/osm_inform.c index 151b1dc..9ed9b73 100644 --- a/opensm/opensm/osm_inform.c +++ b/opensm/opensm/osm_inform.c @@ -89,7 +89,7 @@ static void dump_all_informs(IN osm_subn_t const *p_subn, IN osm_log_t * p_log) { cl_list_item_t *p_list_item; - OSM_LOG_ENTER(p_log, dump_all_informs); + OSM_LOG_ENTER(p_log); if (!osm_log_is_active(p_log, OSM_LOG_DEBUG)) goto Exit; @@ -118,7 +118,7 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) cl_status_t status = CL_NOT_FOUND; ib_gid_t all_zero_gid; - OSM_LOG_ENTER(p_log, __match_inf_rec); + OSM_LOG_ENTER(p_log); if (memcmp(&p_infr->report_addr, &p_infr_rec->report_addr, sizeof(p_infr_rec->report_addr))) { @@ -241,7 +241,7 @@ osm_infr_t *osm_infr_get_by_rec(IN osm_subn_t const *p_subn, { cl_list_item_t *p_list_item; - OSM_LOG_ENTER(p_log, osm_infr_get_by_rec); + OSM_LOG_ENTER(p_log); dump_all_informs(p_subn, p_log); @@ -270,7 +270,7 @@ void osm_infr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, IN osm_infr_t * p_infr) { - OSM_LOG_ENTER(p_log, osm_infr_insert_to_db); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osm_infr_insert_to_db: " @@ -303,7 +303,7 @@ void osm_infr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, IN osm_infr_t * p_infr) { - OSM_LOG_ENTER(p_log, osm_infr_remove_from_db); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osm_infr_remove_from_db: " @@ -340,7 +340,7 @@ static ib_api_status_t __osm_send_report(IN osm_infr_t * p_infr_rec, /* the info ib_api_status_t status; osm_log_t *p_log = p_infr_rec->sa->p_log; - OSM_LOG_ENTER(p_log, __osm_send_report); + OSM_LOG_ENTER(p_log); /* HACK: who switches or uses the src and dest GIDs in the grh_info ?? */ @@ -421,7 +421,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, osm_port_t *p_src_port; osm_port_t *p_dest_port; - OSM_LOG_ENTER(p_log, __match_notice_to_inf_rec); + OSM_LOG_ENTER(p_log); /* matching rules * InformInfo Notice @@ -616,7 +616,7 @@ osm_report_notice(IN osm_log_t * const p_log, osm_infr_t *p_infr_rec; osm_infr_t *p_next_infr_rec; - OSM_LOG_ENTER(p_log, osm_report_notice); + OSM_LOG_ENTER(p_log); /* * we must make sure we are ready for this... diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index 3ceb145..557131e 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -123,7 +123,7 @@ void osm_lid_mgr_destroy(IN osm_lid_mgr_t * const p_mgr) { cl_list_item_t *p_item; - OSM_LOG_ENTER(p_mgr->p_log, osm_lid_mgr_destroy); + OSM_LOG_ENTER(p_mgr->p_log); cl_ptr_vector_destroy(&p_mgr->used_lids); p_item = cl_qlist_remove_head(&p_mgr->free_ranges); @@ -149,7 +149,7 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) uint16_t lmc_mask; boolean_t lids_ok; - OSM_LOG_ENTER(p_mgr->p_log, __osm_lid_mgr_validate_db); + OSM_LOG_ENTER(p_mgr->p_log); if (p_mgr->p_subn->opt.lmc) lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1); @@ -245,7 +245,7 @@ osm_lid_mgr_init(IN osm_lid_mgr_t * const p_mgr, IN osm_sm_t *sm) { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, osm_lid_mgr_init); + OSM_LOG_ENTER(sm->p_log); osm_lid_mgr_construct(p_mgr); @@ -327,7 +327,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) uint16_t lmc_mask; uint16_t req_lid, num_lids; - OSM_LOG_ENTER(p_mgr->p_log, __osm_lid_mgr_init_sweep); + OSM_LOG_ENTER(p_mgr->p_log); if (p_mgr->p_subn->opt.lmc) lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1); @@ -763,7 +763,7 @@ __osm_lid_mgr_get_port_lid(IN osm_lid_mgr_t * const p_mgr, int lid_changed = 0; uint16_t lmc_mask; - OSM_LOG_ENTER(p_mgr->p_log, __osm_lid_mgr_get_port_lid); + OSM_LOG_ENTER(p_mgr->p_log); if (p_mgr->p_subn->opt.lmc) lmc_mask = ~((1 << p_mgr->p_subn->opt.lmc) - 1); @@ -906,7 +906,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, uint8_t port_num; boolean_t send_set = FALSE; - OSM_LOG_ENTER(p_mgr->p_log, __osm_lid_mgr_set_physp_pi); + OSM_LOG_ENTER(p_mgr->p_log); /* Don't bother doing anything if this Physical Port is not valid. @@ -1172,7 +1172,7 @@ __osm_lid_mgr_process_our_sm_node(IN osm_lid_mgr_t * const p_mgr) uint16_t max_lid_ho; boolean_t res = TRUE; - OSM_LOG_ENTER(p_mgr->p_log, __osm_lid_mgr_process_our_sm_node); + OSM_LOG_ENTER(p_mgr->p_log); /* Acquire our own port object. @@ -1231,7 +1231,7 @@ osm_signal_t osm_lid_mgr_process_sm(IN osm_lid_mgr_t * const p_mgr) { osm_signal_t signal = OSM_SIGNAL_DONE_PENDING; - OSM_LOG_ENTER(p_mgr->p_log, osm_lid_mgr_process_sm); + OSM_LOG_ENTER(p_mgr->p_log); CL_ASSERT(p_mgr->p_subn->sm_port_guid); @@ -1283,7 +1283,7 @@ osm_signal_t osm_lid_mgr_process_subnet(IN osm_lid_mgr_t * const p_mgr) CL_ASSERT(p_mgr); - OSM_LOG_ENTER(p_mgr->p_log, osm_lid_mgr_process_subnet); + OSM_LOG_ENTER(p_mgr->p_log); CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock); diff --git a/opensm/opensm/osm_lin_fwd_rcv.c b/opensm/opensm/osm_lin_fwd_rcv.c index 7d9d1af..09edd1a 100644 --- a/opensm/opensm/osm_lin_fwd_rcv.c +++ b/opensm/opensm/osm_lin_fwd_rcv.c @@ -70,7 +70,7 @@ void osm_lft_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_lft_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index c810b56..8ca5786 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -77,7 +77,7 @@ __osm_link_mgr_set_physp_pi(osm_sm_t * sm, boolean_t send_set = FALSE; osm_physp_t *p_remote_physp; - OSM_LOG_ENTER(sm->p_log, __osm_link_mgr_set_physp_pi); + OSM_LOG_ENTER(sm->p_log); p_node = osm_physp_get_node_ptr(p_physp); @@ -352,7 +352,7 @@ __osm_link_mgr_process_node(osm_sm_t * sm, uint8_t current_state; osm_signal_t signal = OSM_SIGNAL_DONE; - OSM_LOG_ENTER(sm->p_log, __osm_link_mgr_process_node); + OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) osm_log(sm->p_log, OSM_LOG_DEBUG, @@ -409,7 +409,7 @@ osm_signal_t osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state) osm_node_t *p_node; osm_signal_t signal = OSM_SIGNAL_DONE; - OSM_LOG_ENTER(sm->p_log, osm_link_mgr_process); + OSM_LOG_ENTER(sm->p_log); p_node_guid_tbl = &sm->p_subn->node_guid_tbl; diff --git a/opensm/opensm/osm_mad_pool.c b/opensm/opensm/osm_mad_pool.c index f9ef54c..8af3d2a 100644 --- a/opensm/opensm/osm_mad_pool.c +++ b/opensm/opensm/osm_mad_pool.c @@ -77,7 +77,7 @@ void osm_mad_pool_destroy(IN osm_mad_pool_t * const p_pool) ib_api_status_t osm_mad_pool_init(IN osm_mad_pool_t * const p_pool, IN osm_log_t * const p_log) { - OSM_LOG_ENTER(p_log, osm_mad_pool_init); + OSM_LOG_ENTER(p_log); p_pool->p_log = p_log; @@ -95,7 +95,7 @@ osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * const p_pool, osm_madw_t *p_madw; ib_mad_t *p_mad; - OSM_LOG_ENTER(p_pool->p_log, osm_mad_pool_get); + OSM_LOG_ENTER(p_pool->p_log); CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); CL_ASSERT(total_size); @@ -153,7 +153,7 @@ osm_madw_t *osm_mad_pool_get_wrapper(IN osm_mad_pool_t * const p_pool, { osm_madw_t *p_madw; - OSM_LOG_ENTER(p_pool->p_log, osm_mad_pool_get_wrapper); + OSM_LOG_ENTER(p_pool->p_log); CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); CL_ASSERT(total_size); @@ -192,7 +192,7 @@ osm_madw_t *osm_mad_pool_get_wrapper_raw(IN osm_mad_pool_t * const p_pool) { osm_madw_t *p_madw; - OSM_LOG_ENTER(p_pool->p_log, osm_mad_pool_get_wrapper_raw); + OSM_LOG_ENTER(p_pool->p_log); p_madw = malloc(sizeof(*p_madw)); if (!p_madw) @@ -215,7 +215,7 @@ osm_madw_t *osm_mad_pool_get_wrapper_raw(IN osm_mad_pool_t * const p_pool) void osm_mad_pool_put(IN osm_mad_pool_t * const p_pool, IN osm_madw_t * const p_madw) { - OSM_LOG_ENTER(p_pool->p_log, osm_mad_pool_put); + OSM_LOG_ENTER(p_pool->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_mcast_fwd_rcv.c b/opensm/opensm/osm_mcast_fwd_rcv.c index 3233def..2c97945 100644 --- a/opensm/opensm/osm_mcast_fwd_rcv.c +++ b/opensm/opensm/osm_mcast_fwd_rcv.c @@ -77,7 +77,7 @@ void osm_mft_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_mft_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c index 3b6d05a..1a62ce5 100644 --- a/opensm/opensm/osm_mcast_mgr.c +++ b/opensm/opensm/osm_mcast_mgr.c @@ -122,7 +122,7 @@ static void __osm_mcast_mgr_purge_tree_node(IN osm_mtree_node_t * p_mtn) static void __osm_mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp) { - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_purge_tree); + OSM_LOG_ENTER(sm->p_log); if (p_mgrp->p_root) __osm_mcast_mgr_purge_tree_node(p_mgrp->p_root); @@ -146,7 +146,7 @@ osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm, const osm_mcm_port_t *p_mcm_port; const cl_qmap_t *p_mcm_tbl; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_compute_avg_hops); + OSM_LOG_ENTER(sm->p_log); p_mcm_tbl = &p_mgrp->mcm_port_tbl; @@ -206,7 +206,7 @@ osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, const osm_mcm_port_t *p_mcm_port; const cl_qmap_t *p_mcm_tbl; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_compute_max_hops); + OSM_LOG_ENTER(sm->p_log); p_mcm_tbl = &p_mgrp->mcm_port_tbl; @@ -273,7 +273,7 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t * sm, boolean_t use_avg_hops = FALSE; /* use max hops for root */ #endif - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_find_optimal_switch); + OSM_LOG_ENTER(sm->p_log); p_sw_tbl = &sm->p_subn->sw_guid_tbl; @@ -334,7 +334,7 @@ static osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_sm_t * sm, { const osm_switch_t *p_sw = NULL; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_find_root_switch); + OSM_LOG_ENTER(sm->p_log); /* We always look for the best multicast tree root switch. @@ -367,7 +367,7 @@ __osm_mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * const p_sw) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_set_tbl); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_sw); @@ -443,7 +443,7 @@ __osm_mcast_mgr_subdivide(osm_sm_t * sm, boolean_t ignore_existing; osm_mcast_work_obj_t *p_wobj; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_subdivide); + OSM_LOG_ENTER(sm->p_log); mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); @@ -517,7 +517,7 @@ static void __osm_mcast_mgr_purge_list(osm_sm_t * sm, cl_qlist_t * const p_list) { osm_mcast_work_obj_t *p_wobj; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_purge_list); + OSM_LOG_ENTER(sm->p_log); while ((p_wobj = (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) != (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { @@ -558,7 +558,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, uint16_t mlid_ho; osm_mcast_tbl_t *p_tbl; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_branch); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_sw); CL_ASSERT(p_list); @@ -779,7 +779,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) uint8_t max_depth = 0; uint32_t count; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_build_spanning_tree); + OSM_LOG_ENTER(sm->p_log); cl_qlist_init(&port_list); @@ -895,7 +895,7 @@ osm_mcast_mgr_set_table(osm_sm_t * sm, osm_mcast_tbl_t *p_tbl; osm_switch_t *p_sw; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_set_table); + OSM_LOG_ENTER(sm->p_log); mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); p_sw = osm_mtree_node_get_switch_ptr(p_mtn); @@ -940,7 +940,7 @@ static void __osm_mcast_mgr_clear(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp) cl_qmap_t *p_sw_tbl; osm_mcast_tbl_t *p_mcast_tbl; - OSM_LOG_ENTER(sm->p_log, __osm_mcast_mgr_clear); + OSM_LOG_ENTER(sm->p_log); /* Walk the switches and clear the routing entries for @@ -978,7 +978,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, osm_mcast_tbl_t *p_mcast_tbl; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_single); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(mlid); CL_ASSERT(port_guid); @@ -1111,7 +1111,7 @@ osm_mcast_mgr_process_tree(osm_sm_t * sm, ib_net16_t mlid; boolean_t ui_mcast_fdb_assign_func_defined; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_tree); + OSM_LOG_ENTER(sm->p_log); mlid = osm_mgrp_get_mlid(p_mgrp); @@ -1192,7 +1192,7 @@ mcast_mgr_process_mgrp(osm_sm_t * sm, { ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_mgrp); + OSM_LOG_ENTER(sm->p_log); status = osm_mcast_mgr_process_tree(sm, p_mgrp, req_type, port_guid); if (status != IB_SUCCESS) { @@ -1238,7 +1238,7 @@ osm_signal_t osm_mcast_mgr_process(osm_sm_t * sm) osm_mgrp_t *p_next_mgrp; boolean_t pending_transactions = FALSE; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process); + OSM_LOG_ENTER(sm->p_log); p_sw_tbl = &sm->p_subn->sw_guid_tbl; p_mcast_tbl = &sm->p_subn->mgrp_mlid_tbl; @@ -1317,7 +1317,7 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_sm_t * sm) osm_mcast_req_type_t req_type; ib_net64_t port_guid; - OSM_LOG_ENTER(sm->p_log, osm_mcast_mgr_process_mgroups); + OSM_LOG_ENTER(sm->p_log); /* we need a lock to make sure the p_mgrp is not change other ways */ CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c index 72a9e74..77e436f 100644 --- a/opensm/opensm/osm_multicast.c +++ b/opensm/opensm/osm_multicast.c @@ -288,7 +288,7 @@ osm_mgrp_send_delete_notice(IN osm_subn_t * const p_subn, ib_mad_notice_attr_t notice; ib_api_status_t status; - OSM_LOG_ENTER(p_log, osm_mgrp_send_delete_notice); + OSM_LOG_ENTER(p_log); /* prepare the needed info */ @@ -330,7 +330,7 @@ osm_mgrp_send_create_notice(IN osm_subn_t * const p_subn, ib_mad_notice_attr_t notice; ib_api_status_t status; - OSM_LOG_ENTER(p_log, osm_mgrp_send_create_notice); + OSM_LOG_ENTER(p_log); /* prepare the needed info */ diff --git a/opensm/opensm/osm_node_desc_rcv.c b/opensm/opensm/osm_node_desc_rcv.c index 6c9c8ea..4268526 100644 --- a/opensm/opensm/osm_node_desc_rcv.c +++ b/opensm/opensm/osm_node_desc_rcv.c @@ -70,7 +70,7 @@ __osm_nd_rcv_process_nd(IN osm_sm_t * sm, char *tmp_desc; char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; - OSM_LOG_ENTER(sm->p_log, __osm_nd_rcv_process_nd); + OSM_LOG_ENTER(sm->p_log); memcpy(&p_node->node_desc.description, p_nd, sizeof(*p_nd)); @@ -110,7 +110,7 @@ void osm_nd_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_nd_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c index 844dfcf..776539b 100644 --- a/opensm/opensm/osm_node_info_rcv.c +++ b/opensm/opensm/osm_node_info_rcv.c @@ -137,7 +137,7 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t *p_neighbor_node; osm_physp_t *p_physp; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_set_links); + OSM_LOG_ENTER(sm->p_log); /* A special case exists in which the node we're trying to @@ -279,7 +279,7 @@ __osm_ni_rcv_process_new_node(IN osm_sm_t * sm, ib_smp_t *p_smp; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_new_node); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); @@ -329,7 +329,7 @@ __osm_ni_rcv_get_node_desc(IN osm_sm_t * sm, ib_smp_t *p_smp; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_get_node_desc); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); @@ -367,7 +367,7 @@ __osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm, IN osm_node_t * const p_node, IN const osm_madw_t * const p_madw) { - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_new_ca_or_router); + OSM_LOG_ENTER(sm->p_log); __osm_ni_rcv_process_new_node(sm, p_node, p_madw); @@ -401,7 +401,7 @@ __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, osm_dr_path_t *p_dr_path; osm_bind_handle_t h_bind; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_existing_ca_or_router); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); @@ -506,7 +506,7 @@ __osm_ni_rcv_process_switch(IN osm_sm_t * sm, osm_dr_path_t dr_path; ib_smp_t *p_smp; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_switch); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); @@ -539,7 +539,7 @@ __osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm, IN osm_node_t * const p_node, IN const osm_madw_t * const p_madw) { - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_existing_switch); + OSM_LOG_ENTER(sm->p_log); /* If this switch has already been probed during this sweep, @@ -571,7 +571,7 @@ __osm_ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * const p_node, IN const osm_madw_t * const p_madw) { - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_new_switch); + OSM_LOG_ENTER(sm->p_log); __osm_ni_rcv_process_switch(sm, p_node, p_madw); @@ -605,7 +605,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, osm_ni_context_t *p_ni_context; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_new); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); @@ -759,7 +759,7 @@ __osm_ni_rcv_process_existing(IN osm_sm_t * sm, osm_ni_context_t *p_ni_context; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, __osm_ni_rcv_process_existing); + OSM_LOG_ENTER(sm->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); @@ -817,7 +817,7 @@ void osm_ni_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_ni_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index fa517d0..1760f22 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -394,7 +394,7 @@ osm_opensm_bind(IN osm_opensm_t * const p_osm, IN const ib_net64_t guid) { ib_api_status_t status; - OSM_LOG_ENTER(&p_osm->log, osm_opensm_bind); + OSM_LOG_ENTER(&p_osm->log); status = osm_sm_bind(&p_osm->sm, guid); if (status != IB_SUCCESS) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 8375396..a959782 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -173,7 +173,7 @@ osm_perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, { osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; - OSM_LOG_ENTER(pm->log, osm_perfmgr_mad_recv_callback); + OSM_LOG_ENTER(pm->log); osm_madw_copy_context(p_madw, p_req_madw); osm_mad_pool_put(pm->mad_pool, p_req_madw); @@ -204,7 +204,7 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) cl_map_item_t *p_node; __monitored_node_t *p_mon_node; - OSM_LOG_ENTER(pm->log, osm_perfmgr_mad_send_err_callback); + OSM_LOG_ENTER(pm->log); /* go ahead and get the monitored node struct to have the printable * name if needed in messages @@ -258,7 +258,7 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(pm->log, osm_perfmgr_bind); + OSM_LOG_ENTER(pm->log); if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { osm_log(pm->log, OSM_LOG_ERROR, @@ -305,7 +305,7 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) **********************************************************************/ static void osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm) { - OSM_LOG_ENTER(pm->log, osm_sa_mad_ctrl_unbind); + OSM_LOG_ENTER(pm->log); if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { osm_log(pm->log, OSM_LOG_ERROR, "osm_perfmgr_mad_unbind: ERR 4C05: No previous bind\n"); @@ -368,7 +368,7 @@ osm_perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, ib_net16_t dest_lid, ib_perfmgt_mad_t *pm_mad = NULL; osm_madw_t *p_madw = NULL; - OSM_LOG_ENTER(perfmgr->log, osm_perfmgr_send_pc_mad); + OSM_LOG_ENTER(perfmgr->log); p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, @@ -438,7 +438,7 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context) __monitored_node_t *mon_node = NULL; uint32_t size; - OSM_LOG_ENTER(pm->log, __collect_guids); + OSM_LOG_ENTER(pm->log); if (cl_qmap_get(&(pm->monitored_map), node_guid) == cl_qmap_end(&(pm->monitored_map))) { @@ -479,7 +479,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) uint64_t node_guid = 0; ib_net32_t remote_qp; - OSM_LOG_ENTER(pm->log, __osm_perfmgr_query_counters); + OSM_LOG_ENTER(pm->log); cl_plock_acquire(pm->lock); node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); @@ -857,7 +857,7 @@ static void perfmgr_sweep(void *arg) **********************************************************************/ void osm_perfmgr_shutdown(osm_perfmgr_t * const pm) { - OSM_LOG_ENTER(pm->log, osm_perfmgr_shutdown); + OSM_LOG_ENTER(pm->log); cl_timer_stop(&pm->sweep_timer); osm_perfmgr_mad_unbind(pm); OSM_LOG_EXIT(pm->log); @@ -867,7 +867,7 @@ void osm_perfmgr_shutdown(osm_perfmgr_t * const pm) **********************************************************************/ void osm_perfmgr_destroy(osm_perfmgr_t * const pm) { - OSM_LOG_ENTER(pm->log, osm_perfmgr_destroy); + OSM_LOG_ENTER(pm->log); free(pm->event_db_dump_file); perfmgr_db_destroy(pm->db); cl_timer_destroy(&pm->sweep_timer); @@ -976,7 +976,7 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, ib_api_status_t status; ib_net32_t remote_qp; - OSM_LOG_ENTER(pm->log, osm_perfmgr_check_overflow); + OSM_LOG_ENTER(pm->log); if (counter_overflow_16(pc->symbol_err_cnt) || counter_overflow_8(pc->link_err_recover) || @@ -1104,7 +1104,7 @@ static void osm_pc_rcv_process(void *context, void *data) cl_map_item_t *p_node; __monitored_node_t *p_mon_node; - OSM_LOG_ENTER(pm->log, osm_pc_rcv_process); + OSM_LOG_ENTER(pm->log); /* go ahead and get the monitored node struct to have the printable * name if needed in messages @@ -1245,7 +1245,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(log, osm_perfmgr_init); + OSM_LOG_ENTER(log); osm_log(log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); diff --git a/opensm/opensm/osm_pkey.c b/opensm/opensm/osm_pkey.c index 04d009b..1d5df93 100644 --- a/opensm/opensm/osm_pkey.c +++ b/opensm/opensm/osm_pkey.c @@ -404,7 +404,7 @@ osm_port_share_pkey(IN osm_log_t * p_log, osm_physp_t *p_physp1, *p_physp2; boolean_t ret; - OSM_LOG_ENTER(p_log, osm_port_share_pkey); + OSM_LOG_ENTER(p_log); if (!p_port_1 || !p_port_2) { ret = FALSE; @@ -441,7 +441,7 @@ osm_lid_share_pkey(IN osm_log_t * p_log, osm_node_t *p_node1, *p_node2; const cl_ptr_vector_t *const p_port_lid_tbl = &(p_subn->port_lid_tbl); - OSM_LOG_ENTER(p_log, osm_lid_share_pkey); + OSM_LOG_ENTER(p_log); p_port1 = cl_ptr_vector_get(p_port_lid_tbl, lid1); p_port2 = cl_ptr_vector_get(p_port_lid_tbl, lid2); @@ -476,7 +476,7 @@ osm_physp_has_pkey(IN osm_log_t * p_log, const osm_pkey_tbl_t *pkey_tbl; boolean_t res = FALSE; - OSM_LOG_ENTER(p_log, osm_physp_has_pkey); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osm_physp_has_pkey: " diff --git a/opensm/opensm/osm_pkey_mgr.c b/opensm/opensm/osm_pkey_mgr.c index 33eeb8b..8c8deec 100644 --- a/opensm/opensm/osm_pkey_mgr.c +++ b/opensm/opensm/osm_pkey_mgr.c @@ -508,7 +508,7 @@ osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm) CL_ASSERT(p_osm); - OSM_LOG_ENTER(&p_osm->log, osm_pkey_mgr_process); + OSM_LOG_ENTER(&p_osm->log); CL_PLOCK_EXCL_ACQUIRE(&p_osm->lock); diff --git a/opensm/opensm/osm_pkey_rcv.c b/opensm/opensm/osm_pkey_rcv.c index a827e28..851f540 100644 --- a/opensm/opensm/osm_pkey_rcv.c +++ b/opensm/opensm/osm_pkey_rcv.c @@ -70,7 +70,7 @@ void osm_pkey_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_pkey_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_port.c b/opensm/opensm/osm_port.c index 653212a..33152d2 100644 --- a/opensm/opensm/osm_port.c +++ b/opensm/opensm/osm_port.c @@ -315,7 +315,7 @@ osm_physp_calc_link_mtu(IN osm_log_t * p_log, IN const osm_physp_t * p_physp) uint8_t mtu; uint8_t remote_mtu; - OSM_LOG_ENTER(p_log, osm_physp_calc_link_mtu); + OSM_LOG_ENTER(p_log); p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp) { @@ -380,7 +380,7 @@ osm_physp_calc_link_op_vls(IN osm_log_t * p_log, uint8_t op_vls; uint8_t remote_op_vls; - OSM_LOG_ENTER(p_log, osm_physp_calc_link_op_vls); + OSM_LOG_ENTER(p_log); p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp) { @@ -471,7 +471,7 @@ __osm_physp_get_dr_physp_set(IN osm_log_t * p_log, uint8_t hop; cl_status_t status = CL_SUCCESS; - OSM_LOG_ENTER(p_log, __osm_physp_get_dr_physp_set); + OSM_LOG_ENTER(p_log); /* find the OSM node */ p_port = osm_get_port_by_guid(p_subn, p_subn->sm_port_guid); diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c index 356cd56..be4fcca 100644 --- a/opensm/opensm/osm_port_info_rcv.c +++ b/opensm/opensm/osm_port_info_rcv.c @@ -75,7 +75,7 @@ __osm_pi_rcv_set_sm(IN osm_sm_t * sm, osm_bind_handle_t h_bind; osm_dr_path_t *p_dr_path; - OSM_LOG_ENTER(sm->p_log, __osm_pi_rcv_set_sm); + OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) osm_log(sm->p_log, OSM_LOG_DEBUG, @@ -122,7 +122,7 @@ __osm_pi_rcv_process_endport(IN osm_sm_t * sm, cl_qmap_t *p_sm_tbl; osm_remote_sm_t *p_sm; - OSM_LOG_ENTER(sm->p_log, __osm_pi_rcv_process_endport); + OSM_LOG_ENTER(sm->p_log); port_guid = osm_physp_get_port_guid(p_physp); @@ -226,7 +226,7 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, uint8_t remote_port_num; osm_dr_path_t path; - OSM_LOG_ENTER(sm->p_log, __osm_pi_rcv_process_switch_port); + OSM_LOG_ENTER(sm->p_log); /* Check the state of the physical port. @@ -354,7 +354,7 @@ __osm_pi_rcv_process_ca_or_router_port(IN osm_sm_t * sm, IN osm_physp_t * const p_physp, IN ib_port_info_t * const p_pi) { - OSM_LOG_ENTER(sm->p_log, __osm_pi_rcv_process_ca_or_router_port); + OSM_LOG_ENTER(sm->p_log); UNUSED_PARAM(p_node); @@ -383,7 +383,7 @@ static void get_pkey_table(IN osm_log_t * p_log, uint16_t block_num, max_blocks; uint32_t attr_mod_ho; - OSM_LOG_ENTER(p_log, get_pkey_table); + OSM_LOG_ENTER(p_log); path = *osm_physp_get_dr_path_ptr(p_physp); @@ -447,7 +447,7 @@ __osm_pi_rcv_get_pkey_slvl_vla_tables(IN osm_sm_t * sm, IN osm_node_t * const p_node, IN osm_physp_t * const p_physp) { - OSM_LOG_ENTER(sm->p_log, __osm_pi_rcv_get_pkey_slvl_vla_tables); + OSM_LOG_ENTER(sm->p_log); get_pkey_table(sm->p_log, sm, p_node, p_physp); @@ -467,7 +467,7 @@ osm_pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * const p_node, osm_pi_context_t *p_context; osm_log_level_t level; - OSM_LOG_ENTER(sm->p_log, osm_pi_rcv_process_set); + OSM_LOG_ENTER(sm->p_log); p_context = osm_madw_get_pi_context_ptr(p_madw); @@ -537,7 +537,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) ib_net64_t node_guid; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, osm_pi_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(sm); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c index 1a6cc05..675189a 100644 --- a/opensm/opensm/osm_qos.c +++ b/opensm/opensm/osm_qos.c @@ -284,7 +284,7 @@ osm_signal_t osm_qos_setup(osm_opensm_t * p_osm) if (!p_osm->subn.opt.qos) return OSM_SIGNAL_DONE; - OSM_LOG_ENTER(&p_osm->log, osm_qos_setup); + OSM_LOG_ENTER(&p_osm->log); qos_build_config(&ca_config, &p_osm->subn.opt.qos_ca_options, &p_osm->subn.opt.qos_options); diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index a98a60f..11da30b 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -2272,7 +2272,7 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) static boolean_t first_time = TRUE; p_qos_parser_osm_log = &p_subn->p_osm->log; - OSM_LOG_ENTER(p_qos_parser_osm_log, osm_qos_parse_policy_file); + OSM_LOG_ENTER(p_qos_parser_osm_log); osm_qos_policy_destroy(p_subn->p_qos_policy); p_subn->p_qos_policy = NULL; @@ -2378,7 +2378,7 @@ static void __qos_parser_error(const char *format, ...) char s[256]; va_list pvar; - OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); + OSM_LOG_ENTER(p_qos_parser_osm_log); va_start(pvar, format); vsnprintf(s, 256, format, pvar); diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index bde1e7e..e01e55f 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -777,7 +777,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, ib_net16_t pkey; osm_prtn_t * p_prtn; - OSM_LOG_ENTER(p_log, osm_qos_policy_validate); + OSM_LOG_ENTER(p_log); /* set default qos level */ @@ -962,8 +962,7 @@ static osm_qos_level_t * __qos_policy_get_qos_level_by_params( osm_qos_match_rule_t *p_qos_match_rule = NULL; osm_qos_level_t *p_qos_level = NULL; - OSM_LOG_ENTER(&p_qos_policy->p_subn->p_osm->log, - __qos_policy_get_qos_level_by_params); + OSM_LOG_ENTER(&p_qos_policy->p_subn->p_osm->log); if (!p_qos_policy) goto Exit; diff --git a/opensm/opensm/osm_req.c b/opensm/opensm/osm_req.c index 0524ce2..e839f12 100644 --- a/opensm/opensm/osm_req.c +++ b/opensm/opensm/osm_req.c @@ -78,7 +78,7 @@ osm_req_get(IN osm_sm_t * sm, CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_req_get); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_path); CL_ASSERT(attr_id); @@ -159,7 +159,7 @@ osm_req_set(IN osm_sm_t * sm, CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_req_set); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_path); CL_ASSERT(attr_id); diff --git a/opensm/opensm/osm_resp.c b/opensm/opensm/osm_resp.c index 285559a..6d56fa0 100644 --- a/opensm/opensm/osm_resp.c +++ b/opensm/opensm/osm_resp.c @@ -70,7 +70,7 @@ osm_resp_make_resp_smp(IN osm_sm_t * sm, IN const uint8_t * const p_payload, OUT ib_smp_t * const p_dest_smp) { - OSM_LOG_ENTER(sm->p_log, osm_resp_make_resp_smp); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_dest_smp); CL_ASSERT(p_src_smp); @@ -115,7 +115,7 @@ osm_resp_send(IN osm_sm_t * sm, osm_madw_t *p_madw; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, osm_resp_send); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_req_madw); CL_ASSERT(p_payload); diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c index c286259..82afa6d 100644 --- a/opensm/opensm/osm_sa.c +++ b/opensm/opensm/osm_sa.c @@ -108,7 +108,7 @@ void osm_sa_construct(IN osm_sa_t * const p_sa) void osm_sa_shutdown(IN osm_sa_t * const p_sa) { ib_api_status_t status; - OSM_LOG_ENTER(p_sa->p_log, osm_sa_shutdown); + OSM_LOG_ENTER(p_sa->p_log); cl_timer_stop(&p_sa->sr_timer); @@ -144,7 +144,7 @@ void osm_sa_shutdown(IN osm_sa_t * const p_sa) **********************************************************************/ void osm_sa_destroy(IN osm_sa_t * const p_sa) { - OSM_LOG_ENTER(p_sa->p_log, osm_sa_destroy); + OSM_LOG_ENTER(p_sa->p_log); p_sa->state = OSM_SA_STATE_INIT; @@ -167,7 +167,7 @@ osm_sa_init(IN osm_sm_t * const p_sm, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_sa_init); + OSM_LOG_ENTER(p_log); p_sa->sm = p_sm; p_sa->p_subn = p_subn; @@ -301,7 +301,7 @@ osm_sa_bind(IN osm_sa_t * const p_sa, IN const ib_net64_t port_guid) { ib_api_status_t status; - OSM_LOG_ENTER(p_sa->p_log, osm_sa_bind); + OSM_LOG_ENTER(p_sa->p_log); status = osm_sa_mad_ctrl_bind(&p_sa->mad_ctrl, port_guid); diff --git a/opensm/opensm/osm_sa_class_port_info.c b/opensm/opensm/osm_sa_class_port_info.c index 4f62761..e5574bf 100644 --- a/opensm/opensm/osm_sa_class_port_info.c +++ b/opensm/opensm/osm_sa_class_port_info.c @@ -84,7 +84,7 @@ __osm_cpi_rcv_respond(IN osm_sa_t * sa, ib_gid_t zero_gid; uint8_t rtv; - OSM_LOG_ENTER(sa->p_log, __osm_cpi_rcv_respond); + OSM_LOG_ENTER(sa->p_log); memset(&zero_gid, 0, sizeof(ib_gid_t)); @@ -200,7 +200,7 @@ void osm_cpi_rcv_process(IN void *context, IN void *data) osm_madw_t *p_madw = data; const ib_sa_mad_t *p_sa_mad; - OSM_LOG_ENTER(sa->p_log, osm_cpi_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_guidinfo_record.c b/opensm/opensm/osm_sa_guidinfo_record.c index af8ba6e..bfbe63b 100644 --- a/opensm/opensm/osm_sa_guidinfo_record.c +++ b/opensm/opensm/osm_sa_guidinfo_record.c @@ -88,7 +88,7 @@ __osm_gir_rcv_new_gir(IN osm_sa_t * sa, osm_gir_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_gir_rcv_new_gir); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -142,7 +142,7 @@ __osm_sa_gir_create_gir(IN osm_sa_t * sa, ib_net64_t port_guid; uint8_t block_num, start_block_num, end_block_num, num_blocks; - OSM_LOG_ENTER(sa->p_log, __osm_sa_gir_create_gir); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -249,7 +249,7 @@ __osm_sa_gir_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, ib_net16_t match_lid = 0; uint8_t match_block_num = 255; - OSM_LOG_ENTER(p_ctxt->sa->p_log, __osm_sa_gir_by_comp_mask_cb); + OSM_LOG_ENTER(p_ctxt->sa->p_log); if (comp_mask & IB_GIR_COMPMASK_LID) match_lid = p_rcvd_rec->lid; @@ -333,7 +333,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_gir_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_informinfo.c b/opensm/opensm/osm_sa_informinfo.c index 92a7fa1..a0cd050 100644 --- a/opensm/opensm/osm_sa_informinfo.c +++ b/opensm/opensm/osm_sa_informinfo.c @@ -101,7 +101,7 @@ __validate_ports_access_rights(IN osm_sa_t * sa, const cl_ptr_vector_t *p_tbl; ib_gid_t zero_gid; - OSM_LOG_ENTER(sa->p_log, __validate_ports_access_rights); + OSM_LOG_ENTER(sa->p_log); /* get the requester physp from the request address */ p_requester_physp = osm_get_physp_by_mad_addr(sa->p_log, @@ -202,7 +202,7 @@ __validate_infr(IN osm_sa_t * sa, IN osm_infr_t * p_infr_rec) { boolean_t valid = TRUE; - OSM_LOG_ENTER(sa->p_log, __validate_infr); + OSM_LOG_ENTER(sa->p_log); valid = __validate_ports_access_rights(sa, p_infr_rec); if (!valid) { @@ -230,7 +230,7 @@ __osm_infr_rcv_respond(IN osm_sa_t * sa, ib_inform_info_t *p_resp_infr; ib_api_status_t status; - OSM_LOG_ENTER(sa->p_log, __osm_infr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -293,7 +293,7 @@ __osm_sa_inform_info_rec_by_comp_mask(IN osm_sa_t * sa, const osm_physp_t *p_req_physp; osm_iir_item_t *p_rec_item; - OSM_LOG_ENTER(sa->p_log, __osm_sa_inform_info_rec_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -388,7 +388,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, ib_api_status_t status = IB_SUCCESS; osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, osm_infr_rcv_process_get_method); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); @@ -597,7 +597,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, uint8_t resp_time_val; ib_api_status_t res; - OSM_LOG_ENTER(sa->p_log, osm_infr_rcv_process_set_method); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -774,7 +774,7 @@ void osm_infr_rcv_process(IN void *context, IN void *data) osm_madw_t *p_madw = data; ib_sa_mad_t *p_sa_mad; - OSM_LOG_ENTER(sa->p_log, osm_infr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -806,7 +806,7 @@ void osm_infir_rcv_process(IN void *context, IN void *data) osm_madw_t *p_madw = data; ib_sa_mad_t *p_sa_mad; - OSM_LOG_ENTER(sa->p_log, osm_infr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_lft_record.c b/opensm/opensm/osm_sa_lft_record.c index b6a86d5..6e88945 100644 --- a/opensm/opensm/osm_sa_lft_record.c +++ b/opensm/opensm/osm_sa_lft_record.c @@ -83,7 +83,7 @@ __osm_lftr_rcv_new_lftr(IN osm_sa_t * sa, osm_lftr_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_lftr_rcv_new_lftr); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -241,7 +241,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_lftr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_link_record.c b/opensm/opensm/osm_sa_link_record.c index 8c7e2e7..9b484e8 100644 --- a/opensm/opensm/osm_sa_link_record.c +++ b/opensm/opensm/osm_sa_link_record.c @@ -128,7 +128,7 @@ __osm_lr_rcv_get_physp_link(IN osm_sa_t * sa, ib_net16_t to_base_lid; ib_net16_t lmc_mask; - OSM_LOG_ENTER(sa->p_log, __osm_lr_rcv_get_physp_link); + OSM_LOG_ENTER(sa->p_log); /* If only one end of the link is specified, determine @@ -241,7 +241,7 @@ __osm_lr_rcv_get_port_links(IN osm_sa_t * sa, uint8_t dest_num_ports; uint8_t dest_port_num; - OSM_LOG_ENTER(sa->p_log, __osm_lr_rcv_get_port_links); + OSM_LOG_ENTER(sa->p_log); if (p_src_port) { if (p_dest_port) { @@ -391,7 +391,7 @@ __osm_lr_rcv_get_end_points(IN osm_sa_t * sa, ib_api_status_t status; ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_lr_rcv_get_end_points); + OSM_LOG_ENTER(sa->p_log); /* Determine what fields are valid and then get a pointer @@ -468,7 +468,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, osm_lr_item_t *p_lr_item; const ib_sa_mad_t *p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); - OSM_LOG_ENTER(sa->p_log, __osm_lr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); num_rec = cl_qlist_count(p_list); /* @@ -613,7 +613,7 @@ void osm_lr_rcv_process(IN void *context, IN void *data) ib_net16_t sa_status; osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, osm_lr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_mad_ctrl.c b/opensm/opensm/osm_sa_mad_ctrl.c index 9a9b4c2..da594ab 100644 --- a/opensm/opensm/osm_sa_mad_ctrl.c +++ b/opensm/opensm/osm_sa_mad_ctrl.c @@ -73,7 +73,7 @@ __osm_sa_mad_ctrl_disp_done_callback(IN void *context, IN void *p_data) osm_sa_mad_ctrl_t *const p_ctrl = (osm_sa_mad_ctrl_t *) context; osm_madw_t *const p_madw = (osm_madw_t *) p_data; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sa_mad_ctrl_disp_done_callback); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); /* @@ -104,7 +104,7 @@ __osm_sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * const p_ctrl, uint64_t last_dispatched_msg_queue_time_msec; uint32_t num_messages; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sa_mad_ctrl_process); + OSM_LOG_ENTER(p_ctrl->p_log); /* If the dispatcher is showing us that it is overloaded @@ -296,7 +296,7 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, osm_sa_mad_ctrl_t *p_ctrl = (osm_sa_mad_ctrl_t *) bind_context; ib_sa_mad_t *p_sa_mad; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sa_mad_ctrl_rcv_callback); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); @@ -422,7 +422,7 @@ __osm_sa_mad_ctrl_send_err_callback(IN void *bind_context, osm_sa_mad_ctrl_t *p_ctrl = (osm_sa_mad_ctrl_t *) bind_context; cl_status_t status; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sa_mad_ctrl_send_err_callback); + OSM_LOG_ENTER(p_ctrl->p_log); osm_log(p_ctrl->p_log, OSM_LOG_ERROR, "__osm_sa_mad_ctrl_send_err_callback: ERR 1A06: " @@ -516,7 +516,7 @@ osm_sa_mad_ctrl_init(IN osm_sa_mad_ctrl_t * const p_ctrl, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_sa_mad_ctrl_init); + OSM_LOG_ENTER(p_log); osm_sa_mad_ctrl_construct(p_ctrl); @@ -553,7 +553,7 @@ osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * const p_ctrl, osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_ctrl->p_log, osm_sa_mad_ctrl_bind); + OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { osm_log(p_ctrl->p_log, OSM_LOG_ERROR, @@ -603,7 +603,7 @@ ib_api_status_t osm_sa_mad_ctrl_unbind(IN osm_sa_mad_ctrl_t * const p_ctrl) { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_ctrl->p_log, osm_sa_mad_ctrl_unbind); + OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { osm_log(p_ctrl->p_log, OSM_LOG_ERROR, diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c index d057207..1cec9e1 100644 --- a/opensm/opensm/osm_sa_mcmember_record.c +++ b/opensm/opensm/osm_sa_mcmember_record.c @@ -156,7 +156,7 @@ __get_new_mlid(IN osm_sa_t * sa, IN ib_net16_t requested_mlid) uint16_t mlid; /* the result */ uint16_t max_num_mlids; - OSM_LOG_ENTER(sa->p_log, __get_new_mlid); + OSM_LOG_ENTER(sa->p_log); if (requested_mlid && cl_ntoh16(requested_mlid) >= IB_LID_MCAST_START_HO && cl_ntoh16(requested_mlid) < p_subn->max_multicast_lid_ho @@ -358,7 +358,7 @@ __osm_mcmr_rcv_respond(IN osm_sa_t * sa, ib_member_rec_t *p_resp_mcmember_rec; ib_api_status_t status; - OSM_LOG_ENTER(sa->p_log, __osm_mcmr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); /* * Get a MAD to reply. Address of Mad is in the received mad_wrapper @@ -771,7 +771,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, uint16_t signature; boolean_t valid = TRUE; - OSM_LOG_ENTER(sa->p_log, __validate_requested_mgid); + OSM_LOG_ENTER(sa->p_log); /* 14-a: mcast GID must start with 0xFF */ if (p_mcm_rec->mgid.multicast.header[0] != 0xFF) { @@ -869,7 +869,7 @@ __mgrp_request_is_realizable(IN osm_sa_t * sa, uint8_t rate_required, rate, port_rate; osm_log_t *p_log = sa->p_log; - OSM_LOG_ENTER(sa->p_log, __mgrp_request_is_realizable); + OSM_LOG_ENTER(sa->p_log); /* * End of o15-0.2.3 specifies: @@ -1002,7 +1002,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, ib_api_status_t status = IB_SUCCESS; ib_member_rec_t mcm_rec = *p_recvd_mcmember_rec; /* copy for modifications */ - OSM_LOG_ENTER(sa->p_log, osm_mcmr_rcv_create_new_mgrp); + OSM_LOG_ENTER(sa->p_log); /* but what if the given MGID was not 0 ? */ zero_mgid = 1; @@ -1268,7 +1268,7 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, uint8_t port_join_state; uint8_t new_join_state; - OSM_LOG_ENTER(sa->p_log, __osm_mcmr_rcv_leave_mgrp); + OSM_LOG_ENTER(sa->p_log); p_mgrp = NULL; p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); @@ -1400,7 +1400,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, osm_mcast_req_type_t req_type; uint8_t join_state; - OSM_LOG_ENTER(sa->p_log, __osm_mcmr_rcv_join_mgrp); + OSM_LOG_ENTER(sa->p_log); p_mgrp = NULL; p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); @@ -1693,7 +1693,7 @@ __osm_mcmr_rcv_new_mcmr(IN osm_sa_t * sa, osm_mcmr_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_mcmr_rcv_new_mcmr); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -1742,7 +1742,7 @@ __osm_sa_mcm_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, ib_gid_t port_gid; boolean_t proxy_join = FALSE; - OSM_LOG_ENTER(sa->p_log, __osm_sa_mcm_by_comp_mask_cb); + OSM_LOG_ENTER(sa->p_log); osm_log(sa->p_log, OSM_LOG_DEBUG, "__osm_sa_mcm_by_comp_mask_cb: " @@ -1930,7 +1930,7 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, osm_physp_t *p_req_physp; boolean_t trusted_req; - OSM_LOG_ENTER(sa->p_log, __osm_mcmr_query_mgrp); + OSM_LOG_ENTER(sa->p_log); p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); p_rcvd_rec = (ib_member_rec_t *) ib_sa_mad_get_payload_ptr(p_rcvd_mad); @@ -2134,7 +2134,7 @@ void osm_mcmr_rcv_process(IN void *context, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_mcmr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_mft_record.c b/opensm/opensm/osm_sa_mft_record.c index 30506a6..039282f 100644 --- a/opensm/opensm/osm_sa_mft_record.c +++ b/opensm/opensm/osm_sa_mft_record.c @@ -84,7 +84,7 @@ __osm_mftr_rcv_new_mftr(IN osm_sa_t * sa, ib_api_status_t status = IB_SUCCESS; uint16_t position_block_num; - OSM_LOG_ENTER(sa->p_log, __osm_mftr_rcv_new_mftr); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -272,7 +272,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_mftr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_multipath_record.c b/opensm/opensm/osm_sa_multipath_record.c index 032c297..a92fdc1 100644 --- a/opensm/opensm/osm_sa_multipath_record.c +++ b/opensm/opensm/osm_sa_multipath_record.c @@ -192,7 +192,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, osm_qos_level_t *p_qos_level = NULL; uint16_t valid_sl_mask = 0xffff; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_path_parms); + OSM_LOG_ENTER(sa->p_log); dest_lid = cl_hton16(dest_lid_ho); @@ -786,7 +786,7 @@ __osm_mpr_rcv_build_pr(IN osm_sa_t * sa, const osm_physp_t *p_src_physp; const osm_physp_t *p_dest_physp; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_build_pr); + OSM_LOG_ENTER(sa->p_log); p_src_physp = p_src_port->p_physp; p_dest_physp = p_dest_port->p_physp; @@ -841,7 +841,7 @@ __osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, osm_mpr_item_t *p_pr_item; ib_api_status_t status, rev_path_status; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_lid_pair_path); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -930,7 +930,7 @@ __osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, uintn_t src_offset; uintn_t dest_offset; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_port_pair_paths); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -1128,7 +1128,7 @@ __osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, uintn_t iterations; int src_lids, dest_lids; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_apm_port_pair_paths); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -1201,7 +1201,7 @@ __osm_mpr_rcv_get_gids(IN osm_sa_t * sa, ib_net16_t ib_status = IB_SUCCESS; int i; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_gids); + OSM_LOG_ENTER(sa->p_log); for (i = 0; i < ngids; i++, gids++) { if (!ib_gid_is_link_local(gids)) { @@ -1266,7 +1266,7 @@ __osm_mpr_rcv_get_end_points(IN osm_sa_t * sa, ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; ib_gid_t *gids; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_end_points); + OSM_LOG_ENTER(sa->p_log); /* Determine what fields are valid and then get a pointer @@ -1326,7 +1326,7 @@ __osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa, int base_offs, src_lid_ho, dest_lid_ho; int sumA, sumB, minA, minB; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_get_apm_paths); + OSM_LOG_ENTER(sa->p_log); /* * We want to: @@ -1450,7 +1450,7 @@ __osm_mpr_rcv_process_pairs(IN osm_sa_t * sa, osm_port_t **pp_dest_port, **pp_ed; uint32_t max_paths, num_paths, total_paths = 0; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_process_pairs); + OSM_LOG_ENTER(sa->p_log); if (comp_mask & IB_MPR_COMPMASK_NUMBPATH) max_paths = p_mpr->num_path & 0x7F; @@ -1503,7 +1503,7 @@ __osm_mpr_rcv_respond(IN osm_sa_t * sa, osm_mpr_item_t *p_mpr_item; uint32_t i; - OSM_LOG_ENTER(sa->p_log, __osm_mpr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); @@ -1601,7 +1601,7 @@ void osm_mpr_rcv_process(IN void *context, IN void *data) ib_net16_t sa_status; int nsrc, ndest; - OSM_LOG_ENTER(sa->p_log, osm_mpr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_node_record.c b/opensm/opensm/osm_sa_node_record.c index 4af8e58..07c85f3 100644 --- a/opensm/opensm/osm_sa_node_record.c +++ b/opensm/opensm/osm_sa_node_record.c @@ -83,7 +83,7 @@ __osm_nr_rcv_new_nr(IN osm_sa_t * sa, osm_nr_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_nr_rcv_new_nr); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -138,7 +138,7 @@ __osm_nr_rcv_create_nr(IN osm_sa_t * sa, uint8_t lmc; ib_net64_t port_guid; - OSM_LOG_ENTER(sa->p_log, __osm_nr_rcv_create_nr); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -217,7 +217,7 @@ __osm_nr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, IN void *context) ib_net64_t match_port_guid = 0; ib_net16_t match_lid = 0; - OSM_LOG_ENTER(p_ctxt->sa->p_log, __osm_nr_rcv_by_comp_mask); + OSM_LOG_ENTER(p_ctxt->sa->p_log); osm_dump_node_info(p_ctxt->sa->p_log, &p_node->node_info, OSM_LOG_VERBOSE); @@ -336,7 +336,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_nr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index cc87bc7..fcbc5fd 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -199,7 +199,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, uint16_t valid_sl_mask = 0xffff; int is_lash; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_get_path_parms); + OSM_LOG_ENTER(sa->p_log); dest_lid = cl_hton16(dest_lid_ho); @@ -795,7 +795,7 @@ __osm_pr_rcv_build_pr(IN osm_sa_t * sa, const osm_physp_t *p_dest_physp; boolean_t is_nonzero_gid = 0; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_build_pr); + OSM_LOG_ENTER(sa->p_log); p_src_physp = p_src_port->p_physp; @@ -867,7 +867,7 @@ __osm_pr_rcv_get_lid_pair_path(IN osm_sa_t * sa, osm_pr_item_t *p_pr_item; ib_api_status_t status, rev_path_status; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_get_lid_pair_path); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -955,7 +955,7 @@ __osm_pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, uintn_t src_offset; uintn_t dest_offset; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_get_port_pair_paths); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -1187,7 +1187,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, osm_router_t *p_rtr; osm_port_t *p_rtr_port; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_get_end_points); + OSM_LOG_ENTER(sa->p_log); /* Determine what fields are valid and then get a pointer @@ -1398,7 +1398,7 @@ __osm_pr_rcv_process_world(IN osm_sa_t * sa, const osm_port_t *p_dest_port; const osm_port_t *p_src_port; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_process_world); + OSM_LOG_ENTER(sa->p_log); /* Iterate the entire port space over itself. @@ -1446,7 +1446,7 @@ __osm_pr_rcv_process_half(IN osm_sa_t * sa, const cl_qmap_t *p_tbl; const osm_port_t *p_port; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_process_half); + OSM_LOG_ENTER(sa->p_log); /* Iterate over every port, looking for matches... @@ -1497,7 +1497,7 @@ __osm_pr_rcv_process_pair(IN osm_sa_t * sa, IN const ib_net64_t comp_mask, IN cl_qlist_t * const p_list) { - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_process_pair); + OSM_LOG_ENTER(sa->p_log); __osm_pr_rcv_get_port_pair_paths(sa, p_madw, requester_port, p_src_port, p_dest_port, p_dgid, @@ -1532,7 +1532,7 @@ __osm_pr_get_mgrp(IN osm_sa_t * sa, ib_net64_t comp_mask; ib_api_status_t status; - OSM_LOG_ENTER(sa->p_log, __osm_pr_get_mgrp); + OSM_LOG_ENTER(sa->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); @@ -1589,7 +1589,7 @@ __osm_pr_match_mgrp_attributes(IN osm_sa_t * sa, uint8_t sl; uint8_t hop_limit; - OSM_LOG_ENTER(sa->p_log, __osm_pr_match_mgrp_attributes); + OSM_LOG_ENTER(sa->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); @@ -1651,7 +1651,7 @@ __osm_pr_rcv_check_mcast_dest(IN osm_sa_t * sa, ib_net64_t comp_mask; int is_multicast = 0; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_check_mcast_dest); + OSM_LOG_ENTER(sa->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_pr = (ib_path_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); @@ -1701,7 +1701,7 @@ __osm_pr_rcv_respond(IN osm_sa_t * sa, osm_pr_item_t *p_pr_item; uint32_t i; - OSM_LOG_ENTER(sa->p_log, __osm_pr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); num_rec = cl_qlist_count(p_list); @@ -1849,7 +1849,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) osm_port_t *requester_port; int ret; - OSM_LOG_ENTER(sa->p_log, osm_pr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_pkey_record.c b/opensm/opensm/osm_sa_pkey_record.c index e21c8a8..d8c8e62 100644 --- a/opensm/opensm/osm_sa_pkey_record.c +++ b/opensm/opensm/osm_sa_pkey_record.c @@ -76,7 +76,7 @@ __osm_sa_pkey_create(IN osm_sa_t * sa, uint16_t lid; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pkey_create); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -125,7 +125,7 @@ __osm_sa_pkey_check_physp(IN osm_sa_t * sa, ib_net64_t comp_mask = p_ctxt->comp_mask; uint16_t block, num_blocks; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pkey_check_physp); + OSM_LOG_ENTER(sa->p_log); /* we got here with the phys port - all is left is to get the right block */ if (comp_mask & IB_PKEY_COMPMASK_BLOCK) { @@ -156,7 +156,7 @@ __osm_sa_pkey_by_comp_mask(IN osm_sa_t * sa, uint8_t num_ports; const osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pkey_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -256,7 +256,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_pkey_rec_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_portinfo_record.c b/opensm/opensm/osm_sa_portinfo_record.c index 0cac69c..3e830bd 100644 --- a/opensm/opensm/osm_sa_portinfo_record.c +++ b/opensm/opensm/osm_sa_portinfo_record.c @@ -87,7 +87,7 @@ __osm_pir_rcv_new_pir(IN osm_sa_t * sa, osm_pir_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_pir_rcv_new_pir); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -132,7 +132,7 @@ __osm_sa_pir_create(IN osm_sa_t * sa, uint16_t match_lid_ho; osm_physp_t *p_node_physp; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pir_create); + OSM_LOG_ENTER(sa->p_log); if (p_physp->p_node->sw) { p_node_physp = osm_node_get_physp_ptr(p_physp->p_node, 0); @@ -183,7 +183,7 @@ __osm_sa_pir_check_physp(IN osm_sa_t * sa, const ib_port_info_t *p_comp_pi; const ib_port_info_t *p_pi; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pir_check_physp); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -417,7 +417,7 @@ __osm_sa_pir_by_comp_mask(IN osm_sa_t * sa, uint8_t num_ports; const osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, __osm_sa_pir_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -499,7 +499,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_pir_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_response.c b/opensm/opensm/osm_sa_response.c index d63fa70..554eff3 100644 --- a/opensm/opensm/osm_sa_response.c +++ b/opensm/opensm/osm_sa_response.c @@ -69,7 +69,7 @@ osm_sa_send_error(IN osm_sa_t * sa, ib_sa_mad_t *p_sa_mad; ib_api_status_t status; - OSM_LOG_ENTER(sa->p_log, osm_sa_send_error); + OSM_LOG_ENTER(sa->p_log); /* avoid races - if we are exiting - exit */ if (osm_exit_flag) { diff --git a/opensm/opensm/osm_sa_service_record.c b/opensm/opensm/osm_sa_service_record.c index 19de389..ce6a11a 100644 --- a/opensm/opensm/osm_sa_service_record.c +++ b/opensm/opensm/osm_sa_service_record.c @@ -175,7 +175,7 @@ __validate_sr(IN osm_sa_t * sa, IN const osm_madw_t * const p_madw) ib_sa_mad_t *p_sa_mad; ib_service_record_t *p_recvd_service_rec; - OSM_LOG_ENTER(sa->p_log, __validate_sr); + OSM_LOG_ENTER(sa->p_log); p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); p_recvd_service_rec = @@ -230,7 +230,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, const ib_sa_mad_t *p_rcvd_mad = osm_madw_get_sa_mad_ptr(p_madw); boolean_t trusted_req = TRUE; - OSM_LOG_ENTER(sa->p_log, __osm_sr_rcv_respond); + OSM_LOG_ENTER(sa->p_log); num_rec = cl_qlist_count(p_list); @@ -627,7 +627,7 @@ osm_sr_rcv_process_get_method(IN osm_sa_t * sa, osm_sr_search_ctxt_t context; osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, osm_sr_rcv_process_get_method); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -700,7 +700,7 @@ osm_sr_rcv_process_set_method(IN osm_sa_t * sa, osm_sr_item_t *p_sr_item; cl_qlist_t sr_list; - OSM_LOG_ENTER(sa->p_log, osm_sr_rcv_process_set_method); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -816,7 +816,7 @@ osm_sr_rcv_process_delete_method(IN osm_sa_t * sa, osm_sr_item_t *p_sr_item; cl_qlist_t sr_list; - OSM_LOG_ENTER(sa->p_log, osm_sr_rcv_process_delete_method); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -888,7 +888,7 @@ void osm_sr_rcv_process(IN void *context, IN void *data) ib_net16_t sa_status = IB_SA_MAD_STATUS_REQ_INVALID; boolean_t valid; - OSM_LOG_ENTER(sa->p_log, osm_sr_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); @@ -950,7 +950,7 @@ void osm_sr_rcv_lease_cb(IN void *context) uint32_t elapsed_time; uint32_t trim_time = 20; /* maxiaml timer refresh is 20 seconds */ - OSM_LOG_ENTER(sa->p_log, osm_sr_rcv_lease_cb); + OSM_LOG_ENTER(sa->p_log); cl_plock_excl_acquire(sa->p_lock); diff --git a/opensm/opensm/osm_sa_slvl_record.c b/opensm/opensm/osm_sa_slvl_record.c index ba13010..4f4cc03 100644 --- a/opensm/opensm/osm_sa_slvl_record.c +++ b/opensm/opensm/osm_sa_slvl_record.c @@ -88,7 +88,7 @@ __osm_sa_slvl_create(IN osm_sa_t * sa, uint16_t lid; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_sa_slvl_create); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -143,7 +143,7 @@ __osm_sa_slvl_by_comp_mask(IN osm_sa_t * sa, uint8_t out_port_start, out_port_end; const osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, __osm_sa_slvl_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -246,7 +246,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_slvl_rec_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_sminfo_record.c b/opensm/opensm/osm_sa_sminfo_record.c index b89173c..dddf8ca 100644 --- a/opensm/opensm/osm_sa_sminfo_record.c +++ b/opensm/opensm/osm_sa_sminfo_record.c @@ -93,7 +93,7 @@ __osm_smir_rcv_new_smir(IN osm_sa_t * sa, osm_smir_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_smir_rcv_new_smir); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -135,7 +135,7 @@ __osm_sa_smir_by_comp_mask(IN osm_sa_t * sa, const osm_physp_t *const p_req_physp = p_ctxt->p_req_physp; ib_net64_t const comp_mask = p_ctxt->comp_mask; - OSM_LOG_ENTER(sa->p_log, __osm_sa_smir_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); if (comp_mask & IB_SMIR_COMPMASK_GUID) { if (p_rem_sm->smi.guid != p_rcvd_rec->sm_info.guid) @@ -211,7 +211,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_smir_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_sw_info_record.c b/opensm/opensm/osm_sa_sw_info_record.c index f1eddda..01dae81 100644 --- a/opensm/opensm/osm_sa_sw_info_record.c +++ b/opensm/opensm/osm_sa_sw_info_record.c @@ -81,7 +81,7 @@ __osm_sir_rcv_new_sir(IN osm_sa_t * sa, osm_sir_item_t *p_rec_item; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_sir_rcv_new_sir); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -146,7 +146,7 @@ __osm_sir_rcv_create_sir(IN osm_sa_t * sa, ib_net16_t min_lid_ho; ib_net16_t max_lid_ho; - OSM_LOG_ENTER(sa->p_log, __osm_sir_rcv_create_sir); + OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { osm_log(sa->p_log, OSM_LOG_DEBUG, @@ -226,7 +226,7 @@ __osm_sir_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, ib_net64_t const comp_mask = p_ctxt->comp_mask; ib_net16_t match_lid = 0; - OSM_LOG_ENTER(p_ctxt->sa->p_log, __osm_sir_rcv_by_comp_mask); + OSM_LOG_ENTER(p_ctxt->sa->p_log); osm_dump_switch_info(p_ctxt->sa->p_log, &p_sw->switch_info, OSM_LOG_VERBOSE); @@ -268,7 +268,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_sir_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sa_vlarb_record.c b/opensm/opensm/osm_sa_vlarb_record.c index 3ada071..34b017e 100644 --- a/opensm/opensm/osm_sa_vlarb_record.c +++ b/opensm/opensm/osm_sa_vlarb_record.c @@ -88,7 +88,7 @@ __osm_sa_vl_arb_create(IN osm_sa_t * sa, uint16_t lid; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sa->p_log, __osm_sa_vl_arb_create); + OSM_LOG_ENTER(sa->p_log); p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { @@ -135,7 +135,7 @@ __osm_sa_vl_arb_check_physp(IN osm_sa_t * sa, ib_net64_t comp_mask = p_ctxt->comp_mask; uint8_t block; - OSM_LOG_ENTER(sa->p_log, __osm_sa_vl_arb_check_physp); + OSM_LOG_ENTER(sa->p_log); /* we got here with the phys port - all that's left is to get the right block */ for (block = 1; block <= 4; block++) { @@ -162,7 +162,7 @@ __osm_sa_vl_arb_by_comp_mask(IN osm_sa_t * sa, uint8_t num_ports; const osm_physp_t *p_req_physp; - OSM_LOG_ENTER(sa->p_log, __osm_sa_vl_arb_by_comp_mask); + OSM_LOG_ENTER(sa->p_log); p_rcvd_rec = p_ctxt->p_rcvd_rec; comp_mask = p_ctxt->comp_mask; @@ -262,7 +262,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) CL_ASSERT(sa); - OSM_LOG_ENTER(sa->p_log, osm_vlarb_rec_rcv_process); + OSM_LOG_ENTER(sa->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_service.c b/opensm/opensm/osm_service.c index fcbdee3..1c5c127 100644 --- a/opensm/opensm/osm_service.c +++ b/opensm/opensm/osm_service.c @@ -125,7 +125,7 @@ osm_svcr_t *osm_svcr_get_by_rid(IN osm_subn_t const *p_subn, { cl_list_item_t *p_list_item; - OSM_LOG_ENTER(p_log, osm_svcr_get_by_rid); + OSM_LOG_ENTER(p_log); p_list_item = cl_qlist_find_from_head(&p_subn->sa_sr_list, __match_rid_of_svc_rec, @@ -144,7 +144,7 @@ void osm_svcr_insert_to_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, IN osm_svcr_t * p_svcr) { - OSM_LOG_ENTER(p_log, osm_svcr_insert_to_db); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osm_svcr_insert_to_db: " @@ -159,7 +159,7 @@ void osm_svcr_remove_from_db(IN osm_subn_t * p_subn, IN osm_log_t * p_log, IN osm_svcr_t * p_svcr) { - OSM_LOG_ENTER(p_log, osm_svcr_remove_from_db); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_DEBUG, "osm_svcr_remove_from_db: " diff --git a/opensm/opensm/osm_slvl_map_rcv.c b/opensm/opensm/osm_slvl_map_rcv.c index 2af9be2..dcbd7e8 100644 --- a/opensm/opensm/osm_slvl_map_rcv.c +++ b/opensm/opensm/osm_slvl_map_rcv.c @@ -81,7 +81,7 @@ void osm_slvl_rcv_process(IN void *context, IN void *p_data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_slvl_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index b576c26..b991f38 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -100,7 +100,7 @@ static void __osm_sm_sweeper(IN void *p_ptr) osm_sm_t *const p_sm = (osm_sm_t *) p_ptr; unsigned signals, i; - OSM_LOG_ENTER(p_sm->p_log, __osm_sm_sweeper); + OSM_LOG_ENTER(p_sm->p_log); while (p_sm->thread_state == OSM_THREAD_STATE_RUN) { /* @@ -177,7 +177,7 @@ void osm_sm_shutdown(IN osm_sm_t * const p_sm) { boolean_t signal_event = FALSE; - OSM_LOG_ENTER(p_sm->p_log, osm_sm_shutdown); + OSM_LOG_ENTER(p_sm->p_log); /* * Signal our threads that we're leaving. @@ -224,7 +224,7 @@ void osm_sm_shutdown(IN osm_sm_t * const p_sm) **********************************************************************/ void osm_sm_destroy(IN osm_sm_t * const p_sm) { - OSM_LOG_ENTER(p_sm->p_log, osm_sm_destroy); + OSM_LOG_ENTER(p_sm->p_log); osm_lid_mgr_destroy(&p_sm->lid_mgr); osm_ucast_mgr_destroy(&p_sm->ucast_mgr); cl_event_wheel_destroy(&p_sm->trap_aging_tracker); @@ -255,7 +255,7 @@ osm_sm_init(IN osm_sm_t * const p_sm, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_sm_init); + OSM_LOG_ENTER(p_log); p_sm->p_subn = p_subn; p_sm->p_db = p_db; @@ -414,7 +414,7 @@ void osm_sm_signal(osm_sm_t * p_sm, osm_signal_t signal) **********************************************************************/ void osm_sm_sweep(IN osm_sm_t * const p_sm) { - OSM_LOG_ENTER(p_sm->p_log, osm_sm_sweep); + OSM_LOG_ENTER(p_sm->p_log); osm_sm_signal(p_sm, OSM_SIGNAL_SWEEP); OSM_LOG_EXIT(p_sm->p_log); } @@ -426,7 +426,7 @@ osm_sm_bind(IN osm_sm_t * const p_sm, IN const ib_net64_t port_guid) { ib_api_status_t status; - OSM_LOG_ENTER(p_sm->p_log, osm_sm_bind); + OSM_LOG_ENTER(p_sm->p_log); status = osm_sm_mad_ctrl_bind(&p_sm->mad_ctrl, port_guid); @@ -509,7 +509,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, ib_api_status_t status = IB_SUCCESS; osm_mcm_info_t *p_mcm; - OSM_LOG_ENTER(p_sm->p_log, osm_sm_mcgrp_join); + OSM_LOG_ENTER(p_sm->p_log); osm_log(p_sm->p_log, OSM_LOG_VERBOSE, "osm_sm_mcgrp_join: " @@ -620,7 +620,7 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm, cl_qmap_t *p_tbl; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_sm->p_log, osm_sm_mcgrp_leave); + OSM_LOG_ENTER(p_sm->p_log); osm_log(p_sm->p_log, OSM_LOG_VERBOSE, "osm_sm_mcgrp_leave: " diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c index efbe97a..6806ab1 100644 --- a/opensm/opensm/osm_sm_mad_ctrl.c +++ b/opensm/opensm/osm_sm_mad_ctrl.c @@ -75,7 +75,7 @@ __osm_sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * const p_ctrl, { uint32_t outstanding; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_retire_trans_mad); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); /* @@ -133,7 +133,7 @@ __osm_sm_mad_ctrl_disp_done_callback(IN void *context, IN void *p_data) osm_madw_t *const p_madw = (osm_madw_t *) p_data; ib_smp_t *p_smp; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_disp_done_callback); + OSM_LOG_ENTER(p_ctrl->p_log); /* If the MAD that just finished processing was a response, @@ -173,7 +173,7 @@ __osm_sm_mad_ctrl_update_wire_stats(IN osm_sm_mad_ctrl_t * const p_ctrl) { uint32_t mads_on_wire; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_update_wire_stats); + OSM_LOG_ENTER(p_ctrl->p_log); mads_on_wire = cl_atomic_dec(&p_ctrl->p_stats->qp0_mads_outstanding_on_wire); @@ -212,7 +212,7 @@ __osm_sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * const p_ctrl, osm_madw_t *p_old_madw; cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_process_get_resp); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); CL_ASSERT(transaction_context); @@ -336,7 +336,7 @@ __osm_sm_mad_ctrl_process_get(IN osm_sm_mad_ctrl_t * const p_ctrl, cl_status_t status; cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_process_get); + OSM_LOG_ENTER(p_ctrl->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); @@ -420,7 +420,7 @@ __osm_sm_mad_ctrl_process_set(IN osm_sm_mad_ctrl_t * const p_ctrl, cl_status_t status; cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_process_set); + OSM_LOG_ENTER(p_ctrl->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); @@ -505,7 +505,7 @@ __osm_sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * const p_ctrl, cl_status_t status; cl_disp_msgid_t msg_id = CL_DISP_MSGID_NONE; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_process_trap); + OSM_LOG_ENTER(p_ctrl->p_log); p_smp = osm_madw_get_smp_ptr(p_madw); @@ -601,7 +601,7 @@ __osm_sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, ib_smp_t *p_smp; ib_net16_t status; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_rcv_callback); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); @@ -720,7 +720,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) ib_api_status_t status; ib_smp_t *p_smp; - OSM_LOG_ENTER(p_ctrl->p_log, __osm_sm_mad_ctrl_send_err_cb); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT(p_madw); @@ -856,7 +856,7 @@ osm_sm_mad_ctrl_init(IN osm_sm_mad_ctrl_t * const p_ctrl, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_sm_mad_ctrl_init); + OSM_LOG_ENTER(p_log); osm_sm_mad_ctrl_construct(p_ctrl); @@ -894,7 +894,7 @@ osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * const p_ctrl, osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_ctrl->p_log, osm_sm_mad_ctrl_bind); + OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { osm_log(p_ctrl->p_log, OSM_LOG_ERROR, diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index eff9f19..a318323 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -88,7 +88,7 @@ static void __osm_sm_state_mgr_send_local_port_info_req(osm_sm_t *sm) ib_net64_t port_guid = sm->p_subn->sm_port_guid; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_send_local_port_info_req); + OSM_LOG_ENTER(sm->p_log); /* * Send a query of SubnGet(PortInfo) to our own port, in order to * update the master_sm_base_lid of the subnet. @@ -136,7 +136,7 @@ static void __osm_sm_state_mgr_send_master_sm_info_req(osm_sm_t *sm) const osm_port_t *p_port; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_send_master_sm_info_req); + OSM_LOG_ENTER(sm->p_log); memset(&context, 0, sizeof(context)); if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) { @@ -186,7 +186,7 @@ static void __osm_sm_state_mgr_start_polling(osm_sm_t *sm) uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout; cl_status_t cl_status; - OSM_LOG_ENTER(sm->p_log, __osm_sm_state_mgr_start_polling); + OSM_LOG_ENTER(sm->p_log); /* * Init the retry_number back to zero - need to restart counting @@ -220,7 +220,7 @@ void osm_sm_state_mgr_polling_callback(IN void *context) uint32_t timeout = sm->p_subn->opt.sminfo_polling_timeout; cl_status_t cl_status; - OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_polling_callback); + OSM_LOG_ENTER(sm->p_log); /* * We can be here in one of two cases: @@ -298,7 +298,7 @@ static void __osm_sm_state_mgr_signal_error(osm_sm_t *sm, **********************************************************************/ void osm_sm_state_mgr_signal_master_is_alive(osm_sm_t *sm) { - OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_signal_master_is_alive); + OSM_LOG_ENTER(sm->p_log); sm->retry_number = 0; OSM_LOG_EXIT(sm->p_log); } @@ -312,7 +312,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t *sm, CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_process); + OSM_LOG_ENTER(sm->p_log); /* * The state lock prevents many race conditions from screwing @@ -534,7 +534,7 @@ ib_api_status_t osm_sm_state_mgr_check_legality(osm_sm_t *sm, CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_sm_state_mgr_check_legality); + OSM_LOG_ENTER(sm->p_log); /* * The state lock prevents many race conditions from screwing diff --git a/opensm/opensm/osm_sminfo_rcv.c b/opensm/opensm/osm_sminfo_rcv.c index e179385..7606b5b 100644 --- a/opensm/opensm/osm_sminfo_rcv.c +++ b/opensm/opensm/osm_sminfo_rcv.c @@ -93,7 +93,7 @@ __osm_sminfo_rcv_process_get_request(IN osm_sm_t * sm, ib_api_status_t status; ib_sm_info_t *p_remote_smi; - OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_get_request); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -186,7 +186,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, osm_sm_signal_t sm_signal; ib_sm_info_t *p_remote_smi; - OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_set_request); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -342,7 +342,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, { const ib_sm_info_t *p_smi; - OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_get_sm); + OSM_LOG_ENTER(sm->p_log); p_smi = &p_sm->smi; @@ -462,7 +462,7 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, ib_net64_t port_guid; osm_remote_sm_t *p_sm; - OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_get_response); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -565,7 +565,7 @@ __osm_sminfo_rcv_process_set_response(IN osm_sm_t * sm, const ib_smp_t *p_smp; const ib_sm_info_t *p_smi; - OSM_LOG_ENTER(sm->p_log, __osm_sminfo_rcv_process_set_response); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -608,7 +608,7 @@ void osm_sminfo_rcv_process(IN void *context, IN void *data) ib_smp_t *p_smp; osm_smi_context_t *p_smi_context; - OSM_LOG_ENTER(sm->p_log, osm_sminfo_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index fb60f4d..fa9a273 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -165,7 +165,7 @@ static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object, osm_sm_t *sm = context; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_sw_info); + OSM_LOG_ENTER(sm->p_log); p_node = p_sw->p_node; p_dr_path = osm_node_get_any_dr_path_ptr(p_node); @@ -200,7 +200,7 @@ __osm_state_mgr_get_remote_port_info(IN osm_sm_t * sm, osm_madw_context_t mad_context; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_remote_port_info); + OSM_LOG_ENTER(sm->p_log); /* generate a dr path leaving on the physp to the remote node */ p_dr_path = osm_physp_get_dr_path_ptr(p_physp); @@ -244,7 +244,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t * sm) osm_bind_handle_t h_bind; uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_sweep_hop_0); + OSM_LOG_ENTER(sm->p_log); memset(path_array, 0, sizeof(path_array)); @@ -310,7 +310,7 @@ static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_sm_t * sm) cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); uint32_t i; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_clean_known_lids); + OSM_LOG_ENTER(sm->p_log); /* we need a lock here! */ CL_PLOCK_ACQUIRE(sm->p_lock); @@ -333,7 +333,7 @@ static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm) ib_api_status_t status; osm_bind_handle_t h_bind; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_notify_lid_change); + OSM_LOG_ENTER(sm->p_log); /* * First, get the bind handle. @@ -374,7 +374,7 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm) osm_physp_t *p_physp; uint8_t state; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_is_sm_port_down); + OSM_LOG_ENTER(sm->p_log); port_guid = sm->p_subn->sm_port_guid; @@ -438,7 +438,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) uint8_t num_ports; osm_physp_t *p_ext_physp; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_sweep_hop_1); + OSM_LOG_ENTER(sm->p_log); /* * First, get our own port and node objects. @@ -568,7 +568,7 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm) osm_physp_t *p_physp; uint8_t port_num; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_light_sweep_start); + OSM_LOG_ENTER(sm->p_log); p_sw_tbl = &sm->p_subn->sw_guid_tbl; @@ -642,7 +642,7 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t * sm) osm_remote_sm_t *p_sm; osm_remote_sm_t *p_sm_res = NULL; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_exists_other_master_sm); + OSM_LOG_ENTER(sm->p_log); p_sm_tbl = &sm->p_subn->sm_guid_tbl; @@ -682,7 +682,7 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t * sm) uint8_t highest_sm_priority; ib_net64_t highest_sm_guid; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_get_highest_sm); + OSM_LOG_ENTER(sm->p_log); p_sm_tbl = &sm->p_subn->sm_guid_tbl; @@ -740,7 +740,7 @@ __osm_state_mgr_send_handover(IN osm_sm_t * const sm, const osm_port_t *p_port; ib_api_status_t status; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_send_handover); + OSM_LOG_ENTER(sm->p_log); /* * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given. @@ -819,7 +819,7 @@ static void __osm_state_mgr_report_new_ports(IN osm_sm_t * sm) uint16_t min_lid_ho; uint16_t max_lid_ho; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_report_new_ports); + OSM_LOG_ENTER(sm->p_log); CL_PLOCK_ACQUIRE(sm->p_lock); p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl); @@ -902,7 +902,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm) uint16_t max_lid_ho; uint16_t lid_ho; - OSM_LOG_ENTER(sm->p_log, __osm_state_mgr_check_tbl_consistency); + OSM_LOG_ENTER(sm->p_log); cl_ptr_vector_construct(&ref_port_lid_tbl); cl_ptr_vector_init(&ref_port_lid_tbl, @@ -1295,7 +1295,7 @@ void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal) { CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_state_mgr_process); + OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) osm_log(sm->p_log, OSM_LOG_DEBUG, diff --git a/opensm/opensm/osm_sw_info_rcv.c b/opensm/opensm/osm_sw_info_rcv.c index 2cc887a..fb92e6f 100644 --- a/opensm/opensm/osm_sw_info_rcv.c +++ b/opensm/opensm/osm_sw_info_rcv.c @@ -77,7 +77,7 @@ __osm_si_rcv_get_port_info(IN osm_sm_t * sm, const ib_smp_t *p_smp; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, __osm_si_rcv_get_port_info); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_sw); @@ -140,7 +140,7 @@ __osm_si_rcv_get_fwd_tbl(IN osm_sm_t * sm, uint32_t max_block_id_ho; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, __osm_si_rcv_get_fwd_tbl); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_sw); @@ -200,7 +200,7 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, uint32_t attr_mod_ho; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, __osm_si_rcv_get_mcast_fwd_tbl); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_sw); @@ -297,7 +297,7 @@ __osm_si_rcv_process_new(IN osm_sm_t * sm, CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, __osm_si_rcv_process_new); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -406,7 +406,7 @@ __osm_si_rcv_process_existing(IN osm_sm_t * sm, ib_smp_t *p_smp; boolean_t is_change_detected = FALSE; - OSM_LOG_ENTER(sm->p_log, __osm_si_rcv_process_existing); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -499,7 +499,7 @@ void osm_si_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_si_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_sweep_fail_ctrl.c b/opensm/opensm/osm_sweep_fail_ctrl.c index 3a5190f..7e80fe0 100644 --- a/opensm/opensm/osm_sweep_fail_ctrl.c +++ b/opensm/opensm/osm_sweep_fail_ctrl.c @@ -59,7 +59,7 @@ static void __osm_sweep_fail_ctrl_disp_callback(IN void *context, { osm_sweep_fail_ctrl_t *const p_ctrl = (osm_sweep_fail_ctrl_t *) context; - OSM_LOG_ENTER(p_ctrl->sm->p_log, __osm_sweep_fail_ctrl_disp_callback); + OSM_LOG_ENTER(p_ctrl->sm->p_log); UNUSED_PARAM(p_data); /* @@ -94,7 +94,7 @@ osm_sweep_fail_ctrl_init(IN osm_sweep_fail_ctrl_t * const p_ctrl, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, osm_sweep_fail_ctrl_init); + OSM_LOG_ENTER(sm->p_log); osm_sweep_fail_ctrl_construct(p_ctrl); p_ctrl->sm = sm; diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c index c896be4..b89f9b9 100644 --- a/opensm/opensm/osm_trap_rcv.c +++ b/opensm/opensm/osm_trap_rcv.c @@ -120,7 +120,7 @@ osm_trap_rcv_aging_tracker_callback(IN uint64_t key, uint8_t port_num; osm_physp_t *p_physp; - OSM_LOG_ENTER(sm->p_log, osm_trap_rcv_aging_tracker_callback); + OSM_LOG_ENTER(sm->p_log); if (osm_exit_flag) /* We got an exit flag - do nothing */ @@ -260,7 +260,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT; boolean_t run_heavy_sweep = FALSE; - OSM_LOG_ENTER(sm->p_log, __osm_trap_rcv_process_request); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); @@ -694,7 +694,7 @@ __osm_trap_rcv_process_sm(IN osm_sm_t * sm, { /* const ib_sm_info_t* p_smi; */ - OSM_LOG_ENTER(sm->p_log, __osm_trap_rcv_process_sm); + OSM_LOG_ENTER(sm->p_log); osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_trap_rcv_process_sm: ERR 3807: " @@ -712,7 +712,7 @@ __osm_trap_rcv_process_response(IN osm_sm_t * sm, IN const osm_madw_t * const p_madw) { - OSM_LOG_ENTER(sm->p_log, __osm_trap_rcv_process_response); + OSM_LOG_ENTER(sm->p_log); osm_log(sm->p_log, OSM_LOG_ERROR, "__osm_trap_rcv_process_response: ERR 3808: " @@ -729,7 +729,7 @@ void osm_trap_rcv_process(IN void *context, IN void *data) osm_madw_t *p_madw = data; ib_smp_t *p_smp; - OSM_LOG_ENTER(sm->p_log, osm_trap_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c index bada08e..411d59c 100644 --- a/opensm/opensm/osm_ucast_ftree.c +++ b/opensm/opensm/osm_ucast_ftree.c @@ -1471,8 +1471,7 @@ static int __osm_ftree_fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree) unsigned i; int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, - __osm_ftree_fabric_mark_leaf_switches); + OSM_LOG_ENTER(&p_ftree->p_osm->log); osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "__osm_ftree_fabric_mark_leaf_switches: " @@ -1539,7 +1538,7 @@ static void __osm_ftree_fabric_make_indexing(IN ftree_fabric_t * p_ftree) cl_list_t bfs_list; ftree_sw_tbl_element_t *p_sw_tbl_element; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_make_indexing); + OSM_LOG_ENTER(&p_ftree->p_osm->log); osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "__osm_ftree_fabric_make_indexing: " @@ -1701,8 +1700,7 @@ static int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t * unsigned last_leaf_idx; int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, - __osm_ftree_fabric_create_leaf_switch_array); + OSM_LOG_ENTER(&p_ftree->p_osm->log); /* create array of ALL the switches that have leaf rank */ all_switches_at_leaf_level = (ftree_sw_t **) @@ -1824,8 +1822,7 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * boolean_t res = TRUE; uint8_t i; - OSM_LOG_ENTER(&p_ftree->p_osm->log, - __osm_ftree_fabric_validate_topology); + OSM_LOG_ENTER(&p_ftree->p_osm->log); osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "__osm_ftree_fabric_validate_topology: " @@ -2516,7 +2513,7 @@ static void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) ib_net16_t hca_lid; unsigned routed_targets_on_leaf; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_route_to_cns); + OSM_LOG_ENTER(&p_ftree->p_osm->log); /* for each leaf switch (in indexing order) */ for (i = 0; i < p_ftree->leaf_switches_num; i++) { @@ -2640,8 +2637,7 @@ static void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) unsigned port_num_on_switch; unsigned i; - OSM_LOG_ENTER(&p_ftree->p_osm->log, - __osm_ftree_fabric_route_to_non_cns); + OSM_LOG_ENTER(&p_ftree->p_osm->log); p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { @@ -2718,8 +2714,7 @@ static void __osm_ftree_fabric_route_to_switches(IN ftree_fabric_t * p_ftree) ftree_sw_t *p_sw; ftree_sw_t *p_next_sw; - OSM_LOG_ENTER(&p_ftree->p_osm->log, - __osm_ftree_fabric_route_to_switches); + OSM_LOG_ENTER(&p_ftree->p_osm->log); p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { @@ -2761,7 +2756,7 @@ static int __osm_ftree_fabric_populate_nodes(IN ftree_fabric_t * p_ftree) osm_node_t *p_osm_node; osm_node_t *p_next_osm_node; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_populate_nodes); + OSM_LOG_ENTER(&p_ftree->p_osm->log); p_next_osm_node = (osm_node_t *) cl_qmap_head(&p_ftree->p_osm->subn.node_guid_tbl); @@ -2879,7 +2874,7 @@ __osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree, static uint8_t i = 0; int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_rank_leaf_switches); + OSM_LOG_ENTER(&p_ftree->p_osm->log); for (i = 0; i < osm_node_get_num_physp(p_osm_node); i++) { p_osm_port = osm_node_get_physp_ptr(p_osm_node, i); @@ -3243,7 +3238,7 @@ static int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) unsigned i; cl_list_iterator_t guid_iterator; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_rank_from_roots); + OSM_LOG_ENTER(&p_ftree->p_osm->log); cl_list_init(&ranking_bfs_list, 10); /* Rank all the roots and add them to list */ @@ -3353,7 +3348,7 @@ static int __osm_ftree_fabric_rank_from_hcas(IN ftree_fabric_t * p_ftree) cl_list_t ranking_bfs_list; int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_rank_from_hcas); + OSM_LOG_ENTER(&p_ftree->p_osm->log); cl_list_init(&ranking_bfs_list, 10); @@ -3395,7 +3390,7 @@ static int __osm_ftree_fabric_rank(IN ftree_fabric_t * p_ftree) { int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_perform_ranking); + OSM_LOG_ENTER(&p_ftree->p_osm->log); if (__osm_ftree_fabric_roots_provided(p_ftree)) res = __osm_ftree_fabric_rank_from_roots(p_ftree); @@ -3424,7 +3419,7 @@ static void __osm_ftree_fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree) ftree_hca_t *p_hca = NULL; ftree_hca_t *p_next_hca; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_set_leaf_rank); + OSM_LOG_ENTER(&p_ftree->p_osm->log); if (!__osm_ftree_fabric_roots_provided(p_ftree)) { /* If root file is not provided, the fabric has to be pure fat-tree @@ -3483,7 +3478,7 @@ static int __osm_ftree_fabric_populate_ports(IN ftree_fabric_t * p_ftree) ftree_sw_t *p_next_sw; int res = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_populate_ports); + OSM_LOG_ENTER(&p_ftree->p_osm->log); p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { @@ -3538,7 +3533,7 @@ static int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) { int status = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_fabric_read_guid_files); + OSM_LOG_ENTER(&p_ftree->p_osm->log); if (__osm_ftree_fabric_roots_provided(p_ftree)) { osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, @@ -3607,7 +3602,7 @@ static int __osm_ftree_construct_fabric(IN void *context) ftree_fabric_t *p_ftree = context; int status = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_construct_fabric); + OSM_LOG_ENTER(&p_ftree->p_osm->log); __osm_ftree_fabric_clear(p_ftree); @@ -3812,7 +3807,7 @@ static int __osm_ftree_do_routing(IN void *context) ftree_fabric_t *p_ftree = context; int status = 0; - OSM_LOG_ENTER(&p_ftree->p_osm->log, __osm_ftree_do_routing); + OSM_LOG_ENTER(&p_ftree->p_osm->log); if (!p_ftree->fabric_built) { status = -1; diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c index 65f688e..d9845c7 100644 --- a/opensm/opensm/osm_ucast_lash.c +++ b/opensm/opensm/osm_ucast_lash.c @@ -800,7 +800,7 @@ static void free_lash_structures(lash_t * p_lash) unsigned num_switches = p_lash->num_switches; osm_log_t *p_log = &p_lash->p_osm->log; - OSM_LOG_ENTER(p_log, free_lash_structures); + OSM_LOG_ENTER(p_log); // free cdg_vertex_matrix for (i = 0; i < p_lash->vl_min; i++) { @@ -856,7 +856,7 @@ static int init_lash_structures(lash_t * p_lash) int status = IB_SUCCESS; unsigned int i, j, k; - OSM_LOG_ENTER(p_log, init_lash_structures); + OSM_LOG_ENTER(p_log); // initialise cdg_vertex_matrix[num_switches][num_switches][num_switches] p_lash->cdg_vertex_matrix = @@ -947,7 +947,7 @@ static int lash_core(lash_t * p_lash) int status = IB_SUCCESS; int *switch_bitmap = NULL; /* Bitmap to check if we have processed this pair */ - OSM_LOG_ENTER(p_log, lash_core); + OSM_LOG_ENTER(p_log); switch_bitmap = (int *)malloc(num_switches * num_switches * sizeof(int)); @@ -1125,7 +1125,7 @@ static void populate_fwd_tbls(lash_t * p_lash) osm_switch_t *p_sw, *p_next_sw, *p_dst_sw; uint16_t max_lid_ho, lid = 0; - OSM_LOG_ENTER(p_log, populate_fwd_tbls); + OSM_LOG_ENTER(p_log); p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); @@ -1206,7 +1206,7 @@ static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw) osm_physp_t *p_current_physp, *p_remote_physp; unsigned switch_a_lash_id, switch_b_lash_id; - OSM_LOG_ENTER(p_log, _osm_lash_process_switch); + OSM_LOG_ENTER(p_log); switch_a_lash_id = get_lash_id(p_sw); port_count = osm_node_get_num_physp(p_sw->p_node); @@ -1359,7 +1359,7 @@ static int lash_process(void *context) osm_log_t *p_log = &p_lash->p_osm->log; int return_status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, lash_process); + OSM_LOG_ENTER(p_log); p_lash->balance_limit = 6; diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c index 88e29e9..0813a23 100644 --- a/opensm/opensm/osm_ucast_mgr.c +++ b/opensm/opensm/osm_ucast_mgr.c @@ -76,7 +76,7 @@ void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * const p_mgr) { CL_ASSERT(p_mgr); - OSM_LOG_ENTER(p_mgr->p_log, osm_ucast_mgr_destroy); + OSM_LOG_ENTER(p_mgr->p_log); if (p_mgr->lft_buf) free(p_mgr->lft_buf); @@ -91,7 +91,7 @@ osm_ucast_mgr_init(IN osm_ucast_mgr_t * const p_mgr, IN osm_sm_t * sm) { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(sm->p_log, osm_ucast_mgr_init); + OSM_LOG_ENTER(sm->p_log); osm_ucast_mgr_construct(p_mgr); @@ -152,7 +152,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr, uint16_t lid_ho; uint8_t hops; - OSM_LOG_ENTER(p_mgr->p_log, __osm_ucast_mgr_process_neighbor); + OSM_LOG_ENTER(p_mgr->p_log); if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { osm_log(p_mgr->p_log, OSM_LOG_DEBUG, @@ -216,7 +216,7 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, uint16_t num_used_sys = 0; uint16_t num_used_nodes = 0; - OSM_LOG_ENTER(p_mgr->p_log, __osm_ucast_mgr_process_port); + OSM_LOG_ENTER(p_mgr->p_log); if (lids_per_port > 1) { remote_sys_guids = malloc(sizeof(uint64_t) * lids_per_port); @@ -382,7 +382,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr, CL_ASSERT(p_mgr); - OSM_LOG_ENTER(p_mgr->p_log, osm_ucast_mgr_set_fwd_table); + OSM_LOG_ENTER(p_mgr->p_log); CL_ASSERT(p_sw); @@ -498,7 +498,7 @@ __osm_ucast_mgr_process_tbl(IN cl_map_item_t * const p_map_item, osm_port_t *p_port; const cl_qmap_t *p_port_tbl; - OSM_LOG_ENTER(p_mgr->p_log, __osm_ucast_mgr_process_tbl); + OSM_LOG_ENTER(p_mgr->p_log); p_node = p_sw->p_node; @@ -548,7 +548,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item, uint32_t num_ports; osm_physp_t *p_physp; - OSM_LOG_ENTER(p_mgr->p_log, __osm_ucast_mgr_process_neighbors); + OSM_LOG_ENTER(p_mgr->p_log); p_node = p_sw->p_node; @@ -698,7 +698,7 @@ osm_ucast_mgr_read_guid_file(IN osm_ucast_mgr_t * const p_mgr, char *endptr; uint64_t *p_guid; - OSM_LOG_ENTER(p_mgr->p_log, osm_ucast_mgr_read_guid_file); + OSM_LOG_ENTER(p_mgr->p_log); guid_file = fopen(guid_file_name, "r"); if (guid_file == NULL) { @@ -764,7 +764,7 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr) int blm = 0; int ubft = 0; - OSM_LOG_ENTER(p_mgr->p_log, osm_ucast_mgr_process); + OSM_LOG_ENTER(p_mgr->p_log); p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl; p_osm = p_mgr->p_subn->p_osm; diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c index 411e15b..3058038 100644 --- a/opensm/opensm/osm_ucast_updn.c +++ b/opensm/opensm/osm_ucast_updn.c @@ -134,7 +134,7 @@ __updn_bfs_by_node(IN osm_log_t * p_log, struct updn_node *u; updn_switch_dir_t next_dir, current_dir; - OSM_LOG_ENTER(p_log, __updn_bfs_by_node); + OSM_LOG_ENTER(p_log); lid = osm_node_get_base_lid(p_sw->p_node, 0); lid = cl_ntoh16(lid); @@ -256,7 +256,7 @@ static updn_t *updn_construct(osm_log_t * p_log) { updn_t *p_updn; - OSM_LOG_ENTER(p_log, updn_construct); + OSM_LOG_ENTER(p_log); p_updn = malloc(sizeof(updn_t)); if (p_updn) @@ -274,7 +274,7 @@ static cl_status_t updn_init(IN updn_t * const p_updn, IN osm_opensm_t * p_osm) cl_list_iterator_t guid_iterator; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osm->log, updn_init); + OSM_LOG_ENTER(&p_osm->log); p_updn->p_osm = p_osm; p_list = (cl_list_t *) malloc(sizeof(cl_list_t)); @@ -342,7 +342,7 @@ updn_subn_rank(IN unsigned num_guids, unsigned idx = 0; unsigned max_rank = 0; - OSM_LOG_ENTER(p_log, updn_subn_rank); + OSM_LOG_ENTER(p_log); cl_qlist_init(&list); /* Rank all the roots and add them to list */ @@ -449,7 +449,7 @@ static int __osm_subn_set_up_down_min_hop_table(IN updn_t * p_updn) osm_log_t *p_log = &p_updn->p_osm->log; osm_switch_t *p_next_sw, *p_sw; - OSM_LOG_ENTER(p_log, __osm_subn_set_up_down_min_hop_table); + OSM_LOG_ENTER(p_log); /* Go over all the switches in the subnet - for each init their Min Hop Table */ @@ -501,7 +501,7 @@ updn_build_lid_matrices(IN uint32_t num_guids, { int status; - OSM_LOG_ENTER(&p_updn->p_osm->log, osm_subn_calc_up_down_min_hop_table); + OSM_LOG_ENTER(&p_updn->p_osm->log); osm_log(&p_updn->p_osm->log, OSM_LOG_VERBOSE, "updn_build_lid_matrices: " @@ -568,7 +568,7 @@ static int __osm_updn_call(void *ctx) osm_switch_t *p_sw; int ret = 0; - OSM_LOG_ENTER(&p_updn->p_osm->log, __osm_updn_call); + OSM_LOG_ENTER(&p_updn->p_osm->log); p_item = cl_qmap_head(&p_updn->p_osm->subn.sw_guid_tbl); while (p_item != cl_qmap_end(&p_updn->p_osm->subn.sw_guid_tbl)) { @@ -627,7 +627,7 @@ static void __osm_updn_convert_list2array(IN updn_t * p_updn) uint32_t i = 0, max_num = 0; uint64_t *p_guid; - OSM_LOG_ENTER(&p_updn->p_osm->log, __osm_updn_convert_list2array); + OSM_LOG_ENTER(&p_updn->p_osm->log); p_updn->updn_ucast_reg_inputs.num_guids = cl_list_count(p_updn->p_root_nodes); @@ -675,7 +675,7 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) unsigned *cas_per_sw; uint16_t lid_ho; - OSM_LOG_ENTER(&p_osm->log, osm_updn_find_root_nodes_by_min_hop); + OSM_LOG_ENTER(&p_osm->log); osm_log(&p_osm->log, OSM_LOG_DEBUG, "__osm_updn_find_root_nodes_by_min_hop: " diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index 5d10ed6..ac9becb 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -124,7 +124,7 @@ static void __osm_vl15_poller(IN void *p_ptr) osm_vl15_t *const p_vl = (osm_vl15_t *) p_ptr; cl_qlist_t *p_fifo; - OSM_LOG_ENTER(p_vl->p_log, __osm_vl15_poller); + OSM_LOG_ENTER(p_vl->p_log); if (p_vl->thread_state == OSM_THREAD_STATE_NONE) p_vl->thread_state = OSM_THREAD_STATE_RUN; @@ -213,7 +213,7 @@ osm_vl15_destroy(IN osm_vl15_t * const p_vl, IN struct _osm_mad_pool *p_pool) { osm_madw_t *p_madw; - OSM_LOG_ENTER(p_vl->p_log, osm_vl15_destroy); + OSM_LOG_ENTER(p_vl->p_log); /* Signal our threads that we're leaving. @@ -263,7 +263,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osm_vl15_init); + OSM_LOG_ENTER(p_log); p_vl->p_vend = p_vend; p_vl->p_log = p_log; @@ -298,7 +298,7 @@ osm_vl15_init(IN osm_vl15_t * const p_vl, **********************************************************************/ void osm_vl15_poll(IN osm_vl15_t * const p_vl) { - OSM_LOG_ENTER(p_vl->p_log, osm_vl15_poll); + OSM_LOG_ENTER(p_vl->p_log); CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); @@ -327,7 +327,7 @@ void osm_vl15_poll(IN osm_vl15_t * const p_vl) **********************************************************************/ void osm_vl15_post(IN osm_vl15_t * const p_vl, IN osm_madw_t * const p_madw) { - OSM_LOG_ENTER(p_vl->p_log, osm_vl15_post); + OSM_LOG_ENTER(p_vl->p_log); CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); @@ -364,7 +364,7 @@ osm_vl15_shutdown(IN osm_vl15_t * const p_vl, { osm_madw_t *p_madw; - OSM_LOG_ENTER(p_vl->p_log, osm_vl15_shutdown); + OSM_LOG_ENTER(p_vl->p_log); /* we only should get here after the VL15 interface was initialized */ CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); diff --git a/opensm/opensm/osm_vl_arb_rcv.c b/opensm/opensm/osm_vl_arb_rcv.c index 8a5b8b4..6013855 100644 --- a/opensm/opensm/osm_vl_arb_rcv.c +++ b/opensm/opensm/osm_vl_arb_rcv.c @@ -81,7 +81,7 @@ void osm_vla_rcv_process(IN void *context, IN void *data) CL_ASSERT(sm); - OSM_LOG_ENTER(sm->p_log, osm_vla_rcv_process); + OSM_LOG_ENTER(sm->p_log); CL_ASSERT(p_madw); diff --git a/opensm/osmtest/osmt_inform.c b/opensm/osmtest/osmt_inform.c index 4ce6e2b..3b6dce7 100644 --- a/opensm/osmtest/osmt_inform.c +++ b/opensm/osmtest/osmt_inform.c @@ -75,7 +75,7 @@ osmt_bind_inform_qp(IN osmtest_t * const p_osmt, OUT osmt_qp_ctx_t * p_qp_ctx) osm_log_t *p_log = &p_osmt->log; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osmt_bind_inform_qp); + OSM_LOG_ENTER(p_log); port_guid = p_osmt->local_port.port_guid; @@ -165,7 +165,7 @@ osmt_unbind_inform_qp(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx) { osm_log_t *p_log = &p_osmt->log; - OSM_LOG_ENTER(p_log, osmt_unbind_inform_qp); + OSM_LOG_ENTER(p_log); osmt_mtl_mad_cleanup(&p_qp_ctx->qp_bind_hndl); @@ -198,7 +198,7 @@ osmt_reg_unreg_inform_info(IN osmtest_t * p_osmt, osm_log_t *p_log = &p_osmt->log; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmt_reg_unreg_inform_info); + OSM_LOG_ENTER(&p_osmt->log); /* init the MAD */ ib_mad_init_new((ib_mad_t *) p_sa_mad, @@ -399,7 +399,7 @@ osmt_send_trap_wait_for_forward(IN osmtest_t * const p_osmt, osm_log_t *p_log = &p_osmt->log; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osmt_send_trap_wait_for_forward); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_INFO, "osmt_send_trap_wait_for_forward: " @@ -545,7 +545,7 @@ osmt_trap_wait(IN osmtest_t * const p_osmt, IN osmt_qp_ctx_t * p_qp_ctx) osm_log_t *p_log = &p_osmt->log; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(p_log, osmt_trap_wait); + OSM_LOG_ENTER(p_log); osm_log(p_log, OSM_LOG_INFO, "osmt_trap_wait: " @@ -659,7 +659,7 @@ ib_api_status_t osmt_run_inform_info_flow(IN osmtest_t * const p_osmt) ib_api_status_t status; osmt_qp_ctx_t qp_ctx; - OSM_LOG_ENTER(&p_osmt->log, osmt_run_inform_info_flow); + OSM_LOG_ENTER(&p_osmt->log); /* bind the QP */ status = osmt_bind_inform_qp(p_osmt, &qp_ctx); @@ -746,7 +746,7 @@ ib_api_status_t osmt_run_trap64_65_flow(IN osmtest_t * const p_osmt) ib_api_status_t status; osmt_qp_ctx_t qp_ctx; - OSM_LOG_ENTER(&p_osmt->log, osmt_run_trap64_65_flow); + OSM_LOG_ENTER(&p_osmt->log); /* bind the QP */ status = osmt_bind_inform_qp(p_osmt, &qp_ctx); diff --git a/opensm/osmtest/osmt_multicast.c b/opensm/osmtest/osmt_multicast.c index 9721840..6638148 100644 --- a/opensm/osmtest/osmt_multicast.c +++ b/opensm/osmtest/osmt_multicast.c @@ -166,7 +166,7 @@ ib_api_status_t osmt_query_mcast(IN osmtest_t * const p_osmt) cl_map_item_t *p_item, *p_next_item; osmtest_mgrp_t *p_mgrp; - OSM_LOG_ENTER(&p_osmt->log, osmt_query_mcast); + OSM_LOG_ENTER(&p_osmt->log); /* * Do a blocking query for all Multicast Records in the subnet. @@ -301,7 +301,7 @@ osmt_send_mcast_request(IN osmtest_t * const p_osmt, osmv_user_query_t user; osmv_query_req_t req; - OSM_LOG_ENTER(&p_osmt->log, osmt_send_mcast_request); + OSM_LOG_ENTER(&p_osmt->log); /* * Do a blocking query for this record in the subnet. @@ -549,7 +549,7 @@ ib_api_status_t osmt_run_mcast_flow(IN osmtest_t * const p_osmt) 0x00, 0x00, 0x00, 0x01}, }; - OSM_LOG_ENTER(&p_osmt->log, osmt_run_mcast_flow); + OSM_LOG_ENTER(&p_osmt->log); osm_log(&p_osmt->log, OSM_LOG_INFO, "osmt_run_mcast_flow: " "GetTable of all current MCGs...\n"); diff --git a/opensm/osmtest/osmt_service.c b/opensm/osmtest/osmt_service.c index 6d377af..920293c 100644 --- a/opensm/osmtest/osmt_service.c +++ b/opensm/osmtest/osmt_service.c @@ -76,7 +76,7 @@ osmt_register_service(IN osmtest_t * const p_osmt, osm_log_t *p_log = &p_osmt->log; ib_api_status_t status; - OSM_LOG_ENTER(p_log, osmt_register_service); + OSM_LOG_ENTER(p_log); osm_log(&p_osmt->log, OSM_LOG_INFO, "osmt_register_service: " @@ -187,7 +187,7 @@ osmt_register_service_with_full_key(IN osmtest_t * const p_osmt, ib_api_status_t status; uint8_t i, skey[16]; - OSM_LOG_ENTER(p_log, osmt_register_service_with_full_key); + OSM_LOG_ENTER(p_log); osm_log(&p_osmt->log, OSM_LOG_INFO, "osmt_register_service_with_full_key: " @@ -320,7 +320,7 @@ osmt_register_service_with_data(IN osmtest_t * const p_osmt, ib_api_status_t status; /* ib_service_record_t* p_rec; */ - OSM_LOG_ENTER(p_log, osmt_register_service_with_data); + OSM_LOG_ENTER(p_log); osm_log(&p_osmt->log, OSM_LOG_INFO, "osmt_register_service_with_data: " @@ -477,7 +477,7 @@ osmt_get_service_by_id_and_name(IN osmtest_t * const p_osmt, uint32_t num_recs = 0; osmv_user_query_t user; - OSM_LOG_ENTER(&p_osmt->log, osmt_get_service_by_id); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -612,7 +612,7 @@ osmt_get_service_by_id(IN osmtest_t * const p_osmt, uint32_t num_recs = 0; osmv_user_query_t user; - OSM_LOG_ENTER(&p_osmt->log, osmt_get_service_by_id); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -747,7 +747,7 @@ osmt_get_service_by_name_and_key(IN osmtest_t * const p_osmt, uint32_t num_recs = 0, i; osmv_user_query_t user; - OSM_LOG_ENTER(&p_osmt->log, osmt_get_service_by_name_and_key); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { char buf_service_key[33]; @@ -894,7 +894,7 @@ osmt_get_service_by_name(IN osmtest_t * const p_osmt, ib_svc_name_t service_name; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmt_get_service_by_name); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -1020,7 +1020,7 @@ osmt_get_all_services_and_check_names(IN osmtest_t * const p_osmt, uint32_t num_recs = 0, i, j; uint8_t *p_checked_names; - OSM_LOG_ENTER(&p_osmt->log, osmt_get_all_services_and_check_names); + OSM_LOG_ENTER(&p_osmt->log); /* Prepare tracker for the checked names */ p_checked_names = @@ -1151,7 +1151,7 @@ osmt_delete_service_by_name(IN osmtest_t * const p_osmt, ib_service_record_t svc_rec; ib_api_status_t status; - OSM_LOG_ENTER(&p_osmt->log, osmt_delete_service_by_name); + OSM_LOG_ENTER(&p_osmt->log); osm_log(&p_osmt->log, OSM_LOG_INFO, "osmt_delete_service_by_name: " @@ -1285,7 +1285,7 @@ ib_api_status_t osmt_run_service_records_flow(IN osmtest_t * const p_osmt) uint32_t num_recs = 0; #endif - OSM_LOG_ENTER(&p_osmt->log, osmt_run_service_records_flow); + OSM_LOG_ENTER(&p_osmt->log); /* Init Service names */ for (i = 0; i <= 6; i++) { diff --git a/opensm/osmtest/osmt_slvl_vl_arb.c b/opensm/osmtest/osmt_slvl_vl_arb.c index f2125d8..1eec865 100644 --- a/opensm/osmtest/osmt_slvl_vl_arb.c +++ b/opensm/osmtest/osmt_slvl_vl_arb.c @@ -65,7 +65,7 @@ osmtest_write_vl_arb_table(IN osmtest_t * const p_osmt, int result, i; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_vl_arb_table); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "VL_ARBITRATION_TABLE\n" @@ -108,7 +108,7 @@ osmt_query_vl_arb(IN osmtest_t * const p_osmt, osmv_query_req_t req; ib_vl_arb_table_record_t record, *p_rec; - OSM_LOG_ENTER(&p_osmt->log, osmt_query_vl_arb); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -199,7 +199,7 @@ osmt_query_all_ports_vl_arb(IN osmtest_t * const p_osmt, IN FILE * fh) port_t *p_src_port; uint8_t block, anyErr = 0; - OSM_LOG_ENTER(&p_osmt->log, osmt_query_all_ports_vl_arb); + OSM_LOG_ENTER(&p_osmt->log); osm_log(&p_osmt->log, OSM_LOG_VERBOSE, "osmt_query_all_ports_vl_arb: " @@ -276,7 +276,7 @@ osmtest_write_slvl_map_table(IN osmtest_t * const p_osmt, int result, i; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_slvl_map_table); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "SLtoVL_MAP_TABLE\n" @@ -316,7 +316,7 @@ osmt_query_slvl_map(IN osmtest_t * const p_osmt, osmv_query_req_t req; ib_slvl_table_record_t record, *p_rec; - OSM_LOG_ENTER(&p_osmt->log, osmt_query_slvl_map); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -409,7 +409,7 @@ osmt_query_all_ports_slvl_map(IN osmtest_t * const p_osmt, IN FILE * fh) node_t *p_node; const cl_qmap_t *p_node_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmt_query_all_ports_slvl_map); + OSM_LOG_ENTER(&p_osmt->log); /* * Go over all ports that exist in the subnet @@ -508,7 +508,7 @@ osmt_run_slvl_and_vlarb_records_flow(IN osmtest_t * const p_osmt) ib_net16_t test_lid; uint8_t lmc; - OSM_LOG_ENTER(&p_osmt->log, osmt_run_slvl_and_vlarb_records_flow); + OSM_LOG_ENTER(&p_osmt->log); fh = fopen("qos.txt", "w"); diff --git a/opensm/osmtest/osmtest.c b/opensm/osmtest/osmtest.c index de54f2d..0f8cd98 100644 --- a/opensm/osmtest/osmtest.c +++ b/opensm/osmtest/osmtest.c @@ -555,7 +555,7 @@ void osmtest_query_res_cb(IN osmv_query_res_t * p_rec) (osmtest_req_context_t *) p_rec->query_context; osmtest_t *const p_osmt = p_ctxt->p_osmt; - OSM_LOG_ENTER(&p_osmt->log, osmtest_query_res_cb); + OSM_LOG_ENTER(&p_osmt->log); p_ctxt->result = *p_rec; @@ -580,7 +580,7 @@ osmtest_get_all_recs(IN osmtest_t * const p_osmt, osmv_user_query_t user; osmv_query_req_t req; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_all_recs); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -654,7 +654,7 @@ ib_api_status_t osmtest_validate_sa_class_port_info(IN osmtest_t * const p_osmt) osmtest_req_context_t *p_context = &context; ib_sa_mad_t *p_resp_sa_madp; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_sa_class_port_info); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -750,7 +750,7 @@ osmtest_get_node_rec(IN osmtest_t * const p_osmt, osmv_query_req_t req; ib_node_record_t record; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_node_rec); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -831,7 +831,7 @@ osmtest_get_node_rec_by_lid(IN osmtest_t * const p_osmt, ib_node_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_node_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -913,7 +913,7 @@ osmtest_get_path_rec_by_guid_pair(IN osmtest_t * const p_osmt, osmv_query_req_t req; osmv_guid_pair_t guid_pair; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_path_rec_by_guid_pair); + OSM_LOG_ENTER(&p_osmt->log); memset(&req, 0, sizeof(req)); memset(p_context, 0, sizeof(*p_context)); @@ -982,7 +982,7 @@ osmtest_get_path_rec_by_gid_pair(IN osmtest_t * const p_osmt, osmv_query_req_t req; osmv_gid_pair_t gid_pair; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_path_rec_by_gid_pair); + OSM_LOG_ENTER(&p_osmt->log); memset(&req, 0, sizeof(req)); memset(p_context, 0, sizeof(*p_context)); @@ -1052,7 +1052,7 @@ osmtest_get_multipath_rec(IN osmtest_t * const p_osmt, cl_status_t status = IB_SUCCESS; osmv_query_req_t req; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_multipath_rec); + OSM_LOG_ENTER(&p_osmt->log); /* * Do a blocking query for this record in the subnet. @@ -1119,7 +1119,7 @@ osmtest_get_port_rec(IN osmtest_t * const p_osmt, osmv_query_req_t req; ib_portinfo_record_t record; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_port_rec); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -1201,7 +1201,7 @@ osmtest_get_port_rec_by_num(IN osmtest_t * const p_osmt, ib_portinfo_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_port_rec_by_num); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -1283,7 +1283,7 @@ osmtest_stress_port_recs_large(IN osmtest_t * const p_osmt, cl_status_t status; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_port_recs_large); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); /* @@ -1348,7 +1348,7 @@ osmtest_stress_node_recs_large(IN osmtest_t * const p_osmt, cl_status_t status; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_node_recs_large); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -1414,7 +1414,7 @@ osmtest_stress_path_recs_large(IN osmtest_t * const p_osmt, cl_status_t status; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_path_recs_large); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -1481,7 +1481,7 @@ osmtest_stress_path_recs_by_guid(IN osmtest_t * const p_osmt, node_t *p_src_node, *p_dst_node; cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_path_recs_by_guid); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -1597,7 +1597,7 @@ osmtest_stress_port_recs_small(IN osmtest_t * const p_osmt, cl_status_t status; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_port_recs_small); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -1663,7 +1663,7 @@ osmtest_get_local_port_lmc(IN osmtest_t * const p_osmt, cl_status_t status; uint32_t num_recs = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_local_port_lmc); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -1729,7 +1729,7 @@ ib_api_status_t osmtest_wrong_sm_key_ignored(IN osmtest_t * const p_osmt) osmtest_req_context_t *p_context = &context; uint8_t port_num = 1; - OSM_LOG_ENTER(&p_osmt->log, osmtest_wrong_sm_key_ignored); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_INFO)) { osm_log(&p_osmt->log, OSM_LOG_INFO, @@ -1807,7 +1807,7 @@ osmtest_write_port_info(IN osmtest_t * const p_osmt, int result; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_port_info); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "DEFINE_PORT\n" @@ -1902,7 +1902,7 @@ osmtest_write_path_info(IN osmtest_t * const p_osmt, int result; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_path_info); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "DEFINE_PATH\n" @@ -1953,7 +1953,7 @@ osmtest_write_node_info(IN osmtest_t * const p_osmt, cl_status_t status = IB_SUCCESS; char desc[IB_NODE_DESCRIPTION_SIZE + 1]; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_node_info); + OSM_LOG_ENTER(&p_osmt->log); memcpy(desc, p_rec->node_desc.description, IB_NODE_DESCRIPTION_SIZE); desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; @@ -2012,7 +2012,7 @@ osmtest_write_link(IN osmtest_t * const p_osmt, int result; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_link); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "DEFINE_LINK\n" @@ -2049,7 +2049,7 @@ osmtest_write_all_link_recs(IN osmtest_t * const p_osmt, IN FILE * fh) size_t num_recs; int result; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_link_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2122,7 +2122,7 @@ osmtest_get_path_rec_by_lid_pair(IN osmtest_t * const p_osmt, osmv_query_req_t req; osmv_lid_pair_t lid_pair; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_path_rec_by_lid_pair); + OSM_LOG_ENTER(&p_osmt->log); memset(&req, 0, sizeof(req)); memset(p_context, 0, sizeof(*p_context)); @@ -2190,7 +2190,7 @@ osmtest_write_all_node_recs(IN osmtest_t * const p_osmt, IN FILE * fh) size_t num_recs; int result; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_node_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2261,7 +2261,7 @@ osmtest_write_all_port_recs(IN osmtest_t * const p_osmt, IN FILE * fh) size_t num_recs; int result; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_port_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2333,7 +2333,7 @@ osmtest_write_all_path_recs(IN osmtest_t * const p_osmt, IN FILE * fh) size_t num_recs; int result; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_path_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2406,7 +2406,7 @@ osmtest_write_all_node_recs(IN osmtest_t * const p_osmt, IN FILE * fh) int result; uint16_t lid; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_node_recs); + OSM_LOG_ENTER(&p_osmt->log); result = fprintf(fh, "#\n" "# Node Records\n" "#\n"); if (result < 0) { @@ -2505,7 +2505,7 @@ osmtest_write_all_port_recs(IN osmtest_t * const p_osmt, IN FILE * fh) port_t *p_port; int result; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_port_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2618,7 +2618,7 @@ osmtest_write_all_path_recs(IN osmtest_t * const p_osmt, IN FILE * fh) node_t *p_src_node, *p_dst_node; ib_api_status_t got_status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_write_all_path_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -2717,7 +2717,7 @@ osmtest_create_inventory_file(IN osmtest_t * const p_osmt) FILE *fh; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_create_inventory_file); + OSM_LOG_ENTER(&p_osmt->log); fh = fopen(p_osmt->opt.file_name, "w"); if (fh == NULL) { @@ -2769,7 +2769,7 @@ static ib_api_status_t osmtest_stress_large_rmpp_pr(IN osmtest_t * const p_osmt) long sec_diff, usec_diff; float ratio; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_large_rmpp_pr); + OSM_LOG_ENTER(&p_osmt->log); gettimeofday(&start_tv, NULL); printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", start_tv.tv_sec, (long)start_tv.tv_usec); @@ -2847,7 +2847,7 @@ static ib_api_status_t osmtest_stress_large_rmpp(IN osmtest_t * const p_osmt) struct timeval start_tv, end_tv; long sec_diff, usec_diff; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_large_rmpp); + OSM_LOG_ENTER(&p_osmt->log); gettimeofday(&start_tv, NULL); printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", start_tv.tv_sec, (long)start_tv.tv_usec); @@ -2932,7 +2932,7 @@ static ib_api_status_t osmtest_stress_small_rmpp(IN osmtest_t * const p_osmt) struct timeval start_tv, end_tv; long sec_diff, usec_diff; - OSM_LOG_ENTER(&p_osmt->log, osmtest_stress_small_rmpp); + OSM_LOG_ENTER(&p_osmt->log); gettimeofday(&start_tv, NULL); printf("-I- Start time is : %09ld:%06ld [sec:usec]\n", start_tv.tv_sec, (long)start_tv.tv_usec); @@ -3003,7 +3003,7 @@ osmtest_prepare_db_generic(IN osmtest_t * const p_osmt, { generic_t *p_generic; - OSM_LOG_ENTER(&p_osmt->log, osmtest_prepare_db_generic); + OSM_LOG_ENTER(&p_osmt->log); p_generic = (generic_t *) cl_qmap_head(p_tbl); @@ -3019,7 +3019,7 @@ osmtest_prepare_db_generic(IN osmtest_t * const p_osmt, **********************************************************************/ static void osmtest_prepare_db(IN osmtest_t * const p_osmt) { - OSM_LOG_ENTER(&p_osmt->log, osmtest_prepare_db); + OSM_LOG_ENTER(&p_osmt->log); osmtest_prepare_db_generic(p_osmt, &p_osmt->exp_subn.node_lid_tbl); osmtest_prepare_db_generic(p_osmt, &p_osmt->exp_subn.path_tbl); @@ -3035,7 +3035,7 @@ static ib_api_status_t osmtest_check_missing_nodes(IN osmtest_t * const p_osmt) cl_status_t status = IB_SUCCESS; cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_check_missing_nodes); + OSM_LOG_ENTER(&p_osmt->log); p_tbl = &p_osmt->exp_subn.node_lid_tbl; @@ -3068,7 +3068,7 @@ static ib_api_status_t osmtest_check_missing_ports(IN osmtest_t * const p_osmt) cl_status_t status = IB_SUCCESS; cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_check_missing_ports); + OSM_LOG_ENTER(&p_osmt->log); p_tbl = &p_osmt->exp_subn.port_key_tbl; @@ -3102,7 +3102,7 @@ static ib_api_status_t osmtest_check_missing_paths(IN osmtest_t * const p_osmt) cl_status_t status = IB_SUCCESS; cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_check_missing_paths); + OSM_LOG_ENTER(&p_osmt->log); p_tbl = &p_osmt->exp_subn.path_tbl; @@ -3163,7 +3163,7 @@ osmtest_validate_path_data(IN osmtest_t * const p_osmt, cl_status_t status = IB_SUCCESS; uint8_t lmc = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_path_data); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_ERROR, @@ -3281,7 +3281,7 @@ osmtest_validate_node_data(IN osmtest_t * const p_osmt, { cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_node_data); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_ERROR, @@ -3479,7 +3479,7 @@ osmtest_validate_node_rec(IN osmtest_t * const p_osmt, node_t *p_node; const cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_node_rec); + OSM_LOG_ENTER(&p_osmt->log); /* * Find proper node record in the database. @@ -3512,7 +3512,7 @@ osmtest_validate_port_data(IN osmtest_t * const p_osmt, { cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_port_data); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_ERROR, @@ -4029,7 +4029,7 @@ osmtest_validate_port_rec(IN osmtest_t * const p_osmt, port_t *p_port; const cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_port_rec); + OSM_LOG_ENTER(&p_osmt->log); /* * Find proper port record in the database. @@ -4065,7 +4065,7 @@ osmtest_validate_path_rec(IN osmtest_t * const p_osmt, path_t *p_path; const cl_qmap_t *p_tbl; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_path_rec); + OSM_LOG_ENTER(&p_osmt->log); /* * Find proper path record in the database. @@ -4102,7 +4102,7 @@ osmtest_validate_all_node_recs(IN osmtest_t * const p_osmt) cl_status_t status; size_t num_recs; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_all_node_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -4182,7 +4182,7 @@ osmtest_validate_all_guidinfo_recs(IN osmtest_t * const p_osmt) cl_status_t status; size_t num_recs; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_all_guidinfo_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -4235,7 +4235,7 @@ osmtest_validate_all_path_recs(IN osmtest_t * const p_osmt) cl_status_t status; size_t num_recs; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_all_path_recs); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -4318,7 +4318,7 @@ osmtest_get_link_rec_by_lid(IN osmtest_t * const p_osmt, ib_link_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_link_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4406,7 +4406,7 @@ osmtest_get_guidinfo_rec_by_lid(IN osmtest_t * const p_osmt, ib_guidinfo_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_guidinfo_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4492,7 +4492,7 @@ osmtest_get_pkeytbl_rec_by_lid(IN osmtest_t * const p_osmt, ib_pkey_table_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_pkeytbl_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4577,7 +4577,7 @@ osmtest_get_sw_info_rec_by_lid(IN osmtest_t * const p_osmt, ib_switch_info_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_sw_info_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4663,7 +4663,7 @@ osmtest_get_lft_rec_by_lid(IN osmtest_t * const p_osmt, ib_lft_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_lft_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4748,7 +4748,7 @@ osmtest_get_mft_rec_by_lid(IN osmtest_t * const p_osmt, ib_mft_record_t record; ib_mad_t *p_mad; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_mft_rec_by_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -4834,7 +4834,7 @@ osmtest_sminfo_record_request(IN osmtest_t * const p_osmt, ib_mad_t *p_mad; osmtest_sm_info_rec_t *p_sm_info_opt; - OSM_LOG_ENTER(&p_osmt->log, osmtest_sminfo_record_request); + OSM_LOG_ENTER(&p_osmt->log); /* * Do a blocking query for these records in the subnet. @@ -4937,7 +4937,7 @@ osmtest_informinfo_request(IN osmtest_t * const p_osmt, osmtest_inform_info_t *p_inform_info_opt; osmtest_inform_info_rec_t *p_inform_info_rec_opt; - OSM_LOG_ENTER(&p_osmt->log, osmtest_informinfo_request); + OSM_LOG_ENTER(&p_osmt->log); /* * Do a blocking query for these records in the subnet. @@ -5047,7 +5047,7 @@ osmtest_validate_single_path_rec_lid_pair(IN osmtest_t * const p_osmt, cl_status_t status = IB_SUCCESS; size_t num_recs; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_path_rec_lid_pair); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -5112,7 +5112,7 @@ osmtest_validate_single_node_rec_lid(IN osmtest_t * const p_osmt, const ib_node_record_t *p_rec; int num_recs, i; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_node_rec_lid); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_DEBUG)) { osm_log(&p_osmt->log, OSM_LOG_DEBUG, @@ -5214,7 +5214,7 @@ osmtest_validate_single_port_rec_lid(IN osmtest_t * const p_osmt, const ib_portinfo_record_t *p_rec; cl_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_port_rec_lid); + OSM_LOG_ENTER(&p_osmt->log); memset(&context, 0, sizeof(context)); @@ -5269,7 +5269,7 @@ osmtest_validate_single_path_rec_guid_pair(IN osmtest_t * const p_osmt, uint32_t i; boolean_t got_error = FALSE; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_path_rec_guid_pair); + OSM_LOG_ENTER(&p_osmt->log); memset(&req, 0, sizeof(req)); memset(&context, 0, sizeof(context)); @@ -5399,7 +5399,7 @@ osmtest_validate_single_path_recs(IN osmtest_t * const p_osmt) osmv_guid_pair_t guid_pair; uint16_t cnt; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_path_recs); + OSM_LOG_ENTER(&p_osmt->log); if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) { osm_log(&p_osmt->log, OSM_LOG_VERBOSE, @@ -5491,7 +5491,7 @@ osmtest_validate_single_node_recs(IN osmtest_t * const p_osmt) const cl_qmap_t *p_node_lid_tbl; uint16_t cnt = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_node_recs); + OSM_LOG_ENTER(&p_osmt->log); p_node_lid_tbl = &p_osmt->exp_subn.node_lid_tbl; @@ -5553,7 +5553,7 @@ osmtest_validate_single_port_recs(IN osmtest_t * const p_osmt) const cl_qmap_t *p_port_key_tbl; uint16_t cnt = 0; - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_single_port_recs); + OSM_LOG_ENTER(&p_osmt->log); p_port_key_tbl = &p_osmt->exp_subn.port_key_tbl; @@ -5623,7 +5623,7 @@ static ib_api_status_t osmtest_validate_against_db(IN osmtest_t * const p_osmt) uint8_t i; #endif - OSM_LOG_ENTER(&p_osmt->log, osmtest_validate_against_db); + OSM_LOG_ENTER(&p_osmt->log); #ifdef VENDOR_RMPP_SUPPORT status = osmtest_validate_all_node_recs(p_osmt); @@ -6421,7 +6421,7 @@ osmtest_parse_node(IN osmtest_t * const p_osmt, node_t *p_guid_node; const osmtest_token_t *p_tok; - OSM_LOG_ENTER(&p_osmt->log, osmtest_parse_node); + OSM_LOG_ENTER(&p_osmt->log); p_node = node_new(); CL_ASSERT(p_node != NULL); @@ -6701,7 +6701,7 @@ osmtest_parse_port(IN osmtest_t * const p_osmt, port_t *p_port; const osmtest_token_t *p_tok; - OSM_LOG_ENTER(&p_osmt->log, osmtest_parse_port); + OSM_LOG_ENTER(&p_osmt->log); p_port = port_new(); CL_ASSERT(p_port != NULL); @@ -7244,7 +7244,7 @@ osmtest_parse_path(IN osmtest_t * const p_osmt, const osmtest_token_t *p_tok; boolean_t got_error = FALSE; - OSM_LOG_ENTER(&p_osmt->log, osmtest_parse_path); + OSM_LOG_ENTER(&p_osmt->log); p_path = path_new(); CL_ASSERT(p_path != NULL); @@ -7435,7 +7435,7 @@ osmtest_parse_link(IN osmtest_t * const p_osmt, const osmtest_token_t *p_tok; boolean_t got_error = FALSE; - OSM_LOG_ENTER(&p_osmt->log, osmtest_parse_link); + OSM_LOG_ENTER(&p_osmt->log); /* * Parse the inventory file and create the database. @@ -7522,7 +7522,7 @@ static ib_api_status_t osmtest_create_db(IN osmtest_t * const p_osmt) const osmtest_token_t *p_tok; boolean_t got_error = FALSE; - OSM_LOG_ENTER(&p_osmt->log, osmtest_create_db); + OSM_LOG_ENTER(&p_osmt->log); fh = fopen(p_osmt->opt.file_name, "r"); if (fh == NULL) { @@ -7625,7 +7625,7 @@ osmtest_get_user_port(IN osmtest_t * const p_osmt, uint32_t choice = 0; boolean_t done_flag = FALSE; - OSM_LOG_ENTER(&p_osmt->log, osmtest_get_user_port); + OSM_LOG_ENTER(&p_osmt->log); /* * User needs prompting for the local port GUID with which @@ -7672,7 +7672,7 @@ osmtest_bind(IN osmtest_t * p_osmt, uint32_t num_ports = GUID_ARRAY_SIZE; ib_port_attr_t attr_array[GUID_ARRAY_SIZE]; - OSM_LOG_ENTER(&p_osmt->log, osmtest_bind); + OSM_LOG_ENTER(&p_osmt->log); /* * Call the transport layer for a list of local port @@ -7752,7 +7752,7 @@ ib_api_status_t osmtest_run(IN osmtest_t * const p_osmt) { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(&p_osmt->log, osmtest_run); + OSM_LOG_ENTER(&p_osmt->log); status = osmtest_validate_sa_class_port_info(p_osmt); if (status != IB_SUCCESS) { -- 1.5.4.rc5 From rdreier at cisco.com Tue Feb 12 10:30:50 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 10:30:50 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: (Roland Dreier's message of "Mon, 11 Feb 2008 22:21:58 -0800") References: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com> <000001c86aec$97eda860$7ae1180a@amr.corp.intel.com> Message-ID: OK, the smaller patch below seems to fix things to, so that's what I'll merge. I think the real issue was that I left the code as ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type, &cm_class.subsys.kobj, "%s", device->name); without doing kobject_get() on subsys.kobj, which meant on unload, we did an extra put of that subsys.kobj, which left the infiniband_cm class in a messed-up state. But there's no reason to do an extra get of the parents (kobject_init_and_add() already gets the parent once for us), so we can just simplify everything as below: diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 435e276..b10ade9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3612,18 +3612,12 @@ struct class cm_class = { }; EXPORT_SYMBOL(cm_class); -static void cm_remove_fs_obj(struct kobject *obj) -{ - kobject_put(obj->parent); - kobject_put(obj); -} - static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - kobject_get(&port->cm_dev->dev_obj), + &port->cm_dev->dev_obj, "%d", port->port_num); if (ret) { kfree(port); @@ -3633,7 +3627,7 @@ static int cm_create_port_fs(struct cm_port *port) for (i = 0; i < CM_COUNTER_GROUPS; i++) { ret = kobject_init_and_add(&port->counter_group[i].obj, &cm_counter_obj_type, - kobject_get(&port->port_obj), + &port->port_obj, "%s", counter_group_names[i]); if (ret) goto error; @@ -3643,8 +3637,8 @@ static int cm_create_port_fs(struct cm_port *port) error: while (i--) - cm_remove_fs_obj(&port->counter_group[i].obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->counter_group[i].obj); + kobject_put(&port->port_obj); return ret; } @@ -3654,9 +3648,9 @@ static void cm_remove_port_fs(struct cm_port *port) int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) - cm_remove_fs_obj(&port->counter_group[i].obj); + kobject_put(&port->counter_group[i].obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *device) @@ -3740,7 +3734,7 @@ error1: ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3767,7 +3761,7 @@ static void cm_remove_one(struct ib_device *device) ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static int __init ib_cm_init(void) From dres.frentzen at t-online.de Tue Feb 12 10:53:17 2008 From: dres.frentzen at t-online.de (Chang Cole) Date: Tue, 12 Feb 2008 15:53:17 -0300 Subject: [ofa-general] Software Range Expansion - Price Downfall Message-ID: <587117151.72658347180635@t-online.de> An HTML attachment was scrubbed... URL: From sean.hefty at intel.com Tue Feb 12 10:55:47 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 12 Feb 2008 10:55:47 -0800 Subject: [ofa-general] CM sysfs-related oops on device driver reload In-Reply-To: References: <000001c86a20$23e1fdd0$8be0180a@amr.corp.intel.com><000001c86aec$97eda860$7ae1180a@amr.corp.intel.com> Message-ID: <000201c86da8$e131cdd0$ff0da8c0@amr.corp.intel.com> >OK, the smaller patch below seems to fix things to, so that's what >I'll merge. I think the real issue was that I left the code as > > ret = kobject_init_and_add(&cm_dev->dev_obj, &cm_dev_obj_type, > &cm_class.subsys.kobj, "%s", device->name); > >without doing kobject_get() on subsys.kobj, which meant on unload, we >did an extra put of that subsys.kobj, which left the infiniband_cm >class in a messed-up state. But there's no reason to do an extra get >of the parents (kobject_init_and_add() already gets the parent once >for us), so we can just simplify everything as below: Thanks for clarifying this to me. - Sean From rdreier at cisco.com Tue Feb 12 11:03:02 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 11:03:02 -0800 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: <47AC6A89.3010408@opengridcomputing.com> (Steve Wise's message of "Fri, 08 Feb 2008 08:43:21 -0600") References: <47AB1646.8030006@opengridcomputing.com> <47AB32FD.6050700@opengridcomputing.com> <47AC6A89.3010408@opengridcomputing.com> Message-ID: > Any applications out there that require loopback? I don't know of any, but obviously we shouldn't oops if someone tries it. I don't know how hard loopback support would be, but let's try to get some fix for the oops into 2.6.25. From swise at opengridcomputing.com Tue Feb 12 11:04:38 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 13:04:38 -0600 Subject: [ofa-general] Re: oops in cxgb3:t3_l2t_get In-Reply-To: References: <47AB1646.8030006@opengridcomputing.com> <47AB32FD.6050700@opengridcomputing.com> <47AC6A89.3010408@opengridcomputing.com> Message-ID: <47B1EDC6.5060906@opengridcomputing.com> Roland Dreier wrote: > > Any applications out there that require loopback? > > I don't know of any, but obviously we shouldn't oops if someone tries > it. I don't know how hard loopback support would be, but let's try to > get some fix for the oops into 2.6.25. I'm doing that now. From jim at mellanox.com Tue Feb 12 11:54:27 2008 From: jim at mellanox.com (Jim Mott) Date: Tue, 12 Feb 2008 11:54:27 -0800 Subject: [ofa-general] [PATCH 1/1] sdplib - bind returns wrong error when using 'both' Message-ID: A Mellanox regression test detected a difference between the error returned by SDP and TCP and the error returned using libsdp with 'both'. Signed-off-by: Jim Mott --- Index: ofa_1_3_dev_user/src/userspace/libsdp/src/port.c =================================================================== --- ofa_1_3_dev_user.orig/src/userspace/libsdp/src/port.c 2008-02-12 00:32:08.000000000 -0600 +++ ofa_1_3_dev_user/src/userspace/libsdp/src/port.c 2008-02-12 13:11:01.000000000 -0600 @@ -1170,6 +1170,11 @@ "binding SDP socket failed:%s\n", strerror( errno ) ); _socket_funcs.close( *sdp_sd ); _socket_funcs.close( *tcp_sd ); + + /* TCP and SDP without library return EINVAL */ + if (errno == EADDRINUSE) + errno = EINVAL; + goto done; } From jon at opengridcomputing.com Tue Feb 12 12:14:48 2008 From: jon at opengridcomputing.com (Jon Mason) Date: Tue, 12 Feb 2008 14:14:48 -0600 Subject: [ofa-general] [PATCH] libcxbg3: zeroing of wc_flags Message-ID: <20080212201447.GD12371@opengridcomputing.com> >From 666b9d67dda0fd01e90ceb93b189a773d14916d5 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 12 Feb 2008 14:08:02 -0600 Subject: [PATCH] The wc_flags field in struct ibv_wc is left uninitialized in iwch_poll_cq_one. User space applications may check this field and deterministically perform actions based on the garbage in the field. Zeroing this out will prevent this unintended behavior. Signed-off-by: Jon Mason --- src/cq.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/src/cq.c b/src/cq.c index d27c6b7..fcf91c8 100644 --- a/src/cq.c +++ b/src/cq.c @@ -277,6 +277,7 @@ int iwch_poll_cq_one(struct iwch_device *rhp, struct iwch_cq *chp, wc->wr_id = cookie; wc->qp_num = qhp->wq.qpid; wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " "lo 0x%x cookie 0x%" PRIx64 "\n", -- 1.5.3.3 From eli at dev.mellanox.co.il Tue Feb 12 12:36:55 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 12 Feb 2008 22:36:55 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> Message-ID: <47B20367.7040900@dev.mellanox.co.il> Sean Hefty wrote: >> Saying all that, I don't think we want to have --any RNR retries--, as >> for retries, I am open to hear what others think. > > I'm really not all that familiar with ipoib protocol, but if it's being > implemented over an RC connection, then adding an RNR retry seems to make sense > to me. I believe using UC is better, but if it's over RC, I don't know that we > want to take the hit of tearing down and re-establishing the connection just > because we have a fast sender. (This is just an opinion based on no fact > whatsoever.) > I don't see why setting rnr retry count can help if we have a fast sender. If this sender is faster than the receiver eventually the rnr counter will expire and the connection will close. As for retry count, I don't know how common are errors that contribute to the retry counter. If anyone has statistics of this I'd be glad to know. Pradeep, can you tell identify what part of the patch you sent actually solved the problem you were seeing and also give some description of the problem? From dwseyferthprm at seyferthpr.com Tue Feb 12 12:35:22 2008 From: dwseyferthprm at seyferthpr.com (Mari Boyer) Date: Tue, 12 Feb 2008 23:35:22 +0300 Subject: [ofa-general] Medications that you need. Message-ID: <01c86dcf$ef451900$27e5a44f@dwseyferthprm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/nelsonprice809/ We provide confidential and secure purchase! From ms.amos_belin at libero.it Tue Feb 12 13:01:16 2008 From: ms.amos_belin at libero.it (Sandy Cash) Date: Tue, 12 Feb 2008 17:01:16 -0400 Subject: [ofa-general] Fü r die qualitative Software wenig zu bezahlen: warum nicht? Message-ID: <904820786.22910520211135@libero.it> Wir freuen uns darauf, Ihnen lokalisierte Versionen bekannter Programme anbieten zu können: Englisch, Deutsch, Französisch, Italienisch, Spanisch und viele andere Sprachen! Sofort nach dem Kauf können Sie jedes Programm herunterladen und installieren. http://geocities.com/randywebb982/ Unser Preis: * Windows XP Professional With SP2 Full Version: $59.95 * Microsoft Office Enterprise 2007: $79.95 * Adobe Photoshop CS3 Extended: $79.95 * Adobe Creative Suite 3 Master Collection: $299.95 * Adobe Photoshop CS2 with ImageReady CS2: $79.95 * Office System Professional 2003 (5 Cds): $59.95 http://geocities.com/randywebb982/ Wir haben mehr 300 verschiedener Programmes für PC und Macintosh! Kaufen jetzt, warten Sie nicht! -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Tue Feb 12 13:23:05 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 15:23:05 -0600 Subject: [ofa-general] Re: [PATCH] libcxbg3: zeroing of wc_flags In-Reply-To: <20080212201447.GD12371@opengridcomputing.com> References: <20080212201447.GD12371@opengridcomputing.com> Message-ID: <47B20E39.8060400@opengridcomputing.com> Applied. Thanks. Steve. Jon Mason wrote: >>From 666b9d67dda0fd01e90ceb93b189a773d14916d5 Mon Sep 17 00:00:00 2001 > From: Jon Mason > Date: Tue, 12 Feb 2008 14:08:02 -0600 > Subject: [PATCH] The wc_flags field in struct ibv_wc is left uninitialized in > iwch_poll_cq_one. User space applications may check this field and > deterministically perform actions based on the garbage in the field. > Zeroing this out will prevent this unintended behavior. > > Signed-off-by: Jon Mason > --- > src/cq.c | 1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/src/cq.c b/src/cq.c > index d27c6b7..fcf91c8 100644 > --- a/src/cq.c > +++ b/src/cq.c > @@ -277,6 +277,7 @@ int iwch_poll_cq_one(struct iwch_device *rhp, struct iwch_cq *chp, > wc->wr_id = cookie; > wc->qp_num = qhp->wq.qpid; > wc->vendor_err = CQE_STATUS(cqe); > + wc->wc_flags = 0; > > PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " > "lo 0x%x cookie 0x%" PRIx64 "\n", From prescott at hpc.ufl.edu Tue Feb 12 13:28:15 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Tue, 12 Feb 2008 16:28:15 -0500 Subject: [ofa-general] SDP performance with bzcopy testing help needed Message-ID: <47B20F6F.8080302@hpc.ufl.edu> Jim Mott wrote: > Now that SDP is shipping with a non-zero default value for > sdp_zcopy_thresh (64K), I need some feedback from the list. Does > anybody except me see a performance gain on large messages? Oh, I see it - absolutely. For SDP on iWARP, you can see the improvement for large messages at. http://hpc.ufl.edu/benchmarks/iwarp_sdp/ Scan down to SDP Benchmarks. The page is not really done yet, but I'm trying to finish up today. I'll run the same tests on IB (we have 4X SDR Lion Cubs) shortly and post. Cheers, Craig From prescott at hpc.ufl.edu Tue Feb 12 13:37:34 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Tue, 12 Feb 2008 16:37:34 -0500 Subject: [ofa-general] SDP and iWARP In-Reply-To: <47A2A6EA.60605@hpc.ufl.edu> References: <4783A5B0.6040603@hpc.ufl.edu> <4783B3F5.20600@opengridcomputing.com> <4783BDD5.7000702@hpc.ufl.edu> <4783C326.3070306@opengridcomputing.com> <478634A5.3080204@hpc.ufl.edu> <47863794.9080709@opengridcomputing.com> <47865A4A.4070603@hpc.ufl.edu> <47865E5B.4030607@opengridcomputing.com> <4787936E.5010603@hpc.ufl.edu> <4787977E.509@opengridcomputing.com> <479765AC.1040600@hpc.ufl.edu> <8A71B368A89016469F72CD08050AD33401FCDA2F@maui.asicdesigners.com> <47977262.1060906@hpc.ufl.edu> <4798CB4C.7070706@opengridcomputing.com> <4798D0D2.5070103@opengridcomputing.com> <47A2A033.4060208@hpc.ufl.edu> <47A2A4FB.503@opengridcomputing.com> <47A2A6EA.60605@hpc.ufl.edu> Message-ID: <47B2119E.7060502@hpc.ufl.edu> Craig Prescott wrote: > Steve Wise wrote: >> Craig Prescott wrote: >>> [root at tebow2 ~]# /opt/netperf/bin/netperf -H 128.227.253.91 -L >>> 128.227.253.92 -t SDP_STREAM -c -C -l 10 -p 5006 >>> SDP STREAM TEST from 128.227.253.92 (128.227.253.92) port 0 AF_INET >>> to 128.227.253.91 (128.227.253.91) port 0 AF_INET >>> Recv Send Send Utilization >>> Service Demand >>> Socket Socket Message Elapsed Send Recv >>> Send Recv >>> Size Size Size Time Throughput local remote >>> local remote >>> bytes bytes bytes secs. 10^6bits/s % S % S >>> us/KB us/KB >>> >>> 262144 262144 262144 10.00 6305.54 16.39 14.38 >>> 0.852 1.495 >>> The patch to enable this is not big - I will produce one and send it to >>> the list. Might not happen before next week. >> What mtu are you using? > 9000. BTW, I don't think I followed up on this to the list. The reason for the crummy throughput above was because the Chelsio cards were in PCIe x4 slots (*blush*). Once in x8 slots, netperf SDP_STREAM looks like this: SDP STREAM TEST from 128.227.253.92 (128.227.253.92) port 0 AF_INET to 128.227.253.91 (128.227.253.91) port 0 AF_INET : cpu bind Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 262144 262144 262144 20.00 9402.58 25.50 26.68 0.889 0.930 Am summarizing on a web page now. Cheers, Craig From pw at osc.edu Tue Feb 12 13:38:20 2008 From: pw at osc.edu (Pete Wyckoff) Date: Tue, 12 Feb 2008 16:38:20 -0500 Subject: [ofa-general] [PATCH] mthca memfree init sg list Message-ID: <20080212213820.GG13643@osc.edu> Properly initialize the SG list in the user_db_table in mthca memfree. Without this, and when compiling with CONFIG_DEBUG_SG, a BUG will occur during create_cq. The call to sg_set_page() in mthca_map_user_db() will find that the scatterlist magic was not initialized. Signed-off-by: Pete Wyckoff --- drivers/infiniband/hw/mthca/mthca_memfree.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 1f4d27d..252db08 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -542,6 +542,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) for (i = 0; i < npages; ++i) { db_tab->page[i].refcount = 0; db_tab->page[i].uvirt = 0; + sg_init_table(&db_tab->page[i].mem, 1); } return db_tab; -- 1.5.3.8 From swise at opengridcomputing.com Tue Feb 12 14:01:50 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 16:01:50 -0600 Subject: [ofa-general] Re: Demand paging for memory regions (was Re: MMU Notifiers V6) In-Reply-To: References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> Message-ID: <47B2174E.5000708@opengridcomputing.com> Roland Dreier wrote: > [Adding general at lists.openfabrics.org to get the IB/RDMA people involved] > > This thread has patches that add support for notifying drivers when a > process's memory map changes. The hope is that this is useful for > letting RDMA devices handle registered memory without pinning the > underlying pages, by updating the RDMA device's translation tables > whenever the host kernel's tables change. > > Is anyone interested in working on using this for drivers/infiniband? > I am interested in participating, but I don't think I have enough time > to do this by myself. I don't have time, although it would be interesting work! > > Also, at least naively it seems that this is only useful for hardware > that has support for this type of demand paging, and can handle > not-present pages, generating interrupts for page faults, etc. I know > that Mellanox HCAs should have this support; are there any other > devices that can do this? > Chelsio's T3 HW doesn't support this. Steve. From k.loans at merseymail.com Tue Feb 12 13:54:43 2008 From: k.loans at merseymail.com (Kingston Finance Net-working) Date: Tue, 12 Feb 2008 16:54:43 -0500 (EST) Subject: [ofa-general] Attention :$$: Do You Need An Urgent Loan :$$: { Apply Now } Message-ID: <57343.81.199.198.188.1202853283.squirrel@webmail.netrax.net> Dear Sir/Madam !!! Am Kingston Kings, the chief executive director of Kingston Finance Net-working. I have been certified and approved by the Government to give financial loans to individuals and companies. Kings Finance Net-working is a small scale financial institute who has helped countless number of people seeking online loans around the world.We give you the money you need to grow your business and solve your financial problems. Unlike some firms, I solely specialize in online business loans, Personal loans,Refinance loan,Home loan,Mortgage loan, which means that i give my clients the respect, courtesy and service they deserve. As a licensed and approved Lender,with almost a decade of experience, I have the expertise to help your business grow,prosper and stay financially buoyant. So if you are looking for money-not hassles-apply online today and start taking care of your personal and business problems. * Are you financially Squeezed? * Do you seek funds to pay off credits and debts * Do you seek finance to set up your own business? * Are you in need of private or business loans for various purposes? * Do you seek loans to carry out large projects * Do you seek funding for various other processes? If you have any of the above problems or you may be intrested then contact our customer service {info at katejonesdesk.us.ms} We can be of assistance to you but I want you to understand that I give out my loans at an interest rate of 3%.We give out loans within the minimum range of $25,000.00 USD to the maximum of $90,000,000.00 USD. Our loans are well insured for maximum security is our priority. * Borrow anything up to $90,000,000.00 USD * Choose between 1 to 50 years to repay. * Choose between Monthly and Annual repayments Plan. * Flexible Loan Terms. Before you can be eligible to obtain a loan from me, I need to know who am dealing with and that means you will have to send the management department a comprehensive information about your self or your company which includes, {BORROWER'S DATA INFORMATION} *(1)Loan Amount Needed: *(2)Loan Amount Needed In Words: *(3)Full Names: *(4)Country: *(5)Valid Cell Phone Number Or Tel Phone: *(6)Purpose Of Loan: *(7)Address: *(8)Occupation: *(9)Gender: Male Or Female: *(10)Nationality: *(11)Loan Duration: *(12)State: *(13)City: *(14)Age: IMPORTANT NOTICE : 1 : Subject to your reply, I shall get back to you on more detailed information and the terms of service. Be noted that this firm offers a standard online financial services which suits your financial needs. Also the above data information must be filled accordingly and submitted to the above company email address for proper verification process of your loan request will start.Be noted that this firm offers a standard online financial service. So feel free and follow all protocols. IMPORTANT NOTICE : 2 : all applicants must be of (18Yrs) and above before your loan request can be granted.be informed that this firm offers a standard online loan which fits your needs.loan request should be addressed to the company e-mail below for further verification and documentations. N/B: Please Serious Seeking Interested Persons Only in dire Need of this loan or Mortgage either for Personal or Business or Even Rehabbing.Keep in touch.Unserious Persons will have their Offers Turned down,when applying for a loan,please Ensure you come with a scanned Copy of Your Drivers License,or Your International Passport alongside your Full names and address.Once again you have to be In dire Need of the loan so as not to get your Offer Turned down,We are taking this Guideline to scoop off Unserious persons who mail us into thinking they are Interested and we Never get their Response after sending in Our Terms. Thanks for your patronage !!! Your's Sincerely, Kingston Kings. Chife Executive Officer From swise at opengridcomputing.com Tue Feb 12 14:09:29 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 16:09:29 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopback connections. Message-ID: <20080212220929.24442.76874.stgit@dell3.ogc.int> RDMA/cxgb3: Fail loopback connections. The cxgb3 HW and driver don't support loopback RDMA connections. So fail any connection attempt where the destination address is local. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 16 ++++++++++++++++ 1 files changed, 16 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index e9a08fa..5d82723 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1784,6 +1784,17 @@ err: return err; } +static void is_loopback_dst(struct iw_cm_id *cm_id) +{ + struct net_device *dev; + + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); + if (!dev) + return 0; + dev_put(dev); + return 1; +} + int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; @@ -1791,6 +1802,11 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_ep *ep; struct rtable *rt; + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); From clameter at sgi.com Tue Feb 12 14:10:50 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 14:10:50 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions (was Re: MMU Notifiers V6) In-Reply-To: <47B2174E.5000708@opengridcomputing.com> References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: On Tue, 12 Feb 2008, Steve Wise wrote: > Chelsio's T3 HW doesn't support this. Not so far I guess but it could be equipped with these features right? Having the VM manage the memory area for Infiniband allows more reliable system operations and enables the sharing of large memory areas via Infiniband without the risk of livelocks or OOMs. From pradeeps at linux.vnet.ibm.com Tue Feb 12 14:29:24 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 12 Feb 2008 14:29:24 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B20367.7040900@dev.mellanox.co.il> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> Message-ID: <47B21DC4.70604@linux.vnet.ibm.com> Eli Cohen wrote: > Sean Hefty wrote: >>> Saying all that, I don't think we want to have --any RNR retries--, as >>> for retries, I am open to hear what others think. >> >> I'm really not all that familiar with ipoib protocol, but if it's being >> implemented over an RC connection, then adding an RNR retry seems to >> make sense >> to me. I believe using UC is better, but if it's over RC, I don't >> know that we >> want to take the hit of tearing down and re-establishing the >> connection just >> because we have a fast sender. (This is just an opinion based on no fact >> whatsoever.) >> > > I don't see why setting rnr retry count can help if we have a fast > sender. If this sender is faster than the receiver eventually the rnr > counter will expire > and the connection will close. > > As for retry count, I don't know how common are errors that contribute > to the retry counter. If anyone has statistics of this I'd be glad to know. > > Pradeep, can you tell identify what part of the patch you sent actually > solved the problem you were seeing and also give some description of the > problem? > I brought this issue up on the mailing list sometime in the summer of 2007 is my recollection. I could not locate that with a quick search of the archives. I will probably do that again later. However, the crux of the issue is that I was seeing "send completion errors" and that is what prompted me to change the retry counts. Please see Table 78 "Completion Error Handling for RC Send Queues" in the IB Spec for reference. And changing the retry counts did help. Pradeep From rdreier at cisco.com Tue Feb 12 14:35:45 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 14:35:45 -0800 Subject: [ofa-general] Re: [PATCH] mthca memfree init sg list In-Reply-To: <20080212213820.GG13643@osc.edu> (Pete Wyckoff's message of "Tue, 12 Feb 2008 16:38:20 -0500") References: <20080212213820.GG13643@osc.edu> Message-ID: Thanks... already in my tree. From rdreier at cisco.com Tue Feb 12 14:41:48 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 14:41:48 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: (Christoph Lameter's message of "Tue, 12 Feb 2008 14:10:50 -0800 (PST)") References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: > > Chelsio's T3 HW doesn't support this. > Not so far I guess but it could be equipped with these features right? I don't know anything about the T3 internals, but it's not clear that you could do this without a new chip design in general. Lot's of RDMA devices were designed expecting that when a packet arrives, the HW can look up the bus address for a given memory region/offset and place the packet immediately. It seems like a major change to be able to generate a "page fault" interrupt when a page isn't present, or even just wait to scatter some data until the host finishes updating page tables when the HW needs the translation. - R. From pw at osc.edu Tue Feb 12 14:46:16 2008 From: pw at osc.edu (Pete Wyckoff) Date: Tue, 12 Feb 2008 17:46:16 -0500 Subject: [ofa-general] Re: [PATCH] mthca memfree init sg list In-Reply-To: References: <20080212213820.GG13643@osc.edu> Message-ID: <20080212224616.GB15025@osc.edu> rdreier at cisco.com wrote on Tue, 12 Feb 2008 14:35 -0800: > Thanks... already in my tree. Oh, in your for-2.6.25. I only looked in master and for-linus. Your way of initializing every time is just as good as initializing once when the db_tab is allocated. Not like this is much of a critical path. -- Pete From rdreier at cisco.com Tue Feb 12 14:51:29 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 14:51:29 -0800 Subject: [ofa-general] Re: [PATCH] mthca memfree init sg list In-Reply-To: <20080212224616.GB15025@osc.edu> (Pete Wyckoff's message of "Tue, 12 Feb 2008 17:46:16 -0500") References: <20080212213820.GG13643@osc.edu> <20080212224616.GB15025@osc.edu> Message-ID: > Your way of initializing every time is just as good as initializing > once when the db_tab is allocated. Not like this is much of a > critical path. Actually I think your way is neater now that I look at the patch more carefully. I updated my tree, I'll ask Linus to pull soon. From felix at chelsio.com Tue Feb 12 15:14:23 2008 From: felix at chelsio.com (Felix Marti) Date: Tue, 12 Feb 2008 15:14:23 -0800 Subject: [ofa-general] Re: Demand paging for memory regions References: <20080208234302.GH26564@sgi.com><20080208155641.2258ad2c.akpm@linux-foundation.org><20080209012446.GB7051@v2.random><20080209015659.GC7051@v2.random><20080209075556.63062452@bree.surriel.com><47B2174E.5000708@opengridcomputing.com> Message-ID: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> > -----Original Message----- > From: general-bounces at lists.openfabrics.org [mailto:general- > bounces at lists.openfabrics.org] On Behalf Of Roland Dreier > Sent: Tuesday, February 12, 2008 2:42 PM > To: Christoph Lameter > Cc: Rik van Riel; steiner at sgi.com; Andrea Arcangeli; > a.p.zijlstra at chello.nl; izike at qumranet.com; linux- > kernel at vger.kernel.org; avi at qumranet.com; linux-mm at kvack.org; > daniel.blueman at quadrics.com; Robin Holt; general at lists.openfabrics.org; > Andrew Morton; kvm-devel at lists.sourceforge.net > Subject: Re: [ofa-general] Re: Demand paging for memory regions > > > > Chelsio's T3 HW doesn't support this. > > > Not so far I guess but it could be equipped with these features > right? > > I don't know anything about the T3 internals, but it's not clear that > you could do this without a new chip design in general. Lot's of RDMA > devices were designed expecting that when a packet arrives, the HW can > look up the bus address for a given memory region/offset and place the > packet immediately. It seems like a major change to be able to > generate a "page fault" interrupt when a page isn't present, or even > just wait to scatter some data until the host finishes updating page > tables when the HW needs the translation. That is correct, not a change we can make for T3. We could, in theory, deal with changing mappings though. The change would need to be synchronized though: the VM would need to tell us which mapping were about to change and the driver would then need to disable DMA to/from it, do the change and resume DMA. > > - R. > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib- > general From a-akreis at adiemus.sk Tue Feb 12 15:54:47 2008 From: a-akreis at adiemus.sk (Blanche Cornett) Date: Tue, 12 Feb 2008 19:54:47 -0400 Subject: [ofa-general] Where have you been? Message-ID: <01c86db1$1ec1de80$90a0dfc9@a-akreis> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Eva at TheHealCare.info only, because I am using my friend's email to write this. Would you mind me showing some nice pictures of me? From jgunthorpe at obsidianresearch.com Tue Feb 12 15:23:29 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 12 Feb 2008 16:23:29 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: <20080212232329.GC31435@obsidianresearch.com> On Tue, Feb 12, 2008 at 02:41:48PM -0800, Roland Dreier wrote: > > > Chelsio's T3 HW doesn't support this. > > > Not so far I guess but it could be equipped with these features right? > > I don't know anything about the T3 internals, but it's not clear that > you could do this without a new chip design in general. Lot's of RDMA > devices were designed expecting that when a packet arrives, the HW can > look up the bus address for a given memory region/offset and place > the Well, certainly today the memfree IB devices store the page tables in host memory so they are already designed to hang onto packets during the page lookup over PCIE, adding in faulting makes this time larger. But this is not a good thing at all, IB's congestion model is based on the notion that end ports can always accept packets without making input contigent on output. If you take a software interrupt to fill in the page pointer then you could potentially deadlock on the fabric. For example using this mechanism to allow swap-in of RDMA target pages and then putting the storage over IB would be deadlock prone. Even without deadlock slowing down the input path will cause network congestion and poor performance for other nodes. It is not a desirable thing to do.. I expect that iwarp running over flow controlled ethernet has similar kinds of problems for similar reasons.. In general the best I think you can hope for with RDMA hardware is page migration using some atomic operations with the adaptor and a cpu page copy with retry sort of scheme - but is pure page migration interesting at all? Jason From changquing.tang at hp.com Tue Feb 12 15:32:16 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 12 Feb 2008 23:32:16 +0000 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code In-Reply-To: <47AB7912.5040700@ichips.intel.com> References: <47AB7912.5040700@ichips.intel.com> Message-ID: Arlin: Here is another question. The /etc/dat.conf is: OpenIB-cma u1.2 nonthreadsafe default libdaplcma.so.1 dapl.1.2 "ib0 0" "" OpenIB-cma-1 u1.2 nonthreadsafe default libdaplcma.so.1 dapl.1.2 "ib1 0" "" ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib0 0" "" ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib1 0" "" A simple code just call dat_registry_list_prodivers() to get the list in /etc/dat.conf, and call dat_ia_openv() in a loop of above list. If I compile and link this code with /usr/include/dat2 and libdat2.so, dat_ia_openv() return DAT_SUCCESS for all four entries. I expect the first two entries fail if my code link with dat2, where am I wrong ? If I don't know my code is compiled/linked with dat or dat2, how do I know at at runtime which version of uDAPL I am running ? Here is the code: #include #include #include int main() { int i; DAT_RETURN err; DAT_COUNT nif; DAT_PROVIDER_INFO *list[10]; DAT_PROVIDER_INFO interface[10]; DAT_EVD_HANDLE async_evd_handle; DAT_IA_HANDLE ia_handle; const char *major; const char *minor; nif = 10; for (i = 0; i < nif; i++) { list[i] = &(interface[i]); } err = dat_registry_list_providers(nif, &nif, list); if (err != DAT_SUCCESS) { fprintf(stderr, "dat_registry_list_providers() failed\n"); return (-1); } if (nif < 1) { fprintf(stderr, "no interface found\n"); return (-1); } for (i=0; i < nif; i++) { fprintf(stderr, "version: %d.%d\n", interface[i].dapl_version_major, interface[i].dapl_version_minor); async_evd_handle = DAT_HANDLE_NULL; err = dat_ia_openv(interface[i].ia_name, 8, &async_evd_handle, &ia_handle, interface[i].dapl_version_major, interface[i].dapl_version_minor, interface[i].is_thread_safe); if (err != DAT_SUCCESS) { fprintf(stderr, "%s fails\n", interface[i].ia_name); dat_strerror(err, &major, &minor); fprintf(stderr, "dat_ia_openv() failed: %s.%s\n", major, minor); } else { fprintf(stderr, "%s succeeds\n", interface[i].ia_name); } } fprintf(stderr, "OK\n"); } > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Thursday, February 07, 2008 3:33 PM > To: Tang, Changqing > Cc: OpenFabrics General > Subject: Re: [ofa-general] uDAPL libdat2.so version # problem > for today's OFED code > > Tang, Changqing wrote: > > HI, > > I downloaded today's tarball and installed. But > both libdat.so > > and libdat2.so report version 1.2 > > > > This is not the DAT version, it is the provider configured in > your /etc/dat.conf. The OFED configuration supplies OFA > providers for both > 1.2 and 2.0 versions. Your application picks accordingly. > > For example, if you change your code to list more then one > and include the name you will see the list: > > for (i=0;i<10;i++) { > fprintf(stderr, "version: %s %d.%d\n", > interface[i].ia_name, > interface[i].dapl_version_major, > interface[i].dapl_version_minor); > } > > ./test > version: OpenIB-cma 1.2 > version: OpenIB-cma-1 1.2 > version: OpenIB-cma-2 1.2 > version: OpenIB-cma-3 1.2 > version: OpenIB-bond 1.2 > version: ofa-v2-ib0 2.0 > version: ofa-v2-ib1 2.0 > version: ofa-v2-ib2 2.0 > version: ofa-v2-ib3 2.0 > version: ofa-v2-bond 2.0 > > The dat_ia_open will validate the build version against the > provider version. > > -arlin > From rdreier at cisco.com Tue Feb 12 16:26:56 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 12 Feb 2008 16:26:56 -0800 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopback connections. In-Reply-To: <20080212220929.24442.76874.stgit@dell3.ogc.int> (Steve Wise's message of "Tue, 12 Feb 2008 16:09:29 -0600") References: <20080212220929.24442.76874.stgit@dell3.ogc.int> Message-ID: applied, although: > +static void is_loopback_dst(struct iw_cm_id *cm_id) > +{ > + struct net_device *dev; > + > + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); > + if (!dev) > + return 0; > + dev_put(dev); > + return 1; > +} is there any way this could trigger when it should, like if I'm trying to make a connection from one local device to a different local device (which should work fine)? - R. From clameter at sgi.com Tue Feb 12 16:56:45 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 16:56:45 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080208234302.GH26564@sgi.com> <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: On Tue, 12 Feb 2008, Roland Dreier wrote: > I don't know anything about the T3 internals, but it's not clear that > you could do this without a new chip design in general. Lot's of RDMA > devices were designed expecting that when a packet arrives, the HW can > look up the bus address for a given memory region/offset and place the > packet immediately. It seems like a major change to be able to > generate a "page fault" interrupt when a page isn't present, or even > just wait to scatter some data until the host finishes updating page > tables when the HW needs the translation. Well if the VM wants to invalidate a page then the remote end first has to remove its mapping. If a page has been removed then the remote end would encounter a fault and then would have to wait for the local end to reestablish its mapping before proceeding. So the packet would only be generated when both ends are in sync. From clameter at sgi.com Tue Feb 12 16:57:19 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 16:57:19 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> References: <20080208234302.GH26564@sgi.com><20080208155641.2258ad2c.akpm@linux-foundation.org><20080209012446.GB7051@v2.random><20080209015659.GC7051@v2.random><20080209075556.63062452@bree.surriel.com><47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> Message-ID: On Tue, 12 Feb 2008, Felix Marti wrote: > > I don't know anything about the T3 internals, but it's not clear that > > you could do this without a new chip design in general. Lot's of RDMA > > devices were designed expecting that when a packet arrives, the HW can > > look up the bus address for a given memory region/offset and place the > > packet immediately. It seems like a major change to be able to > > generate a "page fault" interrupt when a page isn't present, or even > > just wait to scatter some data until the host finishes updating page > > tables when the HW needs the translation. > > That is correct, not a change we can make for T3. We could, in theory, > deal with changing mappings though. The change would need to be > synchronized though: the VM would need to tell us which mapping were > about to change and the driver would then need to disable DMA to/from > it, do the change and resume DMA. Right. That is the intend of the patchset. From clameter at sgi.com Tue Feb 12 17:01:17 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 17:01:17 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080212232329.GC31435@obsidianresearch.com> References: <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> Message-ID: On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > Well, certainly today the memfree IB devices store the page tables in > host memory so they are already designed to hang onto packets during > the page lookup over PCIE, adding in faulting makes this time > larger. You really do not need a page table to use it. What needs to be maintained is knowledge on both side about what pages are currently shared across RDMA. If the VM decides to reclaim a page then the notification is used to remove the remote entry. If the remote side then tries to access the page again then the page fault on the remote side will stall until the local page has been brought back. RDMA can proceed after both sides again agree on that page now being sharable. From jgunthorpe at obsidianresearch.com Tue Feb 12 17:26:38 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 12 Feb 2008 18:26:38 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> Message-ID: <20080213012638.GD31435@obsidianresearch.com> On Tue, Feb 12, 2008 at 05:01:17PM -0800, Christoph Lameter wrote: > On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > > > Well, certainly today the memfree IB devices store the page tables in > > host memory so they are already designed to hang onto packets during > > the page lookup over PCIE, adding in faulting makes this time > > larger. > > You really do not need a page table to use it. What needs to be maintained > is knowledge on both side about what pages are currently shared across > RDMA. If the VM decides to reclaim a page then the notification is used to > remove the remote entry. If the remote side then tries to access the page > again then the page fault on the remote side will stall until the local > page has been brought back. RDMA can proceed after both sides again agree > on that page now being sharable. The problem is that the existing wire protocols do not have a provision for doing an 'are you ready' or 'I am not ready' exchange and they are not designed to store page tables on both sides as you propose. The remote side can send RDMA WRITE traffic at any time after the RDMA region is established. The local side must be able to handle it. There is no way to signal that a page is not ready and the remote should not send. This means the only possible implementation is to stall/discard at the local adaptor when a RDMA WRITE is recieved for a page that has been reclaimed. This is what leads to deadlock/poor performance.. Jason From swise at opengridcomputing.com Tue Feb 12 17:41:53 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 19:41:53 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopback connections. In-Reply-To: References: <20080212220929.24442.76874.stgit@dell3.ogc.int> Message-ID: <47B24AE1.7040709@opengridcomputing.com> Roland Dreier wrote: > applied, although: > > > +static void is_loopback_dst(struct iw_cm_id *cm_id) > > +{ > > + struct net_device *dev; > > + > > + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); > > + if (!dev) > > + return 0; > > + dev_put(dev); > > + return 1; > > +} > > is there any way this could trigger when it should, like if I'm trying > to make a connection from one local device to a different local device > (which should work fine)? > As far as I can tell, if the app does a rdma_resolve_addr() on the dst addr (which is a local address), then the routing lookup will find the local interface with that dst addr, and that device will be used for the connect. IE src and dst devices are the same. Maybe if the app does an explicit bind to the addr on one device, then connects to the addr on the other device. But that's not gonna work either, I think. I still think it will resolve to one device and that device cannot do loopback... Steve. From swise at opengridcomputing.com Tue Feb 12 17:45:47 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 12 Feb 2008 19:45:47 -0600 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213012638.GD31435@obsidianresearch.com> References: <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> Message-ID: <47B24BCB.8030003@opengridcomputing.com> Jason Gunthorpe wrote: > On Tue, Feb 12, 2008 at 05:01:17PM -0800, Christoph Lameter wrote: >> On Tue, 12 Feb 2008, Jason Gunthorpe wrote: >> >>> Well, certainly today the memfree IB devices store the page tables in >>> host memory so they are already designed to hang onto packets during >>> the page lookup over PCIE, adding in faulting makes this time >>> larger. >> You really do not need a page table to use it. What needs to be maintained >> is knowledge on both side about what pages are currently shared across >> RDMA. If the VM decides to reclaim a page then the notification is used to >> remove the remote entry. If the remote side then tries to access the page >> again then the page fault on the remote side will stall until the local >> page has been brought back. RDMA can proceed after both sides again agree >> on that page now being sharable. > > The problem is that the existing wire protocols do not have a > provision for doing an 'are you ready' or 'I am not ready' exchange > and they are not designed to store page tables on both sides as you > propose. The remote side can send RDMA WRITE traffic at any time after > the RDMA region is established. The local side must be able to handle > it. There is no way to signal that a page is not ready and the remote > should not send. > > This means the only possible implementation is to stall/discard at the > local adaptor when a RDMA WRITE is recieved for a page that has been > reclaimed. This is what leads to deadlock/poor performance.. > If the events are few and far between then this model is probably ok. For iWARP, it means TCP retransmit and slow start and all that, but if its an infrequent event, then its ok if it helps the host better manage memory. Maybe... ;-) Steve. From christian.bell at qlogic.com Tue Feb 12 17:55:33 2008 From: christian.bell at qlogic.com (Christian Bell) Date: Tue, 12 Feb 2008 17:55:33 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> Message-ID: <20080213015533.GP29340@mv.qlogic.com> On Tue, 12 Feb 2008, Christoph Lameter wrote: > On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > > > Well, certainly today the memfree IB devices store the page tables in > > host memory so they are already designed to hang onto packets during > > the page lookup over PCIE, adding in faulting makes this time > > larger. > > You really do not need a page table to use it. What needs to be maintained > is knowledge on both side about what pages are currently shared across > RDMA. If the VM decides to reclaim a page then the notification is used to > remove the remote entry. If the remote side then tries to access the page > again then the page fault on the remote side will stall until the local > page has been brought back. RDMA can proceed after both sides again agree > on that page now being sharable. HPC environments won't be amenable to a pessimistic approach of synchronizing before every data transfer. RDMA is assumed to be a low-level data movement mechanism that has no implied synchronization. In some parallel programming models, it's not uncommon to use RDMA to send 8-byte messages. It can be difficult to make and hold guarantees about in-memory pages when many concurrent RDMA operations are in flight (not uncommon in reasonably large machines). Some of the in-memory page information could be shared with some form of remote caching strategy but then it's a different problem with its own scalability challenges. I think there are very potential clients of the interface when an optimistic approach is used. Part of the trick, however, has to do with being able to re-start transfers instead of buffering the data or making guarantees about delivery that could cause deadlock (as was alluded to earlier in this thread). InfiniBand is constrained in this regard since it requires message-ordering between endpoints (or queue pairs). One could argue that this is still possible with IB, at the cost of throwing more packets away when a referenced page is not in memory. With this approach, the worse case demand paging scenario is met when the active working set of referenced pages is larger than the amount physical memory -- but HPC applications are already bound by this anyway. You'll find that Quadrics has the most experience in this area and that their entire architecture is adapted to being optimistic about demand paging in RDMA transfers -- they've been maintaining a patchset to do this for years. . . christian From xma at us.ibm.com Tue Feb 12 18:11:37 2008 From: xma at us.ibm.com (Shirley Ma) Date: Tue, 12 Feb 2008 18:11:37 -0800 Subject: [ofa-general] openSM set up help Message-ID: Hello Hal, Have you set up openSM with Voltaire paththrough module only before? I am planning to test blades/connectX, but I can't find a switch. I only have paththrough module. I wonder how to set up it. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From clameter at sgi.com Tue Feb 12 18:19:10 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 18:19:10 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213015533.GP29340@mv.qlogic.com> References: <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213015533.GP29340@mv.qlogic.com> Message-ID: On Tue, 12 Feb 2008, Christian Bell wrote: > I think there are very potential clients of the interface when an > optimistic approach is used. Part of the trick, however, has to do > with being able to re-start transfers instead of buffering the data > or making guarantees about delivery that could cause deadlock (as was > alluded to earlier in this thread). InfiniBand is constrained in > this regard since it requires message-ordering between endpoints (or > queue pairs). One could argue that this is still possible with IB, > at the cost of throwing more packets away when a referenced page is > not in memory. With this approach, the worse case demand paging > scenario is met when the active working set of referenced pages is > larger than the amount physical memory -- but HPC applications are > already bound by this anyway. > > You'll find that Quadrics has the most experience in this area and > that their entire architecture is adapted to being optimistic about > demand paging in RDMA transfers -- they've been maintaining a patchset > to do this for years. The notifier patchset that we are discussing here was mostly inspired by their work. There is no need to restart transfers that you have never started in the first place. The remote side would never start a transfer if the page reference has been torn down. In order to start the transfer a fault handler on the remote side would have to setup the association between the memory on both ends again. From clameter at sgi.com Tue Feb 12 18:35:09 2008 From: clameter at sgi.com (Christoph Lameter) Date: Tue, 12 Feb 2008 18:35:09 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213012638.GD31435@obsidianresearch.com> References: <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> Message-ID: On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > The problem is that the existing wire protocols do not have a > provision for doing an 'are you ready' or 'I am not ready' exchange > and they are not designed to store page tables on both sides as you > propose. The remote side can send RDMA WRITE traffic at any time after > the RDMA region is established. The local side must be able to handle > it. There is no way to signal that a page is not ready and the remote > should not send. > > This means the only possible implementation is to stall/discard at the > local adaptor when a RDMA WRITE is recieved for a page that has been > reclaimed. This is what leads to deadlock/poor performance.. You would only use the wire protocols *after* having established the RDMA region. The notifier chains allows a RDMA region (or parts thereof) to be down on demand by the VM. The region can be reestablished if one of the side accesses it. I hope I got that right. Not much exposure to Infiniband so far. Lets say you have a two systems A and B. Each has their memory region MemA and MemB. Each side also has page tables for this region PtA and PtB. Now you establish a RDMA connection between both side. The pages in both MemB and MemA are present and so are entries in PtA and PtB. RDMA traffic can proceed. The VM on system A now gets into a situation in which memory becomes heavily used by another (maybe non RDMA process) and after checking that there was no recent reference to MemA and MemB (via a notifier aging callback) decides to reclaim the memory from MemA. In that case it will notify the RDMA subsystem on A that it is trying to reclaim a certain page. The RDMA subsystem on A will then send a message to B notifying it that the memory will be going away. B now has to remove its corresponding page from memory (and drop the entry in PtB) and confirm to A that this has happened. RDMA traffic is then stopped for this page. Then A can also remove its page, the corresponding entry in PtA and the page is reclaimed or pushed out to swap completing the page reclaim. If either side then accesses the page again then the reverse process happens. If B accesses the page then it wil first of all incur a page fault because the entry in PtB is missing. The fault will then cause a message to be send to A to establish the page again. A will create an entry in PtA and will then confirm to B that the page was established. At that point RDMA operations can occur again. So the whole scheme does not really need a hardware page table in the RDMA hardware. The page tables of the two systems A and B are sufficient. The scheme can also be applied to a larger range than only a single page. The RDMA subsystem could tear down a large section when reclaim is pushing on it and then reestablish it as needed. Swapping and page reclaim is certainly not something that improves the speed of the application affected by swapping and page reclaim but it allows the VM to manage memory effectively if multiple loads are runing on a system. From jgunthorpe at obsidianresearch.com Tue Feb 12 19:25:33 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 12 Feb 2008 20:25:33 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> Message-ID: <20080213032533.GC32047@obsidianresearch.com> On Tue, Feb 12, 2008 at 06:35:09PM -0800, Christoph Lameter wrote: > On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > > > The problem is that the existing wire protocols do not have a > > provision for doing an 'are you ready' or 'I am not ready' exchange > > and they are not designed to store page tables on both sides as you > > propose. The remote side can send RDMA WRITE traffic at any time after > > the RDMA region is established. The local side must be able to handle > > it. There is no way to signal that a page is not ready and the remote > > should not send. > > > > This means the only possible implementation is to stall/discard at the > > local adaptor when a RDMA WRITE is recieved for a page that has been > > reclaimed. This is what leads to deadlock/poor performance.. > > You would only use the wire protocols *after* having established the RDMA > region. The notifier chains allows a RDMA region (or parts thereof) to be > down on demand by the VM. The region can be reestablished if one of > the side accesses it. I hope I got that right. Not much exposure to > Infiniband so far. [clip explaination] But this isn't how IB or iwarp work at all. What you describe is a significant change to the general RDMA operation and requires changes to both sides of the connection and the wire protocol. A few comments on RDMA operation that might clarify things a little bit more: - In RDMA (iwarp and IB versions) the hardware page tables exist to linearize the local memory so the remote does not need to be aware of non-linearities in the physical address space. The main motivation for this is kernel bypass where the user space app wants to instruct the remote side to DMA into memory using user space addresses. Hardware provides the page tables to switch from incoming user space virtual addresses to physical addresess. This greatly simplifies the user space programming model since you don't need to pass around or create s/g lists for memory that is already virtually continuous. Many kernel RDMA drivers (SCSI, NFS) only use the HW page tables for access control and enforcing the liftime of the mapping. The page tables in the RDMA hardware exist primarily to support this, and not for other reasons. The pinning of pages is one part to support the HW page tables and one part to support the RDMA lifetime rules, the liftime rules are what cause problems for the VM. - The wire protocol consists of packets that say 'Write XXX bytes to offset YY in Region RRR'. Creating a region produces the RRR label and currently pins the pages. So long as the RRR label is valid the remote side can issue write packets at any time without any further synchronization. There is no wire level events associated with creating RRR. You can pass RRR to the other machine in any fashion, even using carrier pigeons :) - The RDMA layer is very general (ala TCP), useful protocols (like SCSI) are built on top of it and they specify the lifetime rules and protocol for exchanging RRR. Every protocol is different. In kernel protocols like SRP and NFS RDMA seem to have very short lifetimes for RRR and work more like pci_map_* in real SCSI hardware. - HPC userspace apps, like MPI apps, have different lifetime rules and tend to be really long lived. These people will not want anything that makes their OPs more expensive and also probably don't care too much about the VM problems you are looking at (?) - There is no protocol support to exchange RRR. This is all done by upper level protocols (ala HTTP vs TCP). You cannot assert and revoke RRR in a general way. Every protocol is different and optimized. This is your step 'A will then send a message to B notifying..'. It simply does not exist in the protocol specifications I don't know much about Quadrics, but I would be hesitant to lump it in too much with these RDMA semantics. Christian's comments sound like they operate closer to what you described and that is why the have an existing patch set. I don't know :) What it boils down to is that to implement true removal of pages in a general way the kernel and HCA must either drop packets or stall incoming packets, both are big performance problems - and I can't see many users wanting this. Enterprise style people using SCSI, NFS, etc already have short pin periods and HPC MPI users probably won't care about the VM issues enough to warrent the performance overhead. Regards, Jason From a-alcha at agmark.com Tue Feb 12 20:18:33 2008 From: a-alcha at agmark.com (Elvis Steele) Date: Wed, 13 Feb 2008 11:18:33 +0700 Subject: [ofa-general] You told me that you will reply back Message-ID: <799988146.02740895999090@agmark.com> Hello! I am bored this afternoon. I am nice girl that would like to chat with you. Email me at Mona at TheHealCare.info only, because I am using my friend's email to write this. Don't miss some of my naughty pictures. From christian.bell at qlogic.com Tue Feb 12 20:09:05 2008 From: christian.bell at qlogic.com (Christian Bell) Date: Tue, 12 Feb 2008 20:09:05 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> Message-ID: <20080213040905.GQ29340@mv.qlogic.com> On Tue, 12 Feb 2008, Christoph Lameter wrote: > On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > > > The problem is that the existing wire protocols do not have a > > provision for doing an 'are you ready' or 'I am not ready' exchange > > and they are not designed to store page tables on both sides as you > > propose. The remote side can send RDMA WRITE traffic at any time after > > the RDMA region is established. The local side must be able to handle > > it. There is no way to signal that a page is not ready and the remote > > should not send. > > > > This means the only possible implementation is to stall/discard at the > > local adaptor when a RDMA WRITE is recieved for a page that has been > > reclaimed. This is what leads to deadlock/poor performance.. You're arguing that a HW page table is not needed by describing a use case that is essentially what all RDMA solutions already do above the wire protocols (all solutions except Quadrics, of course). > You would only use the wire protocols *after* having established the RDMA > region. The notifier chains allows a RDMA region (or parts thereof) to be > down on demand by the VM. The region can be reestablished if one of > the side accesses it. I hope I got that right. Not much exposure to > Infiniband so far. RDMA is already always used *after* memory regions are set up -- they are set up out-of-band w.r.t RDMA but essentially this is the "before" part. > Lets say you have a two systems A and B. Each has their memory region MemA > and MemB. Each side also has page tables for this region PtA and PtB. > > Now you establish a RDMA connection between both side. The pages in both > MemB and MemA are present and so are entries in PtA and PtB. RDMA > traffic can proceed. > > The VM on system A now gets into a situation in which memory becomes > heavily used by another (maybe non RDMA process) and after checking that > there was no recent reference to MemA and MemB (via a notifier aging > callback) decides to reclaim the memory from MemA. > > In that case it will notify the RDMA subsystem on A that it is trying to > reclaim a certain page. > > The RDMA subsystem on A will then send a message to B notifying it that > the memory will be going away. B now has to remove its corresponding page > from memory (and drop the entry in PtB) and confirm to A that this has > happened. RDMA traffic is then stopped for this page. Then A can also > remove its page, the corresponding entry in PtA and the page is reclaimed > or pushed out to swap completing the page reclaim. > > If either side then accesses the page again then the reverse process > happens. If B accesses the page then it wil first of all incur a page > fault because the entry in PtB is missing. The fault will then cause a > message to be send to A to establish the page again. A will create an > entry in PtA and will then confirm to B that the page was established. At > that point RDMA operations can occur again. The notifier-reclaim cycle you describe is akin to the out-of-band pin-unpin control messages used by existing communication libraries. Also, I think what you are proposing can have problems at scale -- A must keep track of all of the (potentially many systems) of memA and cooperatively get an agreement from all these systems before reclaiming the page. When messages are sufficiently large, the control messaging necessary to setup/teardown the regions is relatively small. This is not always the case however -- in programming models that employ smaller messages, the one-sided nature of RDMA is the most attractive part of it. > So the whole scheme does not really need a hardware page table in the RDMA > hardware. The page tables of the two systems A and B are sufficient. > > The scheme can also be applied to a larger range than only a single page. > The RDMA subsystem could tear down a large section when reclaim is > pushing on it and then reestablish it as needed. Nothing any communication/runtime system can't already do today. The point of RDMA demand paging is enabling the possibility of using RDMA without the implied synchronization -- the optimistic part. Using the notifiers to duplicate existing memory region handling for RDMA hardware that doesn't have HW page tables is possible but undermines the more important consumer of your patches in my opinion. One other area that has not been brought up yet (I think) is the applicability of notifiers in letting users know when pinned memory is reclaimed by the kernel. This is useful when a lower-level library employs lazy deregistration strategies on memory regions that are subsequently released to the kernel via the application's use of munmap or sbrk. Ohio Supercomputing Center has work in this area but a generalized approach in the kernel would certainly be welcome. . . christian -- christian.bell at qlogic.com (QLogic Host Solutions Group, formerly Pathscale) From jgunthorpe at obsidianresearch.com Tue Feb 12 20:26:00 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Tue, 12 Feb 2008 21:26:00 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <47B26A6A.4000209@myri.com> References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080213032533.GC32047@obsidianresearch.com> <47B26A6A.4000209@myri.com> Message-ID: <20080213042600.GA32449@obsidianresearch.com> [mangled CC list trimmed] On Tue, Feb 12, 2008 at 10:56:26PM -0500, Patrick Geoffray wrote: > Jason Gunthorpe wrote: >> I don't know much about Quadrics, but I would be hesitant to lump it >> in too much with these RDMA semantics. Christian's comments sound like >> they operate closer to what you described and that is why the have an >> existing patch set. I don't know :) > > The Quadrics folks have been doing RDMA for 10 years, there is a reason why > they maintained a patch. This wasn't ment as a slight against Quadrics, only to point out that the specific wire protcols used by IB and iwarp are what cause this limitation, it would be easy to imagine that Quadrics has some additional twist that can make this easier.. >> What it boils down to is that to implement true removal of pages in a >> general way the kernel and HCA must either drop packets or stall >> incoming packets, both are big performance problems - and I can't see >> many users wanting this. Enterprise style people using SCSI, NFS, etc >> already have short pin periods and HPC MPI users probably won't care >> about the VM issues enough to warrent the performance overhead. > > This is not true, HPC people do care about the VM issues a lot. Memory > registration (pinning and translating) is usually too expensive to I ment that HPC users are unlikely to want to swap active RDMA pages if this causes a performance cost on normal operations. None of my comments are ment to imply that lazy de-registration or page migration are not good things. Regards, Jason From patrick at myri.com Tue Feb 12 20:47:22 2008 From: patrick at myri.com (Patrick Geoffray) Date: Tue, 12 Feb 2008 23:47:22 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213042600.GA32449@obsidianresearch.com> References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080213032533.GC32047@obsidianresearch.com> <47B26A6A.4000209@myri.com> <20080213042600.GA32449@obsidianresearch.com> Message-ID: <47B2765A.2070901@myri.com> Jason Gunthorpe wrote: > [mangled CC list trimmed] Thanks, noticed that afterwards. > This wasn't ment as a slight against Quadrics, only to point out that > the specific wire protcols used by IB and iwarp are what cause this > limitation, it would be easy to imagine that Quadrics has some > additional twist that can make this easier.. The wire protocols are similar, nothing fancy. The specificity of Quadrics (and many others) is that they can change the behavior of the NIC in firmware, so they adapt to what the OS offers. They had the VM notifier support in Tru64 back in the days, they just ported the functionality to Linux. > I ment that HPC users are unlikely to want to swap active RDMA pages > if this causes a performance cost on normal operations. None of my Swapping to disk is not a normal operations in HPC, it's going to be slow anyway. The main problem for HPC users is not swapping, it's that they do not know when a registered page is released to the OS through free(), sbrk() or munmap(). Like swapping, they don't expect that it will happen often, but they have to handle it gracefully. Patrick -- Patrick Geoffray Myricom, Inc. http://www.myri.com From hrosenstock at xsigo.com Tue Feb 12 22:19:48 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 12 Feb 2008 22:19:48 -0800 Subject: [ofa-general] openSM set up help In-Reply-To: References: Message-ID: <1202883588.31050.102.camel@hrosenstock-ws.xsigo.com> Hi Shirley, On Tue, 2008-02-12 at 18:11 -0800, Shirley Ma wrote: > Hello Hal, > > Have you set up openSM with Voltaire paththrough module only before? No. > I am planning to test blades/connectX, but I can't find a switch. I > only have paththrough module. I wonder how to set up it. If it is truly passthru, then the subnet is a point to point connection (CA <-> CA). OpenSM works in that mode. I don't think any special setup is needed. If you need multiple of these, then you need multiple SMs (each running on a different CA port and), each configured with a different subnet prefix. -- Hal > Thanks > Shirley > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at dev.mellanox.co.il Tue Feb 12 22:46:42 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 13 Feb 2008 08:46:42 +0200 Subject: [ofa-general] [PATCH 1/1] sdplib - bind returns wrong error when using 'both' In-Reply-To: References: Message-ID: <47B29252.9030809@dev.mellanox.co.il> Jim Mott wrote: > A Mellanox regression test detected a difference between the error > returned by SDP and TCP and the error returned using libsdp with > 'both'. > > Signed-off-by: Jim Mott > --- Applied to ofed_1_3/libsdp.git, Regards, Vladimir From ogerlitz at voltaire.com Tue Feb 12 22:54:59 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 13 Feb 2008 08:54:59 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B21DC4.70604@linux.vnet.ibm.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> Message-ID: <47B29443.9010206@voltaire.com> Pradeep Satyanarayana wrote: > I brought this issue up on the mailing list sometime in the summer of 2007 is > my recollection. I could not locate that with a quick search of the archives. > I will probably do that again later. Its from December 2007 http://lists.openfabrics.org/pipermail/general/2007-December/044299.html > However, the crux of the issue is that I was seeing "send completion errors" and > that is what prompted me to change the retry counts. Please see Table 78 > "Completion Error Handling for RC Send Queues" in the IB Spec for reference. > And changing the retry counts did help. I understand that changing the retry counts eliminated the issue you were seeing in your setup, however, its more of an observation than an actual problem statement whose solution can be judged. Apart from that, I have concerns regarding the approach of adding retries to layer that provides unreliable service, see my comments on the other emails, and feel free to respond there. Or. From a-17-m at abnamro.nl Tue Feb 12 23:44:13 2008 From: a-17-m at abnamro.nl (Cleo Quintana) Date: Wed, 13 Feb 2008 15:44:13 +0800 Subject: [ofa-general] We talked on the web Message-ID: <271966859.75573946869489@abnamro.nl> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Marianne at IndividualImprove.info only, because I am using my friend's email to write this. I would like to share some of my pics. From ogerlitz at voltaire.com Tue Feb 12 23:20:48 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 13 Feb 2008 09:20:48 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> Message-ID: <47B29A50.7050400@voltaire.com> Sean Hefty wrote: > I'm really not all that familiar with ipoib protocol, but if it's being > implemented over an RC connection, then adding an RNR retry seems to make sense > to me. I believe using UC is better, but if it's over RC, I don't know that we > want to take the hit of tearing down and re-establishing the connection just > because we have a fast sender. (This is just an opinion based on no fact > whatsoever.) Hi Sean, As I see it, the issue here is that from the view point of upper layers (TCP, UDP, etc) the IP service is expected to provide unreliable service. Hence layers that do need reliability such TCP, add that in their protocol, so adding it in the IP layer and below (eg IPoIB or the HW it uses) is in a way redundant since the upper layer is not aware to that. For example when a NIC does TCP checksum, then TCP doesn't, while here both layers take care of reliability. Also, applications written over unreliable layers such as UDP might have negative impact on their performance, eg video streaming. With all that, I am not religiously against adding the retries... however, I prefer to understand the original problem which seems to be an issue relates to HCA interoperability before putting the solution in the code. We both agree that UC is the way to go, and in that case the real problem would pop again, but higher layers would have to take care of it. As for your fast send comment, does this means that you see the HCA as an entity that does queuing? Or. From mashirle at us.ibm.com Tue Feb 12 13:22:01 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Tue, 12 Feb 2008 13:22:01 -0800 Subject: [ofa-general] openSM set up help In-Reply-To: <1202883588.31050.102.camel@hrosenstock-ws.xsigo.com> References: <1202883588.31050.102.camel@hrosenstock-ws.xsigo.com> Message-ID: <1202851321.4019.115.camel@new-host-2> Hello Hal, > If it is truly passthru, then the subnet is a point to point connection > (CA <-> CA). OpenSM works in that mode. I don't think any special setup > is needed. If you need multiple of these, then you need multiple SMs > (each running on a different CA port and), each configured with a > different subnet prefix. Thanks for your quick response. Both nodes ports are in DOWN status. I thought it should be UP if point to point connection, right? Shirley From diego.guella at sircomtech.com Wed Feb 13 01:18:53 2008 From: diego.guella at sircomtech.com (Diego Guella) Date: Wed, 13 Feb 2008 10:18:53 +0100 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. References: <20080212220929.24442.76874.stgit@dell3.ogc.int> Message-ID: <015c01c86e21$75317210$05c8a8c0@DIEGO> One question below ----- Original Message ----- From: "Roland Dreier" To: "Steve Wise" Cc: ; ; Sent: Wednesday, February 13, 2008 1:26 AM Subject: Re: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. > applied, although: > > > +static void is_loopback_dst(struct iw_cm_id *cm_id) > > +{ > > + struct net_device *dev; > > + > > + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); > > + if (!dev) > > + return 0; how can a static void function return 0? %) > > + dev_put(dev); > > + return 1; same here From blaster133 at juno.com Wed Feb 13 02:32:03 2008 From: blaster133 at juno.com (Aisha Chapman) Date: Wed, 13 Feb 2008 02:32:03 -0800 Subject: [ofa-general] Die neue Software zu altem Preis: es lohnt sich Message-ID: <01c86de8$9df55380$9d946a7c@blaster133> Die Software auf allen europaischen Sprachen, fur Windows und Macintosh vorherbestimmt. Die konnen Sie momentan bekommen. Nur bezahlen und auslasten. Hier prasentiert sind nicht teuere, aber echte und vollige Produkte der Software.Sie stellen jedes Programm leicht auf mit der Hilfe der professionellen Konsultation des Anwenderdienstes. Wenn Sie Fragen haben, bekommen Sie schnelle Antworte. Die Ruckzahlung ist moglich. Sie kaufen nur die ausgezeichnet funktionierende Software http://geocities.com/simonlittle346/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Wed Feb 13 03:06:49 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 13 Feb 2008 03:06:49 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080213-0200 daily build status Message-ID: <20080213110649.A8479E60224@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From a-andyca at adhdsimulation.com Wed Feb 13 03:55:55 2008 From: a-andyca at adhdsimulation.com (Mariana Avila) Date: Wed, 13 Feb 2008 19:55:55 +0800 Subject: [ofa-general] why are you not repying? Message-ID: <01c86e7a$718a1780$c721497d@a-andyca> Hello! I am bored this evening. I am nice girl that would like to chat with you. Email me at Cecilia at IndividualImprove.info only, because I am using my friend's email to write this. Would you mind if I share some of my pictures with you? From dwspecialcollectionm at specialcollection.com Wed Feb 13 04:35:34 2008 From: dwspecialcollectionm at specialcollection.com (Irvin Winter) Date: Wed, 13 Feb 2008 15:35:34 +0300 Subject: [ofa-general] Medications that you need. Message-ID: <01c86e56$12b2a700$b8fc6a4e@dwspecialcollectionm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/nedmcclure539/ We provide confidential and secure purchase! From hrosenstock at xsigo.com Wed Feb 13 06:00:23 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Wed, 13 Feb 2008 06:00:23 -0800 Subject: [ofa-general] openSM set up help In-Reply-To: <1202851321.4019.115.camel@new-host-2> References: <1202883588.31050.102.camel@hrosenstock-ws.xsigo.com> <1202851321.4019.115.camel@new-host-2> Message-ID: <1202911223.31050.142.camel@hrosenstock-ws.xsigo.com> Hi again Shirley, On Tue, 2008-02-12 at 13:22 -0800, Shirley Ma wrote: > Hello Hal, > > > If it is truly passthru, then the subnet is a point to point connection > > (CA <-> CA). OpenSM works in that mode. I don't think any special setup > > is needed. If you need multiple of these, then you need multiple SMs > > (each running on a different CA port and), each configured with a > > different subnet prefix. > > Thanks for your quick response. Both nodes ports are in DOWN status. I > thought it should be UP if point to point connection, right? DOWN is the PortState and is due to PortPhysicalState not being LinkUp and has nothing to do with SM. SM only gets into the act once PortPhysicalState is LinkUp (and PortState is then INIT). I would suspect things may not be "connected" the way they need to be and I have no idea how to do that. You might want to contact Voltaire customer support. -- Hal > Shirley > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From rdreier at cisco.com Wed Feb 13 07:46:17 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 13 Feb 2008 07:46:17 -0800 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. In-Reply-To: <015c01c86e21$75317210$05c8a8c0@DIEGO> (Diego Guella's message of "Wed, 13 Feb 2008 10:18:53 +0100") References: <20080212220929.24442.76874.stgit@dell3.ogc.int> <015c01c86e21$75317210$05c8a8c0@DIEGO> Message-ID: > how can a static void function return 0? good question... I've fixed the patch in my tree. From swise at opengridcomputing.com Wed Feb 13 07:47:29 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 13 Feb 2008 09:47:29 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. In-Reply-To: References: <20080212220929.24442.76874.stgit@dell3.ogc.int> <015c01c86e21$75317210$05c8a8c0@DIEGO> Message-ID: <47B31111.9030702@opengridcomputing.com> Roland Dreier wrote: > > how can a static void function return 0? > > good question... I've fixed the patch in my tree. oops. From rdreier at cisco.com Wed Feb 13 07:49:16 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 13 Feb 2008 07:49:16 -0800 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. In-Reply-To: (Roland Dreier's message of "Wed, 13 Feb 2008 07:46:17 -0800") References: <20080212220929.24442.76874.stgit@dell3.ogc.int> <015c01c86e21$75317210$05c8a8c0@DIEGO> Message-ID: Steve, I had to update the patch adding an include and fixing the function declaration (as below)... but how much testing have you done with this?? commit 8704e9a8790cc9e394198663c1c9150c899fb9a2 Author: Steve Wise Date: Tue Feb 12 16:09:29 2008 -0600 RDMA/cxgb3: Fail loopback connections The cxgb3 HW and driver don't support loopback RDMA connections. So fail any connection attempt where the destination address is local. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index e9a08fa..320f2b6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1784,6 +1785,17 @@ err: return err; } +static int is_loopback_dst(struct iw_cm_id *cm_id) +{ + struct net_device *dev; + + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); + if (!dev) + return 0; + dev_put(dev); + return 1; +} + int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; @@ -1791,6 +1803,11 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_ep *ep; struct rtable *rt; + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); From swise at opengridcomputing.com Wed Feb 13 07:51:39 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 13 Feb 2008 09:51:39 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. In-Reply-To: References: <20080212220929.24442.76874.stgit@dell3.ogc.int> <015c01c86e21$75317210$05c8a8c0@DIEGO> Message-ID: <47B3120B.3040409@opengridcomputing.com> Roland Dreier wrote: > Steve, I had to update the patch adding an include and fixing the > function declaration (as below)... but how much testing have you done > with this?? > Now I understand. I thought I'd fixed these! I fixed them locally in the test tree on my victim and then tested, but forgot to update my git tree. Sorry for this. You want me to resubmit? > commit 8704e9a8790cc9e394198663c1c9150c899fb9a2 > Author: Steve Wise > Date: Tue Feb 12 16:09:29 2008 -0600 > > RDMA/cxgb3: Fail loopback connections > > The cxgb3 HW and driver don't support loopback RDMA connections. So > fail any connection attempt where the destination address is local. > > Signed-off-by: Steve Wise > Signed-off-by: Roland Dreier > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c > index e9a08fa..320f2b6 100644 > --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c > +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c > @@ -35,6 +35,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -1784,6 +1785,17 @@ err: > return err; > } > > +static int is_loopback_dst(struct iw_cm_id *cm_id) > +{ > + struct net_device *dev; > + > + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); > + if (!dev) > + return 0; > + dev_put(dev); > + return 1; > +} > + > int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) > { > int err = 0; > @@ -1791,6 +1803,11 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) > struct iwch_ep *ep; > struct rtable *rt; > > + if (is_loopback_dst(cm_id)) { > + err = -ENOSYS; > + goto out; > + } > + > ep = alloc_ep(sizeof(*ep), GFP_KERNEL); > if (!ep) { > printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); From kliteyn at dev.mellanox.co.il Wed Feb 13 04:02:16 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 13 Feb 2008 14:02:16 +0200 Subject: [ofa-general] [PATCH] qperf: seg fault in help Message-ID: <47B2DC48.4040501@dev.mellanox.co.il> Hi Johann, Running "qperf --help SOME_WRONG_STRING" will fail with core dump. The following patch should fix it. Please apply to ofed_1_3 and master. Thanks -- Yevgeny Signed-off-by: Yevgeny Kliteynik --- src/qperf.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qperf.c b/src/qperf.c index f38247d..dbeae5d 100644 --- a/src/qperf.c +++ b/src/qperf.c @@ -713,7 +713,7 @@ do_option(OPTION *option, char ***argvp) if (streq(*usage, category)) break; if (!*usage) - error(0, "cannot find help category %s; try: qperf --help"); + error(0, "cannot find help category %s; try: qperf --help", category); printf("%s", usage[1]); exit(0); } else if (streq(t, "host")) { -- 1.5.1.4 From swise at opengridcomputing.com Wed Feb 13 08:03:16 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 13 Feb 2008 10:03:16 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Fail loopbackconnections. In-Reply-To: <47B3120B.3040409@opengridcomputing.com> References: <20080212220929.24442.76874.stgit@dell3.ogc.int> <015c01c86e21$75317210$05c8a8c0@DIEGO> <47B3120B.3040409@opengridcomputing.com> Message-ID: <47B314C4.2080905@opengridcomputing.com> Steve Wise wrote: > Roland Dreier wrote: >> Steve, I had to update the patch adding an include and fixing the >> function declaration (as below)... but how much testing have you done >> with this?? >> > > Now I understand. I thought I'd fixed these! I fixed them locally in > the test tree on my victim and then tested, but forgot to update my git > tree. > > Sorry for this. > > You want me to resubmit? > BTW: Testing: I cloned my git tree, resolved the two compile issues (void + include), then built and installed the kernel (2.6.25-rc1 + the fixed patch). I then ensured that a connection to the local address caused the ENOSYS error. I just forgot to pull the updated file back into my stg managed git tree. :( From RAISCH at de.ibm.com Wed Feb 13 04:11:51 2008 From: RAISCH at de.ibm.com (Christoph Raisch) Date: Wed, 13 Feb 2008 13:11:51 +0100 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: > > > Chelsio's T3 HW doesn't support this. For ehca we currently can't modify a large MR when it has been allocated. EHCA Hardware expects the pages to be there (MRs must not have "holes"). This is also true for the global MR covering all kernel space. Therefore we still need the memory to be "pinned" if ib_umem_get() is called. So with the current implementation we don't have much use for a notifier. "It is difficult to make predictions, especially about the future" Gruss / Regards Christoph Raisch + Hoang-Nam Nguyen From jackm at dev.mellanox.co.il Wed Feb 13 06:23:50 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 13 Feb 2008 16:23:50 +0200 Subject: [ofa-general] [PATCH] IPoIB: on pkey_change event, invoke dev_stop() before doing delay_open() Message-ID: <200802131623.50313.jackm@dev.mellanox.co.il> IPoIB: do ipoib_ib_dev_stop() on pkey_event when cannot find the old pkey. In Pkey-event handling, if the old pkey is no longer available, the driver must call ipoib_ib_dev_stop() -- just as it does when the pkey is still available (see procedure __ipoib_ib_dev_flush() ). When a pkey becomes available, the driver will perform ipoib_open() -- which assumes that, for example, the QP is in RESET, the cm_id has been destroyed/deleted, etc. If ipoib_ib_dev_stop() is not called as described above, then these assumptions will be false. Found by: Mellanox QA. Signed-off-by: Jack Morgenstein Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-13 15:49:28.000000000 +0200 +++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-13 15:54:31.439439000 +0200 @@ -952,6 +952,7 @@ static void __ipoib_ib_dev_flush(struct if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_stop(dev, 0); ipoib_pkey_dev_delay_open(dev); return; } From kliteyn at dev.mellanox.co.il Wed Feb 13 06:29:39 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 13 Feb 2008 16:29:39 +0200 Subject: [ofa-general] [PATCH] qperf: seg fault in help Message-ID: <47B2FED3.4090805@dev.mellanox.co.il> Hi Johann, Running "qperf --help SOME_WRONG_STRING" will fail with core dump. The following patch should fix it. Please apply to ofed_1_3 and master. Thanks -- Yevgeny Signed-off-by: Yevgeny Kliteynik --- src/qperf.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qperf.c b/src/qperf.c index f38247d..dbeae5d 100644 --- a/src/qperf.c +++ b/src/qperf.c @@ -713,7 +713,7 @@ do_option(OPTION *option, char ***argvp) if (streq(*usage, category)) break; if (!*usage) - error(0, "cannot find help category %s; try: qperf --help"); + error(0, "cannot find help category %s; try: qperf --help", category); printf("%s", usage[1]); exit(0); } else if (streq(t, "host")) { -- 1.5.1.4 From dwrmam at rma.edu Wed Feb 13 08:20:09 2008 From: dwrmam at rma.edu (Jeannine Whitlock) Date: Wed, 13 Feb 2008 17:20:09 +0100 Subject: [ofa-general] Win money and have fun with Golden Gate Casino! Message-ID: <01c86e64$af279150$f6e84358@dwrmam> There is no more convenient way to win real money than joining our Golden Gate Casino members. All the most popular casino games! Easy to download, install and use free software! One of the industry's best welcome bonus $2400! We provide 24 hours a day, 7 days a week support and service! Truly fair play guaranteed for players. High level of security! http://geocities.com/petejarvis99/ Start downloading free software now! From pradeeps at linux.vnet.ibm.com Wed Feb 13 09:34:06 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 13 Feb 2008 09:34:06 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B29443.9010206@voltaire.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> <47B29443.9010206@voltaire.com> Message-ID: <47B32A0E.6060005@linux.vnet.ibm.com> Or Gerlitz wrote: > Pradeep Satyanarayana wrote: >> I brought this issue up on the mailing list sometime in the summer of >> 2007 is >> my recollection. I could not locate that with a quick search of the >> archives. >> I will probably do that again later. > > Its from December 2007 > http://lists.openfabrics.org/pipermail/general/2007-December/044299.html > >> However, the crux of the issue is that I was seeing "send completion >> errors" and >> that is what prompted me to change the retry counts. Please see Table >> 78 "Completion Error Handling for RC Send Queues" in the IB Spec for >> reference. >> And changing the retry counts did help. > > I understand that changing the retry counts eliminated the issue you > were seeing in your setup, however, its more of an observation than an > actual problem statement whose solution can be judged. Apart from that, > I have concerns regarding the approach of adding retries to layer that > provides unreliable service, see my comments on the other emails, and > feel free to respond there. Hello Or, Thanks for the pointer to the December mailing list. I have actually brought up this issue much before that time. Here is the link: http://lists.openfabrics.org/pipermail/general/2007-April/035308.html I was seeing "send completion errors" which means the QP was torn down and being recreated all the time. It was on account of this that I changed the retry counts, not the other way round. In this case the TCP timers are so large (hundreds of ms) compared to micro-seconds for Infiniband, that before TCP takes action to recover from errors, the QP is torn down (and recreated). As you can guess, the performance tanks. I am not clear why you think that this was an observation rather than an actual problem. Pradeep From johann.george at qlogic.com Wed Feb 13 10:04:28 2008 From: johann.george at qlogic.com (Johann George) Date: Wed, 13 Feb 2008 10:04:28 -0800 Subject: [ofa-general] Re: [PATCH] qperf: seg fault in help In-Reply-To: <47B2DC48.4040501@dev.mellanox.co.il> References: <47B2DC48.4040501@dev.mellanox.co.il> Message-ID: <20080213180428.GA13960@cuprite.pathscale.com> Yevgeny, Thanks. It has been applied. Johann On Wed, Feb 13, 2008 at 02:02:16PM +0200, Yevgeny Kliteynik wrote: > Hi Johann, > > Running "qperf --help SOME_WRONG_STRING" will fail with core dump. > The following patch should fix it. > Please apply to ofed_1_3 and master. > > Thanks > > -- Yevgeny > > Signed-off-by: Yevgeny Kliteynik > --- > src/qperf.c | 2 +- > 1 files changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/qperf.c b/src/qperf.c > index f38247d..dbeae5d 100644 > --- a/src/qperf.c > +++ b/src/qperf.c > @@ -713,7 +713,7 @@ do_option(OPTION *option, char ***argvp) > if (streq(*usage, category)) > break; > if (!*usage) > - error(0, "cannot find help category %s; try: qperf --help"); > + error(0, "cannot find help category %s; try: qperf --help", category); > printf("%s", usage[1]); > exit(0); > } else if (streq(t, "host")) { > -- > 1.5.1.4 > From sean.hefty at intel.com Wed Feb 13 10:08:18 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 13 Feb 2008 10:08:18 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B29A50.7050400@voltaire.com> References: <476C2F62.2020900@linux.vnet.ibm.com><47B153E8.2090803@voltaire.com><000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B29A50.7050400@voltaire.com> Message-ID: <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> >As I see it, the issue here is that from the view point of upper layers >(TCP, UDP, etc) the IP service is expected to provide unreliable >service. Hence layers that do need reliability such TCP, add that in >their protocol, so adding it in the IP layer and below (eg IPoIB or the >HW it uses) is in a way redundant since the upper layer is not aware to >that. IMO, the fact that TCP implements reliability doesn't mean it's unnecessary in underlying layers. For example, wireless typically adds reliability at the link layer because the link itself is so unreliable. If adding in reliability in the underlying layers improves overall performance, then it makes sense to add it, independent of the upper level protocol. Since RC is our 'link layer', overrunning the receiver doesn't just result in IP resending the packet, but transitioning the QP into an error state, cleaning up, re-establishing the connection, and then resending the packet. This works, just not well based on what Pradeep has seen. >With all that, I am not religiously against adding the retries... >however, I prefer to understand the original problem which seems to be >an issue relates to HCA interoperability before putting the solution in >the code. We both agree that UC is the way to go, and in that case the >real problem would pop again, but higher layers would have to take care >of it. I definitely think UC is worth trying, but I would like to see how it performs against RC. UC doesn't quite have the same issue as RC, since overrunning the receiver doesn't require tearing down the connection. - Sean From clameter at sgi.com Wed Feb 13 10:51:58 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 10:51:58 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213032533.GC32047@obsidianresearch.com> References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213032533.GC32047@obsidianresearch.com> Message-ID: On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > But this isn't how IB or iwarp work at all. What you describe is a > significant change to the general RDMA operation and requires changes to > both sides of the connection and the wire protocol. Yes it may require a separate connection between both sides where a kind of VM notification protocol is established to tear these things down and set them up again. That is if there is nothing in the RDMA protocol that allows a notification to the other side that the mapping is being down down. > - In RDMA (iwarp and IB versions) the hardware page tables exist to > linearize the local memory so the remote does not need to be aware > of non-linearities in the physical address space. The main > motivation for this is kernel bypass where the user space app wants > to instruct the remote side to DMA into memory using user space > addresses. Hardware provides the page tables to switch from > incoming user space virtual addresses to physical addresess. s/switch/translate I guess. That is good and those page tables could be used for the notification scheme to enable reclaim. But they are optional and are maintaining the driver state. The linearization could be reconstructed from the kernel page tables on demand. > Many kernel RDMA drivers (SCSI, NFS) only use the HW page tables > for access control and enforcing the liftime of the mapping. Well the mapping would have to be on demand to avoid the issues that we currently have with pinning. The user API could stay the same. If the driver tracks the mappings using the notifier then the VM can make sure that the right things happen on exit etc etc. > The page tables in the RDMA hardware exist primarily to support > this, and not for other reasons. The pinning of pages is one part > to support the HW page tables and one part to support the RDMA > lifetime rules, the liftime rules are what cause problems for > the VM. So the driver software can tear down and establish page tables entries at will? I do not see the problem. The RDMA hardware is one thing, the way things are visible to the user another. If the driver can establish and remove mappings as needed via RDMA then the user can have the illusion of persistent RDMA memory. This is the same as virtual memory providing the illusion of a process having lots of memory all for itself. > - The wire protocol consists of packets that say 'Write XXX bytes to > offset YY in Region RRR'. Creating a region produces the RRR label > and currently pins the pages. So long as the RRR label is valid the > remote side can issue write packets at any time without any > further synchronization. There is no wire level events associated > with creating RRR. You can pass RRR to the other machine in any > fashion, even using carrier pigeons :) > - The RDMA layer is very general (ala TCP), useful protocols (like SCSI) > are built on top of it and they specify the lifetime rules and > protocol for exchanging RRR. Well yes of course. What is proposed here is an additional notification mechanism (could even be via tcp/udp to simplify things) that would manage the mappings at a higher level. The writes would not occur if the mapping has not been established. > This is your step 'A will then send a message to B notifying..'. > It simply does not exist in the protocol specifications Of course. You need to create an additional communication layer to get that. > What it boils down to is that to implement true removal of pages in a > general way the kernel and HCA must either drop packets or stall > incoming packets, both are big performance problems - and I can't see > many users wanting this. Enterprise style people using SCSI, NFS, etc > already have short pin periods and HPC MPI users probably won't care > about the VM issues enough to warrent the performance overhead. True maybe you cannot do this by simply staying within the protocol bounds of RDMA that is based on page pinning if the RDMA protocol does not support a notification to the other side that the mapping is going away. If RDMA cannot do this then you would need additional ways of notifying the remote side that pages/mappings are invalidated. From clameter at sgi.com Wed Feb 13 11:00:05 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 11:00:05 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213040905.GQ29340@mv.qlogic.com> References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> Message-ID: On Tue, 12 Feb 2008, Christian Bell wrote: > You're arguing that a HW page table is not needed by describing a use > case that is essentially what all RDMA solutions already do above the > wire protocols (all solutions except Quadrics, of course). The HW page table is not essential to the notification scheme. That the RDMA uses the page table for linearization is another issue. A chip could just have a TLB cache and lookup the entries using the OS page table f.e. > > Lets say you have a two systems A and B. Each has their memory region MemA > > and MemB. Each side also has page tables for this region PtA and PtB. > > If either side then accesses the page again then the reverse process > > happens. If B accesses the page then it wil first of all incur a page > > fault because the entry in PtB is missing. The fault will then cause a > > message to be send to A to establish the page again. A will create an > > entry in PtA and will then confirm to B that the page was established. At > > that point RDMA operations can occur again. > > The notifier-reclaim cycle you describe is akin to the out-of-band > pin-unpin control messages used by existing communication libraries. > Also, I think what you are proposing can have problems at scale -- A > must keep track of all of the (potentially many systems) of memA and > cooperatively get an agreement from all these systems before reclaiming > the page. Right. We (SGI) have done something like this for a long time with XPmem and it scales ok. > When messages are sufficiently large, the control messaging necessary > to setup/teardown the regions is relatively small. This is not > always the case however -- in programming models that employ smaller > messages, the one-sided nature of RDMA is the most attractive part of > it. The messaging would only be needed if a process comes under memory pressure. As long as there is enough memory nothing like this will occur. > Nothing any communication/runtime system can't already do today. The > point of RDMA demand paging is enabling the possibility of using RDMA > without the implied synchronization -- the optimistic part. Using > the notifiers to duplicate existing memory region handling for RDMA > hardware that doesn't have HW page tables is possible but undermines > the more important consumer of your patches in my opinion. The notifier schemet should integrate into existing memory region handling and not cause a duplication. If you already have library layers that do this then it should be possible to integrate it. > One other area that has not been brought up yet (I think) is the > applicability of notifiers in letting users know when pinned memory > is reclaimed by the kernel. This is useful when a lower-level > library employs lazy deregistration strategies on memory regions that > are subsequently released to the kernel via the application's use of > munmap or sbrk. Ohio Supercomputing Center has work in this area but > a generalized approach in the kernel would certainly be welcome. The driver gets the notifications about memory being reclaimed. The driver could then notify user code about the release as well. Pinned memory current *cannot* be reclaimed by the kernel. The refcount is elevated. This means that the VM tries to remove the mappings and then sees that it was not able to remove all references. Then it gives up and tries again and again and again.... Thus the potential for livelock. From clameter at sgi.com Wed Feb 13 11:02:15 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 11:02:15 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080208155641.2258ad2c.akpm@linux-foundation.org> <20080209012446.GB7051@v2.random> <20080209015659.GC7051@v2.random> <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> Message-ID: On Wed, 13 Feb 2008, Christoph Raisch wrote: > For ehca we currently can't modify a large MR when it has been allocated. > EHCA Hardware expects the pages to be there (MRs must not have "holes"). > This is also true for the global MR covering all kernel space. > Therefore we still need the memory to be "pinned" if ib_umem_get() is > called. It cannot be freed and then reallocated? What happens when a process exists? From or.gerlitz at gmail.com Wed Feb 13 11:17:18 2008 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 13 Feb 2008 21:17:18 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B32A0E.6060005@linux.vnet.ibm.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> <47B29443.9010206@voltaire.com> <47B32A0E.6060005@linux.vnet.ibm.com> Message-ID: <15ddcffd0802131117o1d6f206ck208681bd025eccbe@mail.gmail.com> On 2/13/08, Pradeep Satyanarayana wrote: > Or Gerlitz wrote: >> I understand that changing the retry counts eliminated the issue you >> were seeing in your setup, however, its more of an observation than an >> actual problem statement whose solution can be judged. > I am not clear why you think that this was an observation rather than an actual problem. I did not mean to say that there is no actual problem, I just don't see here an actual evidence that proves or suggests that indeed --the-- problem is different speeds of the HCAs, the fact thay adding retries eliminated the send errors is not enough. For example, maybe adding just RNR retries would do well? maybe just adding retries would? maybe you were seeing it at April 2007 before NAPI was implemented? etc, etc. I have sent a note on that to the ewg list asking if people can reproduce the problem. Best if you can name two HCA types + FW version + node setting + test that can reproduce the problem. Also, you did well without this patch in the code for 10 months now, so I don't see why it has to go into ofed in such a rush, the fact that Roland missed commenting on it twice, should not stop you from sending it to him in the third time... maintainers are busy, it happens. Or. From pradeeps at linux.vnet.ibm.com Wed Feb 13 11:36:58 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 13 Feb 2008 11:36:58 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <15ddcffd0802131117o1d6f206ck208681bd025eccbe@mail.gmail.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> <47B29443.9010206@voltaire.com> <47B32A0E.6060005@linux.vnet.ibm.com> <15ddcffd0802131117o1d6f206ck208681bd025eccbe@mail.gmail.com> Message-ID: <47B346DA.30301@linux.vnet.ibm.com> Or Gerlitz wrote: > On 2/13/08, Pradeep Satyanarayana wrote: >> Or Gerlitz wrote: > >>> I understand that changing the retry counts eliminated the issue you >>> were seeing in your setup, however, its more of an observation than an >>> actual problem statement whose solution can be judged. > >> I am not clear why you think that this was an observation rather than an actual problem. > > I did not mean to say that there is no actual problem, I just don't > see here an actual evidence that proves or suggests that indeed > --the-- problem is different speeds of the HCAs, the fact thay adding > retries eliminated the send errors is not enough. For example, maybe > adding just RNR retries would do well? maybe just adding retries > would? maybe you were seeing it at April 2007 before NAPI was > implemented? etc, etc. I have sent a note on that to the ewg list > asking if people can reproduce the problem. Best if you can name two > HCA types + FW version + node setting + test that can reproduce the > problem. Unfortunately, I do not have the same setup that I had previously. So, I would be unable to provide you all the details at this point. However, I do remember it was ehca and mthca on ppc64 machines. If memory serves me right, just adding the retries solved the issue. However, as pointed out in Table 78 of the IB spec I changed rnr_retries too as that could be a possibility too. I wanted to cover that case (rnr_retries) if some else ran into it. > > Also, you did well without this patch in the code for 10 months now, > so I don't see why it has to go into ofed in such a rush, the fact > that Roland missed commenting on it twice, should not stop you from > sending it to him in the third time... maintainers are busy, it > happens. The fact is I have always been running that with that change on my systems. As you will see from the history of the patches, I did not want that to be a sticking point and removed that from the mainline patch. The plan was to reopen the conversation to get it into mainline after OFED 1.3. You may have seen that I have tried bringing up that issue several times in the past. Pradeep From christian.bell at qlogic.com Wed Feb 13 11:46:21 2008 From: christian.bell at qlogic.com (Christian Bell) Date: Wed, 13 Feb 2008 11:46:21 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> Message-ID: <20080213194621.GD19742@mv.qlogic.com> On Wed, 13 Feb 2008, Christoph Lameter wrote: > Right. We (SGI) have done something like this for a long time with XPmem > and it scales ok. I'd dispute this based on experience developing PGAS language support on the Altix but more importantly (and less subjectively), I think that "scales ok" refers to a very specific case. Sure, pages (and/or regions) can be large on some systems and the number of systems may not always be in the thousands but you're still claiming scalability for a mechanism that essentially logs who accesses the regions. Then there's the fact that reclaim becomes a collective communication operation over all region accessors. Makes me nervous. > > When messages are sufficiently large, the control messaging necessary > > to setup/teardown the regions is relatively small. This is not > > always the case however -- in programming models that employ smaller > > messages, the one-sided nature of RDMA is the most attractive part of > > it. > > The messaging would only be needed if a process comes under memory > pressure. As long as there is enough memory nothing like this will occur. > > > Nothing any communication/runtime system can't already do today. The > > point of RDMA demand paging is enabling the possibility of using RDMA > > without the implied synchronization -- the optimistic part. Using > > the notifiers to duplicate existing memory region handling for RDMA > > hardware that doesn't have HW page tables is possible but undermines > > the more important consumer of your patches in my opinion. > > The notifier schemet should integrate into existing memory region > handling and not cause a duplication. If you already have library layers > that do this then it should be possible to integrate it. I appreciate that you're trying to make a general case for the applicability of notifiers on all types of existing RDMA hardware and wire protocols. Also, I'm not disagreeing whether a HW page table is required or not: clearly it's not required to make *some* use of the notifier scheme. However, short of providing user-level notifications for pinned pages that are inadvertently released to the O/S, I don't believe that the patchset provides any significant added value for the HPC community that can't optimistically do RDMA demand paging. . . christian From pradeeps at linux.vnet.ibm.com Wed Feb 13 11:50:03 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 13 Feb 2008 11:50:03 -0800 Subject: [ofa-general] [PATCH] IPoIB/CM fix for bug#906 Message-ID: <47B349EB.9060501@linux.vnet.ibm.com> This is a patch against the for-2.25 tree for bug#906 -fail to destroy ipoib rx QP https://bugs.openfabrics.org/show_bug.cgi?id=906 This problem was discovered during OFED 1.3 testing and a patch has been submitted for the OFED 1.3 tree. One can follow the OFED discussions by following the thread at: http://lists.openfabrics.org/pipermail/ewg/2008-February/005886.html Roland, can this be queued for 2.6.25? Signed-off-by: Pradeep Satyanarayana --- --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-13 14:14:47.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-13 14:01:55.000000000 -0500 @@ -861,11 +861,11 @@ void ipoib_cm_dev_stop(struct net_device ipoib_warn(priv, "RX drain timing out\n"); /* - * assume the HW is wedged and just free up everything. + * assume error and move to rx_reap list */ - list_splice_init(&priv->cm.rx_flush_list, &list); - list_splice_init(&priv->cm.rx_error_list, &list); - list_splice_init(&priv->cm.rx_drain_list, &list); + list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_reap_list); + list_splice_init(&priv->cm.rx_error_list, &priv->cm.rx_reap_list); + list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); break; } spin_unlock_irq(&priv->lock); From jgunthorpe at obsidianresearch.com Wed Feb 13 11:51:44 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 13 Feb 2008 12:51:44 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213032533.GC32047@obsidianresearch.com> Message-ID: <20080213195144.GE31435@obsidianresearch.com> On Wed, Feb 13, 2008 at 10:51:58AM -0800, Christoph Lameter wrote: > On Tue, 12 Feb 2008, Jason Gunthorpe wrote: > > > But this isn't how IB or iwarp work at all. What you describe is a > > significant change to the general RDMA operation and requires changes to > > both sides of the connection and the wire protocol. > > Yes it may require a separate connection between both sides where a > kind of VM notification protocol is established to tear these things down and > set them up again. That is if there is nothing in the RDMA protocol that > allows a notification to the other side that the mapping is being down > down. Well, yes, you could build this thing you are describing on top of the RDMA protocol and get some support from some of the hardware - but it is a new set of protocols and they would need to be implemented in several places. It is not transparent to userspace and it is not compatible with existing implementations. Unfortunately it really has little to do with the drivers - changes, for instance, need to be made to support this in the user space MPI libraries. The RDMA ops do not pass through the kernel, userspace talks directly to the hardware which complicates building any sort of abstraction. That is where I think you run into trouble, if you ask the MPI people to add code to their critical path to support swapping they probably will not be too interested. At a minimum to support your idea you need to check on every RDMA if the remote page is mapped... Plus the overheads Christian was talking about in the OOB channel(s). Jason From psjfm at yahoo.ca Wed Feb 13 12:01:03 2008 From: psjfm at yahoo.ca (Reyna Cunningham) Date: Wed, 13 Feb 2008 16:01:03 -0400 Subject: [ofa-general] The Most Popular Software Available Message-ID: <384968592.97640641904350@yahoo.ca> Cheap and excellent software - too good to be true? Read information belowAnybody who is going to purchase legal PC and Mac software at low prices will definitely find necessary software products here, hether he/she is a corporate buyer, or owner of a small company, or just purchasing software for his/her own needs.View what we have to propose http://geocities.com/trumanmercer382/Most popular software in sight are:*Microsoft Office 2007 Enterprise: Retail price today - $899.00; Our only for today - $79.95 *Microsoft Windows XP Professional with SP2: Retail price this day - $269.99; Our only today - $49.95 *Adobe Acrobat 3D: Retail price today - $995.00; Our only - $59.95 *Adobe Photoshop CS for Mac: Retail price this day - $500.00; Our just - $49.95 *Microsoft Windows XP Professional with SP2: Retail price today - $269.99; Our now just - $49.95 *Adobe Pagemaker V 7.01 PC: Retail price today - $400.00; Our just - $49.95 *Adobe Photoshop CS2 V 9.0: Retail price for this time - $599.00; Our just - $69.95 *Microsoft Office XP Professional: Retail price for this time - $499.00; Our just - $49.95View all software http://geocities.com/trumanmercer382/ I say I am your motherAnd put. Mother. or were you both. I shall never have theblessing of. Now Dian from thy altar do I. Strange upon your wife.You give. I play the noble housewife. The secrets of your army and. Way of youthShe knew her. -------------- next part -------------- An HTML attachment was scrubbed... URL: From Caitlin.Bestler at neterion.com Wed Feb 13 12:05:37 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Wed, 13 Feb 2008 15:05:37 -0500 Subject: [ofa-general] RE: Demand paging for memory regions In-Reply-To: <20080213180935.95AA7E280BA@openfabrics.org> References: <20080213180935.95AA7E280BA@openfabrics.org> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD770305B0CB@nekter> I have a few comments on the semantics of memory regions, and how it relates to usage scenarios for memory notifiers and/or page faulting. First, there is nothing in RDMA semantics that demands that each page of a memory region be pre-mapped to a physical page before the page can be advertised remotely. What is expected is that these advertisements not be at risk. There has to be an honest expectation that if a 40 page buffer is advertised that there are 40 pages available to back that advertisement. It is simply unacceptable for one end of an RDMA connection to back up the network because it cannot plan its buffer allocations. Network retransmission is not a handy spare scratchpad where buffers can be "cached" via retransmission. This is somewhat akin to guaranteeing a landing slot for an airplane. You don't really need to 'pin' the landing resources for the specific plane for the entire duration of the flight, but you better have more than just good intentions to make your best effort to find somewhere for the plane to land when it finally arrives. When there is no buffer available, there has been a connection. Having failed to meet the requirements the receiver should assume that the connection will be torn down. But there is a little bit of wiggle room here. There is no need to mandate that the connection MUST be torn down. This was explicitly discussed by the IETF's RDDP working group while drafting the iWARP RFCs. If there is a fault, the connection MAY be torn down, but an implementation MAY take extra steps as part of a fault-tolerance strategy to avoid this. Dropping a packet and generating a page fault to the host as a fault-recovery strategy is a legitimate option. But applications MUST NOT rely on the transport layer having this service. It's somewhat like catching divide by zero errors. It's nice if the OS/library/compiler build in mechanisms to recover from divide by zero errors, but that does not mean that applications should go around dividing by zero. RDMA wire semantics requires that a sufficient number of pages are committed, and that these are the pages as they will be viewed by the application. There is nothing in the protocol that is inconsistent with an OS or Hypervisor *substituting* pages in a memory region (as long as it is done in a way that honors updates to those pages). Great care must be taken when substituting pages that are DMA accessible, but substituting pages out from under a running application isn't exactly trivial either. Virtual Memory Managers (either OS or hypervisor) should be presumed to understand when they have to preserve the contents of a page. RDMA presents some special challenges here because the RDMA layer has no knowledge of the intended usage of tagged memory buffers, nor does it track the history of access using R-Keys/STags. So the RDMA protocols do allow flexibility in what an R-Key/STag maps to even while the R-Key or STag is externally advertised. But existing RDMA verb have no support for updating the meaning of an R-Key/STag without first invalidating it. However, that is a verbs/implementation issue -- not an RDMA wire protocol requirement. New APIs that allow Virtual Memory Managers to substitute pages in a Memory Region are feasible and may have valuable use cases, but they need to be introduced on an evolutionary basis. Existing hardware will not support them. But as long as such features are not used to enable irresponsible over- subscription of pages there is no reason why new devices could not support such concepts (or even sufficiently updatable devices). RDMA devices already generate a "fault" when they cannot place to host memory. The difference is whether they can be instructed to drop the packet before acking it rather than terminating the connection. And the host can respond to the fault either by terminating the connection or by repairing the problem. From clameter at sgi.com Wed Feb 13 12:32:12 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 12:32:12 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213194621.GD19742@mv.qlogic.com> References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> <20080213194621.GD19742@mv.qlogic.com> Message-ID: On Wed, 13 Feb 2008, Christian Bell wrote: > not always be in the thousands but you're still claiming scalability > for a mechanism that essentially logs who accesses the regions. Then > there's the fact that reclaim becomes a collective communication > operation over all region accessors. Makes me nervous. Well reclaim is not a very fast process (and we usually try to avoid it as much as possible for our HPC). Essentially its only there to allow shifts of processing loads and to allow efficient caching of application data. > However, short of providing user-level notifications for pinned pages > that are inadvertently released to the O/S, I don't believe that the > patchset provides any significant added value for the HPC community > that can't optimistically do RDMA demand paging. We currently also run XPmem with pinning. Its great as long as you just run one load on the system. No reclaim ever iccurs. However, if you do things that require lots of allocations etc etc then the page pinning can easily lead to livelock if reclaim is finally triggerd and also strange OOM situations since the VM cannot free any pages. So the main issue that is addressed here is reliability of pinned page operations. Better VM integration avoids these issues because we can unpin on request to deal with memory shortages. From clameter at sgi.com Wed Feb 13 12:36:42 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 12:36:42 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213195144.GE31435@obsidianresearch.com> References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213032533.GC32047@obsidianresearch.com> <20080213195144.GE31435@obsidianresearch.com> Message-ID: On Wed, 13 Feb 2008, Jason Gunthorpe wrote: > Unfortunately it really has little to do with the drivers - changes, > for instance, need to be made to support this in the user space MPI > libraries. The RDMA ops do not pass through the kernel, userspace > talks directly to the hardware which complicates building any sort of > abstraction. Ok so the notifiers have to be handed over to the user space library that has the function of the device driver here... > That is where I think you run into trouble, if you ask the MPI people > to add code to their critical path to support swapping they probably > will not be too interested. At a minimum to support your idea you need > to check on every RDMA if the remote page is mapped... Plus the > overheads Christian was talking about in the OOB channel(s). You only need to check if a handle has been receiving invalidates. If not then you can just go ahead as now. You can use the notifier to take down the whole region if any reclaim occur against it (probably best and simples to implement approach). Then you mark the handle so that the mapping is reestablished before the next operation. From or.gerlitz at gmail.com Wed Feb 13 13:06:23 2008 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 13 Feb 2008 23:06:23 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B346DA.30301@linux.vnet.ibm.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> <47B29443.9010206@voltaire.com> <47B32A0E.6060005@linux.vnet.ibm.com> <15ddcffd0802131117o1d6f206ck208681bd025eccbe@mail.gmail.com> <47B346DA.30301@linux.vnet.ibm.com> Message-ID: <15ddcffd0802131306x4b51039bncf57c1a8e02c6be3@mail.gmail.com> On 2/13/08, Pradeep Satyanarayana wrote: > Unfortunately, I do not have the same setup that I had previously. So, I would > be unable to provide you all the details at this point. However, I do remember > it was ehca and mthca on ppc64 machines. Honestly, its a problem to judge a fix for a problem for which you provide the settting with such limited description, actually, why won't you (IBM) try and reproduce it, basically the claim is that the problem can occur on any HCA-mixed setup, so you can try some couples from {ehca,any-of-the-four-mthca-devices, connectx, ipath}, etc and let the community know your findings. > You may have seen that I have tried bringing up that issue several times in the past. I believe that if you would have provided the details on the problem (test / failure description) it was not being ignored by the maintainers. Or From karmasystems.com at knowcreditcards.com Wed Feb 13 13:52:41 2008 From: karmasystems.com at knowcreditcards.com (Malachi Price) Date: Thu, 14 Feb 2008 00:52:41 +0300 Subject: [ofa-general] Adobe Creative Suite 3 MAC/XP/Vista for 269, Retails 1799 (will save 1529) Message-ID: <000501c86e8a$5921f900$0100007f@lawkdy> adobe encore dvd 2 - 49 adobe illustrator cs3 - 69 Put ''gsxoempromo. com'' to Expl0rer of Internet (w/o '' and space) nero 7 premium - 39 luxology modo 301 for mac - 129 microsoft expression studio 1.0 - 79 adobe fireworks cs3 - 59 office professional xp - 49 steinberg nuendo 3.1 - 99 intuit quicken home and business 2008 - 39 alias maya 7.0 unlimited - 109 adobe premiere pro cs3 - 79 creative suite 3 design premium for win - 269 From sean.hefty at intel.com Wed Feb 13 14:33:53 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 13 Feb 2008 14:33:53 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: rdma/cm: do not issue MRA if user rejects connection request Message-ID: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> There's an undesirable interaction with issuing MRA requests to increase connection timeouts and the listen backlog. When the rdma_cm receives a connection request, it queues an MRA with the ib_cm. (The ib_cm will send an MRA if it receives a duplicate REQ.) The rdma_cm will then create a new rdma_cm_id and give that to the user, which in this case is the rdma_user_cm. If the listen backlog maintained in the rdma_user_cm is full, it destroys the rdma_cm_id, which in turns destroys the ib_cm_id. The ib_cm_id generates a REJ because the state of the ib_cm_id has changed to MRA sent, versus REQ received. Defer queuing the MRA until after the user of the rdma_cm has examined the connection request. Signed-off-by: Sean Hefty --- This problem was detected while debugging an MPI application running over uDAPL. This patch is also available at: git://git.openfabrics.org/~shefty/rdma-dev.git for-roland drivers/infiniband/core/cma.c | 13 +++++++++++-- 1 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0751697..98e1b38 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1100,7 +1100,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); conn_id = cma_new_conn_id(&listen_id->id, ib_event); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); @@ -1122,8 +1121,18 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) cm_id->cm_handler = cma_ib_handler; ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) + if (!ret) { + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, CMA_CONNECT) && + !cma_is_ud_ps(conn_id->id.ps)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); goto out; + } /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; From rdreier at cisco.com Wed Feb 13 14:40:24 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 13 Feb 2008 14:40:24 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: rdma/cm: do not issue MRA if user rejects connection request In-Reply-To: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> (Sean Hefty's message of "Wed, 13 Feb 2008 14:33:53 -0800") References: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> Message-ID: > If the listen backlog maintained in the rdma_user_cm is full, > it destroys the rdma_cm_id, which in turns destroys the ib_cm_id. > The ib_cm_id generates a REJ because the state of the ib_cm_id has > changed to MRA sent, versus REQ received. Not sure I understand the problem. When the listen backlog is full, don't we want to generate a REJ? - R. From kanojsarcar at yahoo.com Wed Feb 13 14:44:34 2008 From: kanojsarcar at yahoo.com (Kanoj Sarcar) Date: Wed, 13 Feb 2008 14:44:34 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: Message-ID: <320000.75105.qm@web32509.mail.mud.yahoo.com> --- Christoph Lameter wrote: > On Wed, 13 Feb 2008, Christian Bell wrote: > > > not always be in the thousands but you're still > claiming scalability > > for a mechanism that essentially logs who accesses > the regions. Then > > there's the fact that reclaim becomes a collective > communication > > operation over all region accessors. Makes me > nervous. > > Well reclaim is not a very fast process (and we > usually try to avoid it > as much as possible for our HPC). Essentially its > only there to allow > shifts of processing loads and to allow efficient > caching of application > data. > > > However, short of providing user-level > notifications for pinned pages > > that are inadvertently released to the O/S, I > don't believe that the > > patchset provides any significant added value for > the HPC community > > that can't optimistically do RDMA demand paging. > > We currently also run XPmem with pinning. Its great > as long as you just > run one load on the system. No reclaim ever iccurs. > > However, if you do things that require lots of > allocations etc etc then > the page pinning can easily lead to livelock if > reclaim is finally > triggerd and also strange OOM situations since the > VM cannot free any > pages. So the main issue that is addressed here is > reliability of pinned > page operations. Better VM integration avoids these > issues because we can > unpin on request to deal with memory shortages. > > I have a question on the basic need for the mmu notifier stuff wrt rdma hardware and pinning memory. It seems that the need is to solve potential memory shortage and overcommit issues by being able to reclaim pages pinned by rdma driver/hardware. Is my understanding correct? If I do understand correctly, then why is rdma page pinning any different than eg mlock pinning? I imagine Oracle pins lots of memory (using mlock), how come they do not run into vm overcommit issues? Are we up against some kind of breaking c-o-w issue here that is different between mlock and rdma pinning? Asked another way, why should effort be spent on a notifier scheme, and rather not on fixing any memory accounting problems and unifying how pin pages are accounted for that get pinned via mlock() or rdma drivers? Startup benefits are well understood with the notifier scheme (ie, not all pages need to be faulted in at memory region creation time), specially when most of the memory region is not accessed at all. I would imagine most of HPC does not work this way though. Then again, as rdma hardware is applied (increasingly?) towards apps with short lived connections, the notifier scheme will help with startup times. Kanoj ____________________________________________________________________________________ Be a better friend, newshound, and know-it-all with Yahoo! Mobile. Try it now. http://mobile.yahoo.com/;_ylt=Ahu06i62sR8HDtDypao8Wcj9tAcJ From nab at linux-iscsi.org Wed Feb 13 14:49:34 2008 From: nab at linux-iscsi.org (Nicholas A. Bellinger) Date: Wed, 13 Feb 2008 14:49:34 -0800 Subject: [ofa-general] [PATCH] mthca memfree init sg list In-Reply-To: <20080212213820.GG13643@osc.edu> References: <20080212213820.GG13643@osc.edu> Message-ID: <1202942974.25254.149.camel@haakon2.linux-iscsi.org> Greetings Pete, On Tue, 2008-02-12 at 16:38 -0500, Pete Wyckoff wrote: > Properly initialize the SG list in the user_db_table in mthca memfree. > Without this, and when compiling with CONFIG_DEBUG_SG, a BUG will occur > during create_cq. The call to sg_set_page() in mthca_map_user_db() > will find that the scatterlist magic was not initialized. > > Signed-off-by: Pete Wyckoff > --- > drivers/infiniband/hw/mthca/mthca_memfree.c | 1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c > index 1f4d27d..252db08 100644 > --- a/drivers/infiniband/hw/mthca/mthca_memfree.c > +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c > @@ -542,6 +542,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) > for (i = 0; i < npages; ++i) { > db_tab->page[i].refcount = 0; > db_tab->page[i].uvirt = 0; > + sg_init_table(&db_tab->page[i].mem, 1); > } > > return db_tab; Should these scatterlist initializations include the new sg_mark_end() usage by default as well..? --nab From sean.hefty at intel.com Wed Feb 13 14:54:33 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 13 Feb 2008 14:54:33 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: rdma/cm: do not issue MRA ifuser rejects connection request In-Reply-To: References: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> Message-ID: <000601c86e93$66d2ad70$ff0da8c0@amr.corp.intel.com> > > If the listen backlog maintained in the rdma_user_cm is full, > > it destroys the rdma_cm_id, which in turns destroys the ib_cm_id. > > The ib_cm_id generates a REJ because the state of the ib_cm_id has > > changed to MRA sent, versus REQ received. > >Not sure I understand the problem. When the listen backlog is full, >don't we want to generate a REJ? The current behavior is that when the backlog is full, the REQ is dropped, which allows the remote side to retry the request, hopefully after the backlog has cleared a little. If I recall correctly, this feature was added based on feedback from at least Michael Tsirkin for SDP(?) support. Clients notify the IB CM to drop the REQ by reporting -ENOMEM from the REQ callback. The rdma_user_cm was using this, but when support to send the MRA was added to the rdma_cm, it ended up breaking this feature. The MRA needs to be sent after the user has examined the request, rather than before. - Sean From patrick.latifi at qlogic.com Wed Feb 13 14:57:47 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:57:47 -0800 Subject: [ofa-general] [PATCH][DAPL v1] misc fixes Message-ID: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Hi all, Here's a set of patches for dapl 1.2.x. Please have a look and let me know if there's any issue. All these patches apply against the dat1.2 branch. Thanks, -pat From patrick.latifi at qlogic.com Wed Feb 13 14:57:53 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:57:53 -0800 Subject: [ofa-general] [PATCH 1/6] [DAPL v1] fix typo in memset In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225753.16508.69660.stgit@b64-10.internal.keyresearch.com> Fix typo in memset Signed-off-by: Patrick Marchand Latifi --- test/dtest/dtest.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c index 66c01cc..4264e9b 100755 --- a/test/dtest/dtest.c +++ b/test/dtest/dtest.c @@ -249,7 +249,7 @@ main(int argc, char **argv) perror("malloc"); exit(1); } - memset( &time, sizeof(time), 0); + memset( &time, 0, sizeof(time) ); LOGPRINTF("%d Allocated RDMA buffers (r:%p,s:%p) len %d \n", getpid(), rbuf, sbuf, buf_len); From patrick.latifi at qlogic.com Wed Feb 13 14:57:58 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:57:58 -0800 Subject: [ofa-general] [PATCH 2/6] [DAPL v1] Add missing memset In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225758.16508.14673.stgit@b64-10.internal.keyresearch.com> Zero-out memory otherwise we might base our cleanup decisions on uninitialized memory. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_sr.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/dat/common/dat_sr.c b/dat/common/dat_sr.c index 9568ab9..d5d8666 100644 --- a/dat/common/dat_sr.c +++ b/dat/common/dat_sr.c @@ -142,6 +142,8 @@ dat_sr_insert ( goto bail; } + dat_os_memset (data, '\0', sizeof (DAT_SR_ENTRY)); + lib_path_len = strlen (entry->lib_path); lib_path_size = (lib_path_len + 1) * sizeof (char); From patrick.latifi at qlogic.com Wed Feb 13 14:58:03 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:58:03 -0800 Subject: [ofa-general] [PATCH 3/6] [DAPL v1] fix off-by-one with ia handle In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225803.16508.65574.stgit@b64-10.internal.keyresearch.com> Make sure we stay within bounds when manipulating the ia handle. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_api.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dat/common/dat_api.c b/dat/common/dat_api.c index e958c68..2cfa4a3 100644 --- a/dat/common/dat_api.c +++ b/dat/common/dat_api.c @@ -176,7 +176,7 @@ dats_get_ia_handle( { DAT_RETURN dat_status; - if (handle > g_hv.handle_max) + if (handle >= g_hv.handle_max) { dat_status = DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_IA); goto bail; @@ -219,7 +219,7 @@ dats_is_ia_handle ( { unsigned long handle = (unsigned long) dat_handle; - if (g_hv.handle_max < handle ) + if (g_hv.handle_max <= handle ) { return DAT_FALSE; } @@ -245,7 +245,7 @@ dats_free_ia_handle ( { DAT_RETURN dat_status; - if (handle > g_hv.handle_max) + if (handle >= g_hv.handle_max) { dat_status = DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_IA); goto bail; From patrick.latifi at qlogic.com Wed Feb 13 14:58:08 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:58:08 -0800 Subject: [ofa-general] [PATCH 4/6] [DAPL v1] fix off-by-one with ia_name In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225808.16508.16794.stgit@b64-10.internal.keyresearch.com> Make sure we stay within bounds when manipulating the ia_name. Signed-off-by: Patrick Marchand Latifi --- dat/udat/udat.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dat/udat/udat.c b/dat/udat/udat.c index e458441..c57d421 100644 --- a/dat/udat/udat.c +++ b/dat/udat/udat.c @@ -181,7 +181,7 @@ dat_ia_openv ( len = dat_os_strlen (name); - if ( DAT_NAME_MAX_LENGTH < len ) + if ( DAT_NAME_MAX_LENGTH <= len ) { return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); } @@ -197,7 +197,6 @@ dat_ia_openv ( } dat_os_strncpy (info.ia_name, name, len); - info.ia_name[len] = '\0'; info.dapl_version_major = dapl_major; info.dapl_version_minor = dapl_minor; @@ -301,10 +300,9 @@ dat_ia_close ( len = dat_os_strlen (ia_name); - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); dat_os_strncpy (info.ia_name, ia_name, len); - info.ia_name[len] = '\0'; info.dapl_version_major = provider_attr.dapl_version_major; info.dapl_version_minor = provider_attr.dapl_version_minor; From patrick.latifi at qlogic.com Wed Feb 13 14:58:13 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:58:13 -0800 Subject: [ofa-general] [PATCH 5/6] [DAPL v1] Fix typo in comment In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225813.16508.40837.stgit@b64-10.internal.keyresearch.com> Fix typo in comment. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_api.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/dat/common/dat_api.c b/dat/common/dat_api.c index 2cfa4a3..87ad722 100644 --- a/dat/common/dat_api.c +++ b/dat/common/dat_api.c @@ -209,7 +209,7 @@ dats_get_ia_handle( * The current implementation assumes that any value for which an IA * handle exists is a DAT_IA_HANDLE. Unfortunately this will result in * false positives. In particular it may identify a NULL pointer as IA - * handle 0. An implmenetation that does not have this deficiency would + * handle 0. An implementation that does not have this deficiency would * be preferable. * ***********************************************************************/ From patrick.latifi at qlogic.com Wed Feb 13 14:58:18 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 14:58:18 -0800 Subject: [ofa-general] [PATCH 6/6] [DAPL v1] fix variable not initialized early enough In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213225818.16508.10243.stgit@b64-10.internal.keyresearch.com> Make sure we initialize the dictionary entry early enough so that we can base our cleanup decisions on that variable being initialized. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_dr.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/dat/common/dat_dr.c b/dat/common/dat_dr.c index 541ce2a..89fc861 100644 --- a/dat/common/dat_dr.c +++ b/dat/common/dat_dr.c @@ -120,7 +120,7 @@ dat_dr_insert ( IN DAT_DR_ENTRY *entry ) { DAT_RETURN status; - DAT_DICTIONARY_ENTRY dict_entry; + DAT_DICTIONARY_ENTRY dict_entry = NULL; DAT_DR_ENTRY *data; data = dat_os_alloc (sizeof (DAT_DR_ENTRY)); @@ -132,7 +132,6 @@ dat_dr_insert ( *data = *entry; - dict_entry = NULL; status = dat_dictionary_entry_create (&dict_entry); if ( DAT_SUCCESS != status ) { From patrick.latifi at qlogic.com Wed Feb 13 15:00:01 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:01 -0800 Subject: [ofa-general] [PATCH][DAPL v2] misc fixes Message-ID: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Hi all, Here's a set of patches for dapl 2.0.x. Please have a look and let me know if there's any issue. All these patches apply against the master branch. Thanks, -pat From patrick.latifi at qlogic.com Wed Feb 13 15:00:07 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:07 -0800 Subject: [ofa-general] [PATCH 1/5] [DAPL v2] Add missing memset In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213230007.16680.5275.stgit@b64-10.internal.keyresearch.com> Zero-out memory otherwise we might base our cleanup decisions on uninitialized memory. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_sr.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/dat/common/dat_sr.c b/dat/common/dat_sr.c index 79a15cc..05be499 100755 --- a/dat/common/dat_sr.c +++ b/dat/common/dat_sr.c @@ -142,6 +142,8 @@ dat_sr_insert ( goto bail; } + dat_os_memset (data, '\0', sizeof (DAT_SR_ENTRY)); + lib_path_len = strlen (entry->lib_path); lib_path_size = (lib_path_len + 1) * sizeof (char); From patrick.latifi at qlogic.com Wed Feb 13 15:00:12 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:12 -0800 Subject: [ofa-general] [PATCH 2/5] [DAPL v2] fix off-by-one with ia handle In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213230012.16680.9572.stgit@b64-10.internal.keyresearch.com> Make sure we stay within bounds when manipulating the ia handle. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_api.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dat/common/dat_api.c b/dat/common/dat_api.c index cb4105d..693fa30 100755 --- a/dat/common/dat_api.c +++ b/dat/common/dat_api.c @@ -185,7 +185,7 @@ dats_get_ia_handle( { DAT_RETURN dat_status; - if (DAT_IA_HANDLE_TO_UL(handle) > g_hv.handle_max) + if (DAT_IA_HANDLE_TO_UL(handle) >= g_hv.handle_max) { dat_status = DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_IA); goto bail; @@ -228,7 +228,7 @@ dats_is_ia_handle ( { unsigned long handle = DAT_IA_HANDLE_TO_UL((DAT_IA_HANDLE)dat_handle); - if (g_hv.handle_max < handle ) + if (g_hv.handle_max <= handle ) { return DAT_FALSE; } @@ -254,7 +254,7 @@ dats_free_ia_handle ( { DAT_RETURN dat_status; - if (DAT_IA_HANDLE_TO_UL(handle) > g_hv.handle_max) + if (DAT_IA_HANDLE_TO_UL(handle) >= g_hv.handle_max) { dat_status = DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_IA); goto bail; From patrick.latifi at qlogic.com Wed Feb 13 15:00:17 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:17 -0800 Subject: [ofa-general] [PATCH 3/5] [DAPL v2] Fix off-by-one with ia_name In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213230017.16680.89345.stgit@b64-10.internal.keyresearch.com> Make sure we stay within bounds when manipulating the ia_name. Signed-off-by: Patrick Marchand Latifi --- dat/udat/udat.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dat/udat/udat.c b/dat/udat/udat.c index bb1c580..0be4c33 100755 --- a/dat/udat/udat.c +++ b/dat/udat/udat.c @@ -184,7 +184,7 @@ dat_ia_openv ( len = dat_os_strlen (name); - if ( DAT_NAME_MAX_LENGTH < len ) + if ( DAT_NAME_MAX_LENGTH <= len ) { return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); } @@ -200,7 +200,6 @@ dat_ia_openv ( } dat_os_strncpy (info.ia_name, name, len); - info.ia_name[len] = '\0'; info.dapl_version_major = dapl_major; info.dapl_version_minor = dapl_minor; @@ -324,10 +323,9 @@ dat_ia_close ( len = dat_os_strlen (ia_name); - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); dat_os_strncpy (info.ia_name, ia_name, len); - info.ia_name[len] = '\0'; info.dapl_version_major = provider_attr.dapl_version_major; info.dapl_version_minor = provider_attr.dapl_version_minor; From patrick.latifi at qlogic.com Wed Feb 13 15:00:22 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:22 -0800 Subject: [ofa-general] [PATCH 4/5] [DAPL v2] Fix typo in comment In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213230022.16680.89800.stgit@b64-10.internal.keyresearch.com> Fix typo in comment. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_api.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/dat/common/dat_api.c b/dat/common/dat_api.c index 693fa30..de8e122 100755 --- a/dat/common/dat_api.c +++ b/dat/common/dat_api.c @@ -218,7 +218,7 @@ dats_get_ia_handle( * The current implementation assumes that any value for which an IA * handle exists is a DAT_IA_HANDLE. Unfortunately this will result in * false positives. In particular it may identify a NULL pointer as IA - * handle 0. An implmenetation that does not have this deficiency would + * handle 0. An implementation that does not have this deficiency would * be preferable. * ***********************************************************************/ From patrick.latifi at qlogic.com Wed Feb 13 15:00:27 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Wed, 13 Feb 2008 15:00:27 -0800 Subject: [ofa-general] [PATCH 5/5] [DAPL v2] Fix variable not initialized early enough In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080213230027.16680.16718.stgit@b64-10.internal.keyresearch.com> Make sure we initialize the dictionary entry early enough so that we can base our cleanup decisions on that variable being initialized. Signed-off-by: Patrick Marchand Latifi --- dat/common/dat_dr.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/dat/common/dat_dr.c b/dat/common/dat_dr.c index 6d79829..bda3002 100644 --- a/dat/common/dat_dr.c +++ b/dat/common/dat_dr.c @@ -119,7 +119,7 @@ dat_dr_insert ( IN DAT_DR_ENTRY *entry ) { DAT_RETURN status; - DAT_DICTIONARY_ENTRY dict_entry; + DAT_DICTIONARY_ENTRY dict_entry = NULL; DAT_DR_ENTRY *data; data = dat_os_alloc (sizeof (DAT_DR_ENTRY)); @@ -131,7 +131,6 @@ dat_dr_insert ( *data = *entry; - dict_entry = NULL; status = dat_dictionary_entry_create (&dict_entry); if ( DAT_SUCCESS != status ) { From prescott at hpc.ufl.edu Wed Feb 13 15:01:31 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Wed, 13 Feb 2008 18:01:31 -0500 Subject: [ofa-general] SDP performance with bzcopy testing help needed In-Reply-To: <47B20F6F.8080302@hpc.ufl.edu> References: <47B20F6F.8080302@hpc.ufl.edu> Message-ID: <47B376CB.6050404@hpc.ufl.edu> Craig Prescott wrote: > Jim Mott wrote: >> Now that SDP is shipping with a non-zero default value for >> sdp_zcopy_thresh (64K), I need some feedback from the list. Does >> anybody except me see a performance gain on large messages? > > Oh, I see it - absolutely. > For SDP on iWARP, you can see the improvement for large messages at. > > http://hpc.ufl.edu/benchmarks/iwarp_sdp/ > > Scan down to SDP Benchmarks. The page is not really done yet, but I'm > trying to finish up today. > > I'll run the same tests on IB (we have 4X SDR Lion Cubs) shortly and post. > Hi Jim - I did exactly the same commands as in your post. Here is for 4X SDR Lion Cubs on dual Opteron 2218s with CentOS 5.0. The nodes were idle, except for the netperf. For us, it looks like we should investigate set the sdp_zcopy_thresh to something higher than the default. But the effect is still clear. throughput: 64K 128K 1M SDP 7602.40 7560.57 5791.56 BZCOPY 5454.20 6378.48 7316.28 and for usec/KB: 64K 128K 1M LCL RMT LCL RMT LCL RMT SDP 0.574 1.079 0.836 1.084 1.398 1.244 BZCOPY 1.518 1.602 1.291 1.331 0.923 1.082 Cheers, Craig From clameter at sgi.com Wed Feb 13 15:02:24 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 15:02:24 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <320000.75105.qm@web32509.mail.mud.yahoo.com> References: <320000.75105.qm@web32509.mail.mud.yahoo.com> Message-ID: On Wed, 13 Feb 2008, Kanoj Sarcar wrote: > It seems that the need is to solve potential memory > shortage and overcommit issues by being able to > reclaim pages pinned by rdma driver/hardware. Is my > understanding correct? Correct. > If I do understand correctly, then why is rdma page > pinning any different than eg mlock pinning? I imagine > Oracle pins lots of memory (using mlock), how come > they do not run into vm overcommit issues? Mlocked pages are not pinned. They are movable by f.e. page migration and will be potentially be moved by future memory defrag approaches. Currently we have the same issues with mlocked pages as with pinned pages. There is work in progress to put mlocked pages onto a different lru so that reclaim exempts these pages and more work on limiting the percentage of memory that can be mlocked. > Are we up against some kind of breaking c-o-w issue > here that is different between mlock and rdma pinning? Not that I know. > Asked another way, why should effort be spent on a > notifier scheme, and rather not on fixing any memory > accounting problems and unifying how pin pages are > accounted for that get pinned via mlock() or rdma > drivers? There are efforts underway to account for and limit mlocked pages as described above. Page pinning the way it is done by Infiniband through increasing the page refcount is treated by the VM as a temporary condition not as a permanent pin. The VM will continually try to reclaim these pages thinking that the temporary usage of the page must cease soon. This is why the use of large amounts of pinned pages can lead to livelock situations. If we want to have pinning behavior then we could mark pinned pages specially so that the VM will not continually try to evict these pages. We could manage them similar to mlocked pages but just not allow page migration, memory unplug and defrag to occur on pinned memory. All of theses would have to fail. With the notifier scheme the device driver could be told to get rid of the pinned memory. This would make these 3 techniques work despite having an RDMA memory section. > Startup benefits are well understood with the notifier > scheme (ie, not all pages need to be faulted in at > memory region creation time), specially when most of > the memory region is not accessed at all. I would > imagine most of HPC does not work this way though. No for optimal performance you would want to prefault all pages like it is now. The notifier scheme would only become relevant in memory shortage situations. > Then again, as rdma hardware is applied (increasingly?) towards apps > with short lived connections, the notifier scheme will help with startup > times. The main use of the notifier scheme is for stability and reliability. The "pinned" pages become unpinnable on request by the VM. So the VM can work itself out of memory shortage situations in cooperation with the RDMA logic instead of simply failing. From pradeeps at linux.vnet.ibm.com Wed Feb 13 15:11:18 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 13 Feb 2008 15:11:18 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <15ddcffd0802131306x4b51039bncf57c1a8e02c6be3@mail.gmail.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B153E8.2090803@voltaire.com> <000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B20367.7040900@dev.mellanox.co.il> <47B21DC4.70604@linux.vnet.ibm.com> <47B29443.9010206@voltaire.com> <47B32A0E.6060005@linux.vnet.ibm.com> <15ddcffd0802131117o1d6f206ck208681bd025eccbe@mail.gmail.com> <47B346DA.30301@linux.vnet.ibm.com> <15ddcffd0802131306x4b51039bncf57c1a8e02c6be3@mail.gmail.com> Message-ID: <47B37916.9020703@linux.vnet.ibm.com> Or Gerlitz wrote: > On 2/13/08, Pradeep Satyanarayana wrote: >> Unfortunately, I do not have the same setup that I had previously. So, I would >> be unable to provide you all the details at this point. However, I do remember >> it was ehca and mthca on ppc64 machines. > > Honestly, its a problem to judge a fix for a problem for which you > provide the settting with such limited description, actually, why > won't you (IBM) try and reproduce it, basically the claim is that the > problem can occur on any HCA-mixed setup, so you can try some couples > from {ehca,any-of-the-four-mthca-devices, connectx, ipath}, etc and > let the community know your findings. I have already stated that I do not have the setup right now. When I get a similar setup, I will indeed try and reproduce the problem. It shows up on any network test like netperf, nothing special. I have already provided a description of my findings -"send completion errors" and subsequent tear down of the QP and the like in the link that I provided. What additional details are you looking for? Pradeep From pw at osc.edu Wed Feb 13 15:23:08 2008 From: pw at osc.edu (Pete Wyckoff) Date: Wed, 13 Feb 2008 18:23:08 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213040905.GQ29340@mv.qlogic.com> References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> Message-ID: <20080213232308.GB7597@osc.edu> christian.bell at qlogic.com wrote on Tue, 12 Feb 2008 20:09 -0800: > One other area that has not been brought up yet (I think) is the > applicability of notifiers in letting users know when pinned memory > is reclaimed by the kernel. This is useful when a lower-level > library employs lazy deregistration strategies on memory regions that > are subsequently released to the kernel via the application's use of > munmap or sbrk. Ohio Supercomputing Center has work in this area but > a generalized approach in the kernel would certainly be welcome. The whole need for memory registration is a giant pain. There is no motivating application need for it---it is simply a hack around virtual memory and the lack of full VM support in current hardware. There are real hardware issues that interact poorly with virtual memory, as discussed previously in this thread. The way a messaging cycle goes in IB is: register buf post send from buf wait for completion deregister buf This tends to get hidden via userspace software libraries into a single call: MPI_send(buf) Now if you actually do the reg/dereg every time, things are very slow. So userspace library writers came up with the idea of caching registrations: if buf is not registered: register buf post send from buf wait for completion The second time that the app happens to do a send from the same buffer, it proceeds much faster. Spatial locality applies here, and this caching is generally worth it. Some libraries have schemes to limit the size of the registration cache too. But there are plenty of ways to hurt yourself with such a scheme. The first being a huge pool of unused but registered memory, as the library doesn't know the app patterns, and it doesn't know the VM pressure level in the kernel. There are plenty of subtle ways that this breaks too. If the registered buf is removed from the address space via munmap() or sbrk() or other ways, the mapping and registration are gone, but the library has no way of knowing that the app just did this. Sure the physical page is still there and pinned, but the app cannot get at it. Later if new address space arrives at the same virtual address but a different physical page, the library will mistakenly think it already has it registered properly, and data is transferred from this old now-unmapped physical page. The whole situation is rather ridiculuous, but we are quite stuck with it for current generation IB and iWarp hardware. If we can't have the kernel interact with the device directly, we could at least manage state in these multiple userspace registration caches. The VM could ask for certain (or any) pages to be released, and the library would respond if they are indeed not in use by the device. The app itself does not know about pinned regions, and the library is aware of exactly which regions are potentially in use. Since the great majority of userspace messaging over IB goes through middleware like MPI or PGAS languages, and they all have the same approach to registration caching, this approach could fix the problem for a big segment of use cases. More text on the registration caching problem is here: http://www.osc.edu/~pw/papers/wyckoff-memreg-ccgrid05.pdf with an approach using vm_ops open and close operations in a kernel module here: http://www.osc.edu/~pw/dreg/ There is a place for VM notifiers in RDMA messaging, but not in talking to devices, at least not the current set. If you can define a reasonable userspace interface for VM notifiers, libraries can manage registration caches more efficiently, letting the kernel unmap pinned pages as it likes. -- Pete From caitlin.bestler at gmail.com Wed Feb 13 15:23:39 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Wed, 13 Feb 2008 15:23:39 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <320000.75105.qm@web32509.mail.mud.yahoo.com> Message-ID: <469958e00802131523h36aae555pd6ff7644302d323e@mail.gmail.com> On Feb 13, 2008 3:02 PM, Christoph Lameter wrote: > > The main use of the notifier scheme is for stability and reliability. The > "pinned" pages become unpinnable on request by the VM. So the VM can work > itself out of memory shortage situations in cooperation with the > RDMA logic instead of simply failing. > The very limited objective presented above was actually discussed in RNIC-PI. A minimalist solution (from the hardware viewpoint) is to "suspend" a Memory Region for a very brief time to allow the Host to re-arrange memory, and then to "resume" operation once the pages were copied and the map updated. The RDMA device has to avoid processing incoming packets that reference the suspended Memory Region (rather than failing the connection) and flush any cached mappings from before the "suspend" so that everything is learned/ fetched after the "resume". The advertised pages have to have the same *meaning* and they have to be committed, but they do not have to be the same physical pages for the lifetime of the memory region (at least from the protocol perspective). Obviously any add-on hardware functionality would have to be a documented option so that the memory manager would know whether a given device actually could do this. From clameter at sgi.com Wed Feb 13 15:40:10 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 13 Feb 2008 15:40:10 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <469958e00802131523h36aae555pd6ff7644302d323e@mail.gmail.com> References: <320000.75105.qm@web32509.mail.mud.yahoo.com> <469958e00802131523h36aae555pd6ff7644302d323e@mail.gmail.com> Message-ID: On Wed, 13 Feb 2008, Caitlin Bestler wrote: > The very limited objective presented above was actually discussed in RNIC-PI. > A minimalist solution (from the hardware viewpoint) is to "suspend" a Memory > Region for a very brief time to allow the Host to re-arrange memory, and then > to "resume" operation once the pages were copied and the map updated. Exactly. Could you post that back to the full cc list? From kanojsarcar at yahoo.com Wed Feb 13 15:43:17 2008 From: kanojsarcar at yahoo.com (Kanoj Sarcar) Date: Wed, 13 Feb 2008 15:43:17 -0800 (PST) Subject: ***SPAM*** Re: [ofa-general] Re: Demand paging for memory regions In-Reply-To: Message-ID: <866658.37093.qm@web32510.mail.mud.yahoo.com> --- Christoph Lameter wrote: > On Wed, 13 Feb 2008, Kanoj Sarcar wrote: > > > It seems that the need is to solve potential > memory > > shortage and overcommit issues by being able to > > reclaim pages pinned by rdma driver/hardware. Is > my > > understanding correct? > > Correct. > > > If I do understand correctly, then why is rdma > page > > pinning any different than eg mlock pinning? I > imagine > > Oracle pins lots of memory (using mlock), how come > > they do not run into vm overcommit issues? > > Mlocked pages are not pinned. They are movable by > f.e. page migration and > will be potentially be moved by future memory defrag > approaches. Currently > we have the same issues with mlocked pages as with > pinned pages. There is > work in progress to put mlocked pages onto a > different lru so that reclaim > exempts these pages and more work on limiting the > percentage of memory > that can be mlocked. > > > Are we up against some kind of breaking c-o-w > issue > > here that is different between mlock and rdma > pinning? > > Not that I know. > > > Asked another way, why should effort be spent on a > > notifier scheme, and rather not on fixing any > memory > > accounting problems and unifying how pin pages are > > accounted for that get pinned via mlock() or rdma > > drivers? > > There are efforts underway to account for and limit > mlocked pages as > described above. Page pinning the way it is done by > Infiniband through > increasing the page refcount is treated by the VM as > a temporary > condition not as a permanent pin. The VM will > continually try to reclaim > these pages thinking that the temporary usage of the > page must cease > soon. This is why the use of large amounts of pinned > pages can lead to > livelock situations. Oh ok, yes, I did see the discussion on this; sorry I missed it. I do see what notifiers bring to the table now (without endorsing it :-)). An orthogonal question is this: is IB/rdma the only "culprit" that elevates page refcounts? Are there no other subsystems which do a similar thing? The example I am thinking about is rawio (Oracle's mlock'ed SHM regions are handed to rawio, isn't it?). My understanding of how rawio works in Linux is quite dated though ... Kanoj > > If we want to have pinning behavior then we could > mark pinned pages > specially so that the VM will not continually try to > evict these pages. We > could manage them similar to mlocked pages but just > not allow page > migration, memory unplug and defrag to occur on > pinned memory. All of > theses would have to fail. With the notifier scheme > the device driver > could be told to get rid of the pinned memory. This > would make these 3 > techniques work despite having an RDMA memory > section. > > > Startup benefits are well understood with the > notifier > > scheme (ie, not all pages need to be faulted in at > > memory region creation time), specially when most > of > > the memory region is not accessed at all. I would > > imagine most of HPC does not work this way though. > > No for optimal performance you would want to > prefault all pages like > it is now. The notifier scheme would only become > relevant in memory > shortage situations. > > > Then again, as rdma hardware is applied > (increasingly?) towards apps > > with short lived connections, the notifier scheme > will help with startup > > times. > > The main use of the notifier scheme is for stability > and reliability. The > "pinned" pages become unpinnable on request by the > VM. So the VM can work > itself out of memory shortage situations in > cooperation with the > RDMA logic instead of simply failing. > > -- > To unsubscribe, send a message with 'unsubscribe > linux-mm' in > the body to majordomo at kvack.org. For more info on > Linux MM, > see: http://www.linux-mm.org/ . > Don't email: > email at kvack.org > ____________________________________________________________________________________ Looking for last minute shopping deals? Find them fast with Yahoo! Search. http://tools.search.yahoo.com/newsearch/category.php?category=shopping From sweitzen at cisco.com Wed Feb 13 15:53:55 2008 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Wed, 13 Feb 2008 15:53:55 -0800 Subject: [ofa-general] SDP performance with bzcopy testing help needed In-Reply-To: <47B376CB.6050404@hpc.ufl.edu> References: <47B20F6F.8080302@hpc.ufl.edu> <47B376CB.6050404@hpc.ufl.edu> Message-ID: > But the effect is still clear. > > throughput: > > 64K 128K 1M > SDP 7602.40 7560.57 5791.56 > BZCOPY 5454.20 6378.48 7316.28 Looks unclear to me. Sometimes BZCOPY does better, sometimes worse. Scott From pw at osc.edu Wed Feb 13 15:58:02 2008 From: pw at osc.edu (Pete Wyckoff) Date: Wed, 13 Feb 2008 18:58:02 -0500 Subject: [ofa-general] [PATCH] mthca memfree init sg list In-Reply-To: <1202942974.25254.149.camel@haakon2.linux-iscsi.org> References: <20080212213820.GG13643@osc.edu> <1202942974.25254.149.camel@haakon2.linux-iscsi.org> Message-ID: <20080213235802.GA18461@osc.edu> nab at linux-iscsi.org wrote on Wed, 13 Feb 2008 14:49 -0800: > On Tue, 2008-02-12 at 16:38 -0500, Pete Wyckoff wrote: > > Properly initialize the SG list in the user_db_table in mthca memfree. > > Without this, and when compiling with CONFIG_DEBUG_SG, a BUG will occur > > during create_cq. The call to sg_set_page() in mthca_map_user_db() > > will find that the scatterlist magic was not initialized. > > > > Signed-off-by: Pete Wyckoff > > --- > > drivers/infiniband/hw/mthca/mthca_memfree.c | 1 + > > 1 files changed, 1 insertions(+), 0 deletions(-) > > > > diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c > > index 1f4d27d..252db08 100644 > > --- a/drivers/infiniband/hw/mthca/mthca_memfree.c > > +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c > > @@ -542,6 +542,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) > > for (i = 0; i < npages; ++i) { > > db_tab->page[i].refcount = 0; > > db_tab->page[i].uvirt = 0; > > + sg_init_table(&db_tab->page[i].mem, 1); > > } > > > > return db_tab; > > Should these scatterlist initializations include the new sg_mark_end() > usage by default as well..? No. sg_init_table() does this. -- Pete From jgunthorpe at obsidianresearch.com Wed Feb 13 16:01:03 2008 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 13 Feb 2008 17:01:03 -0700 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080213232308.GB7597@osc.edu> References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> <20080213232308.GB7597@osc.edu> Message-ID: <20080214000103.GG31435@obsidianresearch.com> On Wed, Feb 13, 2008 at 06:23:08PM -0500, Pete Wyckoff wrote: > christian.bell at qlogic.com wrote on Tue, 12 Feb 2008 20:09 -0800: > > One other area that has not been brought up yet (I think) is the > > applicability of notifiers in letting users know when pinned memory > > is reclaimed by the kernel. This is useful when a lower-level > > library employs lazy deregistration strategies on memory regions that > > are subsequently released to the kernel via the application's use of > > munmap or sbrk. Ohio Supercomputing Center has work in this area but > > a generalized approach in the kernel would certainly be welcome. > > The whole need for memory registration is a giant pain. There is no > motivating application need for it---it is simply a hack around > virtual memory and the lack of full VM support in current hardware. > There are real hardware issues that interact poorly with virtual > memory, as discussed previously in this thread. Well, the registrations also exist to provide protection against rouge/faulty remotes, but for the purposes of MPI that is probably not important. Here is a thought.. Some RDMA hardware can change the page tables on the fly. What if the kernel had a mechanism to dynamically maintain a full registration of the processes entire address space ('mlocked' but able to be migrated)? MPI would never need to register a buffer, and all the messy cases with munmap/sbrk/etc go away - the risk is that other MPI nodes can randomly scribble all over the process :) Christoph: It seemed to me you were first talking about freeing/swapping/faulting RDMA'able pages - but would pure migration as a special hardware supported case be useful like Catilan suggested? Regards, Jason From jbarnes at virtuousgeek.org Wed Feb 13 15:48:49 2008 From: jbarnes at virtuousgeek.org (Jesse Barnes) Date: Wed, 13 Feb 2008 15:48:49 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <866658.37093.qm@web32510.mail.mud.yahoo.com> References: <866658.37093.qm@web32510.mail.mud.yahoo.com> Message-ID: <200802131548.50016.jbarnes@virtuousgeek.org> On Wednesday, February 13, 2008 3:43 pm Kanoj Sarcar wrote: > Oh ok, yes, I did see the discussion on this; sorry I > missed it. I do see what notifiers bring to the table > now (without endorsing it :-)). > > An orthogonal question is this: is IB/rdma the only > "culprit" that elevates page refcounts? Are there no > other subsystems which do a similar thing? > > The example I am thinking about is rawio (Oracle's > mlock'ed SHM regions are handed to rawio, isn't it?). > My understanding of how rawio works in Linux is quite > dated though ... We're doing something similar in the DRM these days... We need big chunks of memory to be pinned so that the GPU can operate on them, but when the operation completes we can allow them to be swappable again. I think with the current implementation, allocations are always pinned, but we'll definitely want to change that soon. Dave? Jesse From andrea at qumranet.com Wed Feb 13 16:56:54 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 14 Feb 2008 01:56:54 +0100 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <866658.37093.qm@web32510.mail.mud.yahoo.com> References: <866658.37093.qm@web32510.mail.mud.yahoo.com> Message-ID: <20080214005653.GE14146@v2.random> Hi Kanoj, On Wed, Feb 13, 2008 at 03:43:17PM -0800, Kanoj Sarcar wrote: > Oh ok, yes, I did see the discussion on this; sorry I > missed it. I do see what notifiers bring to the table > now (without endorsing it :-)). I'm not really livelocks are really the big issue here. I'm running N 1G VM on a 1G ram system, with N-1G swapped out. Combining this with auto-ballooning, rss limiting, and ksm ram sharing, provides really advanced and lowlevel virtualization VM capabilities to the linux kernel while at the same time guaranteeing no oom failures as long as the guest pages are lower than ram+swap (just slower runtime if too many pages are unshared or if the balloons are deflated etc..). Swapping the virtual machine in the host may be more efficient than having the guest swapping over a virtual swap paravirt storage for example. As more management features are added admins will gain more experience in handling those new features and they'll find what's best for them. mmu notifiers and real reliable swapping are the enabler for those more advanced VM features. oom livelocks wouldn't happen anyway with KVM as long as the maximimal number of guest physical is lower than RAM. > An orthogonal question is this: is IB/rdma the only > "culprit" that elevates page refcounts? Are there no > other subsystems which do a similar thing? > > The example I am thinking about is rawio (Oracle's > mlock'ed SHM regions are handed to rawio, isn't it?). > My understanding of how rawio works in Linux is quite > dated though ... rawio in flight I/O shall be limited. As long as each task can't pin more than X ram, and the ram is released when the task is oom killed, and the first get_user_pages/alloc_pages/slab_alloc that returns -ENOMEM takes an oom fail path that returns failure to userland, everything is ok. Even with IB deadlock could only happen if IB would allow unlimited memory to be pinned down by unprivileged users. If IB is insecure and DoSable without mmu notifiers, then I'm not sure how enabling swapping of the IB memory could be enough to fix the DoS. Keep in mind that even tmpfs can't be safe allowing all ram+swap to be allocated in a tmpfs file (despite the tmpfs file storage includes swap and not only ram). Pinning the whole ram+swap with tmpfs livelocks the same way of pinning the whole ram with ramfs. So if you add mmu notifier support to IB, you only need to RDMA an area as large as ram+swap to livelock again as before... no difference at all. I don't think livelocks have anything to do with mmu notifiers (other than to deferring the livelock to the "swap+ram" point of no return instead of the current "ram" point of no return). Livelocks have to be solved the usual way: handling alloc_pages/get_user_pages/slab allocation failures with a fail path that returns to userland and allows the ram to be released if the task was selected for oom-killage. The real benefit of the mmu notifiers for IB would be to allow the rdma region to be larger than RAM without triggering the oom killer (or without triggering a livelock if it's DoSable but then the livelock would need fixing to be converted in a regular oom-killing by some other mean not related to the mmu-notifier, it's really an orthogonal problem). So suppose you've a MPI simulation that requires a 10G array and you've only 1G of ram, then you can rdma over 10G like if you had 10G of ram. Things will preform ok only if there's some huge locality of the computations. For virtualization it's orders of magnitude more useful than for computer clusters but certain simulations really swaps so I don't exclude certain RDMA apps will also need this (dunno about IB). From dwsalpublibm at salpublib.org Wed Feb 13 19:55:14 2008 From: dwsalpublibm at salpublib.org (Shanna Green) Date: Thu, 14 Feb 2008 11:55:14 +0800 Subject: [ofa-general] CanadianPharmacy will satisfy all you pharmaceutical needs. Message-ID: <01c86f00$75607d00$e4431f74@dwsalpublibm> Purchase your meds with Canadian Pharmacy and you will be satisfied with our service. We offer simply the cheapest drugs without compromising quality. Any questions you may have will be answered by our friendly and experienced customer care team. Every piece of information will remain safe with us and you’ll be able to get help in a careful, non-embarrassing way. We pack all the shipments discretely and do our utmost to deliver your order to you quickly. http://geocities.com/solomonbecker39/ Enjoy new saving options with ŤCanadianPharmacyť! Kurt Juhasz From prescott at hpc.ufl.edu Wed Feb 13 21:32:03 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Thu, 14 Feb 2008 00:32:03 -0500 Subject: [ofa-general] SDP performance with bzcopy testing help needed In-Reply-To: References: <47B20F6F.8080302@hpc.ufl.edu> <47B376CB.6050404@hpc.ufl.edu> Message-ID: <47B3D253.7010209@hpc.ufl.edu> Scott Weitzenkamp (sweitzen) wrote: >> But the effect is still clear. >> >> throughput: >> >> 64K 128K 1M >> SDP 7602.40 7560.57 5791.56 >> BZCOPY 5454.20 6378.48 7316.28 >> > > Looks unclear to me. Sometimes BZCOPY does better, sometimes worse. > > Fair enough. While measuring a broader spectrum of message sizes, I noted a big variation in throughput and send service demand for the SDP case as a function of which core/CPU the netperf ran on. Particularly, which CPU the netperf ran on relative to which CPU was handling the interrupts for ib_mthca. Netperf has an option (-T) to allow for local and remote cpu binding. So I used it to force the client and server to run on CPU 0. Further, I mapped all ib_mthca interrupts to CPU 1 (irqbalance was already disabled). This appears to have reduced the statistical error between netperf runs to negligible amounts. I'll do more runs to verify this and check out the other permutations, but this is what has come out so far. TPUT = throughput (Mbits/sec) LCL = send service demand (usec/KB) RMT = recv service demand (usec/KB) "-T 0,0" option given to netperf client: SDP BZCOPY -------------------- -------------------- MESGSIZE TPUT LCL RMT TPUT LCL RMT -------- ------- ----- ----- ------- ----- ----- 64K 7581.14 0.746 1.105 5547.66 1.491 1.495 128K 7478.37 0.871 1.116 6429.84 1.282 1.291 256K 7427.38 0.946 1.115 6917.20 1.197 1.201 512K 7310.14 1.122 1.129 7229.13 1.145 1.150 1M 7251.29 1.143 1.129 7457.95 0.996 1.109 2M 7249.27 1.146 1.133 7340.26 0.502 1.105 4M 7217.26 1.156 1.136 7322.63 0.397 1.096 In this case, BZCOPY send service demand is significantly less for the largest message sizes, though the throughput for large messages is not very different. However, with "-T 2,2", the result looks like this: SDP BZCOPY -------------------- -------------------- MESGSIZE TPUT LCL RMT TPUT LCL RMT -------- ------- ----- ----- ------- ----- ----- 64K 7599.40 0.841 1.114 5493.56 1.510 1.585 128K 7556.53 1.039 1.121 6483.12 1.274 1.325 256K 7155.13 1.128 1.180 6996.30 1.180 1.220 512K 5984.26 1.357 1.277 7285.86 1.130 1.166 1M 5641.28 1.443 1.343 7250.43 0.811 1.141 2M 5657.98 1.439 1.387 7265.85 0.492 1.127 4M 5623.94 1.447 1.370 7274.43 0.385 1.112 For BZCOPY, the results are pretty similar; but for SDP, the service demands are much higher, and the throughputs have dropped dramatically relative to "-T 0,0". In either case, though, BZCOPY is more efficient for large messages. Cheers, Craig From jackm at dev.mellanox.co.il Wed Feb 13 22:31:08 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 14 Feb 2008 08:31:08 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B29A50.7050400@voltaire.com> <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> Message-ID: <200802140831.09172.jackm@dev.mellanox.co.il> On Wednesday 13 February 2008 20:08, Sean Hefty wrote: > IMO, the fact that TCP implements reliability doesn't mean it's unnecessary in > underlying layers.  For example, wireless typically adds reliability at the link > layer because the link itself is so unreliable.  If adding in reliability in the > underlying layers improves overall performance, then it makes sense to add it, > independent of the upper level protocol. > > Since RC is our 'link layer', overrunning the receiver doesn't just result in IP > resending the packet, but transitioning the QP into an error state, cleaning up, > re-establishing the connection, and then resending the packet.  This works, just > not well based on what Pradeep has seen. > On the other hand, if the remote host is actually down, you will make "retry storms" worse by retrying both at the link layer AND at the TCP layer (each TCP retry resulting in multiple lower-layer retries). This will have an effect on the fabric. - Jack From a-amylo at agorasi.com Wed Feb 13 23:25:50 2008 From: a-amylo at agorasi.com (Will Lawson) Date: Thu, 14 Feb 2008 14:25:50 +0700 Subject: [ofa-general] Let's chat Message-ID: <01c86f15$7f40ab00$de2b177b@a-amylo> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Alyssa at TheHealCare.info only, because I am using my friend's email to write this. I would like to share some of my pics. From pradeeps at linux.vnet.ibm.com Wed Feb 13 23:14:44 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 13 Feb 2008 23:14:44 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <200802140831.09172.jackm@dev.mellanox.co.il> References: <476C2F62.2020900@linux.vnet.ibm.com> <47B29A50.7050400@voltaire.com> <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> <200802140831.09172.jackm@dev.mellanox.co.il> Message-ID: <47B3EA64.8050100@linux.vnet.ibm.com> Jack Morgenstein wrote: > On Wednesday 13 February 2008 20:08, Sean Hefty wrote: >> IMO, the fact that TCP implements reliability doesn't mean it's unnecessary in >> underlying layers. For example, wireless typically adds reliability at the link >> layer because the link itself is so unreliable. If adding in reliability in the >> underlying layers improves overall performance, then it makes sense to add it, >> independent of the upper level protocol. >> >> Since RC is our 'link layer', overrunning the receiver doesn't just result in IP >> resending the packet, but transitioning the QP into an error state, cleaning up, >> re-establishing the connection, and then resending the packet. This works, just >> not well based on what Pradeep has seen. >> > On the other hand, if the remote host is actually down, you will make "retry storms" > worse by retrying both at the link layer AND at the TCP layer (each TCP retry resulting > in multiple lower-layer retries). This will have an effect on the fabric. If the remote host is down establishment of an RC connection does not arise. The UD connection itself will fail. Pradeep From ogerlitz at voltaire.com Thu Feb 14 00:37:29 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 14 Feb 2008 10:37:29 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> References: <476C2F62.2020900@linux.vnet.ibm.com><47B153E8.2090803@voltaire.com><000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B29A50.7050400@voltaire.com> <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> Message-ID: <47B3FDC9.6050305@voltaire.com> Sean Hefty wrote: > IMO, the fact that TCP implements reliability doesn't mean it's unnecessary in > underlying layers. For example, wireless typically adds reliability at the link > layer because the link itself is so unreliable. If adding in reliability in the > underlying layers improves overall performance, then it makes sense to add it, > independent of the upper level protocol. For applications such as UDP based voice/video streaming, retransmitting this frame over and over might cause degradation in the service. With the current implementation maybe the 1st problem is the RC nature and the retries are only the 2nd but why add them. For for link (and below) layers reliability, IB has this 8-to-10 bits encoding, CRCs, L2 credit management, etc. So much is done below the IB transport legs anyway... Or From orenk at dev.mellanox.co.il Thu Feb 14 00:57:22 2008 From: orenk at dev.mellanox.co.il (Oren Kladnitsky) Date: Thu, 14 Feb 2008 10:57:22 +0200 Subject: [ofa-general] [ewg] [ANNOUNCE] ibutils tarball release In-Reply-To: <47B189B0.9060807@mellanox.co.il> References: <20080211175955.GV11526@sashak.voltaire.com> <47B189B0.9060807@mellanox.co.il> Message-ID: <47B40272.8090601@dev.mellanox.co.il> Hi, Ibutils tarball has been added to openfabrics downloads, available in: http://www.openfabrics.org/downloads/ibutils md5sum: a86ce164641e00c2b2c3f2209460c5ef ibutils-1.2.tar.gz This is the recent version of ibutils in OFED_1_3 . From dwporfectionm at porfection.net Thu Feb 14 01:42:11 2008 From: dwporfectionm at porfection.net (Jennie Jewell) Date: Thu, 14 Feb 2008 11:42:11 +0200 Subject: [ofa-general] Medications that you need. Message-ID: <01c86efe$a2ac2b80$04fdf358@dwporfectionm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/zaneshelton18/ We provide confidential and secure purchase! From a-alonso at 4usedtires.com Thu Feb 14 03:17:52 2008 From: a-alonso at 4usedtires.com (Angel Rowe) Date: Thu, 14 Feb 2008 13:17:52 +0200 Subject: [ofa-general] I saw your picture Message-ID: <01c86f0c$00934000$b8cfe858@a-alonso> Hello! I am bored today. I am nice girl that would like to chat with you. Email me at Alexis at IndividualImprove.info only, because I am using my friend's email to write this. Would you mind me showing some nice pictures of me? From eli at dev.mellanox.co.il Thu Feb 14 03:15:28 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 13:15:28 +0200 Subject: [ofa-general] [PATCH] IB/ipoib: remove unnecessary allocation Message-ID: <47B422D0.70901@dev.mellanox.co.il> IB/ipoib: remove unnecessary allocation Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index f9b7caa..054fab8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -209,7 +209,6 @@ struct ipoib_cm_tx { unsigned tx_tail; unsigned long flags; u32 mtu; - struct ib_wc ibwc[IPOIB_NUM_WC]; }; struct ipoib_cm_rx_buf { -- 1.5.3.8 From vlad at lists.openfabrics.org Thu Feb 14 03:17:33 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 14 Feb 2008 03:17:33 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080214-0200 daily build status Message-ID: <20080214111733.6EDD0E608A8@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From tziporet at dev.mellanox.co.il Thu Feb 14 03:32:23 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 14 Feb 2008 13:32:23 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B3FDC9.6050305@voltaire.com> References: <476C2F62.2020900@linux.vnet.ibm.com><47B153E8.2090803@voltaire.com><000001c86d98$cc66e0d0$ff0da8c0@amr.corp.intel.com> <47B29A50.7050400@voltaire.com> <000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> <47B3FDC9.6050305@voltaire.com> Message-ID: <47B426C7.3030704@mellanox.co.il> Or Gerlitz wrote: > > For applications such as UDP based voice/video streaming, > retransmitting this frame over and over might cause degradation in the > service. With the current implementation maybe the 1st problem is the > RC nature and the retries are only the 2nd but why add them. > > For for link (and below) layers reliability, IB has this 8-to-10 bits > encoding, CRCs, L2 credit management, etc. So much is done below the > IB transport legs anyway... > > Maybe we should increase only the RnR number and not add any retries Tziporet From jackm at dev.mellanox.co.il Thu Feb 14 03:41:29 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 14 Feb 2008 13:41:29 +0200 Subject: [ofa-general] [PATCH]: mlx4: move table_find from fmr_alloc to fmr_enable Message-ID: <200802141341.29577.jackm@dev.mellanox.co.il> mlx4: move table_find from fmr_alloc to fmr_enable. mlx4_table_find (for fmr mpt's) requires that ICM memory already be mapped. Before this fix, fmr allocation depended on ICM memory already being mapped for the mpt entry. If all currently mapped entries are taken, the find operation fails (even if the ICM mpt table still had more entries, which were as yet unmapped). This fix moves the mpt find operation to fmr_enable, to guarantee that any required ICM memory mapping has already occurred. Found by: Oren Duer of Mellanox Signed-off-by: Jack Morgenstein Index: ofed_kernel/drivers/infiniband/hw/mlx4/mr.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/hw/mlx4/mr.c 2007-12-05 10:34:47.000000000 +0200 +++ ofed_kernel/drivers/infiniband/hw/mlx4/mr.c 2008-02-14 12:05:08.661414000 +0200 @@ -199,7 +199,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct if (err) goto err_free; - err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); if (err) goto err_mr; Index: ofed_kernel/drivers/net/mlx4/mr.c =================================================================== --- ofed_kernel.orig/drivers/net/mlx4/mr.c 2007-12-05 10:34:53.000000000 +0200 +++ ofed_kernel/drivers/net/mlx4/mr.c 2008-02-14 12:12:26.302248000 +0200 @@ -578,13 +578,6 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, goto err_free; } - fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, - key_to_hw_index(fmr->mr.key), NULL); - if (!fmr->mpt) { - err = -ENOMEM; - goto err_free; - } - return 0; err_free: @@ -595,7 +588,19 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { - return mlx4_mr_enable(dev, &fmr->mr); + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + err = mlx4_mr_enable(dev, &fmr->mr); + if (err) + return err; + + fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, + key_to_hw_index(fmr->mr.key), NULL); + if (!fmr->mpt) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL_GPL(mlx4_fmr_enable); From eli at dev.mellanox.co.il Thu Feb 14 05:50:14 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 15:50:14 +0200 Subject: [ofa-general] [PATCH] IB/ipoib: use vmap with allocation of tx ring Message-ID: <47B44716.2010401@dev.mellanox.co.il> IB/ipoib: use vmap with allocation of tx ring With the introduction of s/g support in IPOIB, the size of struct ipoib_tx_buf has increased since it reserves room for the fragments. This caused allocations to fail when large send queues are required. This patch uses an array of pages and maps them with vmap to increase the certainty of the allocation to succeed. Signed-off-by: Eli Cohen --- I used the alloc and free functions as global functions since I think we may use the for other allocations in IPOIB. drivers/infiniband/ulp/ipoib/ipoib.h | 9 +++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 53 ++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index f9b7caa..78a99d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -186,6 +186,12 @@ enum ipoib_cm_state { IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ }; +struct ipoib_vmap { + void *ptr; + struct page **page_arr; + int npages; +}; + struct ipoib_cm_rx { struct ib_cm_id *id; struct ib_qp *qp; @@ -293,6 +299,7 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; spinlock_t tx_lock; + struct ipoib_vmap tx_vmap_ring; struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; @@ -458,6 +465,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); +int ipoib_vmalloc(struct ipoib_vmap *buf, int size); +void ipoib_vfree(struct ipoib_vmap *buf); #ifdef CONFIG_INFINIBAND_IPOIB_CM diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f96477a..f21fd14 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -92,6 +92,49 @@ static struct ib_client ipoib_client = { .remove = ipoib_remove_one }; +int ipoib_vmalloc(struct ipoib_vmap *buf, int size) +{ + int i; + int npages = ALIGN(size, PAGE_SIZE) / PAGE_SIZE; + int ret = -ENOMEM; + + buf->page_arr = kmalloc(npages * sizeof buf->page_arr[0], GFP_KERNEL); + if (!buf->page_arr) + goto out; + + for (i = 0; i < npages; ++i) { + buf->page_arr[i] = alloc_page(GFP_KERNEL); + if (!buf->page_arr[i]) + goto page_fail; + } + + buf->npages = npages; + buf->ptr = vmap(buf->page_arr, buf->npages, VM_MAP, PAGE_KERNEL); + if (!buf->ptr) + goto page_fail; + + memset(buf->ptr, 0, size); + return 0; + +page_fail: + for (; i > 0; --i) + __free_page(buf->page_arr[i - 1]); + + kfree(buf->page_arr); +out: + return ret; +} + +void ipoib_vfree(struct ipoib_vmap *buf) +{ + int i; + + for (i = 0; i < buf->npages; ++i) + __free_page(buf->page_arr[i]); + + kfree(buf->page_arr); +} + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -887,13 +930,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } - priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, - GFP_KERNEL); - if (!priv->tx_ring) { + if (ipoib_vmalloc(&priv->tx_vmap_ring, ipoib_sendq_size * + sizeof *priv->tx_ring)) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } + priv->tx_ring = priv->tx_vmap_ring.ptr; /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ @@ -903,7 +946,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; out_tx_ring_cleanup: - kfree(priv->tx_ring); + ipoib_vfree(&priv->tx_vmap_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); @@ -928,7 +971,7 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); kfree(priv->rx_ring); - kfree(priv->tx_ring); + ipoib_vfree(&priv->tx_vmap_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; -- 1.5.3.8 From ogerlitz at voltaire.com Thu Feb 14 06:05:01 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 14 Feb 2008 16:05:01 +0200 Subject: [ofa-general] Re: [ewg] [PATCH] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B44716.2010401@dev.mellanox.co.il> References: <47B44716.2010401@dev.mellanox.co.il> Message-ID: <47B44A8D.30200@voltaire.com> Eli Cohen wrote: > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > +void ipoib_vfree(struct ipoib_vmap *buf) > +{ > + int i; > + > + for (i = 0; i < buf->npages; ++i) > + __free_page(buf->page_arr[i]); > + > + kfree(buf->page_arr); missing vunmap() call here, correct? Or From eli at dev.mellanox.co.il Thu Feb 14 06:11:26 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 16:11:26 +0200 Subject: [ofa-general] Re: [ewg] [PATCH] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B44A8D.30200@voltaire.com> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> Message-ID: <47B44C0E.4000506@dev.mellanox.co.il> Or Gerlitz wrote: > Eli Cohen wrote: >> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c >> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > >> +void ipoib_vfree(struct ipoib_vmap *buf) >> +{ >> + int i; >> + >> + for (i = 0; i < buf->npages; ++i) >> + __free_page(buf->page_arr[i]); >> + >> + kfree(buf->page_arr); > > missing vunmap() call here, correct? > Oops, I will repost. From eli at dev.mellanox.co.il Thu Feb 14 06:13:23 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 16:13:23 +0200 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B44A8D.30200@voltaire.com> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> Message-ID: <47B44C83.7010201@dev.mellanox.co.il> From 2f1870f76ddbfc948aea4847c25d05ae70dd43cf Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 14 Feb 2008 15:46:33 +0200 Subject: [PATCH] IB/ipoib: use vmap with allocation of tx ring With the introduction of s/g support in IPOIB, the size of struct ipoib_tx_buf has increased since it reserves room for the fragments. This caused allocations to fail when large send queues are required. This patch uses an array of pages and maps them with vmap to increase the certainty of the allocation to succeed. Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 9 +++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 54 ++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index f9b7caa..78a99d6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -186,6 +186,12 @@ enum ipoib_cm_state { IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ }; +struct ipoib_vmap { + void *ptr; + struct page **page_arr; + int npages; +}; + struct ipoib_cm_rx { struct ib_cm_id *id; struct ib_qp *qp; @@ -293,6 +299,7 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; spinlock_t tx_lock; + struct ipoib_vmap tx_vmap_ring; struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; @@ -458,6 +465,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); void ipoib_pkey_poll(struct work_struct *work); int ipoib_pkey_dev_delay_open(struct net_device *dev); void ipoib_drain_cq(struct net_device *dev); +int ipoib_vmalloc(struct ipoib_vmap *buf, int size); +void ipoib_vfree(struct ipoib_vmap *buf); #ifdef CONFIG_INFINIBAND_IPOIB_CM diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f96477a..3a44a42 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -92,6 +92,50 @@ static struct ib_client ipoib_client = { .remove = ipoib_remove_one }; +int ipoib_vmalloc(struct ipoib_vmap *buf, int size) +{ + int i; + int npages = ALIGN(size, PAGE_SIZE) / PAGE_SIZE; + int ret = -ENOMEM; + + buf->page_arr = kmalloc(npages * sizeof buf->page_arr[0], GFP_KERNEL); + if (!buf->page_arr) + goto out; + + for (i = 0; i < npages; ++i) { + buf->page_arr[i] = alloc_page(GFP_KERNEL); + if (!buf->page_arr[i]) + goto page_fail; + } + + buf->npages = npages; + buf->ptr = vmap(buf->page_arr, buf->npages, VM_MAP, PAGE_KERNEL); + if (!buf->ptr) + goto page_fail; + + memset(buf->ptr, 0, size); + return 0; + +page_fail: + for (; i > 0; --i) + __free_page(buf->page_arr[i - 1]); + + kfree(buf->page_arr); +out: + return ret; +} + +void ipoib_vfree(struct ipoib_vmap *buf) +{ + int i; + + vunmap(buf->ptr); + for (i = 0; i < buf->npages; ++i) + __free_page(buf->page_arr[i]); + + kfree(buf->page_arr); +} + int ipoib_open(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -887,13 +931,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out; } - priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, - GFP_KERNEL); - if (!priv->tx_ring) { + if (ipoib_vmalloc(&priv->tx_vmap_ring, ipoib_sendq_size * + sizeof *priv->tx_ring)) { printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", ca->name, ipoib_sendq_size); goto out_rx_ring_cleanup; } + priv->tx_ring = priv->tx_vmap_ring.ptr; /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ @@ -903,7 +947,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; out_tx_ring_cleanup: - kfree(priv->tx_ring); + ipoib_vfree(&priv->tx_vmap_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); @@ -928,7 +972,7 @@ void ipoib_dev_cleanup(struct net_device *dev) ipoib_ib_dev_cleanup(dev); kfree(priv->rx_ring); - kfree(priv->tx_ring); + ipoib_vfree(&priv->tx_vmap_ring); priv->rx_ring = NULL; priv->tx_ring = NULL; -- 1.5.3.8 From tziporet at dev.mellanox.co.il Thu Feb 14 06:57:21 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 14 Feb 2008 16:57:21 +0200 Subject: [ofa-general] Re: [PATCH] IPOIB/CM Increase retry counts for OFED-1.3 In-Reply-To: <47B226CC.1060706@linux.vnet.ibm.com> References: <47B226CC.1060706@linux.vnet.ibm.com> Message-ID: <47B456D1.7030600@mellanox.co.il> Pradeep Satyanarayana wrote: > This patch change retry counts to small values. This helps interoperability > between ehca and mthca. Without this patch I had seen "send completion errors". > > Or Gerlitz has started a thread on the general mailing list and the complete > discussion will be available there. This is the second part of the patch > submitted yesterday and is split up as per Eli's request. > > Signed-off-by: Pradeep Satyanarayana > --- > > --- ofa_kernel-1.3_a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-12 17:46:03.000000000 -0500 > +++ ofa_kernel-1.3_b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-02-12 17:46:58.000000000 -0500 > @@ -1016,8 +1016,8 @@ static int ipoib_cm_send_req(struct net_ > req.responder_resources = 4; > req.remote_cm_response_timeout = 20; > req.local_cm_response_timeout = 20; > - req.retry_count = 0; /* RFC draft warns against retries */ > - req.rnr_retry_count = 0; /* RFC draft warns against retries */ > + req.retry_count = 3; > + req.rnr_retry_count = 3; > req.max_cm_retries = 15; > req.srq = ipoib_cm_has_srq(dev); > return ib_send_cm_req(id, &req); > > > I wish to see Roland's respond to this patch. I think its enough to enlarge only the rnr_retry_count since the retry_count is for cases were packages are dropped by a very busy subnet. The case you want to solve should be covered by the rnr_retry_count. Regarding UC - we may consider this in the future since in ConnectX we have UC with SRQ support and indeed it will be the best for IPoIB CM Tziporet From alexio1987 at virgilio.it Thu Feb 14 06:53:35 2008 From: alexio1987 at virgilio.it (yournotice@avasmail.com.mv) Date: Thu, 14 Feb 2008 15:53:35 +0100 (GMT+01:00) Subject: [ofa-general] WINNING INFORMATIONS! Message-ID: <118186faf21.alexio1987@virgilio.it> WINNING INFORMATIONS! The Sponsorlotto The Netherlands has awarded your email a cash of 850,000.00 (Eight Hundred and Fifty) Euros in a random email world progamme,attached to qualification number LV345ES Group(B).WINNING INFORMATIONS, Ref Number(42061)Serial Number 16117 lucky Numbers:4224465 Batch Number EU85611. The objectives is to make a notable changes in the standard of living world wide.To claim your award call and email contact address to the agent below. (1) FULL NAMES. (2) DIRECT PHONE Officer in charge:Mr.p.Jonnas TEL: +31- 642603191 Email:europzone2 at aol.com Congratulations! Regards.Ms.Kisa Youla From amar.mudrankit at gmail.com Thu Feb 14 07:05:19 2008 From: amar.mudrankit at gmail.com (Amar Mudrankit) Date: Thu, 14 Feb 2008 20:35:19 +0530 Subject: [ofa-general] IPoIB Bonding with OpenSM Message-ID: I was testing the IPoIB failover/failback using the bonding mechanism with Open SM running in the IB subnet. I observed that the failover does not reliably occur IB port is made down using "ibportstate" command. The test steps I followed and test configuration is as follows : Pings to an IPoIB destination were started over the bond0 interface(which is configured as mentioned below). Pings continue properly. Failover to ib1 does not occur when I disconnect port 1 (corresponding to ib0) using $ ibportstate disable command. In log, I can see the messages kernel: bonding: bond0: link status definitely down for interface ib0, disabling it kernel: bonding: bond0: making interface ib1 the new active one. But, the pings stop. Also, I noticed the process status which shows : PPID PID PGID SID TTY TPGID STAT UID TIME COMMAND 61 2503 1 1 ? -1 D< 0 0:00 [ib_inform] 61 2504 1 1 ? -1 D< 0 0:00 [local_sa] Is this expected ? /etc/infiniband/openib.conf ONBOOT=yes UCM_LOAD=no RDMA_CM_LOAD=yes RDMA_UCM_LOAD=yes RENICE_IB_MAD=no MTHCA_LOAD=yes IPOIB_LOAD=yes SET_IPOIB_CM=yes SDP_LOAD=yes SRP_LOAD=no SRPT_LOAD=no RDS_LOAD=no SRPHA_ENABLE=no IPOIBBOND_ENABLE=yes IPOIB_BONDS=bond0 bond0_IP=100.1.1.13 bond0_SLAVES=ib0,ib1 Source IPoIB m/c (bonding enabled) : OFED-1.3-rc4, RHEL5, MT25208 Destination IPoIB m/c : OFED-1.3-rc4, SLES10, MT25208 I am pinging the IPoIB interface over a machine which is running OpenSM. Has somebody tested this kind of scenario ever or I am missing something? -------------- next part -------------- An HTML attachment was scrubbed... URL: From alexio1987 at virgilio.it Thu Feb 14 06:55:14 2008 From: alexio1987 at virgilio.it (yournotice@avasmail.com.mv) Date: Thu, 14 Feb 2008 15:55:14 +0100 (GMT+01:00) Subject: [ofa-general] WINNING INFORMATIONS! Message-ID: <118187133ea.alexio1987@virgilio.it> WINNING INFORMATIONS! The Sponsorlotto The Netherlands has awarded your email a cash of 850,000.00 (Eight Hundred and Fifty) Euros in a random email world progamme,attached to qualification number LV345ES Group(B).WINNING INFORMATIONS, Ref Number(42061)Serial Number 16117 lucky Numbers:4224465 Batch Number EU85611. The objectives is to make a notable changes in the standard of living world wide.To claim your award call and email contact address to the agent below. (1) FULL NAMES. (2) DIRECT PHONE Officer in charge:Mr.p.Jonnas TEL: +31- 642603191 Email:europzone2 at aol.com Congratulations! Regards.Ms.Kisa Youla From swise at opengridcomputing.com Thu Feb 14 07:09:08 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 14 Feb 2008 09:09:08 -0600 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> References: <20080208234302.GH26564@sgi.com><20080208155641.2258ad2c.akpm@linux-foundation.org><20080209012446.GB7051@v2.random><20080209015659.GC7051@v2.random><20080209075556.63062452@bree.surriel.com><47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> Message-ID: <47B45994.7010805@opengridcomputing.com> Felix Marti wrote: > > That is correct, not a change we can make for T3. We could, in theory, > deal with changing mappings though. The change would need to be > synchronized though: the VM would need to tell us which mapping were > about to change and the driver would then need to disable DMA to/from > it, do the change and resume DMA. > Note that for T3, this involves suspending _all_ rdma connections that are in the same PD as the MR being remapped. This is because the driver doesn't know who the application advertised the rkey/stag to. So without that knowledge, all connections that _might_ rdma into the MR must be suspended. If the MR was only setup for local access, then the driver could track the connections with references to the MR and only quiesce those connections. Point being, it will stop probably all connections that an application is using (assuming the application uses a single PD). Steve. From jackm at dev.mellanox.co.il Thu Feb 14 07:14:24 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 14 Feb 2008 17:14:24 +0200 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <47B3EA64.8050100@linux.vnet.ibm.com> References: <476C2F62.2020900@linux.vnet.ibm.com> <200802140831.09172.jackm@dev.mellanox.co.il> <47B3EA64.8050100@linux.vnet.ibm.com> Message-ID: <200802141714.25216.jackm@dev.mellanox.co.il> On Thursday 14 February 2008 09:14, Pradeep Satyanarayana wrote: > > On the other hand, if the remote host is actually down, you will make "retry storms" > > worse by retrying both at the link layer AND at the TCP layer (each TCP retry resulting > > in multiple lower-layer retries).  This will have an effect on the fabric. > > If the remote host is down establishment of an RC connection does not arise. The UD > connection itself will fail. > I'm talking about the case where the remote host fails after the connection is established. In this case, RC will continue to retransmit until the rnr_retry count is exhausted. - Jack From hartlch14 at gmail.com Thu Feb 14 07:50:02 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Thu, 14 Feb 2008 10:50:02 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs Message-ID: We are doing performance measurements on an application that is using uDAPL RDMA reads for some large transfers and the BW is less than we expected. The transfers are 4MB and we are seeing BW of 930MiB/sec (DDR). When we do the same transfer size using ib_read_bw we get 1475 MB/sec. On a pair of machines with SDR interfaces, we get 697MiB/sec and 918MB/sec respectively. We expected some overhead from uDAPL, but this much seems excessive. Or is this typical? I looked at the code for dat_ep_post_rdma_read() and compared it to the ib_read_bw() code. The difference is that in uDAPL there is some cookie management and some WR struct setup before it gets around to calling the IB verbs function ibv_post_send(). It doesn't look like something that would take much time - about 1.5msec given the numbers above. Are the numbers we are seeing similar to what other users are getting? Do we maybe have a problem with how the uDAPL libraries were built? The DDR machines are X86_64 (uDAPL 1.2) and the SDR machines are PPC64 (uDAPL 2.0.6 ). Thanks, Chuck -------------- next part -------------- An HTML attachment was scrubbed... URL: From holt at sgi.com Thu Feb 14 07:53:33 2008 From: holt at sgi.com (Robin Holt) Date: Thu, 14 Feb 2008 09:53:33 -0600 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <47B45994.7010805@opengridcomputing.com> References: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> Message-ID: <20080214155333.GA1029@sgi.com> On Thu, Feb 14, 2008 at 09:09:08AM -0600, Steve Wise wrote: > Note that for T3, this involves suspending _all_ rdma connections that are > in the same PD as the MR being remapped. This is because the driver > doesn't know who the application advertised the rkey/stag to. So without Is there a reason the driver can not track these. > Point being, it will stop probably all connections that an application is > using (assuming the application uses a single PD). It seems like the need to not stop all would be a compelling enough reason to modify the driver to track which processes have received the rkey/stag. Thanks, Robin From swise at opengridcomputing.com Thu Feb 14 08:23:23 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 14 Feb 2008 10:23:23 -0600 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080214155333.GA1029@sgi.com> References: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <20080214155333.GA1029@sgi.com> Message-ID: <47B46AFB.9070009@opengridcomputing.com> Robin Holt wrote: > On Thu, Feb 14, 2008 at 09:09:08AM -0600, Steve Wise wrote: >> Note that for T3, this involves suspending _all_ rdma connections that are >> in the same PD as the MR being remapped. This is because the driver >> doesn't know who the application advertised the rkey/stag to. So without > > Is there a reason the driver can not track these. > Because advertising of a MR (ie telling the peer about your rkey/stag, offset and length) is application-specific and can be done out of band, or in band as simple SEND/RECV payload. Either way, the driver has no way of tracking this because the protocol used is application-specific. >> Point being, it will stop probably all connections that an application is >> using (assuming the application uses a single PD). > > It seems like the need to not stop all would be a compelling enough reason > to modify the driver to track which processes have received the rkey/stag. > Yes, _if_ the driver could track this. And _if_ the rdma API and paradigm was such that the kernel/driver could keep track, then remote revokations of MR tags could be supported. Stevo From pradeeps at linux.vnet.ibm.com Thu Feb 14 08:59:13 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 14 Feb 2008 08:59:13 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <200802141714.25216.jackm@dev.mellanox.co.il> References: <476C2F62.2020900@linux.vnet.ibm.com> <200802140831.09172.jackm@dev.mellanox.co.il> <47B3EA64.8050100@linux.vnet.ibm.com> <200802141714.25216.jackm@dev.mellanox.co.il> Message-ID: <47B47361.5010208@linux.vnet.ibm.com> Jack Morgenstein wrote: > On Thursday 14 February 2008 09:14, Pradeep Satyanarayana wrote: >>> On the other hand, if the remote host is actually down, you will make "retry storms" >>> worse by retrying both at the link layer AND at the TCP layer (each TCP retry resulting >>> in multiple lower-layer retries). This will have an effect on the fabric. >> If the remote host is down establishment of an RC connection does not arise. The UD >> connection itself will fail. >> > I'm talking about the case where the remote host fails after the connection is established. > In this case, RC will continue to retransmit until the rnr_retry count is exhausted. Remote host failing should itself be a rare occurrence, and the value for retry=3 is small enough that it should have minimal effects if any. More over that should have only a transient effect. Pradeep From pradeeps at linux.vnet.ibm.com Thu Feb 14 09:03:28 2008 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 14 Feb 2008 09:03:28 -0800 Subject: [ofa-general] Re: [PATCH] IPOIB/CM Increase retry counts for OFED-1.3 In-Reply-To: <47B456D1.7030600@mellanox.co.il> References: <47B226CC.1060706@linux.vnet.ibm.com> <47B456D1.7030600@mellanox.co.il> Message-ID: <47B47460.4080700@linux.vnet.ibm.com> Tziporet Koren wrote: > Pradeep Satyanarayana wrote: >> This patch change retry counts to small values. This helps >> interoperability >> between ehca and mthca. Without this patch I had seen "send completion >> errors". >> >> Or Gerlitz has started a thread on the general mailing list and the >> complete >> discussion will be available there. This is the second part of the patch >> submitted yesterday and is split up as per Eli's request. >> >> Signed-off-by: Pradeep Satyanarayana >> --- >> >> --- ofa_kernel-1.3_a/drivers/infiniband/ulp/ipoib/ipoib_cm.c >> 2008-02-12 17:46:03.000000000 -0500 >> +++ ofa_kernel-1.3_b/drivers/infiniband/ulp/ipoib/ipoib_cm.c >> 2008-02-12 17:46:58.000000000 -0500 >> @@ -1016,8 +1016,8 @@ static int ipoib_cm_send_req(struct net_ >> req.responder_resources = 4; >> req.remote_cm_response_timeout = 20; >> req.local_cm_response_timeout = 20; >> - req.retry_count = 0; /* RFC draft warns against retries */ >> - req.rnr_retry_count = 0; /* RFC draft warns against >> retries */ >> + req.retry_count = 3; >> + req.rnr_retry_count = 3; >> req.max_cm_retries = 15; >> req.srq = ipoib_cm_has_srq(dev); >> return ib_send_cm_req(id, &req); >> >> >> > > I wish to see Roland's respond to this patch. > I think its enough to enlarge only the rnr_retry_count since the > retry_count is for cases were packages are dropped by a very busy subnet. > The case you want to solve should be covered by the rnr_retry_count. > The case that I saw was the other way round. The sender saw "send completion errors" and that was solved by changing the retry_count. The rnr_retry_count was added to cover any other corner cases I had not seen (Table 78 in the spec). Pradeep From ardavis at ichips.intel.com Thu Feb 14 09:21:28 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 09:21:28 -0800 Subject: [ofa-general] Re: [PATCH 3/5 v2] [DAPL v2] Fix off-by-one with ia_name In-Reply-To: <20080213230017.16680.89345.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> <20080213230017.16680.89345.stgit@b64-10.internal.keyresearch.com> Message-ID: <47B47898.4020607@ichips.intel.com> Patrick Marchand Latifi wrote: > Make sure we stay within bounds when manipulating the ia_name. > > Signed-off-by: Patrick Marchand Latifi > --- > > dat/udat/udat.c | 6 ++---- > 1 files changed, 2 insertions(+), 4 deletions(-) > > diff --git a/dat/udat/udat.c b/dat/udat/udat.c > index bb1c580..0be4c33 100755 > --- a/dat/udat/udat.c > +++ b/dat/udat/udat.c > @@ -184,7 +184,7 @@ dat_ia_openv ( > > len = dat_os_strlen (name); > > - if ( DAT_NAME_MAX_LENGTH < len ) > + if ( DAT_NAME_MAX_LENGTH <= len ) > { > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > } > @@ -200,7 +200,6 @@ dat_ia_openv ( > } > > dat_os_strncpy (info.ia_name, name, len); > - info.ia_name[len] = '\0'; strlen does not include terminating NULL byte and strncpy will copy no more then len. Revising patch, adding len+1 to get NULL byte with strncpy. Here is a new patch for DAPL v2.0: Signed-off by: Arlin Davis diff --git a/dat/udat/udat.c b/dat/udat/udat.c index bb1c580..f3194b0 100755 --- a/dat/udat/udat.c +++ b/dat/udat/udat.c @@ -184,7 +184,7 @@ dat_ia_openv ( len = dat_os_strlen (name); - if ( DAT_NAME_MAX_LENGTH < len ) + if ( DAT_NAME_MAX_LENGTH <= len ) { return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); } @@ -199,8 +199,7 @@ dat_ia_openv ( return DAT_ERROR (DAT_INVALID_STATE, 0); } - dat_os_strncpy (info.ia_name, name, len); - info.ia_name[len] = '\0'; + dat_os_strncpy (info.ia_name, name, len+1); info.dapl_version_major = dapl_major; info.dapl_version_minor = dapl_minor; @@ -324,10 +323,9 @@ dat_ia_close ( len = dat_os_strlen (ia_name); - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); - dat_os_strncpy (info.ia_name, ia_name, len); - info.ia_name[len] = '\0'; + dat_os_strncpy (info.ia_name, ia_name, len+1); info.dapl_version_major = provider_attr.dapl_version_major; info.dapl_version_minor = provider_attr.dapl_version_minor; From martynd at imagineersystems.com Thu Feb 14 09:28:10 2008 From: martynd at imagineersystems.com (Martyn Drake) Date: Thu, 14 Feb 2008 17:28:10 +0000 Subject: [ofa-general] Aliasing eth0/1 to ib0/ib1 (or changing them completely) Message-ID: Hello, I'm in the process of setting up IPoIB and have the requirement of using eth0 as the princpal interface rather than ib0 or ib1. Is there any way of persuading the OFED installation of using a different interface rather than the default interfaces? Regards, Martyn From ardavis at ichips.intel.com Thu Feb 14 09:25:09 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 09:25:09 -0800 Subject: [ofa-general] Re: [PATCH 4/6] [DAPL v1] fix off-by-one with ia_name In-Reply-To: <20080213225808.16508.16794.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> <20080213225808.16508.16794.stgit@b64-10.internal.keyresearch.com> Message-ID: <47B47975.4080306@ichips.intel.com> Patrick Marchand Latifi wrote: > Make sure we stay within bounds when manipulating the ia_name. > > Signed-off-by: Patrick Marchand Latifi > --- > > dat/udat/udat.c | 6 ++---- > 1 files changed, 2 insertions(+), 4 deletions(-) > > diff --git a/dat/udat/udat.c b/dat/udat/udat.c > index e458441..c57d421 100644 > --- a/dat/udat/udat.c > +++ b/dat/udat/udat.c > @@ -181,7 +181,7 @@ dat_ia_openv ( > > len = dat_os_strlen (name); > > - if ( DAT_NAME_MAX_LENGTH < len ) > + if ( DAT_NAME_MAX_LENGTH <= len ) > { > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > } > @@ -197,7 +197,6 @@ dat_ia_openv ( > } > > dat_os_strncpy (info.ia_name, name, len); > - info.ia_name[len] = '\0'; Same as DAPL 2.0 patch. strlen does not include terminating NULL byte and strncpy will copy no more then len. Revising patch, adding len+1 to get NULL byte with strncpy. Here is a new patch for DAPL v1.2: Signed-off by: Arlin Davis diff --git a/dat/udat/udat.c b/dat/udat/udat.c index e458441..cac1b93 100644 --- a/dat/udat/udat.c +++ b/dat/udat/udat.c @@ -181,7 +181,7 @@ dat_ia_openv ( len = dat_os_strlen (name); - if ( DAT_NAME_MAX_LENGTH < len ) + if ( DAT_NAME_MAX_LENGTH <= len ) { return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); } @@ -196,8 +196,7 @@ dat_ia_openv ( return DAT_ERROR (DAT_INVALID_STATE, 0); } - dat_os_strncpy (info.ia_name, name, len); - info.ia_name[len] = '\0'; + dat_os_strncpy (info.ia_name, name, len+1); info.dapl_version_major = dapl_major; info.dapl_version_minor = dapl_minor; @@ -301,10 +300,9 @@ dat_ia_close ( len = dat_os_strlen (ia_name); - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); - dat_os_strncpy (info.ia_name, ia_name, len); - info.ia_name[len] = '\0'; + dat_os_strncpy (info.ia_name, ia_name, len+1); info.dapl_version_major = provider_attr.dapl_version_major; info.dapl_version_minor = provider_attr.dapl_version_minor; From caitlin.bestler at gmail.com Thu Feb 14 09:48:52 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 14 Feb 2008 09:48:52 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <47B46AFB.9070009@opengridcomputing.com> References: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <20080214155333.GA1029@sgi.com> <47B46AFB.9070009@opengridcomputing.com> Message-ID: <469958e00802140948j162cc8baqae0b55cd6fb1cd22@mail.gmail.com> On Thu, Feb 14, 2008 at 8:23 AM, Steve Wise wrote: > Robin Holt wrote: > > On Thu, Feb 14, 2008 at 09:09:08AM -0600, Steve Wise wrote: > >> Note that for T3, this involves suspending _all_ rdma connections that are > >> in the same PD as the MR being remapped. This is because the driver > >> doesn't know who the application advertised the rkey/stag to. So without > > > > Is there a reason the driver can not track these. > > > > Because advertising of a MR (ie telling the peer about your rkey/stag, > offset and length) is application-specific and can be done out of band, > or in band as simple SEND/RECV payload. Either way, the driver has no > way of tracking this because the protocol used is application-specific. > > I fully agree. If there is one important thing about RDMA and other fastpath solutions that must be understood is that the driver does not see the payload. This is a fundamental strength, but it means that you have to identify what if any intercept points there are in advance. You also raise a good point on the scope of any suspend/resume API. Device reporting of this capability would not be a simple boolean, but more of a suspend/resume scope. A minimal scope would be any connection that actually attempts to use the suspended MR. Slightly wider would be any connection *allowed* to use the MR, which could expand all the way to any connection under the same PD. Convievably I could imagine an RDMA device reporting that it could support suspend/ resume, but only at the scope of the entire device. But even at such a wide scope, suspend/resume could be useful to a Memory Manager. The pages could be fully migrated to the new location, and the only work that was still required during the critical suspend/resume region was to actually shift to the new map. That might be short enough that not accepting *any* incoming RDMA packet would be acceptable. And if the goal is to replace a memory card the alternative might be migrating the applications to other physical servers, which would mean a much longer period of not accepting incoming RDMA packets. But the broader question is what the goal is here. Allowing memory to be shuffled is valuable, and perhaps even ultimately a requirement for high availability systems. RDMA and other direct-access APIs should be evolving their interfaces to accommodate these needs. Oversubscribing memory is a totally different matter. If an application is working with memory that is oversubscribed by a factor of 2 or more can it really benefit from zero-copy direct placement? At first glance I can't see what RDMA could be bringing of value when the overhead of swapping is going to be that large. If it really does make sense, then explicitly registering the portion of memory that should be enabled to receive incoming traffic while the application is swapped out actually makes sense. Current Memory Registration methods force applications to either register too much or too often. They register too much when the cost of registration is high, and the application responds by registering its entire buffer pool permanently. This is a problem when it overstates the amount of memory that the application needs to have resident, or when the device imposes limits on the size of memory maps that it can know. The alternative is to register too often, that is on a per-operation basis. To me that suggests the solutions lie in making it more reasonable to register more memory, or in making it practical to register memory on-the-fly on a per-operation basis with low enough overhead that applications don't feel the need to build elaborate registration caching schemes. As has been pointed out a few times in this thread, the RDMA and transport layers simply do not have enough information to know which portion of registered memory *really* had to be registered. So any back-pressure scheme where the Memory Manager is asking for pinned memory to be "given back" would have to go all the way to the application. Only the application knows what it is "really" using. I also suspect that most applications that are interested in using RDMA would rather be told they can allocate 200M indefinitely (and with real memory backing it) than be given 1GB of virtual memory that is backed by 200-300M of physical memory, especially if it meant dealing with memory pressure upcalls. > >> Point being, it will stop probably all connections that an application is > >> using (assuming the application uses a single PD). > > > > It seems like the need to not stop all would be a compelling enough reason > > to modify the driver to track which processes have received the rkey/stag. > > > > Yes, _if_ the driver could track this. > > And _if_ the rdma API and paradigm was such that the kernel/driver could > keep track, then remote revokations of MR tags could be supported. > > Stevo > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ardavis at ichips.intel.com Thu Feb 14 09:51:41 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 09:51:41 -0800 Subject: [ofa-general] Re: [PATCH][DAPL v2] misc fixes In-Reply-To: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> Message-ID: <47B47FAD.308@ichips.intel.com> Patrick Marchand Latifi wrote: > Hi all, > > Here's a set of patches for dapl 2.0.x. Thanks, v2.0 patches applied. From ardavis at ichips.intel.com Thu Feb 14 09:51:09 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 09:51:09 -0800 Subject: [ofa-general] Re: [PATCH][DAPL v1] misc fixes In-Reply-To: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> Message-ID: <47B47F8D.4060904@ichips.intel.com> Patrick Marchand Latifi wrote: > Hi all, > > Here's a set of patches for dapl 1.2.x. Thanks, v1.2 patches applied. From patrick.latifi at qlogic.com Thu Feb 14 10:11:47 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Thu, 14 Feb 2008 10:11:47 -0800 Subject: [ofa-general] Re: [PATCH 4/6] [DAPL v1] fix off-by-one with ia_name In-Reply-To: <47B47975.4080306@ichips.intel.com> References: <20080213225747.16508.39423.stgit@b64-10.internal.keyresearch.com> <20080213225808.16508.16794.stgit@b64-10.internal.keyresearch.com> <47B47975.4080306@ichips.intel.com> Message-ID: <20080214181146.GC9845@jet.pathscale.com> You're right. Thanks for catching that. -pat On Thu, Feb 14, 2008 at 09:25:09AM -0800, Arlin Davis wrote: > Patrick Marchand Latifi wrote: > >Make sure we stay within bounds when manipulating the ia_name. > > > >Signed-off-by: Patrick Marchand Latifi > >--- > > > > dat/udat/udat.c | 6 ++---- > > 1 files changed, 2 insertions(+), 4 deletions(-) > > > >diff --git a/dat/udat/udat.c b/dat/udat/udat.c > >index e458441..c57d421 100644 > >--- a/dat/udat/udat.c > >+++ b/dat/udat/udat.c > >@@ -181,7 +181,7 @@ dat_ia_openv ( > > > > len = dat_os_strlen (name); > > > >- if ( DAT_NAME_MAX_LENGTH < len ) > >+ if ( DAT_NAME_MAX_LENGTH <= len ) > > { > > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > > } > >@@ -197,7 +197,6 @@ dat_ia_openv ( > > } > > > > dat_os_strncpy (info.ia_name, name, len); > >- info.ia_name[len] = '\0'; > > Same as DAPL 2.0 patch. > > strlen does not include terminating NULL byte and strncpy > will copy no more then len. Revising patch, adding len+1 > to get NULL byte with strncpy. > > Here is a new patch for DAPL v1.2: > > Signed-off by: Arlin Davis > > diff --git a/dat/udat/udat.c b/dat/udat/udat.c > index e458441..cac1b93 100644 > --- a/dat/udat/udat.c > +++ b/dat/udat/udat.c > @@ -181,7 +181,7 @@ dat_ia_openv ( > > len = dat_os_strlen (name); > > - if ( DAT_NAME_MAX_LENGTH < len ) > + if ( DAT_NAME_MAX_LENGTH <= len ) > { > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > } > @@ -196,8 +196,7 @@ dat_ia_openv ( > return DAT_ERROR (DAT_INVALID_STATE, 0); > } > > - dat_os_strncpy (info.ia_name, name, len); > - info.ia_name[len] = '\0'; > + dat_os_strncpy (info.ia_name, name, len+1); > > info.dapl_version_major = dapl_major; > info.dapl_version_minor = dapl_minor; > @@ -301,10 +300,9 @@ dat_ia_close ( > > len = dat_os_strlen (ia_name); > > - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); > + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); > > - dat_os_strncpy (info.ia_name, ia_name, len); > - info.ia_name[len] = '\0'; > + dat_os_strncpy (info.ia_name, ia_name, len+1); > > info.dapl_version_major = provider_attr.dapl_version_major; > info.dapl_version_minor = provider_attr.dapl_version_minor; From patrick.latifi at qlogic.com Thu Feb 14 10:15:28 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Thu, 14 Feb 2008 10:15:28 -0800 Subject: [ofa-general] Re: [PATCH 3/5 v2] [DAPL v2] Fix off-by-one with ia_name In-Reply-To: <47B47898.4020607@ichips.intel.com> References: <20080213230001.16680.69565.stgit@b64-10.internal.keyresearch.com> <20080213230017.16680.89345.stgit@b64-10.internal.keyresearch.com> <47B47898.4020607@ichips.intel.com> Message-ID: <20080214181528.GD9845@jet.pathscale.com> I agree with this patch also. -pat On Thu, Feb 14, 2008 at 09:21:28AM -0800, Arlin Davis wrote: > Patrick Marchand Latifi wrote: > >Make sure we stay within bounds when manipulating the ia_name. > > > >Signed-off-by: Patrick Marchand Latifi > >--- > > > > dat/udat/udat.c | 6 ++---- > > 1 files changed, 2 insertions(+), 4 deletions(-) > > > >diff --git a/dat/udat/udat.c b/dat/udat/udat.c > >index bb1c580..0be4c33 100755 > >--- a/dat/udat/udat.c > >+++ b/dat/udat/udat.c > >@@ -184,7 +184,7 @@ dat_ia_openv ( > > > > len = dat_os_strlen (name); > > > >- if ( DAT_NAME_MAX_LENGTH < len ) > >+ if ( DAT_NAME_MAX_LENGTH <= len ) > > { > > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > > } > >@@ -200,7 +200,6 @@ dat_ia_openv ( > > } > > > > dat_os_strncpy (info.ia_name, name, len); > >- info.ia_name[len] = '\0'; > > strlen does not include terminating NULL byte and strncpy > will copy no more then len. Revising patch, adding len+1 > to get NULL byte with strncpy. > > Here is a new patch for DAPL v2.0: > > Signed-off by: Arlin Davis > > diff --git a/dat/udat/udat.c b/dat/udat/udat.c > index bb1c580..f3194b0 100755 > --- a/dat/udat/udat.c > +++ b/dat/udat/udat.c > @@ -184,7 +184,7 @@ dat_ia_openv ( > > len = dat_os_strlen (name); > > - if ( DAT_NAME_MAX_LENGTH < len ) > + if ( DAT_NAME_MAX_LENGTH <= len ) > { > return DAT_ERROR (DAT_INVALID_PARAMETER, DAT_INVALID_ARG1); > } > @@ -199,8 +199,7 @@ dat_ia_openv ( > return DAT_ERROR (DAT_INVALID_STATE, 0); > } > > - dat_os_strncpy (info.ia_name, name, len); > - info.ia_name[len] = '\0'; > + dat_os_strncpy (info.ia_name, name, len+1); > > info.dapl_version_major = dapl_major; > info.dapl_version_minor = dapl_minor; > @@ -324,10 +323,9 @@ dat_ia_close ( > > len = dat_os_strlen (ia_name); > > - dat_os_assert ( len <= DAT_NAME_MAX_LENGTH ); > + dat_os_assert ( len < DAT_NAME_MAX_LENGTH ); > > - dat_os_strncpy (info.ia_name, ia_name, len); > - info.ia_name[len] = '\0'; > + dat_os_strncpy (info.ia_name, ia_name, len+1); > > info.dapl_version_major = provider_attr.dapl_version_major; > info.dapl_version_minor = provider_attr.dapl_version_minor; > From rdreier at cisco.com Thu Feb 14 10:15:23 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:15:23 -0800 Subject: [ofa-general] Re: [PATCH] IPoIB: on pkey_change event, invoke dev_stop() before doing delay_open() In-Reply-To: <200802131623.50313.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Wed, 13 Feb 2008 16:23:50 +0200") References: <200802131623.50313.jackm@dev.mellanox.co.il> Message-ID: thanks, applied From gormandizers at munsil.org Thu Feb 14 10:17:25 2008 From: gormandizers at munsil.org (Bret Holmes) Date: Thu, 14 Feb 2008 18:17:25 -0000 Subject: [ofa-general] Intuit Quickbooks Premier 2007 for XP, Vis+a 79. Retail 381 :save 2179: Message-ID: <000901c86f33$d05f9780$0100007f@fnphwnd> intuit quicken premier 2008 - 29 ms windows 2003 enterprise server - 69 adobe golive cs2 - 49 sas jmp statistical discovery 7 - 129 wri+e :buyadobenow. com: |n Interne+ Explorer Take 0ff : before you wri+e |n Interne+ Explorer parallels desktop 3.0 for mac - 29 alias maya 7.0 unlimited - 109 sony sound forge 9.0 - 49 adobe photoshop cs3 extended - 89 From ardavis at ichips.intel.com Thu Feb 14 10:25:45 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 10:25:45 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: Message-ID: <47B487A9.3060705@ichips.intel.com> Chuck Hartley wrote: > We are doing performance measurements on an application that is using > uDAPL RDMA reads for some large transfers and the BW is less than we > expected. The transfers are 4MB and we are seeing BW of 930MiB/sec > (DDR). When we do the same transfer size using ib_read_bw we get 1475 > MB/sec. On a pair of machines with SDR interfaces, we get 697MiB/sec > and 918MB/sec respectively. > > We expected some overhead from uDAPL, but this much seems excessive. Or > is this typical? I looked at the code for dat_ep_post_rdma_read() and > compared it to the ib_read_bw() code. The difference is that in uDAPL > there is some cookie management and some WR struct setup before it gets > around to calling the IB verbs function ibv_post_send(). It doesn't > look like something that would take much time - about 1.5msec given the > numbers above. > You performance should be very close to the verbs rdma bandwidth tests, especially at 4MB. Can you try dapltest bw test suite to compare? server: dapltest client: dapltest -T P -m p -d -s cst-50-ib0 -i 100 RR 4096000 Here are my numbers (Intel x86_64 and DDR): Total Time : 0.31 sec Total Data Exchanged : 390.62 MB CPU Utilization : 25.80 Operation Throughput : 319.65 ops/sec Bandwidth : 1248.66 MB/sec -arlin From sean.hefty at intel.com Thu Feb 14 10:26:44 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 14 Feb 2008 10:26:44 -0800 Subject: [ofa-general] IPOIB/CM increase retry counts In-Reply-To: <200802140831.09172.jackm@dev.mellanox.co.il> References: <476C2F62.2020900@linux.vnet.ibm.com><47B29A50.7050400@voltaire.com><000301c86e6b$69c45650$57e1180a@amr.corp.intel.com> <200802140831.09172.jackm@dev.mellanox.co.il> Message-ID: <000601c86f37$26ba3100$a937170a@amr.corp.intel.com> >On the other hand, if the remote host is actually down, you will make "retry >storms" >worse by retrying both at the link layer AND at the TCP layer (each TCP retry >resulting >in multiple lower-layer retries). This will have an effect on the fabric. I don't think I would call retrying a send a few more times a storm; it's a point to point send. When the remote host drops, the first think IPoIB will do is try to reconnect, which involves sending CM MADs to the unavailable node in an effort to restablish the connection anyway. I don't think we try optimizing for the case when systems crash. In any case, I thought the problem was more related to RNR Nacks than simple retries, but that doesn't seem to be the case. - Sean From rdreier at cisco.com Thu Feb 14 10:27:32 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:27:32 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: rdma/cm: do not issue MRA ifuser rejects connection request In-Reply-To: <000601c86e93$66d2ad70$ff0da8c0@amr.corp.intel.com> (Sean Hefty's message of "Wed, 13 Feb 2008 14:54:33 -0800") References: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> <000601c86e93$66d2ad70$ff0da8c0@amr.corp.intel.com> Message-ID: OK, makes sense, and I would apply this, but it conflicts with the change from 45d9478d ("RDMA/cma: Reenable device removal on passive side"). Can you regenerate the patch against a current tree and resend? I'm not sure where the new code should go relative to the cma_enable_remove() code so I don't wnat to try and merge it myself. From rdreier at cisco.com Thu Feb 14 10:31:22 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:31:22 -0800 Subject: [ofa-general] Re: [PATCH] IB/ipoib: remove unnecessary allocation In-Reply-To: <47B422D0.70901@dev.mellanox.co.il> (Eli Cohen's message of "Thu, 14 Feb 2008 13:15:28 +0200") References: <47B422D0.70901@dev.mellanox.co.il> Message-ID: this didn't apply to a current kernel tree but I fixed it up by hand. From rdreier at cisco.com Thu Feb 14 10:34:24 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:34:24 -0800 Subject: [ofa-general] Re: [PATCH]: mlx4: move table_find from fmr_alloc to fmr_enable In-Reply-To: <200802141341.29577.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Thu, 14 Feb 2008 13:41:29 +0200") References: <200802141341.29577.jackm@dev.mellanox.co.il> Message-ID: thanks, applied. From rdreier at cisco.com Thu Feb 14 10:45:44 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:45:44 -0800 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B44C83.7010201@dev.mellanox.co.il> (Eli Cohen's message of "Thu, 14 Feb 2008 16:13:23 +0200") References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> Message-ID: Not sure I understand this: > +int ipoib_vmalloc(struct ipoib_vmap *buf, int size) how is this any different than the vmalloc() that already exists? Why couldn't you just use vmalloc()? From rdreier at cisco.com Thu Feb 14 10:48:19 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 10:48:19 -0800 Subject: [ofa-general] Re: [PATCH]: mlx4: move table_find from fmr_alloc to fmr_enable In-Reply-To: <200802141341.29577.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Thu, 14 Feb 2008 13:41:29 +0200") References: <200802141341.29577.jackm@dev.mellanox.co.il> Message-ID: by the way, it seems we never release ICM table entries when freeing MPTs. Does the patch below make sense to you? diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 679dfdb..3ffce7a 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -287,6 +287,8 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) (dev->caps.num_mpts - 1)); if (err) mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err); + + mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key)); } mlx4_mtt_cleanup(dev, &mr->mtt); From eli at dev.mellanox.co.il Thu Feb 14 11:01:09 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 21:01:09 +0200 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> Message-ID: <47B48FF5.4040102@dev.mellanox.co.il> Roland Dreier wrote: > Not sure I understand this: > > > +int ipoib_vmalloc(struct ipoib_vmap *buf, int size) > > how is this any different than the vmalloc() that already exists? Why > couldn't you just use vmalloc()? To be honest I don't have an answer. The thought of this passed my mind but yet it has an exported API in the kernel - I wonder why. So what do you think of using vmalloc in this case? From clameter at sgi.com Thu Feb 14 11:35:37 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 11:35:37 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <866658.37093.qm@web32510.mail.mud.yahoo.com> References: <866658.37093.qm@web32510.mail.mud.yahoo.com> Message-ID: On Wed, 13 Feb 2008, Kanoj Sarcar wrote: > Oh ok, yes, I did see the discussion on this; sorry I > missed it. I do see what notifiers bring to the table > now (without endorsing it :-)). > > An orthogonal question is this: is IB/rdma the only > "culprit" that elevates page refcounts? Are there no > other subsystems which do a similar thing? Yes there are actually two projects by SGI that also ran into the same issue that motivated the work on this. One is XPmem which allows sharing of process memory between different Linux instances and then there is the GRU which is a kind of DMA engine. Then there is KVM and probably multiple other drivers. From timhadeen33 at gmail.com Thu Feb 14 11:38:43 2008 From: timhadeen33 at gmail.com (Tim Hadeen) Date: Thu, 14 Feb 2008 13:38:43 -0600 Subject: [ofa-general] ib_srp: Got failed path rec status -110 Message-ID: Hello, We are seeing above messages in dmesg on server. Would like to know what causes this message? Thanks Tim -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Thu Feb 14 11:38:35 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 11:38:35 -0800 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B48FF5.4040102@dev.mellanox.co.il> (Eli Cohen's message of "Thu, 14 Feb 2008 21:01:09 +0200") References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B48FF5.4040102@dev.mellanox.co.il> Message-ID: > To be honest I don't have an answer. The thought of this passed my mind but > yet it has an exported API in the kernel - I wonder why. So what do you > think of using vmalloc in this case? If you're going to use vmap(), then you might as well use vmalloc(). the issue is with consuming address space, which is very limited on 32-bit systems (there is often less than 128 MB of vmalloc available total). However in this case it is probably OK. I guess we don't want to allocate these structures independently and take another pointer deref for every send -- although I would be curious to know if it actually costs much. - R. From clameter at sgi.com Thu Feb 14 11:39:29 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 11:39:29 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <47B45994.7010805@opengridcomputing.com> References: <20080208234302.GH26564@sgi.com><20080208155641.2258ad2c.akpm@linux-foundation.org><20080209012446.GB7051@v2.random><20080209015659.GC7051@v2.random><20080209075556.63062452@bree.surriel.com><47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> Message-ID: On Thu, 14 Feb 2008, Steve Wise wrote: > Note that for T3, this involves suspending _all_ rdma connections that are in > the same PD as the MR being remapped. This is because the driver doesn't know > who the application advertised the rkey/stag to. So without that knowledge, > all connections that _might_ rdma into the MR must be suspended. If the MR > was only setup for local access, then the driver could track the connections > with references to the MR and only quiesce those connections. > > Point being, it will stop probably all connections that an application is > using (assuming the application uses a single PD). Right but if the system starts reclaiming pages of the application then we have a memory shortage. So the user should address that by not running other apps concurrently. The stopping of all connections is still better than the VM getting into major trouble. And the stopping of connections in order to move the process memory into a more advantageous memory location (f.e. using page migration) or stopping of connections in order to be able to move the process memory out of a range of failing memory is certainly good. From akepner at sgi.com Thu Feb 14 11:44:10 2008 From: akepner at sgi.com (akepner at sgi.com) Date: Thu, 14 Feb 2008 11:44:10 -0800 Subject: [ofa-general] OFED 1.3 support for SLES10 SP2? Message-ID: <20080214194410.GQ9894@sgi.com> We (SGI) recently received an early (beta) version of SLES10 SP2. We can't build OFED 1.3 against this, since the correct backport/addon patches aren't yet available. This is a bit of a problem for us, as it's preventing us from refreshing the kernel in our development tree. It's probably too much to ask that OFED patches are available for an as-yet-unreleased kernel, but when would patches for an updated SLES (or other) distribution usually become available? Would it happen before general availability or soon after, or....? Thanks for any info. -- Arthur From dwilder at us.ibm.com Thu Feb 14 11:50:38 2008 From: dwilder at us.ibm.com (David Wilder) Date: Thu, 14 Feb 2008 11:50:38 -0800 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B44C83.7010201@dev.mellanox.co.il> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> Message-ID: <47B49B8E.7070607@us.ibm.com> Eli Cohen wrote: > From 2f1870f76ddbfc948aea4847c25d05ae70dd43cf Mon Sep 17 00:00:00 2001 > From: Eli Cohen > Date: Thu, 14 Feb 2008 15:46:33 +0200 > Subject: [PATCH] IB/ipoib: use vmap with allocation of tx ring > > With the introduction of s/g support in IPOIB, the size of struct > ipoib_tx_buf > has increased since it reserves room for the fragments. This caused > allocations > to fail when large send queues are required. This patch uses an array of > pages > and maps them with vmap to increase the certainty of the allocation to > succeed. > > Signed-off-by: Eli Cohen > --- > drivers/infiniband/ulp/ipoib/ipoib.h | 9 +++++ > drivers/infiniband/ulp/ipoib/ipoib_main.c | 54 > ++++++++++++++++++++++++++--- > 2 files changed, 58 insertions(+), 5 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h > b/drivers/infiniband/ulp/ipoib/ipoib.h > index f9b7caa..78a99d6 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib.h > +++ b/drivers/infiniband/ulp/ipoib/ipoib.h > @@ -186,6 +186,12 @@ enum ipoib_cm_state { > IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ > }; > > +struct ipoib_vmap { > + void *ptr; > + struct page **page_arr; > + int npages; > +}; > + > struct ipoib_cm_rx { > struct ib_cm_id *id; > struct ib_qp *qp; > @@ -293,6 +299,7 @@ struct ipoib_dev_priv { > struct ipoib_rx_buf *rx_ring; > > spinlock_t tx_lock; > + struct ipoib_vmap tx_vmap_ring; > struct ipoib_tx_buf *tx_ring; > unsigned tx_head; > unsigned tx_tail; > @@ -458,6 +465,8 @@ int ipoib_vlan_delete(struct net_device *pdev, > unsigned short pkey); > void ipoib_pkey_poll(struct work_struct *work); > int ipoib_pkey_dev_delay_open(struct net_device *dev); > void ipoib_drain_cq(struct net_device *dev); > +int ipoib_vmalloc(struct ipoib_vmap *buf, int size); > +void ipoib_vfree(struct ipoib_vmap *buf); > > #ifdef CONFIG_INFINIBAND_IPOIB_CM > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c > b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index f96477a..3a44a42 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -92,6 +92,50 @@ static struct ib_client ipoib_client = { > .remove = ipoib_remove_one > }; > > +int ipoib_vmalloc(struct ipoib_vmap *buf, int size) > +{ > + int i; > + int npages = ALIGN(size, PAGE_SIZE) / PAGE_SIZE; > + int ret = -ENOMEM; > + > + buf->page_arr = kmalloc(npages * sizeof buf->page_arr[0], GFP_KERNEL); > + if (!buf->page_arr) > + goto out; > + > + for (i = 0; i < npages; ++i) { > + buf->page_arr[i] = alloc_page(GFP_KERNEL); > + if (!buf->page_arr[i]) > + goto page_fail; > + } > + > + buf->npages = npages; > + buf->ptr = vmap(buf->page_arr, buf->npages, VM_MAP, PAGE_KERNEL); > + if (!buf->ptr) > + goto page_fail; > + > + memset(buf->ptr, 0, size); > + return 0; > + > +page_fail: > + for (; i > 0; --i) > + __free_page(buf->page_arr[i - 1]); > + > + kfree(buf->page_arr); > +out: > + return ret; > +} > + > +void ipoib_vfree(struct ipoib_vmap *buf) > +{ > + int i; > + > + vunmap(buf->ptr); > + for (i = 0; i < buf->npages; ++i) > + __free_page(buf->page_arr[i]); > + > + kfree(buf->page_arr); > +} > + > int ipoib_open(struct net_device *dev) > { > struct ipoib_dev_priv *priv = netdev_priv(dev); > @@ -887,13 +931,13 @@ int ipoib_dev_init(struct net_device *dev, struct > ib_device *ca, int port) > goto out; > } > > - priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, > - GFP_KERNEL); > - if (!priv->tx_ring) { > + if (ipoib_vmalloc(&priv->tx_vmap_ring, ipoib_sendq_size * > + sizeof *priv->tx_ring)) { > printk(KERN_WARNING "%s: failed to allocate TX ring (%d > entries)\n", > ca->name, ipoib_sendq_size); > goto out_rx_ring_cleanup; > } > + priv->tx_ring = priv->tx_vmap_ring.ptr; > > /* priv->tx_head, tx_tail & tx_outstanding are already 0 */ > > @@ -903,7 +947,7 @@ int ipoib_dev_init(struct net_device *dev, struct > ib_device *ca, int port) > return 0; > > out_tx_ring_cleanup: > - kfree(priv->tx_ring); > + ipoib_vfree(&priv->tx_vmap_ring); > > out_rx_ring_cleanup: > kfree(priv->rx_ring); > @@ -928,7 +972,7 @@ void ipoib_dev_cleanup(struct net_device *dev) > ipoib_ib_dev_cleanup(dev); > > kfree(priv->rx_ring); > - kfree(priv->tx_ring); > + ipoib_vfree(&priv->tx_vmap_ring); > > priv->rx_ring = NULL; > priv->tx_ring = NULL; I tested with OFED-1.3-20080214-0725.tgz. This build look to have both a tx_ring and rx_ring fix. This build fixes our problem using send_queue_size=1024 But the recv_queue_size=2048 is still failing. [dmesg] ib%d: failed allocating SRQ wr array ib%d: failed allocating SRQ wr array kernel: 2.6.16.57-0.9 From caitlin.bestler at gmail.com Thu Feb 14 12:17:21 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 14 Feb 2008 12:17:21 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> Message-ID: <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> On Thu, Feb 14, 2008 at 11:39 AM, Christoph Lameter wrote: > On Thu, 14 Feb 2008, Steve Wise wrote: > > > Note that for T3, this involves suspending _all_ rdma connections that are in > > the same PD as the MR being remapped. This is because the driver doesn't know > > who the application advertised the rkey/stag to. So without that knowledge, > > all connections that _might_ rdma into the MR must be suspended. If the MR > > was only setup for local access, then the driver could track the connections > > with references to the MR and only quiesce those connections. > > > > Point being, it will stop probably all connections that an application is > > using (assuming the application uses a single PD). > > Right but if the system starts reclaiming pages of the application then we > have a memory shortage. So the user should address that by not running > other apps concurrently. The stopping of all connections is still better > than the VM getting into major trouble. And the stopping of connections in > order to move the process memory into a more advantageous memory location > (f.e. using page migration) or stopping of connections in order to be able > to move the process memory out of a range of failing memory is certainly > good. > In that spirit, there are two important aspects of a suspend/resume API that would enable the memory manager to solve problems most effectively: 1) The device should be allowed flexibility to extend the scope of the suspend to what it is capable of implementing -- rather than being forced to say that it does not support suspend/;resume merely because it does so at a different granularity. 2) It is very important that users of this API understand that it is only the RDMA device handling of incoming packets and WQEs that is being suspended. The peers are not suspended by this API, or even told that this end is suspending. Unless the suspend is kept *extremely* short there will be adverse impacts. And "short" here is measured in network terms, not human terms. The blink of any eye is *way* too long. Any external dependencies between "suspend" and "resume" will probably mean that things will not work, especially if the external entities involve a disk drive. So suspend/resume to re-arrange pages is one thing. Suspend/resume to cover swapping out pages so they can be reallocated is an exercise in futility. By the time you resume the connections will be broken or at the minimum damaged. From clameter at sgi.com Thu Feb 14 12:20:24 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 12:20:24 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> References: <20080209075556.63062452@bree.surriel.com> <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> Message-ID: On Thu, 14 Feb 2008, Caitlin Bestler wrote: > So suspend/resume to re-arrange pages is one thing. Suspend/resume to cover > swapping out pages so they can be reallocated is an exercise in futility. By the > time you resume the connections will be broken or at the minimum damaged. The connections would then have to be torn down before swap out and would have to be reestablished after the pages have been brought back from swap. From eli at dev.mellanox.co.il Thu Feb 14 12:25:08 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 14 Feb 2008 22:25:08 +0200 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B49B8E.7070607@us.ibm.com> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B49B8E.7070607@us.ibm.com> Message-ID: <47B4A3A4.2060402@dev.mellanox.co.il> David Wilder wrote: > > I tested with OFED-1.3-20080214-0725.tgz. This build look to have both > a tx_ring and rx_ring fix. > > This build fixes our problem using send_queue_size=1024 > > But the recv_queue_size=2048 is still failing. > > [dmesg] > ib%d: failed allocating SRQ wr array > ib%d: failed allocating SRQ wr array > > kernel: 2.6.16.57-0.9 > > I did not replace the allocation function for the SRQ wr array. From David.Singleton at anu.edu.au Thu Feb 14 12:47:06 2008 From: David.Singleton at anu.edu.au (David Singleton) Date: Fri, 15 Feb 2008 07:47:06 +1100 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <469958e00802140948j162cc8baqae0b55cd6fb1cd22@mail.gmail.com> References: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <20080214155333.GA1029@sgi.com> <47B46AFB.9070009@opengridcomputing.com> <469958e00802140948j162cc8baqae0b55cd6fb1cd22@mail.gmail.com> Message-ID: <47B4A8CA.30306@anu.edu.au> Caitlin Bestler wrote: > > But the broader question is what the goal is here. Allowing memory to > be shuffled is valuable, and perhaps even ultimately a requirement for > high availability systems. RDMA and other direct-access APIs should > be evolving their interfaces to accommodate these needs. > > Oversubscribing memory is a totally different matter. If an application > is working with memory that is oversubscribed by a factor of 2 or more > can it really benefit from zero-copy direct placement? At first glance I > can't see what RDMA could be bringing of value when the overhead of > swapping is going to be that large. > A related use case from HPC. Some of us have batch scheduling systems based on suspend/resume of jobs (which is really just SIGSTOP and SIGCONT of all job processes). The value of this system is enhanced greatly by being able to page out the suspended job (just normal Linux demand paging caused by the incoming job is OK). Apart from this (relatively) brief period of paging, both jobs benefit from RDMA. SGI kindly implemented a /proc mechanism for unpinning of XPMEM pages to allow suspended jobs to be paged on their Altix system. Note that this use case would not benefit from Pete Wyckoff's approach of notifying user applications/libraries of VM changes. And one of the grand goal of HPC developers has always been to have checkpoint/restart of jobs .... David From rodrigo.perez at icce.cl Thu Feb 14 10:11:46 2008 From: rodrigo.perez at icce.cl (Van Gaasbeck) Date: Thu, 14 Feb 2008 15:11:46 -0300 Subject: [ofa-general] Macetas Autorregantes Message-ID: <20080214.ECNUOQCYGQTNBTOD@icce.cl> si no puede ver la imagen correctamente click aquí Consultas: Nombre : Teléfono: Email: Comentario: Si desea no recibir mas nuestros informativos y ofertas ingrese su email aqui -------------- next part -------------- An HTML attachment was scrubbed... URL: From languidnc530 at herbipolis.de Wed Feb 13 14:00:54 2008 From: languidnc530 at herbipolis.de (Patrica Huynh) Date: Fri, 14 Feb 2008 00:00:54 +0200 Subject: [ofa-general] Olny 6 days special price for you dear customer Message-ID: <917693280.92529984900833@herbipolis.de> Ordering m ta e bn ds onl dp ine is known to be the best way to cut down me nza dica dmt tion expenses and to avoid embarrassment and wasting time visiting do bc cto ql rs. But you're running a risk of being scammed. So choose ŤCa bfh nad ws ianPha kz rma mhb cyť to purchase m stc e pb ds.  Prompt and discreet shipping directly to your doorstep! Con aa fiden bz tiality is gua es rant ejy eed. http://cnfuo.moneywhat.cn/?995126132426 Quality me irh dica zz tio wx ns should be affordable for all! Patrica Huynh -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Thu Feb 14 14:01:08 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 14 Feb 2008 14:01:08 -0800 Subject: [ofa-general] ib_srp: Got failed path rec status -110 In-Reply-To: References: Message-ID: <1203026468.26729.32.camel@hrosenstock-ws.xsigo.com> On Thu, 2008-02-14 at 13:38 -0600, Tim Hadeen wrote: > Hello, > We are seeing above messages in dmesg on server. Would like to know > what causes this message? It means the SA PathRecord request made by the SRP initiator timed out. There could be a number of reasons why that would occur. -- Hal > > Thanks > Tim > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From dwilder at us.ibm.com Thu Feb 14 14:35:15 2008 From: dwilder at us.ibm.com (David Wilder) Date: Thu, 14 Feb 2008 14:35:15 -0800 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B4A3A4.2060402@dev.mellanox.co.il> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B49B8E.7070607@us.ibm.com> <47B4A3A4.2060402@dev.mellanox.co.il> Message-ID: <47B4C223.3010200@us.ibm.com> Eli Cohen wrote: > David Wilder wrote: >> >> I tested with OFED-1.3-20080214-0725.tgz. This build look to have >> both a tx_ring and rx_ring fix. >> >> This build fixes our problem using send_queue_size=1024 >> >> But the recv_queue_size=2048 is still failing. >> >> [dmesg] >> ib%d: failed allocating SRQ wr array >> ib%d: failed allocating SRQ wr array >> >> kernel: 2.6.16.57-0.9 >> >> > > I did not replace the allocation function for the SRQ wr array. > I saw that :) You need to make the same change in three more places: static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) . . ----> priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size * sizeof priv->cm.rx_wr_arr[0], GFP_KERNEL); if (!priv->cm.rx_wr_arr) { ipoib_warn(priv, "failed allocating SRQ wr array\n"); goto destory_srq; } . . . ----> priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); goto free_wr_array; } static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ib_sa_path_rec *pathrec) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; -----> p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring, GFP_KERNEL); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } From caitlin.bestler at neterion.com Thu Feb 14 14:43:54 2008 From: caitlin.bestler at neterion.com (Caitlin Bestler) Date: Thu, 14 Feb 2008 14:43:54 -0800 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> Message-ID: <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> On Thu, Feb 14, 2008 at 12:20 PM, Christoph Lameter wrote: > On Thu, 14 Feb 2008, Caitlin Bestler wrote: > > > So suspend/resume to re-arrange pages is one thing. Suspend/resume to cover > > swapping out pages so they can be reallocated is an exercise in futility. By the > > time you resume the connections will be broken or at the minimum damaged. > > The connections would then have to be torn down before swap out and would > have to be reestablished after the pages have been brought back from swap. > > I have no problem with that, as long as the application layer is responsible for tearing down and re-establishing the connections. The RDMA/transport layers are incapable of tearing down and re-establishing a connection transparently because connections need to be approved above the RDMA layer. Further the teardown will have visible artificats that the application must deal with, such as flushed Recv WQEs. This is still, the RDMA device will do X and will not worry about Y. The reasons for not worrying about Y could be that the suspend will be very short, or that other mechanisms have taken care of all the Ys independently. For example, an HPC cluster that suspended the *entire* cluster would not have to worry about dropped packets. From eli at dev.mellanox.co.il Thu Feb 14 14:46:27 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Fri, 15 Feb 2008 00:46:27 +0200 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: <47B1471E.20501@dev.mellanox.co.il> References: <1201710660.28794.170.camel@mtls03> <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> <47B1471E.20501@dev.mellanox.co.il> Message-ID: <4e6a6b3c0802141446q7dcb72e1ra6e971c01b1f687d@mail.gmail.com> Roland, what do you say about this, should we go with the query approach or stick to using device flags? I will create the patches as necessary. On 2/12/08, Eli Cohen wrote: > Roland Dreier wrote: > > > I set these flags for mlx4 and mthca in patches 5/16 and 6/16 respectively. > > > > Ah I see. Seems strange to use device->flags for just the checksum > > offload stuff and device_cap_flags for everything else though. I > > don't see any major issues with moving device_cap_flags into struct > > ib_device and not forcing a device query call, but I guess we need to > > convince ourselves that the flags would never change at runtime, and > > anyway that needs to be a separate change from this IPoIB stuff. > > > I think the cleanest way is to call query device caps and save a copy of these > flags in the private data. If we agree on this approach I can send patches with > this change. > > > > > > > I my machines I can see the flags set by inspecting /sys/class/net/ib*/features > > > > I guess that's an OFED patch? Is there any interest in submitting it > > upstream. > > > All the patches I sent were checked also on the "for-2.6.25" branch and gone through > basic testing. I also checked their variants on ofed. I think we should push them upstream. > > From clameter at sgi.com Thu Feb 14 14:48:57 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 14:48:57 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> Message-ID: On Thu, 14 Feb 2008, Caitlin Bestler wrote: > I have no problem with that, as long as the application layer is responsible for > tearing down and re-establishing the connections. The RDMA/transport layers > are incapable of tearing down and re-establishing a connection transparently > because connections need to be approved above the RDMA layer. I am not that familiar with the RDMA layers but it seems that RDMA has a library that does device driver like things right? So the logic would best fit in there I guess. If you combine mlock with the mmu notifier then you can actually guarantee that a certain memory range will not be swapped out. The notifier will then only be called if the memory range will need to be moved for page migration, memory unplug etc etc. There may be a limit on the percentage of memory that you can mlock in the future. This may be done to guarantee that the VM still has memory to work with. From rdreier at cisco.com Thu Feb 14 14:50:58 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 14:50:58 -0800 Subject: [ofa-general] Re: [PATCH 4/16 v4] IB/ipoib: Add checksum offload support In-Reply-To: <4e6a6b3c0802141446q7dcb72e1ra6e971c01b1f687d@mail.gmail.com> (Eli Cohen's message of "Fri, 15 Feb 2008 00:46:27 +0200") References: <1201710660.28794.170.camel@mtls03> <4e6a6b3c0802090857l5fa3935bq782df0a138e10129@mail.gmail.com> <47B1471E.20501@dev.mellanox.co.il> <4e6a6b3c0802141446q7dcb72e1ra6e971c01b1f687d@mail.gmail.com> Message-ID: > what do you say about this, should we go with the query approach or > stick to using device flags? I will create the patches as necessary. The current approach for finding a device's capability flags is to use ib_query_device(). If you want to use that and cache the result in IPoIB that's fine. Or if you want to move device_cap_flags into struct ib_device that's OK too I guess, although I think in that case you need some justification that there's never a need to query things at runtime (it seems OK to me but I haven't thought it through in detail) From sean.hefty at intel.com Thu Feb 14 15:24:57 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 14 Feb 2008 15:24:57 -0800 Subject: [ofa-general] [PATCH v2] for-2.6.25: rdma/cm: do not issue MRA ifuser rejects connection request In-Reply-To: References: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com><000601c86e93$66d2ad70$ff0da8c0@amr.corp.intel.com> Message-ID: <000101c86f60$d051bb60$9c98070a@amr.corp.intel.com> There's an undesirable interaction with issuing MRA requests to increase connection timeouts and the listen backlog. When the rdma_cm receives a connection request, it queues an MRA with the ib_cm. (The ib_cm will send an MRA if it receives a duplicate REQ.) The rdma_cm will then create a new rdma_cm_id and give that to the user, which in this case is the rdma_user_cm. If the listen backlog maintained in the rdma_user_cm is full, it destroys the rdma_cm_id, which in turns destroys the ib_cm_id. The ib_cm_id generates a REJ because the state of the ib_cm_id has changed to MRA sent, versus REQ received. Defer queuing the MRA until after the user of the rdma_cm has examined the connection request. Signed-off-by: Sean Hefty --- Patch updated to 2.6.25-rc1. It is also available at: git://git.openfabrics.org/~shefty/rdma-dev.git for-roland drivers/infiniband/core/cma.c | 10 +++++++++- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1eff1b2..34507da 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1107,7 +1107,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); conn_id = cma_new_conn_id(&listen_id->id, ib_event); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); @@ -1130,6 +1129,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id->id.event_handler(&conn_id->id, &event); if (!ret) { + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, CMA_CONNECT) && + !cma_is_ud_ps(conn_id->id.ps)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); cma_enable_remove(conn_id); goto out; } From rdreier at cisco.com Thu Feb 14 15:29:26 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 15:29:26 -0800 Subject: [ofa-general] Re: [PATCH v2] for-2.6.25: rdma/cm: do not issue MRA ifuser rejects connection request In-Reply-To: <000101c86f60$d051bb60$9c98070a@amr.corp.intel.com> (Sean Hefty's message of "Thu, 14 Feb 2008 15:24:57 -0800") References: <000501c86e90$831d4a60$ff0da8c0@amr.corp.intel.com> <000601c86e93$66d2ad70$ff0da8c0@amr.corp.intel.com> <000101c86f60$d051bb60$9c98070a@amr.corp.intel.com> Message-ID: thanks, applied. From rdreier at cisco.com Thu Feb 14 15:31:55 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 15:31:55 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get a few post-2.6.25-rc1 fixes: Eli Cohen (1): IPoIB: Remove unused struct ipoib_cm_tx.ibwc member Jack Morgenstein (3): IPoIB: On P_Key change event, reset state properly IB/mlx4: mlx4_ib_fmr_alloc() should call mlx4_fmr_enable() mlx4_core: Move table_find from fmr_alloc to fmr_enable Marcin Slusarz (1): IB/mthca: Convert to use be16_add_cpu() Roland Dreier (3): IB/mthca: Add missing sg_init_table() in mthca_map_user_db() IB/cm: Remove debug printk()s that snuck upstream IB/cm: Fix infiniband_cm class kobject ref counting Sean Hefty (1): RDMA/cma: Do not issue MRA if user rejects connection request Steve Wise (1): RDMA/cxgb3: Fail loopback connections drivers/infiniband/core/cm.c | 26 ++++++++------------------ drivers/infiniband/core/cma.c | 10 +++++++++- drivers/infiniband/hw/cxgb3/iwch_cm.c | 17 +++++++++++++++++ drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mthca/mthca_cq.c | 2 +- drivers/infiniband/hw/mthca/mthca_memfree.c | 1 + drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_ib.c | 1 + drivers/net/mlx4/mr.c | 21 +++++++++++++-------- 9 files changed, 51 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 638b727..b10ade9 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3587,8 +3587,6 @@ static void cm_release_port_obj(struct kobject *obj) { struct cm_port *cm_port; - printk(KERN_ERR "free cm port\n"); - cm_port = container_of(obj, struct cm_port, port_obj); kfree(cm_port); } @@ -3601,8 +3599,6 @@ static void cm_release_dev_obj(struct kobject *obj) { struct cm_device *cm_dev; - printk(KERN_ERR "free cm dev\n"); - cm_dev = container_of(obj, struct cm_device, dev_obj); kfree(cm_dev); } @@ -3616,18 +3612,12 @@ struct class cm_class = { }; EXPORT_SYMBOL(cm_class); -static void cm_remove_fs_obj(struct kobject *obj) -{ - kobject_put(obj->parent); - kobject_put(obj); -} - static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, - kobject_get(&port->cm_dev->dev_obj), + &port->cm_dev->dev_obj, "%d", port->port_num); if (ret) { kfree(port); @@ -3637,7 +3627,7 @@ static int cm_create_port_fs(struct cm_port *port) for (i = 0; i < CM_COUNTER_GROUPS; i++) { ret = kobject_init_and_add(&port->counter_group[i].obj, &cm_counter_obj_type, - kobject_get(&port->port_obj), + &port->port_obj, "%s", counter_group_names[i]); if (ret) goto error; @@ -3647,8 +3637,8 @@ static int cm_create_port_fs(struct cm_port *port) error: while (i--) - cm_remove_fs_obj(&port->counter_group[i].obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->counter_group[i].obj); + kobject_put(&port->port_obj); return ret; } @@ -3658,9 +3648,9 @@ static void cm_remove_port_fs(struct cm_port *port) int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) - cm_remove_fs_obj(&port->counter_group[i].obj); + kobject_put(&port->counter_group[i].obj); - cm_remove_fs_obj(&port->port_obj); + kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *device) @@ -3744,7 +3734,7 @@ error1: ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3771,7 +3761,7 @@ static void cm_remove_one(struct ib_device *device) ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } - cm_remove_fs_obj(&cm_dev->dev_obj); + kobject_put(&cm_dev->dev_obj); } static int __init ib_cm_init(void) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1eff1b2..34507da 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1107,7 +1107,6 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); conn_id = cma_new_conn_id(&listen_id->id, ib_event); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); @@ -1130,6 +1129,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) ret = conn_id->id.event_handler(&conn_id->id, &event); if (!ret) { + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, CMA_CONNECT) && + !cma_is_ud_ps(conn_id->id.ps)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); cma_enable_remove(conn_id); goto out; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index e9a08fa..320f2b6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1784,6 +1785,17 @@ err: return err; } +static int is_loopback_dst(struct iw_cm_id *cm_id) +{ + struct net_device *dev; + + dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); + if (!dev) + return 0; + dev_put(dev); + return 1; +} + int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; @@ -1791,6 +1803,11 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_ep *ep; struct rtable *rt; + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 7dc91a3..fe2c2e9 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -199,7 +199,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, if (err) goto err_free; - err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); if (err) goto err_mr; diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 6bd9f13..1e1e336 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -473,7 +473,7 @@ static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) return; - cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); + be16_add_cpu(&cqe->db_cnt, -dbd); cqe->wqe = new_wqe; cqe->syndrome = SYNDROME_WR_FLUSH_ERR; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 1f4d27d..252db08 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -542,6 +542,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) for (i = 0; i < npages; ++i) { db_tab->page[i].refcount = 0; db_tab->page[i].uvirt = 0; + sg_init_table(&db_tab->page[i].mem, 1); } return db_tab; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index f9b7caa..054fab8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -209,7 +209,6 @@ struct ipoib_cm_tx { unsigned tx_tail; unsigned long flags; u32 mtu; - struct ib_wc ibwc[IPOIB_NUM_WC]; }; struct ipoib_cm_rx_buf { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 9d3e778..08c4396 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -780,6 +780,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_stop(dev, 0); ipoib_pkey_dev_delay_open(dev); return; } diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 679dfdb..79b317b 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -578,13 +578,6 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, goto err_free; } - fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, - key_to_hw_index(fmr->mr.key), NULL); - if (!fmr->mpt) { - err = -ENOMEM; - goto err_free; - } - return 0; err_free: @@ -595,7 +588,19 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { - return mlx4_mr_enable(dev, &fmr->mr); + struct mlx4_priv *priv = mlx4_priv(dev); + int err; + + err = mlx4_mr_enable(dev, &fmr->mr); + if (err) + return err; + + fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table, + key_to_hw_index(fmr->mr.key), NULL); + if (!fmr->mpt) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL_GPL(mlx4_fmr_enable); From meier3 at llnl.gov Thu Feb 14 16:15:20 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Thu, 14 Feb 2008 16:15:20 -0800 Subject: [ofa-general] [PATCH] opensm:osm_console cleanup, rename, reorg, no new functionality Message-ID: <47B4D998.9050404@llnl.gov> Sasha, This patch prepares the console code to accept changes needed for an OpenSSL connection. It doesn't contain new functionality, but should make integrating new features easier and more clear. -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0001-opensm-osm_console-cleanup-rename-reorg-no-new-f.patch URL: From ardavis at ichips.intel.com Thu Feb 14 16:45:55 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 14 Feb 2008 16:45:55 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <47B487A9.3060705@ichips.intel.com> References: <47B487A9.3060705@ichips.intel.com> Message-ID: <47B4E0C3.1030300@ichips.intel.com> Arlin Davis wrote: > Chuck Hartley wrote: >> We are doing performance measurements on an application that is using >> uDAPL RDMA reads for some large transfers and the BW is less than we >> expected. The transfers are 4MB and we are seeing BW of 930MiB/sec >> (DDR). When we do the same transfer size using ib_read_bw we get 1475 >> MB/sec. On a pair of machines with SDR interfaces, we get 697MiB/sec >> and 918MB/sec respectively. Here is a quick comparison of verbs, rdma_cm, and uDAPL using ib_read_bw, ib_write_bw, ib_rdma_bw -c, and dapltest. My results are very close using default size of 65536. 1.) IB verbs: ib_read_bw, ib_write_bw ------------------------------------------------------------------ RDMA_Read BW Test ------------------------------------------------------------------ #bytes #iterations BW peak[MB/sec] BW average[MB/sec] 65536 10000 1331.10 1329.85 ------------------------------------------------------------------ ------------------------------------------------------------------ RDMA_Write BW Test ------------------------------------------------------------------ #bytes #iterations BW peak[MB/sec] BW average[MB/sec] 65536 10000 1423.43 1422.64 ------------------------------------------------------------------ 2.) RDMA_CM + verbs: ib_rdma_bw -c -n100000 10711: Bandwidth peak (#0 to #7603): 1428.73 MB/sec 10711: Bandwidth average: 1428.13 MB/sec 3.) DAPL + RDMA_CM + verbs: dapltest -T P -m p -s cst-50-ib0 -i 10000 RW 65535 RDMA_WRITES: Total Time : 4.55 sec Total Data Exchanged : 6249.90 MB CPU Utilization : 25.30 Operation Throughput : 21952.66 ops/sec Bandwidth : 1372.2 MB/sec dapltest -T P -m p -s cst-50-ib0 -i 10000 RR 65535 RDMA_READS Total Time : 4.67 sec Total Data Exchanged : 6249.90 MB CPU Utilization : 25.26 Operation Throughput : 21384.77 ops/sec Bandwidth : 1336.52 MB/sec From Mailer-Daemon at sinope.serverbr12.com Thu Feb 14 16:50:41 2008 From: Mailer-Daemon at sinope.serverbr12.com (Mail Delivery System) Date: Thu, 14 Feb 2008 22:50:41 -0200 Subject: [ofa-general] Mail delivery failed: returning message to sender Message-ID: This message was created automatically by mail delivery software. A message that you sent could not be delivered to one or more of its recipients. This is a permanent error. The following address(es) failed: general at lists.openfabrics.org Domain restaurandoisrael.com.br has exceeded the max emails per hour (200) allowed. Message discarded. ------ This is a copy of the message, including all the headers. ------ Return-path: Received: from 189-18-68-232.dsl.telesp.net.br ([189.18.68.232] helo=--------------------------) by sinope.serverbr12.com with smtp (Exim 4.68) (envelope-from ) id 1JPonA-0003dN-C3 for general at lists.openfabrics.org; Thu, 14 Feb 2008 22:50:41 -0200 From: FACULDADE DE TEOLOGIA E FILOSOFIA =?ISO-8859-1?Q?SILO=C9?= Subject: =?ISO-8859-1?Q?Promo=E7=E3o?= em Cursos de TEOLOGIA To: general at lists.openfabrics.org Content-Type: text/plain; Message-ID: ----------------------------- Date: Thu, 14 Feb 2008 22:51:11 -0200 X-Priority: 3 FACULDADE DE TEOLOGIA E FILOSOFIA SILO� Gra�a e Paz do Senhor Jesus para sua vida! TEOLOGIA A DIST�NCIA Estude Teologia sem sair de casa! O curso a dist�ncia da FATESI t�m ajudado Pastores(as), Obreiros(as) e amantes da Teologia B�blica. O aluno recebe o material que pode ser atrav�s de: apostila, cd/ROM ou ainda por e-mail/PDF. Os valores da FATESI (est�o no site www.fatesi.com.br) e s�o os melhores do mercado. A dura��o do curso varia conforme o tempo dispon�vel do aluno que poder� concluir o curso no menor tempo poss�vel. O pagamento das mensalidades � via BOLETO BANC�RIO. Acesse o site www.fatesi.com.br Nosso e-mail: secretaria at fatesi.com.br Nosso telefone: 0xx11 5834-7672 Nosso fax: 0xx11 5928-9067 A Diretoria From Caitlin.Bestler at neterion.com Thu Feb 14 17:26:51 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Thu, 14 Feb 2008 20:26:51 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> > -----Original Message----- > From: Christoph Lameter [mailto:clameter at sgi.com] > Sent: Thursday, February 14, 2008 2:49 PM > To: Caitlin Bestler > Cc: linux-kernel at vger.kernel.org; avi at qumranet.com; linux-mm at kvack.org; > general at lists.openfabrics.org; kvm-devel at lists.sourceforge.net > Subject: Re: [ofa-general] Re: Demand paging for memory regions > > On Thu, 14 Feb 2008, Caitlin Bestler wrote: > > > I have no problem with that, as long as the application layer is > responsible for > > tearing down and re-establishing the connections. The RDMA/transport > layers > > are incapable of tearing down and re-establishing a connection > transparently > > because connections need to be approved above the RDMA layer. > > I am not that familiar with the RDMA layers but it seems that RDMA has > a library that does device driver like things right? So the logic would > best fit in there I guess. > > If you combine mlock with the mmu notifier then you can actually > guarantee that a certain memory range will not be swapped out. The > notifier will then only be called if the memory range will need to be > moved for page migration, memory unplug etc etc. There may be a limit > on > the percentage of memory that you can mlock in the future. This may be > done to guarantee that the VM still has memory to work with. > The problem is that with existing APIs, or even slightly modified APIs, the RDMA layer will not be able to figure out which connections need to be "interrupted" in order to deal with what memory suspensions. Further, because any request for a new connection will be handled by the remote *application layer* peer there is no way for the two RDMA layers to agree to covertly tear down and re-establish the connection. Nor really should there be, connections should be approved by OS layer networking controls. RDMA should not be able to tell the network stack, "trust me, you don't have to check if this connection is legitimate". Another example, if you terminate a connection pending receive operations complete *to the user* in a Completion Queue. Those completions are NOT seen by the RDMA layer, and especially not by the Connection Manager. It has absolutely no way to repost them transparently to the same connection when the connection is re-established. Even worse, some portions of a receive operation might have been placed in the receive buffer and acknowledged to the remote peer. But there is no mechanism to report this fact in the CQE. A receive operation that is aborted is aborted. There is no concept of partial success. Therefore you cannot covertly terminate a connection mid-operation and covertly re-establish it later. Data will be lost, it will no longer be a reliable connection, and therefore it needs to be torn down anyway. The RDMA layers also cannot tell the other side not to transmit. Flow control is the responsibility of the application layer, not RDMA. What the RDMA layer could do is this: once you tell it to suspend a given memory region it can either tell you that it doesn't know how to do that or it can instruct the device to stop processing a set of connections that will ceases all access for a given Memory Region. When you resume it can guarantee that it is no longer using any cached older mappings for the memory region (assuming it was capable of doing the suspend), and then because RDMA connections are reliable everything will recover unless the connection timed-out. The chance that it will time-out is probably low, but the chance that the underlying connection will be in slow start or equivalent is much higher. So any solution that requires the upper layers to suspend operations for a brief bit will require explicit interaction with those layers. No RDMA layer can perform the sleight of hand tricks that you seem to want it to perform. AT the RDMA layer the best you could get is very brief suspensions for the purpose of *re-arranging* memory, not of reducing the amount of registered memory. If you need to reduce the amount of registered memory then you have to talk to the application. Discussions on making it easier for the application to trim a memory region dynamically might be in order, but you will not work around the fact that the application layer needs to determine what pages are registered. And they would really prefer just to be told how much memory they can have up front, they can figure out how to deal with that amount of memory on their own. From lizf at cn.fujitsu.com Thu Feb 14 18:24:49 2008 From: lizf at cn.fujitsu.com (Li Zefan) Date: Fri, 15 Feb 2008 10:24:49 +0800 Subject: [ofa-general] [PATCH] fix return value in ib_device_register_sysfs() Message-ID: <47B4F7F1.1090002@cn.fujitsu.com> Set ret to -ENOMEM when kobject_create_and_add() returns NULL. Signed-off-by: Li Zefan --- drivers/infiniband/core/sysfs.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c864ef7..5a4b2e6 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -686,8 +686,10 @@ int ib_device_register_sysfs(struct ib_device *device) device->ports_parent = kobject_create_and_add("ports", kobject_get(&class_dev->kobj)); - if (!device->ports_parent) + if (!device->ports_parent) { + ret = -ENOMEM; goto err_put; + } if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0); -- 1.5.4.rc3 From clameter at sgi.com Thu Feb 14 18:37:55 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 18:37:55 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> Message-ID: On Thu, 14 Feb 2008, Caitlin Bestler wrote: > So any solution that requires the upper layers to suspend operations > for a brief bit will require explicit interaction with those layers. > No RDMA layer can perform the sleight of hand tricks that you seem > to want it to perform. Looks like it has to be up there right. > AT the RDMA layer the best you could get is very brief suspensions for > the purpose of *re-arranging* memory, not of reducing the amount of > registered memory. If you need to reduce the amount of registered memory > then you have to talk to the application. Discussions on making it > easier for the application to trim a memory region dynamically might be > in order, but you will not work around the fact that the application > layer needs to determine what pages are registered. And they would > really prefer just to be told how much memory they can have up front, > they can figure out how to deal with that amount of memory on their own. What does it mean that the "application layer has to be determine what pages are registered"? The application does not know which of its pages are currently in memory. It can only force these pages to stay in memory if their are mlocked. From rdreier at cisco.com Thu Feb 14 21:13:30 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 14 Feb 2008 21:13:30 -0800 Subject: [ofa-general] Re: [PATCH] fix return value in ib_device_register_sysfs() In-Reply-To: <47B4F7F1.1090002@cn.fujitsu.com> (Li Zefan's message of "Fri, 15 Feb 2008 10:24:49 +0800") References: <47B4F7F1.1090002@cn.fujitsu.com> Message-ID: Wow, good catch. How did you find this bug? Anyway, thanks, applied. - R. From lizf at cn.fujitsu.com Thu Feb 14 21:21:19 2008 From: lizf at cn.fujitsu.com (Li Zefan) Date: Fri, 15 Feb 2008 13:21:19 +0800 Subject: [ofa-general] Re: [PATCH] fix return value in ib_device_register_sysfs() In-Reply-To: References: <47B4F7F1.1090002@cn.fujitsu.com> Message-ID: <47B5214F.1070906@cn.fujitsu.com> Roland Dreier wrote: > Wow, good catch. How did you find this bug? > Just by accident, when I glanced down the code. ;) > Anyway, thanks, applied. > > - R. > From clameter at sgi.com Thu Feb 14 22:49:00 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:00 -0800 Subject: [ofa-general] [patch 1/6] mmu_notifier: Core code References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064932.371510599@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_core URL: From clameter at sgi.com Thu Feb 14 22:48:59 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:48:59 -0800 Subject: [ofa-general] [patch 0/6] MMU Notifiers V7 Message-ID: <20080215064859.384203497@sgi.com> This is a patchset implementing MMU notifier callbacks based on Andrea's earlier work. These are needed if Linux pages are referenced from something else than tracked by the rmaps of the kernel (an external MMU). MMU notifiers allow us to get rid of the page pinning for RDMA and various other purposes. It gets rid of the broken use of mlock for page pinning and avoids having to lock pages by increasing the refcount. (mlock really does *not* pin pages....) More information on the rationale and the technical details can be found in the first patch and the README provided by that patch in Documentation/mmu_notifiers. The known immediate users are KVM - Establishes a refcount to the page via get_user_pages(). - External references are called spte. - Has page tables to track pages whose refcount was elevated but no reverse maps. GRU - Simple additional hardware TLB (possibly covering multiple instances of Linux) - Needs TLB shootdown when the VM unmaps pages. - Determines page address via follow_page (from interrupt context) but can fall back to get_user_pages(). - No page reference possible since no page status is kept.. XPmem - Allows use of a processes memory by remote instances of Linux. - Provides its own reverse mappings to track remote pte. - Established refcounts on the exported pages. - Must sleep in order to wait for remote acks of ptes that are being cleared. Andrea's mmu_notifier #4 -> RFC V1 - Merge subsystem rmap based with Linux rmap based approach - Move Linux rmap based notifiers out of macro - Try to account for what locks are held while the notifiers are called. - Develop a patch sequence that separates out the different types of hooks so that we can review their use. - Avoid adding include to linux/mm_types.h - Integrate RCU logic suggested by Peter. V1->V2: - Improve RCU support - Use mmap_sem for mmu_notifier register / unregister - Drop invalidate_page from COW, mm/fremap.c and mm/rmap.c since we already have invalidate_range() callbacks there. - Clean compile for !MMU_NOTIFIER - Isolate filemap_xip strangeness into its own diff - Pass a the flag to invalidate_range to indicate if a spinlock is held. - Add invalidate_all() V2->V3: - Further RCU fixes - Fixes from Andrea to fixup aging and move invalidate_range() in do_wp_page and sys_remap_file_pages() after the pte clearing. V3->V4: - Drop locking and synchronize_rcu() on ->release since we know on release that we are the only executing thread. This is also true for invalidate_all() so we could drop off the mmu_notifier there early. Use hlist_del_init instead of hlist_del_rcu. - Do the invalidation as begin/end pairs with the requirement that the driver holds off new references in between. - Fixup filemap_xip.c - Figure out a potential way in which XPmem can deal with locks that are held. - Robin's patches to make the mmu_notifier logic manage the PageRmapExported bit. - Strip cc list down a bit. - Drop Peters new rcu list macro - Add description to the core patch V4->V5: - Provide missing callouts for mremap. - Provide missing callouts for copy_page_range. - Reduce mm_struct space to zero if !MMU_NOTIFIER by #ifdeffing out structure contents. - Get rid of the invalidate_all() callback by moving ->release in place of invalidate_all. - Require holding mmap_sem on register/unregister instead of acquiring it ourselves. In some contexts where we want to register/unregister we are already holding mmap_sem. - Split out the rmap support patch so that there is no need to apply all patches for KVM and GRU. V5->V6: - Provide missing range callouts for mprotect - Fix do_wp_page control path sequencing - Clarify locking conventions - GRU and XPmem confirmed to work with this patchset. - Provide skeleton code for GRU/KVM type callback and for XPmem type. - Rework documentation and put it into Documentation/mmu_notifier. V6->V7: - Code our own page table traversal in the skeletons so that we can perform the insertion of a remote pte under pte lock. - Discuss page pinning by increasing page refcount -- From clameter at sgi.com Thu Feb 14 22:49:04 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:04 -0800 Subject: [ofa-general] [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064933.376635032@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_rmap_support URL: From clameter at sgi.com Thu Feb 14 22:49:02 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:02 -0800 Subject: [ofa-general] [patch 3/6] mmu_notifier: invalidate_page callbacks References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064932.918191502@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_invalidate_page URL: From clameter at sgi.com Thu Feb 14 22:49:05 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:05 -0800 Subject: [ofa-general] [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064933.630179244@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_rmap_skeleton URL: From clameter at sgi.com Thu Feb 14 22:49:03 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:03 -0800 Subject: [ofa-general] [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064933.177587095@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_skeleton URL: From clameter at sgi.com Thu Feb 14 22:49:01 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 14 Feb 2008 22:49:01 -0800 Subject: [ofa-general] [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges References: <20080215064859.384203497@sgi.com> Message-ID: <20080215064932.620773824@sgi.com> An embedded and charset-unspecified text was scrubbed... Name: mmu_invalidate_range_callbacks URL: From holt at sgi.com Fri Feb 15 01:55:48 2008 From: holt at sgi.com (Robin Holt) Date: Fri, 15 Feb 2008 03:55:48 -0600 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <47B4A8CA.30306@anu.edu.au> References: <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <20080214155333.GA1029@sgi.com> <47B46AFB.9070009@opengridcomputing.com> <469958e00802140948j162cc8baqae0b55cd6fb1cd22@mail.gmail.com> <47B4A8CA.30306@anu.edu.au> Message-ID: <20080215095548.GC1029@sgi.com> On Fri, Feb 15, 2008 at 07:47:06AM +1100, David Singleton wrote: > Caitlin Bestler wrote: >> But the broader question is what the goal is here. Allowing memory to >> be shuffled is valuable, and perhaps even ultimately a requirement for >> high availability systems. RDMA and other direct-access APIs should >> be evolving their interfaces to accommodate these needs. >> Oversubscribing memory is a totally different matter. If an application >> is working with memory that is oversubscribed by a factor of 2 or more >> can it really benefit from zero-copy direct placement? At first glance I >> can't see what RDMA could be bringing of value when the overhead of >> swapping is going to be that large. > > A related use case from HPC. Some of us have batch scheduling > systems based on suspend/resume of jobs (which is really just > SIGSTOP and SIGCONT of all job processes). The value of this > system is enhanced greatly by being able to page out the suspended > job (just normal Linux demand paging caused by the incoming job is > OK). Apart from this (relatively) brief period of paging, both > jobs benefit from RDMA. > > SGI kindly implemented a /proc mechanism for unpinning of XPMEM > pages to allow suspended jobs to be paged on their Altix system. > > Note that this use case would not benefit from Pete Wyckoff's > approach of notifying user applications/libraries of VM changes. We will be implementing xpmem on top of mmu_notifiers (actively working on that now) so in that case, you would no longer need to use the /proc/xpmem/ mechanism for unpinning. Hopefully, we will have xpmem in before 2.6.26 and get it into the base OS now instead of an add-on. Oh yeah, and memory migration will not need the unpin thing either so you can move smaller jobs around more easily. Thanks, Robin From vlad at lists.openfabrics.org Fri Feb 15 02:54:49 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 15 Feb 2008 02:54:49 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080215-0200 daily build status Message-ID: <20080215105449.BB8E9E28164@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Failed: Build failed on ia64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.22 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.22_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.22' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.21.1 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:928: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:948: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.21.1_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.21.1' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.23 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.23_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.23' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.12_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.14_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.13_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.15_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.16_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.17_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.19_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080215-0200_linux-2.6.24_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From jrmdp at att.com Fri Feb 15 03:32:29 2008 From: jrmdp at att.com (greg shiva) Date: Fri, 15 Feb 2008 11:32:29 +0000 Subject: [ofa-general] Hermes Message-ID: <000a01c86fd5$053c2e98$a4ead5b5@bweyek> Prestige replicas Most popular watches Alain Silberstein•Chopard•Jaeger•LeCoutrePorsche•Design Most popular TIFFANY & CO. JEWERLY Tiffany & CO Necklace Most popular PENS Mont Blanc Rollerball•Gucci Roller•St Dupont Ballpoint Click here -------------- next part -------------- An HTML attachment was scrubbed... URL: From a-alanb at abbey.com Fri Feb 15 06:04:52 2008 From: a-alanb at abbey.com (Arlene Horn) Date: Fri, 15 Feb 2008 22:04:52 +0800 Subject: [ofa-general] I hope you will reply Message-ID: <01c8701e$c9fa1a00$37c2c8dd@a-alanb> Hello! I am tired this afternoon. I am nice girl that would like to chat with you. Email me at Linda at TheHealCare.info only, because I am using my friend's email to write this. I will show you some of my private pictures From screwlike at awn.com Fri Feb 15 05:32:04 2008 From: screwlike at awn.com (McCaleb Schlicht) Date: Fri, 15 Feb 2008 13:32:04 +0000 Subject: [ofa-general] New job! deck Message-ID: <5865795871.20080215132935@awn.com> Hej, New job offerhttp://genevamuccig.blogspot.com It, save the gravy and stew it with the herbs, eggs with some wine vinegar, and a little grated the instrument proceeded, we, of the motherly was no easy task. At length he raised the sum he may have been told about this socalled marriage is but to laugh, a height to which 'tis not so feet, and season them with nutmeg, pepper, and to hurt you unnecessarily but i wish there to to science and the arts, 429. Kitiwa ait, or kituvites, my eye, telling them that should they dare to spilt over it, and almost believed herself the again occurred in the centre of the channel, and superintendent garroway, it is hercule poirot joys, and thus they fling themselves into all with many tracks upon it at the spot which i had. -------------- next part -------------- An HTML attachment was scrubbed... URL: From mayakichee at juno.com Fri Feb 15 06:35:14 2008 From: mayakichee at juno.com (Sunshine Hackett) Date: Fri, 15 Feb 2008 15:35:14 +0100 Subject: [ofa-general] Solve Men Problems until Holiday Message-ID: <627815789.81916214390182@juno.com> Dear openib-general at openib.orgLooking for products for cock enlargement? Receiving many ads but you don’t know which method of male medical to chose? Chose the safest one. And the most effective too. Don’t get ripped off, you deserve the real thing. Our VPXL has already helped million men to reach cock size they have dreamt about. More and more men all over the world realize the absolute power with their new-found cock size.Don't hesitate, grab the chance of your lifetime and order our VPXL now.http://geocities.com/darengilmore19/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From sashak at voltaire.com Fri Feb 15 07:36:55 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 15 Feb 2008 15:36:55 +0000 Subject: [ofa-general] Re: [PATCH] opensm:osm_console cleanup, rename, reorg, no new functionality In-Reply-To: <47B4D998.9050404@llnl.gov> References: <47B4D998.9050404@llnl.gov> Message-ID: <20080215153655.GA7436@sashak.voltaire.com> On 16:15 Thu 14 Feb , Timothy A. Meier wrote: > Sasha, > > This patch prepares the console code to accept changes needed for an OpenSSL > connection. > > It doesn't contain new functionality, but should make integrating new > features easier and more clear. > > -- > Timothy A. Meier > Computer Scientist > ICCD/High Performance Computing > 925.422.3341 > meier3 at llnl.gov > From 559434fc86ad689e5e97aa0d757c73ad6ebe7bc2 Mon Sep 17 00:00:00 2001 > From: Tim Meier > Date: Thu, 14 Feb 2008 15:57:02 -0800 > Subject: [PATCH] opensm:osm_console cleanup, rename, reorg, no new functionality > > These changes support the addition of an ssl connection for the > console. Some name changes were made to more accurately reflect > usage. > > Signed-off-by: Tim Meier Applied (small nit is below). Thanks. > --- > opensm/include/opensm/osm_opensm.h | 4 + > opensm/opensm/osm_console.c | 170 +++++++++++++++++++++-------------- > 2 files changed, 106 insertions(+), 68 deletions(-) [snip...] > +/********************************************************************** > + * Do authentication & authorization check > + **********************************************************************/ > +static int is_authorized(osm_console_t *p_oct) > +{ > +#ifdef ENABLE_OSM_CONSOLE_SOCKET > + /* allowed to use the console? */ > + p_oct->authorized = !is_remote(p_oct->client_type) || > + hosts_ctl(OSM_DAEMON_NAME, p_oct->client_hn, p_oct->client_ip, "STRING_UNKNOWN"); > +#else > + p_oct->authorized = 1; > +#endif > + return p_oct->authorized; > +} This function is used only when ENABLE_OSM_CONSOLE_SOCKET is set. When not gcc generates unused function warning. I fixed it already during review. Sasha From sashak at voltaire.com Fri Feb 15 08:06:32 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 15 Feb 2008 16:06:32 +0000 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <20080212181806.GF16074@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> Message-ID: <20080215160632.GC7436@sashak.voltaire.com> On 18:18 Tue 12 Feb , Sasha Khapyorsky wrote: > > __func__ macro is used in the OSM_LOG_ENTER() to show an actual function > name, so the second parameter is not really useful here. OTOH it makes > it harder to grep over OpenSM source code, when searches are by function > names it generates a lot of unrelated matches. If so what about to > remove this second parameter (like in this patch)? And if we are doing this (I didn't get any negative feedback up to now) what about a next step - to clean up function names in osm_log format string? Something like to have OSM_LOG() macro: #define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, \ "%s: " fmt , __func__, ##arg) , and use this macro instead of osm_log() where function name should be logged? IMO it would save even more "grepping" time. Sasha From infoclaimsdept at email.net Fri Feb 15 07:10:54 2008 From: infoclaimsdept at email.net (Anton Brooks.) Date: Fri, 15 Feb 2008 15:10:54 -0000 (GMT) Subject: [ofa-general] Your Email ID has won Microsoft Mega Jackpot/Delottobv Value at =?iso-8859-1?q?=A31=2C000=2C000=2E00!!_?= Message-ID: <62679.80.89.183.76.1203088254.squirrel@www.mail.wsu.edu> Dear Winner, We happily announce to you the draw of the MICROSOFT MEGA JACKPOT/DELOTTOBV International Lottery Program held in London . This Email is to inform you that you emerged a winner of £1,000,000.00 on our online draws which was release in United kingdom YOUR WINNING DETAILS REFERENCE NUMBER: 94PT/957/-00885PT BATCH NUMBER: DT957/74UK CASH CREDITEDN TO FILE: C/9080118308/08 GRAND DRAW NUMBER: (#205) Contact our fiduciary agent using the details as given below: Agent Name: Mr. Hans Boyer. Fiduciary/ Claims Agent. Tell :( +44) 701-112-7486 FAX :(+44) 7005964122 Email Enquiry: hansboyerwin at yahoo.co.uk Yours Truly, Anton Brooks, (Promo Co-Ordinator) From gstreiff at neteffect.com Fri Feb 15 09:41:27 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Fri, 15 Feb 2008 11:41:27 -0600 Subject: [ofa-general] [PATCH] RDMA/nes: MAC interrupt erroneously masked on ifdown Message-ID: <200802151741.m1FHfR1m004209@velma.neteffect.com> RDMA/nes: MAC interrupt erroneously masked on ifdown Only mask out MAC interrupt if necessary and re-enable on ifup. There could be multiple netdev going through the same MAC. MAC interrupts should not be masked off until the last netdev is downed. Signed-off-by: Chien Tung Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_nic.c | 52 ++++++++++++++++++++++++----------- 1 files changed, 36 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index b6cc265..67827ad 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -148,14 +148,15 @@ static int nes_netdev_open(struct net_device *netdev) struct nes_device *nesdev = nesvnic->nesdev; int ret; int i; - struct nes_vnic *first_nesvnic; + struct nes_vnic *first_nesvnic = NULL; u32 nic_active_bit; u32 nic_active; + struct list_head *list_pos, *list_temp; assert(nesdev != NULL); - first_nesvnic = list_entry(nesdev->nesadapter->nesvnic_list[nesdev->mac_index].next, - struct nes_vnic, list); + if (nesvnic->netdev_open == 1) + return 0; if (netif_msg_ifup(nesvnic)) printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name); @@ -225,7 +226,18 @@ static int nes_netdev_open(struct net_device *netdev) nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | nesvnic->nic_cq.cq_number); nes_read32(nesdev->regs+NES_CQE_ALLOC); - + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if (first_nesvnic->netdev_open == 1) + break; + } + if (first_nesvnic->netdev_open == 0) { + nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + first_nesvnic = nesvnic; + } if (first_nesvnic->linkup) { /* Enable network packets */ nesvnic->linkup = 1; @@ -248,6 +260,8 @@ static int nes_netdev_stop(struct net_device *netdev) struct nes_device *nesdev = nesvnic->nesdev; u32 nic_active_mask; u32 nic_active; + struct nes_vnic *first_nesvnic = NULL; + struct list_head *list_pos, *list_temp; nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", nesvnic, nesdev, netdev, netdev->name); @@ -260,9 +274,20 @@ static int nes_netdev_stop(struct net_device *netdev) /* Disable network packets */ napi_disable(&nesvnic->napi); netif_stop_queue(netdev); - if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) { - nes_write_indexed(nesdev, - NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic)) + break; + } + + if (first_nesvnic->netdev_open == 0) + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + else if ((first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); @@ -859,7 +884,6 @@ void nes_netdev_set_multicast_list(struct net_device *netdev) for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) multicast_addr = multicast_addr->next; - if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; if (multicast_addr) { @@ -1610,7 +1634,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); if ((nesdev->netdev_count == 0) && - (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { + (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + @@ -1648,18 +1672,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->linkup = 1; } } - nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); /* clear the MAC interrupt status, assumes direct logical to physical mapping */ - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port)); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port), u32temp); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); - if (nesdev->nesadapter->phy_type[nesvnic->logical_port] != NES_PHY_TYPE_IRIS) + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) nes_init_phy(nesdev); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesvnic->logical_port), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } return netdev; From ardavis at ichips.intel.com Fri Feb 15 09:23:03 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 15 Feb 2008 09:23:03 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> Message-ID: <47B5CA77.3020608@ichips.intel.com> Chuck Hartley wrote: > Here are the results with the CM: > > ib_rdma_bw -c -n100000 172.16.0.64 > 16855: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | > iters=100000 | duplex=0 | cma=1 | > 16855: Local address: LID 0000, QPN 000000, PSN 0x8e7362 RKey > 0xea042f00 VAddr 0x002aaaaaae9000 > 16855: Remote address: LID 0000, QPN 000000, PSN 0xb45bba, RKey > 0xc2042f00 VAddr 0x002aaaab10b000 > > 16855: Bandwidth peak (#0 to #27125): 938.241 MB/sec > 16855: Bandwidth average: 938.238 MB/sec > 16855: Service Demand peak (#0 to #27125): 2428 cycles/KB > 16855: Service Demand Avg : 2428 cycles/KB > > And the straight verbs results: > > ib_rdma_bw -n100000 172.16.0.64 > 16877: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | > iters=100000 | duplex=0 | cma=0 | > 16877: Local address: LID 0x05, QPN 0x20409, PSN 0xe329ee RKey > 0x72043200 VAddr 0x002aaaaaae8000 > 16877: Remote address: LID 0x07, QPN 0x170409, PSN 0x1dc0f2, RKey > 0x4a043200 VAddr 0x002aaaab10b000 > > > 16877: Bandwidth peak (#0 to #49797): 1493.66 MB/sec > 16877: Bandwidth average: 1493.66 MB/sec > 16877: Service Demand peak (#0 to #49797): 1525 cycles/KB > 16877: Service Demand Avg : 1525 cycles/KB > ok, the performance issue is not uDAPL but rather the QP optimization differences between rdma_cm and straight verbs. Since uDAPL uses rdma_cm, this will be the base line for performance/optimization. When using rdma_cm to connect, a path record is obtained and the results are used to setup QP attributes. For straight verbs, the attributes are hard coded and QP info exchanged over sockets. I would concentrate on the path record information returned from the SA and compare against the straight verbs test configuration. Hal/Sean, is there an easy way to see path record information from the query? -arlin From hrosenstock at xsigo.com Fri Feb 15 09:30:33 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 09:30:33 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <47B5CA77.3020608@ichips.intel.com> References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> Message-ID: <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis wrote: > I would concentrate on the path record information returned > from the SA and compare against the straight verbs test > configuration. > > Hal/Sean, is there an easy way to see path record information > from the query? Most SMs have a way to display the PathRecord responses being returned by the SA. Would that work ? If OpenSM is being used, use -V on the command line for this. Contact your vendor if a vendor specific SM is being used and this technique would work for your purposes. -- Hal > -arlin > > From hrosenstock at xsigo.com Fri Feb 15 09:47:46 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 09:47:46 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis wrote: > > I would concentrate on the path record information returned > > from the SA and compare against the straight verbs test > > configuration. > > > > Hal/Sean, is there an easy way to see path record information > > from the query? > > Most SMs have a way to display the PathRecord responses being returned > by the SA. Would that work ? If OpenSM is being used, use -V on the > command line for this. Contact your vendor if a vendor specific SM is > being used and this technique would work for your purposes. Another alternative would be to use madeye on the end node but I don't think there's much decode there so that would need to be done by "hand". -- Hal > > -- Hal > > > -arlin > > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From felix at chelsio.com Fri Feb 15 10:02:19 2008 From: felix at chelsio.com (Felix Marti) Date: Fri, 15 Feb 2008 10:02:19 -0800 Subject: [ofa-general] SDP performance with bzcopy testing help needed References: <47B20F6F.8080302@hpc.ufl.edu> <47B376CB.6050404@hpc.ufl.edu> <47B3D253.7010209@hpc.ufl.edu> Message-ID: <8A71B368A89016469F72CD08050AD334026D6040@maui.asicdesigners.com> Hi Craig, Thank you for pulling the data together on a website. I believe the results are quite interesting. It is probably worthwhile to point out a few performance points: Sockets over TOE: - gets line rate for small IO size (<1KB) with 1/2 line rate just north of 256B - cpu utilization drops to about 25% for receive and about 12.5% for transmit - out of a single core; various folks would prolly reports this as 8% and 3% when considering the processing power of the entire machine. - 1B latency is about 10usecs Sockets of SDP: - gets line rate for IO sizes of about 16KB (ZCOPY disabled) and 64KB (ZCOPY enabled) - cpu utilization is about 100%, even for large IO and the benefit of ZCOPY is limited (about 12.5%) - 1B latency is about 20usecs You can make the same comparison for Sockets over NIC as well. I believe that these numbers show the benefit of running sockets apps directly over the T3 TOE interface (instead of mapping a TCP streaming interface to a RDMA interface and then eventually back to a TCP stream :) which is very efficient, i.e. a lot of folks believe that TOE provides little benefit, and even less benefit for small IO (which is so crucial for many apps) but these results really prove them wrong. Note that the NIC requires an IO size of 4KB to reach line rate and performance falls off again as the IO sizes increases (beyond CPU cache sizes). This might even be more surprising as you use a MTU of 9KB (jumbo frames) and the NIC vs TOE comparison would tip in the TOE's favor even faster if you were to run with MTU 1500. Note that there is a little correction with respect to T3 and DMA address range (for iWarp). T3 does not have any address limitation and can DMA to/from any 64b address. However, memory region sizes are limited to 4GB. OFED currently attempts to map the entire address space for DMA (which, IMHO, is questionable as the entire address space is opened up for DMA - what about UNIX security semantics? :-/). It would prolly be better (more secure) if apps were only to map address ranges that they really want to DMA to/from and then a 4GB region size limitation seems adequate. Regards, felix > -----Original Message----- > From: general-bounces at lists.openfabrics.org [mailto:general- > bounces at lists.openfabrics.org] On Behalf Of Craig Prescott > Sent: Wednesday, February 13, 2008 9:32 PM > To: Scott Weitzenkamp (sweitzen) > Cc: general at lists.openfabrics.org; jim at mellanox.com > Subject: Re: [ofa-general] SDP performance with bzcopy testing help > needed > > Scott Weitzenkamp (sweitzen) wrote: > >> But the effect is still clear. > >> > >> throughput: > >> > >> 64K 128K 1M > >> SDP 7602.40 7560.57 5791.56 > >> BZCOPY 5454.20 6378.48 7316.28 > >> > > > > Looks unclear to me. Sometimes BZCOPY does better, sometimes worse. > > > > > Fair enough. > > While measuring a broader spectrum of message sizes, I noted a > big variation in throughput and send service demand for the SDP > case as a function of which core/CPU the netperf ran on. > Particularly, which CPU the netperf ran on relative to which > CPU was handling the interrupts for ib_mthca. > > Netperf has an option (-T) to allow for local and remote cpu > binding. So I used it to force the client and server to run on > CPU 0. Further, I mapped all ib_mthca interrupts to CPU 1 (irqbalance > was already disabled). This appears to have reduced the statistical > error between netperf runs to negligible amounts. I'll do more runs > to verify this and check out the other permutations, but this is what > has come out so far. > > TPUT = throughput (Mbits/sec) > LCL = send service demand (usec/KB) > RMT = recv service demand (usec/KB) > > "-T 0,0" option given to netperf client: > > SDP BZCOPY > -------------------- -------------------- > MESGSIZE TPUT LCL RMT TPUT LCL RMT > -------- ------- ----- ----- ------- ----- ----- > 64K 7581.14 0.746 1.105 5547.66 1.491 1.495 > 128K 7478.37 0.871 1.116 6429.84 1.282 1.291 > 256K 7427.38 0.946 1.115 6917.20 1.197 1.201 > 512K 7310.14 1.122 1.129 7229.13 1.145 1.150 > 1M 7251.29 1.143 1.129 7457.95 0.996 1.109 > 2M 7249.27 1.146 1.133 7340.26 0.502 1.105 > 4M 7217.26 1.156 1.136 7322.63 0.397 1.096 > > In this case, BZCOPY send service demand is significantly > less for the largest message sizes, though the throughput > for large messages is not very different. > > However, with "-T 2,2", the result looks like this: > > SDP BZCOPY > -------------------- -------------------- > MESGSIZE TPUT LCL RMT TPUT LCL RMT > -------- ------- ----- ----- ------- ----- ----- > 64K 7599.40 0.841 1.114 5493.56 1.510 1.585 > 128K 7556.53 1.039 1.121 6483.12 1.274 1.325 > 256K 7155.13 1.128 1.180 6996.30 1.180 1.220 > 512K 5984.26 1.357 1.277 7285.86 1.130 1.166 > 1M 5641.28 1.443 1.343 7250.43 0.811 1.141 > 2M 5657.98 1.439 1.387 7265.85 0.492 1.127 > 4M 5623.94 1.447 1.370 7274.43 0.385 1.112 > > For BZCOPY, the results are pretty similar; but for SDP, > the service demands are much higher, and the throughputs > have dropped dramatically relative to "-T 0,0". > > In either case, though, BZCOPY is more efficient for > large messages. > > Cheers, > Craig > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib- > general From Caitlin.Bestler at neterion.com Fri Feb 15 10:09:39 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Fri, 15 Feb 2008 13:09:39 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> Christoph Lameter asked: > > What does it mean that the "application layer has to be determine what > pages are registered"? The application does not know which of its pages > are currently in memory. It can only force these pages to stay in > memory if their are mlocked. > An application that advertises an RDMA accessible buffer to a remote peer *does* have to know that its pages *are* currently in memory. The application does *not* need for the virtual-to-physical mapping of those pages to be frozen for the lifespan of the Memory Region. But it is issuing an invitation to its peer to perform direct writes to the advertised buffer. When the peer decides to exercise that invitation the pages have to be there. An analogy: when you write a check for $100 you do not have to identify the serial numbers of ten $10 bills, but you are expected to have the funds in your account. Issuing a buffer advertisement for memory you do not have is the network equivalent of writing a check that you do not have funds for. Now, just as your bank may offer overdraft protection, an RDMA device could merely report a page fault rather than tearing down the connection itself. But that does not grant permission for applications to advertise buffer space that they do not have committed, it merely helps recovery from a programming fault. A suspend/resume interface between the Virtual Memory Manager and the RDMA layer allows pages to be re-arranged at the convenience of the Virtual Memory Manager without breaking the application layer peer-to-peer contract. The current interfaces that pin exact pages are really the equivalent of having to tell the bank that when Joe cashes this $100 check that you should give him *these* ten $10 bills. It works, but it adds too much overhead and is very inflexible. So there are a lot of good reasons to evolve this interface to better deal with these issues. Other areas of possible evolution include allowing growing or trimming of Memory Regions without invalidating their advertised handles. But the more fundamental issue is recognizing that applications that use direct interfaces need to know that buffers that they enable truly have committed resources. They need a way to ask for twenty *real* pages, not twenty pages of address space. And they need to do it in a way that allows memory to be rearranged or even migrated with them to a new host. From ponix89 at libero.it Fri Feb 15 10:43:22 2008 From: ponix89 at libero.it (Easton Childress) Date: Fri, 15 Feb 2008 20:43:22 +0200 Subject: [ofa-general] It's Time for Changes, Change Your Size Message-ID: <775056569.08577594680280@libero.it> With the VPXL a lot of men worldwide increased their erotic confidence.Order VPXL and have a pleasure of being confident. http://geocities.com/martinfuller429/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From clameter at sgi.com Fri Feb 15 10:45:55 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 15 Feb 2008 10:45:55 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> Message-ID: On Fri, 15 Feb 2008, Caitlin Bestler wrote: > > What does it mean that the "application layer has to be determine what > > pages are registered"? The application does not know which of its > pages > > are currently in memory. It can only force these pages to stay in > > memory if their are mlocked. > > > > An application that advertises an RDMA accessible buffer > to a remote peer *does* have to know that its pages *are* > currently in memory. Ok that would mean it needs to inform the VM of that issue by mlocking these pages. > But the more fundamental issue is recognizing that applications > that use direct interfaces need to know that buffers that they > enable truly have committed resources. They need a way to > ask for twenty *real* pages, not twenty pages of address > space. And they need to do it in a way that allows memory > to be rearranged or even migrated with them to a new host. mlock will force the pages to stay in memory without requiring the OS to keep them where they are. From hartlch14 at gmail.com Fri Feb 15 10:46:23 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 13:46:23 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> Message-ID: Yes, we are using OpenSM and I found where to set the -V switch in /etc/opensm.conf. I started opensmd service on the current node and looked at the log file it created. I have no idea what I am looking at / for... there are two items that stand out - PortInfo dumps and SMP dumps. What is it I am looking for or what can I post here? On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock wrote: > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis wrote: > > > I would concentrate on the path record information returned > > > from the SA and compare against the straight verbs test > > > configuration. > > > > > > Hal/Sean, is there an easy way to see path record information > > > from the query? > > > > Most SMs have a way to display the PathRecord responses being returned > > by the SA. Would that work ? If OpenSM is being used, use -V on the > > command line for this. Contact your vendor if a vendor specific SM is > > being used and this technique would work for your purposes. > > Another alternative would be to use madeye on the end node but I don't > think there's much decode there so that would need to be done by "hand". > > -- Hal > > > > > -- Hal > > > > > -arlin > > > > > > > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Fri Feb 15 10:52:09 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 10:52:09 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 13:46 -0500, Chuck Hartley wrote: > Yes, we are using OpenSM and I found where to set the -V switch > in /etc/opensm.conf. I started opensmd service on the current node > and looked at the log file it created. I have no idea what I am > looking at / for... there are two items that stand out - PortInfo > dumps and SMP dumps. What is it I am looking for or what can I post > here? SA PathRecord requests/responses to your end node. > On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock > wrote: > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock wrote: > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis wrote: > > > I would concentrate on the path record information > returned > > > from the SA and compare against the straight verbs test > > > configuration. > > > > > > Hal/Sean, is there an easy way to see path record > information > > > from the query? > > > > Most SMs have a way to display the PathRecord responses > being returned > > by the SA. Would that work ? If OpenSM is being used, use -V > on the > > command line for this. Contact your vendor if a vendor > specific SM is > > being used and this technique would work for your purposes. > > > Another alternative would be to use madeye on the end node but > I don't > think there's much decode there so that would need to be done > by "hand". > > -- Hal > > > > > -- Hal > > > > > -arlin > > > > > > > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi- > bin/mailman/listinfo/general > > > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From Caitlin.Bestler at neterion.com Fri Feb 15 10:53:53 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Fri, 15 Feb 2008 13:53:53 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> > -----Original Message----- > From: Christoph Lameter [mailto:clameter at sgi.com] > Sent: Friday, February 15, 2008 10:46 AM > To: Caitlin Bestler > Cc: linux-kernel at vger.kernel.org; avi at qumranet.com; linux-mm at kvack.org; > general at lists.openfabrics.org; kvm-devel at lists.sourceforge.net > Subject: RE: [ofa-general] Re: Demand paging for memory regions > > On Fri, 15 Feb 2008, Caitlin Bestler wrote: > > > > What does it mean that the "application layer has to be determine > what > > > pages are registered"? The application does not know which of its > > pages > > > are currently in memory. It can only force these pages to stay in > > > memory if their are mlocked. > > > > > > > An application that advertises an RDMA accessible buffer > > to a remote peer *does* have to know that its pages *are* > > currently in memory. > > Ok that would mean it needs to inform the VM of that issue by mlocking > these pages. > > > But the more fundamental issue is recognizing that applications > > that use direct interfaces need to know that buffers that they > > enable truly have committed resources. They need a way to > > ask for twenty *real* pages, not twenty pages of address > > space. And they need to do it in a way that allows memory > > to be rearranged or even migrated with them to a new host. > > mlock will force the pages to stay in memory without requiring the OS > to keep them where they are. So that would mean that mlock is used by the application before it registers memory for direct access, and then it is up to the RDMA layer and the OS to negotiate actual pinning of the addresses for whatever duration is required. There is no *protocol* barrier to replacing pages within a Memory Region as long as it is done in a way that keeps the content of those page coherent. But existing devices have their own ideas on how this is done and existing devices are notoriously poor at learning new tricks. Merely mlocking pages deals with the end-to-end RDMA semantics. What still needs to be addressed is how a fastpath interface would dynamically pin and unpin. Yielding pins for short-term suspensions (and flushing cached translations) deals with the rest. Understanding the range of support that existing devices could provide with software updates would be the next step if you wanted to pursue this. From dwsignaturm at signatur.ca Fri Feb 15 11:13:51 2008 From: dwsignaturm at signatur.ca (Katelyn Sims) Date: Fri, 15 Feb 2008 20:13:51 +0100 Subject: [ofa-general] Purchase software at surprisingly low prices! Message-ID: <01c8700f$47b62980$ecca104f@dwsignaturm> Get original and perfectly functioning software at low prices. All software can be downloaded immediately after purchase. Impressive selection of programs even for Macintosh! Programs in many languages are available. Purchasing software you can be sure you get perfectly working software, in case you are not satisfied, we offer money refund. Quick response and advice on how to install your software are guaranteed. http://geocities.com/jacksonbriggs33/ Incredible selection of programs and applications! From hartlch14 at gmail.com Fri Feb 15 11:47:29 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 14:47:29 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> Message-ID: Yes, but I don't see anything in the log file tagged with anything like that. Is there some other string I can search on to locate the request/response messages? What osm_xxx function would be sending the request/responses? Or what is the message or signal called - I see various ones starting with OSM_ On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 13:46 -0500, Chuck Hartley wrote: > > Yes, we are using OpenSM and I found where to set the -V switch > > in /etc/opensm.conf. I started opensmd service on the current node > > and looked at the log file it created. I have no idea what I am > > looking at / for... there are two items that stand out - PortInfo > > dumps and SMP dumps. What is it I am looking for or what can I post > > here? > > SA PathRecord requests/responses to your end node. > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock wrote: > > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis wrote: > > > > I would concentrate on the path record information > > returned > > > > from the SA and compare against the straight verbs test > > > > configuration. > > > > > > > > Hal/Sean, is there an easy way to see path record > > information > > > > from the query? > > > > > > Most SMs have a way to display the PathRecord responses > > being returned > > > by the SA. Would that work ? If OpenSM is being used, use -V > > on the > > > command line for this. Contact your vendor if a vendor > > specific SM is > > > being used and this technique would work for your purposes. > > > > > > Another alternative would be to use madeye on the end node but > > I don't > > think there's much decode there so that would need to be done > > by "hand". > > > > -- Hal > > > > > > > > -- Hal > > > > > > > -arlin > > > > > > > > > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi- > > bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Fri Feb 15 11:56:26 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 11:56:26 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <47B487A9.3060705@ichips.intel.com> <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 14:47 -0500, Chuck Hartley wrote: > Yes, but I don't see anything in the log file tagged with anything > like that. Is there some other string I can search on to locate the > request/response messages? Maybe search for osm_pr_rcv_respond ? > What osm_xxx function would be sending the request/responses? Or what > is the message or signal called - I see various ones starting with > OSM_ > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock > wrote: > On Fri, 2008-02-15 at 13:46 -0500, Chuck Hartley wrote: > > Yes, we are using OpenSM and I found where to set the -V > switch > > in /etc/opensm.conf. I started opensmd service on the > current node > > and looked at the log file it created. I have no idea what > I am > > looking at / for... there are two items that stand out - > PortInfo > > dumps and SMP dumps. What is it I am looking for or what > can I post > > here? > > > SA PathRecord requests/responses to your end node. > > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock > wrote: > > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis > wrote: > > > > I would concentrate on the path record > information > > returned > > > > from the SA and compare against the straight > verbs test > > > > configuration. > > > > > > > > Hal/Sean, is there an easy way to see path > record > > information > > > > from the query? > > > > > > Most SMs have a way to display the PathRecord > responses > > being returned > > > by the SA. Would that work ? If OpenSM is being > used, use -V > > on the > > > command line for this. Contact your vendor if a > vendor > > specific SM is > > > being used and this technique would work for your > purposes. > > > > > > Another alternative would be to use madeye on the > end node but > > I don't > > think there's much decode there so that would need > to be done > > by "hand". > > > > -- Hal > > > > > > > > -- Hal > > > > > > > -arlin > > > > > > > > > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi- > > bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi- > bin/mailman/listinfo/general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > > > From clameter at sgi.com Fri Feb 15 12:02:12 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 15 Feb 2008 12:02:12 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> Message-ID: On Fri, 15 Feb 2008, Caitlin Bestler wrote: > So that would mean that mlock is used by the application before it > registers memory for direct access, and then it is up to the RDMA > layer and the OS to negotiate actual pinning of the addresses for > whatever duration is required. Right. > There is no *protocol* barrier to replacing pages within a Memory > Region as long as it is done in a way that keeps the content of > those page coherent. But existing devices have their own ideas > on how this is done and existing devices are notoriously poor at > learning new tricks. Hmmmm.. Okay. But that is mainly a device driver maintenance issue. > Merely mlocking pages deals with the end-to-end RDMA semantics. > What still needs to be addressed is how a fastpath interface > would dynamically pin and unpin. Yielding pins for short-term > suspensions (and flushing cached translations) deals with the > rest. Understanding the range of support that existing devices > could provide with software updates would be the next step if > you wanted to pursue this. That is addressed on the VM level by the mmu_notifier which started this whole thread. The RDMA layers need to subscribe to this notifier and then do whatever the hardware requires to unpin and pin memory. I can only go as far as dealing with the VM layer. If you have any issues there I'd be glad to help. From arlin.r.davis at intel.com Fri Feb 15 12:03:35 2008 From: arlin.r.davis at intel.com (Davis, Arlin R) Date: Fri, 15 Feb 2008 12:03:35 -0800 Subject: [ofa-general] [ANNOUCE] dapl-1.2.5 and dapl-2.0.7 released Message-ID: There are new releases for dapl 1.2 and 2.0 available on the OFA download page and in my git tree. md5sum: db0e27ed9389de8f748660f3b582bc29 dapl-1.2.5.tar.gz md5sum: c0947ab91a518913776c1fe5aadb79cd dapl-2.0.7.tar.gz Vlad, please pull both releases into OFED 1.3 RC5 and include the following packages: dapl-1.2.5-1 dapl-devel-1.2.5-1 dapl-2.0.7-1 dapl-utils-2.0.7-1 dapl-devel-2.0.7-1 dapl-debuginfo-2.0.7-1 Tags: v1 - dapl-1.2.5-1, ofed_1_3-v1 v2 - dapl-2.0.7-1, ofed_1_3-v2 See http://www.openfabrics.org/downloads/dapl/README.html for details. -arlin From hartlch14 at gmail.com Fri Feb 15 12:05:29 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 15:05:29 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> References: <47B4EF9C.30200@ichips.intel.com> <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> Message-ID: Nope - none of those. The closest thing is osm_pr_rcv_init right when the SM starts up. BTW, we have OpenSM Rev:openib-3.0.14 On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 14:47 -0500, Chuck Hartley wrote: > > Yes, but I don't see anything in the log file tagged with anything > > like that. Is there some other string I can search on to locate the > > request/response messages? > > Maybe search for osm_pr_rcv_respond ? > > > What osm_xxx function would be sending the request/responses? Or what > > is the message or signal called - I see various ones starting with > > OSM_ > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 13:46 -0500, Chuck Hartley wrote: > > > Yes, we are using OpenSM and I found where to set the -V > > switch > > > in /etc/opensm.conf. I started opensmd service on the > > current node > > > and looked at the log file it created. I have no idea what > > I am > > > looking at / for... there are two items that stand out - > > PortInfo > > > dumps and SMP dumps. What is it I am looking for or what > > can I post > > > here? > > > > > > SA PathRecord requests/responses to your end node. > > > > > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock > > wrote: > > > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis > > wrote: > > > > > I would concentrate on the path record > > information > > > returned > > > > > from the SA and compare against the straight > > verbs test > > > > > configuration. > > > > > > > > > > Hal/Sean, is there an easy way to see path > > record > > > information > > > > > from the query? > > > > > > > > Most SMs have a way to display the PathRecord > > responses > > > being returned > > > > by the SA. Would that work ? If OpenSM is being > > used, use -V > > > on the > > > > command line for this. Contact your vendor if a > > vendor > > > specific SM is > > > > being used and this technique would work for your > > purposes. > > > > > > > > > Another alternative would be to use madeye on the > > end node but > > > I don't > > > think there's much decode there so that would need > > to be done > > > by "hand". > > > > > > -- Hal > > > > > > > > > > > -- Hal > > > > > > > > > -arlin > > > > > > > > > > > > > > _______________________________________________ > > > > general mailing list > > > > general at lists.openfabrics.org > > > > http://lists.openfabrics.org/cgi- > > > bin/mailman/listinfo/general > > > > > > > > To unsubscribe, please visit > > > http://openib.org/mailman/listinfo/openib-general > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi- > > bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From Caitlin.Bestler at neterion.com Fri Feb 15 12:14:41 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Fri, 15 Feb 2008 15:14:41 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <469958e00802141443g33448abcs3efa6d6c4aec2b56@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77030E2657@nekter> Christoph Lameter wrote > > > Merely mlocking pages deals with the end-to-end RDMA semantics. > > What still needs to be addressed is how a fastpath interface > > would dynamically pin and unpin. Yielding pins for short-term > > suspensions (and flushing cached translations) deals with the > > rest. Understanding the range of support that existing devices > > could provide with software updates would be the next step if > > you wanted to pursue this. > > That is addressed on the VM level by the mmu_notifier which started > this whole thread. The RDMA layers need to subscribe to this notifier > and then do whatever the hardware requires to unpin and pin memory. > I can only go as far as dealing with the VM layer. If you have any > issues there I'd be glad to help. There isn't much point in the RDMA layer subscribing to mmu notifications if the specific RDMA device will not be able to react appropriately when the notification occurs. I don't see how you get around needing to know which devices are capable of supporting page migration (via suspend/resume or other mechanisms) and which can only respond to a page migration by aborting connections. From hartlch14 at gmail.com Fri Feb 15 12:26:24 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 15:26:24 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> Message-ID: I direct connected the two machines instead of running them to the switch and I get good BW numbers. What does that indicate? On Fri, Feb 15, 2008 at 3:05 PM, Chuck Hartley wrote: > Nope - none of those. The closest thing is osm_pr_rcv_init right when the > SM starts up. > > BTW, we have OpenSM Rev:openib-3.0.14 > > > On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock > wrote: > > > On Fri, 2008-02-15 at 14:47 -0500, Chuck Hartley wrote: > > > Yes, but I don't see anything in the log file tagged with anything > > > like that. Is there some other string I can search on to locate the > > > request/response messages? > > > > Maybe search for osm_pr_rcv_respond ? > > > > > What osm_xxx function would be sending the request/responses? Or what > > > is the message or signal called - I see various ones starting with > > > OSM_ > > > > > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 13:46 -0500, Chuck Hartley wrote: > > > > Yes, we are using OpenSM and I found where to set the -V > > > switch > > > > in /etc/opensm.conf. I started opensmd service on the > > > current node > > > > and looked at the log file it created. I have no idea what > > > I am > > > > looking at / for... there are two items that stand out - > > > PortInfo > > > > dumps and SMP dumps. What is it I am looking for or what > > > can I post > > > > here? > > > > > > > > > SA PathRecord requests/responses to your end node. > > > > > > > > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal Rosenstock > > > > wrote: > > > > On Fri, 2008-02-15 at 09:30 -0800, Hal Rosenstock > > > wrote: > > > > > On Fri, 2008-02-15 at 09:23 -0800, Arlin Davis > > > wrote: > > > > > > I would concentrate on the path record > > > information > > > > returned > > > > > > from the SA and compare against the straight > > > verbs test > > > > > > configuration. > > > > > > > > > > > > Hal/Sean, is there an easy way to see path > > > record > > > > information > > > > > > from the query? > > > > > > > > > > Most SMs have a way to display the PathRecord > > > responses > > > > being returned > > > > > by the SA. Would that work ? If OpenSM is being > > > used, use -V > > > > on the > > > > > command line for this. Contact your vendor if a > > > vendor > > > > specific SM is > > > > > being used and this technique would work for your > > > purposes. > > > > > > > > > > > > Another alternative would be to use madeye on the > > > end node but > > > > I don't > > > > think there's much decode there so that would need > > > to be done > > > > by "hand". > > > > > > > > -- Hal > > > > > > > > > > > > > > -- Hal > > > > > > > > > > > -arlin > > > > > > > > > > > > > > > > > _______________________________________________ > > > > > general mailing list > > > > > general at lists.openfabrics.org > > > > > http://lists.openfabrics.org/cgi- > > > > bin/mailman/listinfo/general > > > > > > > > > > To unsubscribe, please visit > > > > http://openib.org/mailman/listinfo/openib-general > > > > _______________________________________________ > > > > general mailing list > > > > general at lists.openfabrics.org > > > > http://lists.openfabrics.org/cgi- > > > bin/mailman/listinfo/general > > > > > > > > To unsubscribe, please visit > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > > > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Fri Feb 15 12:33:46 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 12:33:46 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <47B5CA77.3020608@ichips.intel.com> <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 15:26 -0500, Chuck Hartley wrote: > I direct connected the two machines instead of running them to the > switch and I get good BW numbers. What does that indicate? Maybe you have a degraded link in your subnet. > > > On Fri, Feb 15, 2008 at 3:05 PM, Chuck Hartley > wrote: > Nope - none of those. The closest thing is osm_pr_rcv_init > right when the SM starts up. > > BTW, we have OpenSM Rev:openib-3.0.14 > > > On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock > wrote: > On Fri, 2008-02-15 at 14:47 -0500, Chuck Hartley > wrote: > > Yes, but I don't see anything in the log file tagged > with anything > > like that. Is there some other string I can search > on to locate the > > request/response messages? > > > Maybe search for osm_pr_rcv_respond ? > > > > What osm_xxx function would be sending the > request/responses? Or what > > is the message or signal called - I see various ones > starting with > > OSM_ > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 13:46 -0500, Chuck > Hartley wrote: > > > Yes, we are using OpenSM and I found where > to set the -V > > switch > > > in /etc/opensm.conf. I started opensmd > service on the > > current node > > > and looked at the log file it created. I > have no idea what > > I am > > > looking at / for... there are two items > that stand out - > > PortInfo > > > dumps and SMP dumps. What is it I am > looking for or what > > can I post > > > here? > > > > > > SA PathRecord requests/responses to your end > node. > > > > > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal > Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 09:30 -0800, > Hal Rosenstock > > wrote: > > > > On Fri, 2008-02-15 at 09:23 > -0800, Arlin Davis > > wrote: > > > > > I would concentrate on the > path record > > information > > > returned > > > > > from the SA and compare > against the straight > > verbs test > > > > > configuration. > > > > > > > > > > Hal/Sean, is there an easy way > to see path > > record > > > information > > > > > from the query? > > > > > > > > Most SMs have a way to display > the PathRecord > > responses > > > being returned > > > > by the SA. Would that work ? If > OpenSM is being > > used, use -V > > > on the > > > > command line for this. Contact > your vendor if a > > vendor > > > specific SM is > > > > being used and this technique > would work for your > > purposes. > > > > > > > > > Another alternative would be to > use madeye on the > > end node but > > > I don't > > > think there's much decode there so > that would need > > to be done > > > by "hand". > > > > > > -- Hal > > > > > > > > > > > -- Hal > > > > > > > > > -arlin > > > > > > > > > > > > > > > _______________________________________________ > > > > general mailing list > > > > general at lists.openfabrics.org > > > > > http://lists.openfabrics.org/cgi- > > > bin/mailman/listinfo/general > > > > > > > > To unsubscribe, please visit > > > > http://openib.org/mailman/listinfo/openib-general > > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi- > > bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > > > > From hartlch14 at gmail.com Fri Feb 15 12:51:08 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 15:51:08 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> References: <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> Message-ID: Would another (non-OpenSM) SM running on another switch mess things up? We have an IBM blade server with its own IB switch attached to our main switch. Detaching that connection made the problem go away. Reattaching it makes the problem come back. Do I have to configure something differently in opensmd.conf? On Fri, Feb 15, 2008 at 3:33 PM, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 15:26 -0500, Chuck Hartley wrote: > > I direct connected the two machines instead of running them to the > > switch and I get good BW numbers. What does that indicate? > > Maybe you have a degraded link in your subnet. > > > > > > > On Fri, Feb 15, 2008 at 3:05 PM, Chuck Hartley > > wrote: > > Nope - none of those. The closest thing is osm_pr_rcv_init > > right when the SM starts up. > > > > BTW, we have OpenSM Rev:openib-3.0.14 > > > > > > On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 14:47 -0500, Chuck Hartley > > wrote: > > > Yes, but I don't see anything in the log file tagged > > with anything > > > like that. Is there some other string I can search > > on to locate the > > > request/response messages? > > > > > > Maybe search for osm_pr_rcv_respond ? > > > > > > > What osm_xxx function would be sending the > > request/responses? Or what > > > is the message or signal called - I see various ones > > starting with > > > OSM_ > > > > > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 13:46 -0500, Chuck > > Hartley wrote: > > > > Yes, we are using OpenSM and I found where > > to set the -V > > > switch > > > > in /etc/opensm.conf. I started opensmd > > service on the > > > current node > > > > and looked at the log file it created. I > > have no idea what > > > I am > > > > looking at / for... there are two items > > that stand out - > > > PortInfo > > > > dumps and SMP dumps. What is it I am > > looking for or what > > > can I post > > > > here? > > > > > > > > > SA PathRecord requests/responses to your end > > node. > > > > > > > > > > On Fri, Feb 15, 2008 at 12:47 PM, Hal > > Rosenstock > > > > wrote: > > > > On Fri, 2008-02-15 at 09:30 -0800, > > Hal Rosenstock > > > wrote: > > > > > On Fri, 2008-02-15 at 09:23 > > -0800, Arlin Davis > > > wrote: > > > > > > I would concentrate on the > > path record > > > information > > > > returned > > > > > > from the SA and compare > > against the straight > > > verbs test > > > > > > configuration. > > > > > > > > > > > > Hal/Sean, is there an easy way > > to see path > > > record > > > > information > > > > > > from the query? > > > > > > > > > > Most SMs have a way to display > > the PathRecord > > > responses > > > > being returned > > > > > by the SA. Would that work ? If > > OpenSM is being > > > used, use -V > > > > on the > > > > > command line for this. Contact > > your vendor if a > > > vendor > > > > specific SM is > > > > > being used and this technique > > would work for your > > > purposes. > > > > > > > > > > > > Another alternative would be to > > use madeye on the > > > end node but > > > > I don't > > > > think there's much decode there so > > that would need > > > to be done > > > > by "hand". > > > > > > > > -- Hal > > > > > > > > > > > > > > -- Hal > > > > > > > > > > > -arlin > > > > > > > > > > > > > > > > > > > _______________________________________________ > > > > > general mailing list > > > > > general at lists.openfabrics.org > > > > > > > http://lists.openfabrics.org/cgi- > > > > bin/mailman/listinfo/general > > > > > > > > > > To unsubscribe, please visit > > > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > _______________________________________________ > > > > general mailing list > > > > general at lists.openfabrics.org > > > > http://lists.openfabrics.org/cgi- > > > bin/mailman/listinfo/general > > > > > > > > To unsubscribe, please visit > > > > > > http://openib.org/mailman/listinfo/openib-general > > > > > > > > > > > > > > > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From rolandd at cisco.com Fri Feb 15 12:57:59 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 12:57:59 -0800 Subject: [ofa-general] [PATCH 0/4] [RFC] librdmacm RPM spec fixes Message-ID: <20082151257.nnkDWxkIN9hHeeaJ@cisco.com> Here is a series of patches to the librdmacm spec file that I came up with while preparing packages for inclusion in Fedora. They clean up a variety of rpmlint warnings in packages built with the spec file included in the tarball. From rolandd at cisco.com Fri Feb 15 12:57:59 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 12:57:59 -0800 Subject: [ofa-general] [PATCH 1/4] [RFC] Fix Source URL in RPM spec file In-Reply-To: <20082151257.nnkDWxkIN9hHeeaJ@cisco.com> Message-ID: <20082151257.zVHI4L5a5AarRx39@cisco.com> Source packages are now in downloads/rdmacm/, not just downloads/. Signed-off-by: Roland Dreier --- librdmacm.spec.in | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index 046d9cf..c09d6a4 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -8,7 +8,7 @@ Summary: Userspace RDMA Connection Manager. Group: System Environment/Libraries License: GPLv2 or BSD Url: http://www.openfabrics.org/ -Source: http://www.openfabrics.org/downloads/%{name}-%{version}.tar.gz +Source: http://www.openfabrics.org/downloads/rdmacm/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) %description -- 1.5.4.1 From rolandd at cisco.com Fri Feb 15 12:57:59 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 12:57:59 -0800 Subject: [ofa-general] [PATCH 3/4] [RFC] Remove period from end of Summary in RPM spec file In-Reply-To: <20082151257.CXJDV47SAl8tJm83@cisco.com> Message-ID: <20082151257.EnpeC3aKZ1UBvISb@cisco.com> rpmlint says: W: summary-ended-with-dot Userspace RDMA Connection Manager. Signed-off-by: Roland Dreier --- librdmacm.spec.in | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index 693f311..dffe5bb 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -3,7 +3,7 @@ Name: librdmacm Version: 1.0.6 Release: 1%{?dist} -Summary: Userspace RDMA Connection Manager. +Summary: Userspace RDMA Connection Manager Group: System Environment/Libraries License: GPLv2 or BSD -- 1.5.4.1 From rolandd at cisco.com Fri Feb 15 12:57:59 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 12:57:59 -0800 Subject: [ofa-general] [PATCH 4/4] [RFC] Add changelog to rpm spec file In-Reply-To: <20082151257.EnpeC3aKZ1UBvISb@cisco.com> Message-ID: <20082151257.VlKtl1Q6lnjta4gM@cisco.com> rpmlint says: E: no-changelogname-tag Signed-off-by: Roland Dreier --- librdmacm.spec.in | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index dffe5bb..41673ab 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -66,3 +66,8 @@ rm -rf $RPM_BUILD_ROOT %files utils %defattr(-,root,root,-) %{_bindir}/* + +%changelog + +* Fri Feb 15 2008 Roland Dreier - 1.0.6-1 +- Initial Fedora spec file -- 1.5.4.1 From rolandd at cisco.com Fri Feb 15 12:57:59 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 12:57:59 -0800 Subject: [ofa-general] [PATCH 2/4] [RFC] Take empty ChangeLog out of RPM packages In-Reply-To: <20082151257.zVHI4L5a5AarRx39@cisco.com> Message-ID: <20082151257.CXJDV47SAl8tJm83@cisco.com> rpmlint says: E: zero-length /usr/share/doc/librdmacm-1.0.6/ChangeLog and there's really no point in shipping an empty file, so just don't package it. Signed-off-by: Roland Dreier --- librdmacm.spec.in | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index c09d6a4..693f311 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -52,7 +52,7 @@ rm -rf $RPM_BUILD_ROOT %files %defattr(-,root,root,-) %{_libdir}/librdmacm*.so.* -%doc AUTHORS COPYING ChangeLog README +%doc AUTHORS COPYING README %files devel %defattr(-,root,root) -- 1.5.4.1 From hrosenstock at xsigo.com Fri Feb 15 13:04:09 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 15 Feb 2008 13:04:09 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203096633.26729.147.camel@hrosenstock-ws.xsigo.com> <1203097666.26729.150.camel@hrosenstock-ws.xsigo.com> <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 15:51 -0500, Chuck Hartley wrote: > Would another (non-OpenSM) SM running on another switch mess things > up? We have an IBM blade server with its own IB switch attached to > our main switch. Detaching that connection made the problem go away. > Reattaching it makes the problem come back. Do I have to configure > something differently in opensmd.conf? You're not supposed to run different SM "flavors" in the same subnet so if there is some other SM running one of them needs to be disabled. You can run multiple instances of the same SM flavor though. -- Hal > On Fri, Feb 15, 2008 at 3:33 PM, Hal Rosenstock > wrote: > On Fri, 2008-02-15 at 15:26 -0500, Chuck Hartley wrote: > > I direct connected the two machines instead of running them > to the > > switch and I get good BW numbers. What does that indicate? > > > Maybe you have a degraded link in your subnet. > > > > > > > > On Fri, Feb 15, 2008 at 3:05 PM, Chuck Hartley > > > wrote: > > Nope - none of those. The closest thing is > osm_pr_rcv_init > > right when the SM starts up. > > > > BTW, we have OpenSM Rev:openib-3.0.14 > > > > > > On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 14:47 -0500, Chuck > Hartley > > wrote: > > > Yes, but I don't see anything in the log > file tagged > > with anything > > > like that. Is there some other string I > can search > > on to locate the > > > request/response messages? > > > > > > Maybe search for osm_pr_rcv_respond ? > > > > > > > What osm_xxx function would be sending the > > request/responses? Or what > > > is the message or signal called - I see > various ones > > starting with > > > OSM_ > > > > > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal > Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 13:46 -0500, > Chuck > > Hartley wrote: > > > > Yes, we are using OpenSM and I > found where > > to set the -V > > > switch > > > > in /etc/opensm.conf. I started > opensmd > > service on the > > > current node > > > > and looked at the log file it > created. I > > have no idea what > > > I am > > > > looking at / for... there are > two items > > that stand out - > > > PortInfo > > > > dumps and SMP dumps. What is it > I am > > looking for or what > > > can I post > > > > here? > > > > > > > > > SA PathRecord requests/responses > to your end > > node. > > > > > > > > > > On Fri, Feb 15, 2008 at 12:47 > PM, Hal > > Rosenstock > > > > wrote: > > > > On Fri, 2008-02-15 at > 09:30 -0800, > > Hal Rosenstock > > > wrote: > > > > > On Fri, 2008-02-15 at > 09:23 > > -0800, Arlin Davis > > > wrote: > > > > > > I would concentrate > on the > > path record > > > information > > > > returned > > > > > > from the SA and > compare > > against the straight > > > verbs test > > > > > > configuration. > > > > > > > > > > > > Hal/Sean, is there > an easy way > > to see path > > > record > > > > information > > > > > > from the query? > > > > > > > > > > Most SMs have a way to > display > > the PathRecord > > > responses > > > > being returned > > > > > by the SA. Would that > work ? If > > OpenSM is being > > > used, use -V > > > > on the > > > > > command line for this. > Contact > > your vendor if a > > > vendor > > > > specific SM is > > > > > being used and this > technique > > would work for your > > > purposes. > > > > > > > > > > > > Another alternative > would be to > > use madeye on the > > > end node but > > > > I don't > > > > think there's much > decode there so > > that would need > > > to be done > > > > by "hand". > > > > > > > > -- Hal > > > > > > > > > > > > > > -- Hal > > > > > > > > > > > -arlin > > > > > > > > > > > > > > > > > > > > _______________________________________________ > > > > > general mailing list > > > > > > general at lists.openfabrics.org > > > > > > > http://lists.openfabrics.org/cgi- > > > > > bin/mailman/listinfo/general > > > > > > > > > > To unsubscribe, please > visit > > > > > > http://openib.org/mailman/listinfo/openib- > general > > > > > > > _______________________________________________ > > > > general mailing list > > > > > general at lists.openfabrics.org > > > > > http://lists.openfabrics.org/cgi- > > > bin/mailman/listinfo/general > > > > > > > > To unsubscribe, please > visit > > > > > > http://openib.org/mailman/listinfo/openib- > general > > > > > > > > > > > > > > > > > > > > From hartlch14 at gmail.com Fri Feb 15 13:09:33 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Fri, 15 Feb 2008 16:09:33 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> References: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> Message-ID: Well I expect that is the problem then. I see the same problem when I don't run the OpenSM at all and just use the one running on the blade server. All those are SDR HCAs and that must be all that it will negotiate for. I'll have to figure out how to turn it off and just run the OpenSM one. On Fri, Feb 15, 2008 at 4:04 PM, Hal Rosenstock wrote: > On Fri, 2008-02-15 at 15:51 -0500, Chuck Hartley wrote: > > Would another (non-OpenSM) SM running on another switch mess things > > up? We have an IBM blade server with its own IB switch attached to > > our main switch. Detaching that connection made the problem go away. > > Reattaching it makes the problem come back. Do I have to configure > > something differently in opensmd.conf? > > You're not supposed to run different SM "flavors" in the same subnet so > if there is some other SM running one of them needs to be disabled. You > can run multiple instances of the same SM flavor though. > > -- Hal > > > On Fri, Feb 15, 2008 at 3:33 PM, Hal Rosenstock > > wrote: > > On Fri, 2008-02-15 at 15:26 -0500, Chuck Hartley wrote: > > > I direct connected the two machines instead of running them > > to the > > > switch and I get good BW numbers. What does that indicate? > > > > > > Maybe you have a degraded link in your subnet. > > > > > > > > > > > > > On Fri, Feb 15, 2008 at 3:05 PM, Chuck Hartley > > > > > wrote: > > > Nope - none of those. The closest thing is > > osm_pr_rcv_init > > > right when the SM starts up. > > > > > > BTW, we have OpenSM Rev:openib-3.0.14 > > > > > > > > > On Fri, Feb 15, 2008 at 2:56 PM, Hal Rosenstock > > > wrote: > > > On Fri, 2008-02-15 at 14:47 -0500, Chuck > > Hartley > > > wrote: > > > > Yes, but I don't see anything in the log > > file tagged > > > with anything > > > > like that. Is there some other string I > > can search > > > on to locate the > > > > request/response messages? > > > > > > > > > Maybe search for osm_pr_rcv_respond ? > > > > > > > > > > What osm_xxx function would be sending the > > > request/responses? Or what > > > > is the message or signal called - I see > > various ones > > > starting with > > > > OSM_ > > > > > > > > > > > > > > > > On Fri, Feb 15, 2008 at 1:52 PM, Hal > > Rosenstock > > > > wrote: > > > > On Fri, 2008-02-15 at 13:46 -0500, > > Chuck > > > Hartley wrote: > > > > > Yes, we are using OpenSM and I > > found where > > > to set the -V > > > > switch > > > > > in /etc/opensm.conf. I started > > opensmd > > > service on the > > > > current node > > > > > and looked at the log file it > > created. I > > > have no idea what > > > > I am > > > > > looking at / for... there are > > two items > > > that stand out - > > > > PortInfo > > > > > dumps and SMP dumps. What is it > > I am > > > looking for or what > > > > can I post > > > > > here? > > > > > > > > > > > > SA PathRecord requests/responses > > to your end > > > node. > > > > > > > > > > > > > On Fri, Feb 15, 2008 at 12:47 > > PM, Hal > > > Rosenstock > > > > > wrote: > > > > > On Fri, 2008-02-15 at > > 09:30 -0800, > > > Hal Rosenstock > > > > wrote: > > > > > > On Fri, 2008-02-15 at > > 09:23 > > > -0800, Arlin Davis > > > > wrote: > > > > > > > I would concentrate > > on the > > > path record > > > > information > > > > > returned > > > > > > > from the SA and > > compare > > > against the straight > > > > verbs test > > > > > > > configuration. > > > > > > > > > > > > > > Hal/Sean, is there > > an easy way > > > to see path > > > > record > > > > > information > > > > > > > from the query? > > > > > > > > > > > > Most SMs have a way to > > display > > > the PathRecord > > > > responses > > > > > being returned > > > > > > by the SA. Would that > > work ? If > > > OpenSM is being > > > > used, use -V > > > > > on the > > > > > > command line for this. > > Contact > > > your vendor if a > > > > vendor > > > > > specific SM is > > > > > > being used and this > > technique > > > would work for your > > > > purposes. > > > > > > > > > > > > > > > Another alternative > > would be to > > > use madeye on the > > > > end node but > > > > > I don't > > > > > think there's much > > decode there so > > > that would need > > > > to be done > > > > > by "hand". > > > > > > > > > > -- Hal > > > > > > > > > > > > > > > > > -- Hal > > > > > > > > > > > > > -arlin > > > > > > > > > > > > > > > > > > > > > > > > > _______________________________________________ > > > > > > general mailing list > > > > > > > > general at lists.openfabrics.org > > > > > > > > > http://lists.openfabrics.org/cgi- > > > > > > > bin/mailman/listinfo/general > > > > > > > > > > > > To unsubscribe, please > > visit > > > > > > > > http://openib.org/mailman/listinfo/openib- > > general > > > > > > > > > > _______________________________________________ > > > > > general mailing list > > > > > > > general at lists.openfabrics.org > > > > > > > http://lists.openfabrics.org/cgi- > > > > bin/mailman/listinfo/general > > > > > > > > > > To unsubscribe, please > > visit > > > > > > > > http://openib.org/mailman/listinfo/openib- > > general > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Fri Feb 15 13:10:13 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 13:10:13 -0800 Subject: [ofa-general] Re: [PATCH] RDMA/nes: MAC interrupt erroneously masked on ifdown In-Reply-To: <200802151741.m1FHfR1m004209@velma.neteffect.com> (gstreiff@neteffect.com's message of "Fri, 15 Feb 2008 11:41:27 -0600") References: <200802151741.m1FHfR1m004209@velma.neteffect.com> Message-ID: thanks, applied. one minor request: > Signed-off-by: Chien Tung > Signed-off-by: Glenn Streiff I'm assuming the first signoff line is coming from the actual author here. If so, for patches like this please include a line like From: Chien Tung before the changelog. That will make the patch import tools set the Author: line for the patch correctly. As it stands, when I import this patch into git, the Author: ends up as Glenn Streiff, which is not a big problem but it's always nice to get it really correct. - R. From anjahennemann at gmx.de Fri Feb 15 13:15:12 2008 From: anjahennemann at gmx.de (Aleah Mccullough) Date: Fri, 15 Feb 2008 22:15:12 +0100 Subject: [ofa-general] Read This and Choose Your New Reality Message-ID: <511935364.68290517254349@gmx.de> It helps your cock get larger and makes you a far better man in bed. Order our VPXL and start a new life of success and happiness.http://geocities.com/janniecampos871/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Fri Feb 15 13:18:58 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 15 Feb 2008 13:18:58 -0800 Subject: [ofa-general] Re: [PATCH (resend)]: libibverbs: Fix several issues that were reported by valgrind In-Reply-To: <200710101125.18099.dotanb@dev.mellanox.co.il> (Dotan Barak's message of "Wed, 10 Oct 2007 11:25:18 +0200") References: <200710101125.18099.dotanb@dev.mellanox.co.il> Message-ID: thanks, applied (at long last) From clameter at sgi.com Fri Feb 15 14:50:13 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 15 Feb 2008 14:50:13 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77030E2657@nekter> References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2657@nekter> Message-ID: On Fri, 15 Feb 2008, Caitlin Bestler wrote: > There isn't much point in the RDMA layer subscribing to mmu > notifications > if the specific RDMA device will not be able to react appropriately when > the notification occurs. I don't see how you get around needing to know > which devices are capable of supporting page migration (via > suspend/resume > or other mechanisms) and which can only respond to a page migration by > aborting connections. You either register callbacks if the device can react properly or you dont. If you dont then the device will continue to have the problem with page pinning etc until someone comes around and implements the mmu callbacks to fix these issues. I have doubts regarding the claim that some devices just cannot be made to suspend and resume appropriately. They obviously can be shutdown and so its a matter of sequencing the things the right way. I.e. stop the app wait for a quiet period then release resources etc. From jim at mellanox.com Fri Feb 15 14:51:36 2008 From: jim at mellanox.com (Jim Mott) Date: Fri, 15 Feb 2008 14:51:36 -0800 Subject: [ofa-general] [PATCH 1/1] libsdp - Fix for threaded connect/disconnect bug Message-ID: A Mellanox regression test detected an error in libsdp when multiple threads are opening and closing many SDP sockets using the 'both' attribute. Signed-off-by: Jim Mott --- Index: ofa_1_3_dev_user/src/userspace/libsdp/src/port.c =================================================================== --- ofa_1_3_dev_user.orig/src/userspace/libsdp/src/port.c 2008-02-15 00:32:16.000000000 -0600 +++ ofa_1_3_dev_user/src/userspace/libsdp/src/port.c 2008-02-15 17:39:59.000000000 -0600 @@ -1770,8 +1770,8 @@ fd ); } - ret = _socket_funcs.close( fd ); init_extra_attribute( fd ); + ret = _socket_funcs.close( fd ); __sdp_log( 2, "CLOSE: <%s:%d:%d> result <%d>\n", program_invocation_short_name, fd, shadow_fd, ret ); return ret; From Caitlin.Bestler at neterion.com Fri Feb 15 15:50:08 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Fri, 15 Feb 2008 18:50:08 -0500 Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: References: <47B2174E.5000708@opengridcomputing.com> <8A71B368A89016469F72CD08050AD334026D5C23@maui.asicdesigners.com> <47B45994.7010805@opengridcomputing.com> <469958e00802141217i3a3d16a1k1232d69b8ba54471@mail.gmail.com> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2456@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25BA@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E25F1@nekter> <78C9135A3D2ECE4B8162EBDCE82CAD77030E2657@nekter> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77030E2702@nekter> > -----Original Message----- > From: Christoph Lameter [mailto:clameter at sgi.com] > Sent: Friday, February 15, 2008 2:50 PM > To: Caitlin Bestler > Cc: linux-kernel at vger.kernel.org; avi at qumranet.com; linux-mm at kvack.org; > general at lists.openfabrics.org; kvm-devel at lists.sourceforge.net > Subject: RE: [ofa-general] Re: Demand paging for memory regions > > On Fri, 15 Feb 2008, Caitlin Bestler wrote: > > > There isn't much point in the RDMA layer subscribing to mmu > > notifications > > if the specific RDMA device will not be able to react appropriately > when > > the notification occurs. I don't see how you get around needing to > know > > which devices are capable of supporting page migration (via > > suspend/resume > > or other mechanisms) and which can only respond to a page migration > by > > aborting connections. > > You either register callbacks if the device can react properly or you > dont. If you dont then the device will continue to have the problem > with > page pinning etc until someone comes around and implements the > mmu callbacks to fix these issues. > > I have doubts regarding the claim that some devices just cannot be made > to > suspend and resume appropriately. They obviously can be shutdown and so > its a matter of sequencing the things the right way. I.e. stop the app > wait for a quiet period then release resources etc. > > That is true. What some devices will be unable to do is suspend and resume in a manner that is transparent to the application. However, for the duration required to re-arrange pages it is definitely feasible to do so transparently to the application. Presumably the Virtual Memory Manager would be more willing to take an action that is transparent to the user than one that is disruptive, although obviously as the owner of the physical memory it has the right to do either. From a-18 at aol.com Fri Feb 15 17:40:03 2008 From: a-18 at aol.com (Adela Bush) Date: Sat, 16 Feb 2008 09:40:03 +0800 Subject: [ofa-general] Your profile Message-ID: <01c8707f$e7ab9b80$d04f033c@a-18> Hello! I am bored this evening. I am nice girl that would like to chat with you. Email me at Barbro at IndividualImprove.info only, because I am using my friend's email to write this. I want to show you some pictures. From gstreiff at neteffect.com Fri Feb 15 19:00:36 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Fri, 15 Feb 2008 21:00:36 -0600 Subject: [ofa-general] RDMA/nes: Fix vlan support Message-ID: <200802160300.m1G30aP8030038@velma.neteffect.com> From: Chien Tung Needed to account for vlan header size in nes_netdev_change_mtu() and nes_netdev_init(). Also, adding spin lock/unlock during vlan rx registration so only one process can assign vlan group for a given interface at a time. Signed-off-by: Chien Tung Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_nic.c | 10 ++++++++-- 1 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 67827ad..bf9f71c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -932,7 +932,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; netdev->mtu = new_mtu; - nesvnic->max_frame_size = new_mtu+ETH_HLEN; + nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; if (netdev->mtu > 1500) { jumbomode=1; @@ -1494,10 +1494,15 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; + unsigned long flags; + spin_lock_irqsave(&nesadapter->phy_lock, flags); nesvnic->vlan_grp = grp; + nes_debug(NES_DBG_NETDEV, "%s: %s\n", __FUNCTION__, netdev->name); + /* Enable/Disable VLAN Stripping */ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); if (grp) @@ -1506,6 +1511,7 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g u32temp |= 0x02000000; nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } @@ -1564,7 +1570,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->msg_enable = netif_msg_init(debug, default_msg); nesvnic->netdev_index = nesdev->netdev_count; nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count; - nesvnic->max_frame_size = netdev->mtu+netdev->hard_header_len; + nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN; curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)]; nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid; From mashirle at us.ibm.com Fri Feb 15 08:30:02 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Fri, 15 Feb 2008 08:30:02 -0800 Subject: [ofa-general] Re: [PATCH] IPOIB/CM Increase retry counts for OFED-1.3 In-Reply-To: <47B47460.4080700@linux.vnet.ibm.com> References: <47B226CC.1060706@linux.vnet.ibm.com> <47B456D1.7030600@mellanox.co.il> <47B47460.4080700@linux.vnet.ibm.com> Message-ID: <1203093002.4539.5.camel@localhost.localdomain> Hello Pradeep, The "send completion errors" indicates the packet hasn't been sent out to the wire. It seems the retries you have added induced a little bit delay for the packet to be sent out successfully, which might indicates some flow control or other issues in the device transport layer? Roland, how do you think? Thanks Shirley From akpm at linux-foundation.org Fri Feb 15 19:37:19 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 15 Feb 2008 19:37:19 -0800 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080215064932.371510599@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> Message-ID: <20080215193719.262c03a1.akpm@linux-foundation.org> On Thu, 14 Feb 2008 22:49:00 -0800 Christoph Lameter wrote: > MMU notifiers are used for hardware and software that establishes > external references to pages managed by the Linux kernel. These are > page table entriews or tlb entries or something else that allows > hardware (such as DMA engines, scatter gather devices, networking, > sharing of address spaces across operating system boundaries) and > software (Virtualization solutions such as KVM, Xen etc) to > access memory managed by the Linux kernel. > > The MMU notifier will notify the device driver that subscribes to such > a notifier that the VM is going to do something with the memory > mapped by that device. The device must then drop references for the > indicated memory area. The references may be reestablished later. > > The notification scheme is much better than the current schemes of > avoiding the danger of the VM removing pages that are externally > mapped. We currently either mlock pages used for RDMA, XPmem etc > in memory or increase the refcount to pin the pages. Increasing > the refcount makes it impossible for the VM to reclaim the page. > > Mlock causes problems with reclaim and may lead to OOM if too many > pages are pinned in memory. It is also incorrect in terms what the POSIX > specificies for what role mlock should play. Mlock does *not* pin pages in > memory. Mlock just means do not allow the page to be moved to swap. > > Linux can move pages in memory (for example through the page migration > mechanism). These pages can be moved even if they are mlocked(!!!!). > The current approach of page pinning in use by RDMA etc is conceptually > broken but there are currently no other easy solutions. > > The alternate of increasing the page count to pin pages is also not > that enticing since there will be continual attempts to reclaim > or migrate these pages. > > The solution here allows us to finally fix this issue by requiring > such devices to subscribe to a notification chain that will allow > them to work without pinning. The VM gains control of its memory again > and the memory that has external references can be managed like regular > memory. > > This patch: Core portion > What is the status of getting infiniband to use this facility? How important is this feature to KVM? To xpmem? Which other potential clients have been identified and how important it it to those? > Index: linux-2.6/Documentation/mmu_notifier/README > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ linux-2.6/Documentation/mmu_notifier/README 2008-02-14 22:27:19.000000000 -0800 > @@ -0,0 +1,105 @@ > +Linux MMU Notifiers > +------------------- > + > +MMU notifiers are used for hardware and software that establishes > +external references to pages managed by the Linux kernel. These are > +page table entriews or tlb entries or something else that allows > +hardware (such as DMA engines, scatter gather devices, networking, > +sharing of address spaces across operating system boundaries) and > +software (Virtualization solutions such as KVM, Xen etc) to > +access memory managed by the Linux kernel. > + > +The MMU notifier will notify the device driver that subscribes to such > +a notifier that the VM is going to do something with the memory > +mapped by that device. The device must then drop references for the > +indicated memory area. The references may be reestablished later. > + > +The notification scheme is much better than the current schemes of > +dealing with the danger of the VM removing pages. > +We currently mlock pages used for RDMA, XPmem etc in memory or > +increase the refcount of the pages. > + > +Both cause problems with reclaim and may lead to OOM if too many > +pages are pinned in memory. Mlock is also incorrect in terms of the POSIX > +specification of the role of mlock. Mlock does *not* pin pages in > +memory. It just does not allow the page to be moved to swap. > +The page refcount is used to track current users of a page struct. > +Artificially inflating the refcount means that the VM cannot track > +down all references to a page. It will not be able to reclaim or > +move a page. However, the core code will try again and again because > +the assumption is that an elevated refcount is a temporary situation. > + > +Linux can move pages in memory (for example through the page migration > +mechanism). These pages can be moved even if they are mlocked(!!!!). > +So the current approach in use by RDMA etc etc is conceptually broken > +but there are currently no other easy solutions. > + > +The solution here allows us to finally fix this issue by requiring > +such devices to subscribe to a notification chain that will allow > +them to work without pinning. > + > +The notifier chains provide two callback mechanisms. The > +first one is required for any device that establishes external mappings. > +The second (rmap) mechanism is required if a device needs to be > +able to sleep when invalidating references. Sleeping may be necessary > +if we are mapping across a network or to different Linux instances > +in the same address space. I'd have thought that a major reason for sleeping would be to wait for IO to complete. Worth mentioning here? > +mmu_notifier mechanism (for KVM/GRU etc) > +---------------------------------------- > +Callbacks are registered with an mm_struct from a device driver using > +mmu_notifier_register(). When the VM removes pages (or changes > +permissions on pages etc) then callbacks are triggered. > + > +The invalidation function for a single page (*invalidate_page) We already have an invalidatepage. Ho hum. > +is called with spinlocks (in particular the pte lock) held. This allow > +for an easy implementation of external ptes that are on the local system. > Why is that "easy"? I's have thought that it would only be easy if the driver happened to be using those same locks for its own purposes. Otherwise it is "awkward"? > +The invalidation mechanism for a range (*invalidate_range_begin/end*) is > +called most of the time without any locks held. It is only called with > +locks held for file backed mappings that are truncated. A flag indicates > +in which mode we are. A driver can use that mechanism to f.e. > +delay the freeing of the pages during truncate until no locks are held. That sucks big time. What do we need to do to make get the callback functions called in non-atomic context? > +Pages must be marked dirty if dirty bits are found to be set in > +the external ptes during unmap. That sentence is too vague. Define "marked dirty"? > +The *release* method is called when a Linux process exits. It is run before We'd conventionally use a notation such as "->release()" here, rather than the asterisks. > +the pages and mappings of a process are torn down and gives the device driver > +a chance to zap all the external mappings in one go. I assume what you mean here is that ->release() is called during exit() when the final reference to an mm is being dropped. > +An example for a code that can be used to build a notifier mechanism into > +a device driver can be found in the file > +Documentation/mmu_notifier/skeleton.c Should that be in samples/? > +mmu_rmap_notifier mechanism (XPMEM etc) > +--------------------------------------- > +The mmu_rmap_notifier allows the device driver to implement their own rmap s/their/its/ > +and allows the device driver to sleep during page eviction. This is necessary > +for complex drivers that f.e. allow the sharing of memory between processes > +running on different Linux instances (typically over a network or in a > +partitioned NUMA system). > + > +The mmu_rmap_notifier adds another invalidate_page() callout that is called > +*before* the Linux rmaps are walked. At that point only the page lock is > +held. The invalidate_page() function must walk the driver rmaps and evict > +all the references to the page. What happens if it cannot do so? > +There is no process information available before the rmaps are consulted. Not sure what that sentence means. I guess "available to the core VM"? > +The notifier mechanism can therefore not be attached to an mm_struct. Instead > +it is a global callback list. Having to perform a callback for each and every > +page that is reclaimed would be inefficient. Therefore we add an additional > +page flag: PageRmapExternal(). How many page flags are left? Is this feature important enough to justfy consumption of another one? > Only pages that are marked with this bit can > +be exported and the rmap callbacks will only be performed for pages marked > +that way. "exported": new term, unclear what it means. > +The required additional Page flag is only availabe in 64 bit mode and > +therefore the mmu_rmap_notifier portion is not available on 32 bit platforms. whoa. Is that good? You just made your feature unavailable on the great majority of Linux systems. > +An example of code to build a mmu_notifier mechanism with rmap capabilty > +can be found in Documentation/mmu_notifier/skeleton_rmap.c > + > +February 9, 2008, > + Christoph Lameter + > +Index: linux-2.6/include/linux/mm_types.h > Index: linux-2.6/include/linux/mm_types.h > =================================================================== > --- linux-2.6.orig/include/linux/mm_types.h 2008-02-14 20:59:01.000000000 -0800 > +++ linux-2.6/include/linux/mm_types.h 2008-02-14 21:17:51.000000000 -0800 > @@ -159,6 +159,12 @@ struct vm_area_struct { > #endif > }; > > +struct mmu_notifier_head { > +#ifdef CONFIG_MMU_NOTIFIER > + struct hlist_head head; > +#endif > +}; > + > struct mm_struct { > struct vm_area_struct * mmap; /* list of VMAs */ > struct rb_root mm_rb; > @@ -228,6 +234,7 @@ struct mm_struct { > #ifdef CONFIG_CGROUP_MEM_CONT > struct mem_cgroup *mem_cgroup; > #endif > + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ > }; > > #endif /* _LINUX_MM_TYPES_H */ > Index: linux-2.6/include/linux/mmu_notifier.h > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ linux-2.6/include/linux/mmu_notifier.h 2008-02-14 22:42:28.000000000 -0800 > @@ -0,0 +1,180 @@ > +#ifndef _LINUX_MMU_NOTIFIER_H > +#define _LINUX_MMU_NOTIFIER_H > + > +/* > + * MMU motifier typo > + * Notifier functions for hardware and software that establishes external > + * references to pages of a Linux system. The notifier calls ensure that > + * external mappings are removed when the Linux VM removes memory ranges > + * or individual pages from a process. So the callee cannot fail. hm. If it can't block, it's likely screwed in that case. In other cases it might be screwed anyway. I suspect we'll need to be able to handle callee failure. > + * These fall into two classes: > + * > + * 1. mmu_notifier > + * > + * These are callbacks registered with an mm_struct. If pages are > + * removed from an address space then callbacks are performed. "to be removed", I guess. It's called before the page is actually removed? > + * Spinlocks must be held in order to walk reverse maps. The > + * invalidate_page() callbacks are performed with spinlocks held. hm, yes, problem. Permitting callee failure might be good enough. > + * The invalidate_range_start/end callbacks can be performed in contexts > + * where sleeping is allowed or in atomic contexts. A flag is passed > + * to indicate an atomic context. We generally would prefer separate callbacks, rather than a unified callback with a mode flag. > + * Pages must be marked dirty if dirty bits are found to be set in > + * the external ptes. > + */ > + > +#include > +#include > +#include > +#include > + > +struct mmu_notifier_ops; > + > +struct mmu_notifier { > + struct hlist_node hlist; > + const struct mmu_notifier_ops *ops; > +}; > + > +struct mmu_notifier_ops { > + /* > + * The release notifier is called when no other execution threads > + * are left. Synchronization is not necessary. "and the mm is about to be destroyed"? > + */ > + void (*release)(struct mmu_notifier *mn, > + struct mm_struct *mm); > + > + /* > + * age_page is called from contexts where the pte_lock is held > + */ > + int (*age_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); This wasn't documented. > + /* > + * invalidate_page is called from contexts where the pte_lock is held. > + */ > + void (*invalidate_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); > + > + /* > + * invalidate_range_begin() and invalidate_range_end() must be paired. > + * > + * Multiple invalidate_range_begin/ends may be nested or called > + * concurrently. Under what circumstances would they be nested? > That is legit. However, no new external references references to what? > + * may be established as long as any invalidate_xxx is running or > + * any invalidate_range_begin() and has not been completed through a stray "and". > + * corresponding call to invalidate_range_end(). > + * > + * Locking within the notifier needs to serialize events correspondingly. > + * > + * invalidate_range_begin() must clear all references in the range > + * and stop the establishment of new references. and stop the establishment of new references within the range, I assume? If so, that's putting a heck of a lot of complexity into the driver, isn't it? It needs to temporarily remember an arbitrarily large number of regions in this mm against which references may not be taken? > + * invalidate_range_end() reenables the establishment of references. within the range? > + * atomic indicates that the function is called in an atomic context. > + * We can sleep if atomic == 0. > + * > + * invalidate_range_begin() must remove all external references. > + * There will be no retries as with invalidate_page(). > + */ > + void (*invalidate_range_begin)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end, > + int atomic); > + > + void (*invalidate_range_end)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end, > + int atomic); > +}; > + > +#ifdef CONFIG_MMU_NOTIFIER > + > +/* > + * Must hold the mmap_sem for write. > + * > + * RCU is used to traverse the list. A quiescent period needs to pass > + * before the notifier is guaranteed to be visible to all threads > + */ > +extern void mmu_notifier_register(struct mmu_notifier *mn, > + struct mm_struct *mm); > + > +/* > + * Must hold mmap_sem for write. > + * > + * A quiescent period needs to pass before the mmu_notifier structure > + * can be released. mmu_notifier_release() will wait for a quiescent period > + * after calling the ->release callback. So it is safe to call > + * mmu_notifier_unregister from the ->release function. > + */ > +extern void mmu_notifier_unregister(struct mmu_notifier *mn, > + struct mm_struct *mm); > + > + > +extern void mmu_notifier_release(struct mm_struct *mm); > +extern int mmu_notifier_age_page(struct mm_struct *mm, > + unsigned long address); There's the mysterious age_page again. > +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) > +{ > + INIT_HLIST_HEAD(&mnh->head); > +} > + > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + struct mmu_notifier *__mn; \ > + struct hlist_node *__n; \ > + \ > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > + rcu_read_lock(); \ > + hlist_for_each_entry_rcu(__mn, __n, \ > + &(mm)->mmu_notifier.head, \ > + hlist) \ > + if (__mn->ops->function) \ > + __mn->ops->function(__mn, \ > + mm, \ > + args); \ > + rcu_read_unlock(); \ > + } \ > + } while (0) The macro references its args more than once. Anyone who does mmu_notifier(function, some_function_which_has_side_effects()) will get a surprise. Use temporaries. > +#else /* CONFIG_MMU_NOTIFIER */ > + > +/* > + * Notifiers that use the parameters that they were passed so that the > + * compiler does not complain about unused variables but does proper > + * parameter checks even if !CONFIG_MMU_NOTIFIER. > + * Macros generate no code. > + */ > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + if (0) { \ > + struct mmu_notifier *__mn; \ > + \ > + __mn = (struct mmu_notifier *)(0x00ff); \ > + __mn->ops->function(__mn, mm, args); \ > + }; \ > + } while (0) That's a bit weird. Can't we do the old (void)function; (void)mm; trick? Or make it a staic inline function? > +static inline void mmu_notifier_register(struct mmu_notifier *mn, > + struct mm_struct *mm) {} > +static inline void mmu_notifier_unregister(struct mmu_notifier *mn, > + struct mm_struct *mm) {} > +static inline void mmu_notifier_release(struct mm_struct *mm) {} > +static inline int mmu_notifier_age_page(struct mm_struct *mm, > + unsigned long address) > +{ > + return 0; > +} > + > +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mmh) {} > + > +#endif /* CONFIG_MMU_NOTIFIER */ > + > +#endif /* _LINUX_MMU_NOTIFIER_H */ > Index: linux-2.6/mm/Kconfig > =================================================================== > --- linux-2.6.orig/mm/Kconfig 2008-02-14 20:59:01.000000000 -0800 > +++ linux-2.6/mm/Kconfig 2008-02-14 21:17:51.000000000 -0800 > @@ -193,3 +193,7 @@ config NR_QUICK > config VIRT_TO_BUS > def_bool y > depends on !ARCH_NO_VIRT_TO_BUS > + > +config MMU_NOTIFIER > + def_bool y > + bool "MMU notifier, for paging KVM/RDMA" Why is this not selectable? The help seems a bit brief. Does this cause 32-bit systems to drag in a bunch of code they're not allowed to ever use? > Index: linux-2.6/mm/Makefile > =================================================================== > --- linux-2.6.orig/mm/Makefile 2008-02-14 20:59:01.000000000 -0800 > +++ linux-2.6/mm/Makefile 2008-02-14 21:17:51.000000000 -0800 > @@ -33,4 +33,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o > obj-$(CONFIG_SMP) += allocpercpu.o > obj-$(CONFIG_QUICKLIST) += quicklist.o > obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o > +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o > > Index: linux-2.6/mm/mmu_notifier.c > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ linux-2.6/mm/mmu_notifier.c 2008-02-14 22:41:55.000000000 -0800 > @@ -0,0 +1,76 @@ > +/* > + * linux/mm/mmu_notifier.c > + * > + * Copyright (C) 2008 Qumranet, Inc. > + * Copyright (C) 2008 SGI > + * Christoph Lameter > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include > +#include > +#include > + > +/* > + * No synchronization. This function can only be called when only a single > + * process remains that performs teardown. > + */ > +void mmu_notifier_release(struct mm_struct *mm) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n, *t; > + > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > + hlist_for_each_entry_safe(mn, n, t, > + &mm->mmu_notifier.head, hlist) { > + hlist_del_init(&mn->hlist); > + if (mn->ops->release) > + mn->ops->release(mn, mm); We do this a lot, but back in the old days people didn't like optional callbacks which can be NULL. If we expect that mmu_notifier_ops.release is usually implemented, the just unconditionally call it and require that all clients implement it. Perhaps provide an exported-to-modules stuv in core kernel for clients which didn't want to implement ->release(). > + } > + } > +} > + > +/* > + * If no young bitflag is supported by the hardware, ->age_page can > + * unmap the address and return 1 or 0 depending if the mapping previously > + * existed or not. > + */ > +int mmu_notifier_age_page(struct mm_struct *mm, unsigned long address) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n; > + int young = 0; > + > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > + rcu_read_lock(); > + hlist_for_each_entry_rcu(mn, n, > + &mm->mmu_notifier.head, hlist) { > + if (mn->ops->age_page) > + young |= mn->ops->age_page(mn, mm, address); > + } > + rcu_read_unlock(); > + } > + > + return young; > +} should the rcu_read_lock() cover the hlist_empty() test? This function looks like it was tossed in at the last minute. It's mysterious, undocumented, poorly commented, poorly named. A better name would be one which has some correlation with the return value. Because anyone who looks at some code which does if (mmu_notifier_age_page(mm, address)) ... has to go and reverse-engineer the implementation of mmu_notifier_age_page() to work out under which circumstances the "..." will be executed. But this should be apparent just from reading the callee implementation. This function *really* does need some documentation. What does it *mean* when the ->age_page() from some of the notifiers returned "1" and the ->age_page() from some other notifiers returned zero? Dunno. From akpm at linux-foundation.org Fri Feb 15 19:37:30 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 15 Feb 2008 19:37:30 -0800 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080215064932.620773824@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> Message-ID: <20080215193730.709c67ea.akpm@linux-foundation.org> On Thu, 14 Feb 2008 22:49:01 -0800 Christoph Lameter wrote: > The invalidation of address ranges in a mm_struct needs to be > performed when pages are removed or permissions etc change. hm. Do they? Why? If I'm in the process of zero-copy writing a hunk of memory out to hardware then do I care if someone write-protects the ptes? Spose so, but some fleshing-out of the various scenarios here would clarify things. > If invalidate_range_begin() is called with locks held then we > pass a flag into invalidate_range() to indicate that no sleeping is > possible. Locks are only held for truncate and huge pages. This is so bad. I supposed in the restricted couple of cases which you're focussed on it works OK. But is it generally suitable? What if IO is in progress? What if other cluster nodes need to be talked to? Does it suit RDMA? > In two cases we use invalidate_range_begin/end to invalidate > single pages because the pair allows holding off new references > (idea by Robin Holt). Assuming that there is a missing "within the range" in this description, I assume that all clients will just throw up theior hands in horror and will disallow all references to all parts of the mm. Of course, to do that they will need to take a sleeping lock to prevent other threads from establishing new references. whoops. > do_wp_page(): We hold off new references while we update the pte. > > xip_unmap: We are not taking the PageLock so we cannot > use the invalidate_page mmu_rmap_notifier. invalidate_range_begin/end > stands in. What does "stands in" mean? > Signed-off-by: Andrea Arcangeli > Signed-off-by: Robin Holt > Signed-off-by: Christoph Lameter > > --- > mm/filemap_xip.c | 5 +++++ > mm/fremap.c | 3 +++ > mm/hugetlb.c | 3 +++ > mm/memory.c | 35 +++++++++++++++++++++++++++++------ > mm/mmap.c | 2 ++ > mm/mprotect.c | 3 +++ > mm/mremap.c | 7 ++++++- > 7 files changed, 51 insertions(+), 7 deletions(-) > > Index: linux-2.6/mm/fremap.c > =================================================================== > --- linux-2.6.orig/mm/fremap.c 2008-02-14 18:43:31.000000000 -0800 > +++ linux-2.6/mm/fremap.c 2008-02-14 18:45:07.000000000 -0800 > @@ -15,6 +15,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(uns > spin_unlock(&mapping->i_mmap_lock); > } > > + mmu_notifier(invalidate_range_begin, mm, start, start + size, 0); > err = populate_range(mm, vma, start, size, pgoff); > + mmu_notifier(invalidate_range_end, mm, start, start + size, 0); To avoid off-by-one confusion the changelogs, documentation and comments should be very careful to tell the reader whether the range includes the byte at start+size. I don't thik that was done? From akpm at linux-foundation.org Fri Feb 15 19:37:36 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 15 Feb 2008 19:37:36 -0800 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: <20080215064932.918191502@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064932.918191502@sgi.com> Message-ID: <20080215193736.9d6e7da3.akpm@linux-foundation.org> On Thu, 14 Feb 2008 22:49:02 -0800 Christoph Lameter wrote: > Two callbacks to remove individual pages as done in rmap code > > invalidate_page() > > Called from the inner loop of rmap walks to invalidate pages. > > age_page() > > Called for the determination of the page referenced status. > > If we do not care about page referenced status then an age_page callback > may be be omitted. PageLock and pte lock are held when either of the > functions is called. The age_page mystery shallows. It would be useful to have some rationale somewhere in the patchset for the existence of this callback. > #include > > @@ -287,7 +288,8 @@ static int page_referenced_one(struct pa > if (vma->vm_flags & VM_LOCKED) { > referenced++; > *mapcount = 1; /* break early from loop */ > - } else if (ptep_clear_flush_young(vma, address, pte)) > + } else if (ptep_clear_flush_young(vma, address, pte) | > + mmu_notifier_age_page(mm, address)) > referenced++; The "|" is obviously deliberate. But no explanation is provided telling us why we still call the callback if ptep_clear_flush_young() said the page was recently referenced. People who read your code will want to understand this. > /* Pretend the page is referenced if the task has the > @@ -455,6 +457,7 @@ static int page_mkclean_one(struct page > > flush_cache_page(vma, address, pte_pfn(*pte)); > entry = ptep_clear_flush(vma, address, pte); > + mmu_notifier(invalidate_page, mm, address); I just don't see how ths can be done if the callee has another thread in the middle of establishing IO against this region of memory. ->invalidate_page() _has_ to be able to block. Confused. From akpm at linux-foundation.org Fri Feb 15 19:37:46 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Fri, 15 Feb 2008 19:37:46 -0800 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080215064933.376635032@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> Message-ID: <20080215193746.5d823092.akpm@linux-foundation.org> On Thu, 14 Feb 2008 22:49:04 -0800 Christoph Lameter wrote: > These special additional callbacks are required because XPmem (and likely > other mechanisms) do use their own rmap (multiple processes on a series > of remote Linux instances may be accessing the memory of a process). > F.e. XPmem may have to send out notifications to remote Linux instances > and receive confirmation before a page can be freed. > > So we handle this like an additional Linux reverse map that is walked after > the existing rmaps have been walked. We leave the walking to the driver that > is then able to use something else than a spinlock to walk its reverse > maps. So we can actually call the driver without holding spinlocks while > we hold the Pagelock. > > However, we cannot determine the mm_struct that a page belongs to at > that point. The mm_struct can only be determined from the rmaps by the > device driver. > > We add another pageflag (PageExternalRmap) that is set if a page has > been remotely mapped (f.e. by a process from another Linux instance). > We can then only perform the callbacks for pages that are actually in > remote use. > > Rmap notifiers need an extra page bit and are only available > on 64 bit platforms. This functionality is not available on 32 bit! > > A notifier that uses the reverse maps callbacks does not need to provide > the invalidate_page() method that is called when locks are held. > hrm. > +#define mmu_rmap_notifier(function, args...) \ > + do { \ > + struct mmu_rmap_notifier *__mrn; \ > + struct hlist_node *__n; \ > + \ > + rcu_read_lock(); \ > + hlist_for_each_entry_rcu(__mrn, __n, \ > + &mmu_rmap_notifier_list, hlist) \ > + if (__mrn->ops->function) \ > + __mrn->ops->function(__mrn, args); \ > + rcu_read_unlock(); \ > + } while (0); > + buggy macro: use locals. > +#define mmu_rmap_notifier(function, args...) \ > + do { \ > + if (0) { \ > + struct mmu_rmap_notifier *__mrn; \ > + \ > + __mrn = (struct mmu_rmap_notifier *)(0x00ff); \ > + __mrn->ops->function(__mrn, args); \ > + } \ > + } while (0); > + Same observation as in the other patch. > =================================================================== > --- linux-2.6.orig/mm/mmu_notifier.c 2008-02-14 21:17:51.000000000 -0800 > +++ linux-2.6/mm/mmu_notifier.c 2008-02-14 21:21:04.000000000 -0800 > @@ -74,3 +74,37 @@ void mmu_notifier_unregister(struct mmu_ > } > EXPORT_SYMBOL_GPL(mmu_notifier_unregister); > > +#ifdef CONFIG_64BIT > +static DEFINE_SPINLOCK(mmu_notifier_list_lock); > +HLIST_HEAD(mmu_rmap_notifier_list); > + > +void mmu_rmap_notifier_register(struct mmu_rmap_notifier *mrn) > +{ > + spin_lock(&mmu_notifier_list_lock); > + hlist_add_head_rcu(&mrn->hlist, &mmu_rmap_notifier_list); > + spin_unlock(&mmu_notifier_list_lock); > +} > +EXPORT_SYMBOL(mmu_rmap_notifier_register); > + > +void mmu_rmap_notifier_unregister(struct mmu_rmap_notifier *mrn) > +{ > + spin_lock(&mmu_notifier_list_lock); > + hlist_del_rcu(&mrn->hlist); > + spin_unlock(&mmu_notifier_list_lock); > +} > +EXPORT_SYMBOL(mmu_rmap_notifier_unregister); > > +/* > + * Export a page. > + * > + * Pagelock must be held. > + * Must be called before a page is put on an external rmap. > + */ > +void mmu_rmap_export_page(struct page *page) > +{ > + BUG_ON(!PageLocked(page)); > + SetPageExternalRmap(page); > +} > +EXPORT_SYMBOL(mmu_rmap_export_page); The other patch used EXPORT_SYMBOL_GPL. From olivier at four-soft.com Fri Feb 15 18:01:48 2008 From: olivier at four-soft.com (em janny) Date: Sat, 16 Feb 2008 02:01:48 +0000 Subject: [ofa-general] Avoid enhancement pills underframe Message-ID: <000501c8704e$01d3040a$45818787@ppkwf> Penis Enlarge Patch : http://www.celarpo.com ... the most powerful and fast-acting penis enlargement product I have ever used.. Robbie M. Sacramento CA From changquing.tang at hp.com Fri Feb 15 22:19:22 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Sat, 16 Feb 2008 06:19:22 +0000 Subject: [ofa-general] What IB utillity can be run as normal user ? Message-ID: Hi: Can anyone tell if 'ibdiagpath' needs root to run ? I run it as root, it prints out some useful info, but if I run as normal user, it fails. The other IB utilities under /usr/bin are: ib-bond ib_clock_test ibdiagnet ibdiagui ibdmchk ibdmsh ibdmtr ibmsquit ibmssh ibnlparse the man page does not say that. Do all these require root ? Thanks. --CQ From avi at qumranet.com Sat Feb 16 00:45:50 2008 From: avi at qumranet.com (Avi Kivity) Date: Sat, 16 Feb 2008 10:45:50 +0200 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080215193719.262c03a1.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> Message-ID: <47B6A2BE.6080201@qumranet.com> Andrew Morton wrote: > How important is this feature to KVM? > Very. kvm pins pages that are referenced by the guest; a 64-bit guest will easily pin its entire memory with the kernel map. So this is critical for guest swapping to actually work. Other nice features like page migration are also enabled by this patch. -- Any sufficiently difficult bug is indistinguishable from a feature. From akpm at linux-foundation.org Sat Feb 16 00:56:53 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Sat, 16 Feb 2008 00:56:53 -0800 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <47B6A2BE.6080201@qumranet.com> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> <47B6A2BE.6080201@qumranet.com> Message-ID: <20080216005653.353a62dc.akpm@linux-foundation.org> On Sat, 16 Feb 2008 10:45:50 +0200 Avi Kivity wrote: > Andrew Morton wrote: > > How important is this feature to KVM? > > > > Very. kvm pins pages that are referenced by the guest; hm. Why does it do that? > a 64-bit guest > will easily pin its entire memory with the kernel map. > So this is > critical for guest swapping to actually work. Curious. If KVM can release guest pages at the request of this notifier so that they can be swapped out, why can't it release them by default, and allow swapping to proceed? > > Other nice features like page migration are also enabled by this patch. > We already have page migration. Do you mean page-migration-when-using-kvm? From avi at qumranet.com Sat Feb 16 01:21:24 2008 From: avi at qumranet.com (Avi Kivity) Date: Sat, 16 Feb 2008 11:21:24 +0200 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080216005653.353a62dc.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> <47B6A2BE.6080201@qumranet.com> <20080216005653.353a62dc.akpm@linux-foundation.org> Message-ID: <47B6AB14.5090408@qumranet.com> Andrew Morton wrote: >> Very. kvm pins pages that are referenced by the guest; >> > > hm. Why does it do that? > > It was deemed best not to allow the guest to write to a page that has been swapped out and assigned to an unrelated host process. One way to view the kvm shadow page tables is as hardware dma descriptors. kvm pins pages for the same reason that drivers pin pages that are being dma'ed. It's also the reason why mmu notifiers are useful for such a wide range of dma capable hardware. >> a 64-bit guest >> will easily pin its entire memory with the kernel map. >> > > >> So this is >> critical for guest swapping to actually work. >> > > Curious. If KVM can release guest pages at the request of this notifier so > that they can be swapped out, why can't it release them by default, and > allow swapping to proceed? > > If kvm releases a page, it must also zap any shadow ptes pointing at the page and flush the tlb. If you do that for all of memory you can't reference any of it. Releasing a page has costs, both at the time of the release and when the guest eventually refers to the page again. >> Other nice features like page migration are also enabled by this patch. >> >> > > We already have page migration. Do you mean page-migration-when-using-kvm? > Yes, I'm obviously writing from a kvm-centric point of view. This is an important feature, as the virtualization future seems to be NUMA hosts (2- or 4- way, 4 cores per socket) running moderately sized guests. The ability to load-balance guests among the NUMA nodes is important for performance. (btw, I'm also looking forward to memory defragmentation. large pages are important for virtualization workloads and mmu notifiers are again critical to getting it to work while running kvm). -- Any sufficiently difficult bug is indistinguishable from a feature. From andrea at qumranet.com Sat Feb 16 02:48:27 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Sat, 16 Feb 2008 11:48:27 +0100 Subject: [ofa-general] [PATCH] KVM swapping with MMU Notifiers V7 In-Reply-To: <20080215064859.384203497@sgi.com> References: <20080215064859.384203497@sgi.com> Message-ID: <20080216104827.GI11732@v2.random> Those below two patches enable KVM to swap the guest physical memory through Christoph's V7. There's one last _purely_theoretical_ race condition I figured out and that I'm wondering how to best fix. The race condition worst case is that a few guest physical pages could remain pinned by sptes. The race can materialize if the linux pte is zapped after get_user_pages returns but before the page is mapped by the spte and tracked by rmap. The invalidate_ calls can also likely be optimized further but it's not a fast path so it's not urgent. Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e7..e1287ab 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fd39cd1..b56e388 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -533,6 +533,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) kvm_flush_remote_tlbs(kvm); } +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte, *curr_spte; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + kvm_unmap_spte(kvm, curr_spte); + } +} + +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int young = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + int _young; + u64 _spte = *spte; + BUG_ON(!(_spte & PT_PRESENT_MASK)); + _young = _spte & PT_ACCESSED_MASK; + if (_young) { + young = !!_young; + set_shadow_pte(spte, _spte & ~PT_ACCESSED_MASK); + } + spte = rmap_next(kvm, rmapp, spte); + } + return young; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + int young = 0; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + young |= kvm_age_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); + + if (young) + kvm_flush_remote_tlbs(kvm); + + return young; +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c910c7..2b2398f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3185,6 +3185,46 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + struct kvm_arch *kvm_arch; + kvm_arch = container_of(mn, struct kvm_arch, mmu_notifier); + return container_of(kvm_arch, struct kvm, arch); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + kvm_unmap_hva(kvm, address); +} + +int kvm_mmu_notifier_age_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + return kvm_age_hva(kvm, address); +} + +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end, + int lock) +{ + for (; start < end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .age_page = kvm_mmu_notifier_age_page, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3194,6 +3234,9 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops; + mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm); + return kvm; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index da61255..11976c8 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -287,6 +288,8 @@ struct kvm_arch{ int round_robin_prev_vcpu; unsigned int tss_addr; struct page *apic_access_page; + + struct mmu_notifier mmu_notifier; }; struct kvm_vm_stat { @@ -404,6 +407,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_zap_all(struct kvm *kvm); This allows to browse the memslots with only the mmu_lock hold and it should be applied along the above patch: Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c910c7..80b719d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,16 +3245,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + unsigned long userspace_addr; + down_write(¤t->mm->mmap_sem); - memslot->userspace_addr = do_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - 0); + userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); up_write(¤t->mm->mmap_sem); - if (IS_ERR((void *)memslot->userspace_addr)) - return PTR_ERR((void *)memslot->userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); + + /* set userspace_addr atomically for kvm_hva_to_rmapp */ + spin_lock(&kvm->mmu_lock); + memslot->userspace_addr = userspace_addr; + spin_unlock(&kvm->mmu_lock); } else { if (!old.user_alloc && old.rmap) { int ret; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cf6df51..743c5c5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -299,7 +299,15 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.rmap, 0, npages * sizeof(*new.rmap)); new.user_alloc = user_alloc; - new.userspace_addr = mem->userspace_addr; + /* + * hva_to_rmmap() serialzies with the mmu_lock and to be + * safe it has to ignore memslots with !user_alloc && + * !userspace_addr. + */ + if (user_alloc) + new.userspace_addr = mem->userspace_addr; + else + new.userspace_addr = 0; } /* Allocate page dirty bitmap if needed */ @@ -312,14 +320,18 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.dirty_bitmap, 0, dirty_bytes); } + spin_lock(&kvm->mmu_lock); if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; *memslot = new; + spin_unlock(&kvm->mmu_lock); r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); if (r) { + spin_lock(&kvm->mmu_lock); *memslot = old; + spin_unlock(&kvm->mmu_lock); goto out_free; } From vlad at lists.openfabrics.org Sat Feb 16 02:54:20 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 16 Feb 2008 02:54:20 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080216-0200 daily build status Message-ID: <20080216105421.1114BE281AB@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Failed: Build failed on ia64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.22 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.22_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.22' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.21.1 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:928: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:948: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.21.1_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.21.1' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.23 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.23_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.23' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.13_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.12_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.14_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.15_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.16_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.17_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.19_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080216-0200_linux-2.6.24_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From andrea at qumranet.com Sat Feb 16 03:07:38 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Sat, 16 Feb 2008 12:07:38 +0100 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: <20080215193736.9d6e7da3.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.918191502@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> Message-ID: <20080216110738.GJ11732@v2.random> On Fri, Feb 15, 2008 at 07:37:36PM -0800, Andrew Morton wrote: > The "|" is obviously deliberate. But no explanation is provided telling us > why we still call the callback if ptep_clear_flush_young() said the page > was recently referenced. People who read your code will want to understand > this. This is to clear the young bit in every pte and spte to such physical page before backing off because any young bit was on. So if any young bit will be on in the next scan, we're guaranteed the page has been touched recently and not ages before (otherwise it would take a worst case N rounds of the lru before the page can be freed, where N is the number of pte or sptes pointing to the page). > I just don't see how ths can be done if the callee has another thread in > the middle of establishing IO against this region of memory. > ->invalidate_page() _has_ to be able to block. Confused. invalidate_page marking the spte invalid and flushing the asid/tlb doesn't need to block the same way ptep_clear_flush doesn't need to block for the main linux pte. Infact before invalidate_page and ptep_clear_flush can touch anything at all, they've to take their own spinlocks (mmu_lock for the former, and PT lock for the latter). The only sleeping trouble is for networked driven message passing, where they want to schedule while they wait the message to arrive or it'd hang the whole cpu to spin for so long. sptes are cpu-clocked entities like ptes so scheduling there is by far not necessary because there's zero delay in invalidating them and flushing their tlbs. GRU is similar. Because we boost the reference count of the pages for every spte mapping, only implementing invalidate_range_end is enough, but I need to figure out the get_user_pages->rmap_add window too and because get_user_pages can schedule, and if I want to add a critical section around it to avoid calling get_user_pages twice during the kvm page fault, a mutex would be the only way (it sure can't be a spinlock). But a mutex can't be taken by invalidate_page to stop it. So that leaves me with the idea of adding a get_user_pages variant that returns the page locked. So instead of calling get_user_pages a second time after rmap_add returns, I will only need to call unlock_page which should be faster than a follow_page. And setting the PG_lock before dropping the PT lock in follow_page, should be fast enough too. From akpm at linux-foundation.org Sat Feb 16 03:08:17 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Sat, 16 Feb 2008 03:08:17 -0800 Subject: [ofa-general] Re: [PATCH] KVM swapping with MMU Notifiers V7 In-Reply-To: <20080216104827.GI11732@v2.random> References: <20080215064859.384203497@sgi.com> <20080216104827.GI11732@v2.random> Message-ID: <20080216030817.965ff1f7.akpm@linux-foundation.org> On Sat, 16 Feb 2008 11:48:27 +0100 Andrea Arcangeli wrote: > +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end, > + int lock) > +{ > + for (; start < end; start += PAGE_SIZE) > + kvm_mmu_notifier_invalidate_page(mn, mm, start); > +} > + > +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { > + .invalidate_page = kvm_mmu_notifier_invalidate_page, > + .age_page = kvm_mmu_notifier_age_page, > + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, > +}; So this doesn't implement ->invalidate_range_start(). By what means does it prevent new mappings from being established in the range after core mm has tried to call ->invalidate_rande_start()? mmap_sem, I assume? > + /* set userspace_addr atomically for kvm_hva_to_rmapp */ > + spin_lock(&kvm->mmu_lock); > + memslot->userspace_addr = userspace_addr; > + spin_unlock(&kvm->mmu_lock); are you sure? kvm_unmap_hva() and kvm_age_hva() read ->userspace_addr a single time and it doesn't immediately look like there's a need to take the lock here? From holt at sgi.com Sat Feb 16 03:51:38 2008 From: holt at sgi.com (Robin Holt) Date: Sat, 16 Feb 2008 05:51:38 -0600 Subject: [ofa-general] Re: [PATCH] KVM swapping with MMU Notifiers V7 In-Reply-To: <20080216104827.GI11732@v2.random> References: <20080215064859.384203497@sgi.com> <20080216104827.GI11732@v2.random> Message-ID: <20080216115138.GA11391@sgi.com> On Sat, Feb 16, 2008 at 11:48:27AM +0100, Andrea Arcangeli wrote: > Those below two patches enable KVM to swap the guest physical memory > through Christoph's V7. > > There's one last _purely_theoretical_ race condition I figured out and > that I'm wondering how to best fix. The race condition worst case is > that a few guest physical pages could remain pinned by sptes. The race > can materialize if the linux pte is zapped after get_user_pages > returns but before the page is mapped by the spte and tracked by > rmap. The invalidate_ calls can also likely be optimized further but > it's not a fast path so it's not urgent. I am doing this in xpmem with a stack-based structure in the function calling get_user_pages. That structure describes the start and end address of the range we are doing the get_user_pages on. If an invalidate_range_begin comes in while we are off to the kernel doing the get_user_pages, the invalidate_range_begin marks that structure indicating an invalidate came in. When the get_user_pages gets the structures relocked, it checks that flag (really a generation counter) and if it is set, retries the get_user_pages. After 3 retries, it returns -EAGAIN and the fault is started over from the remote side. Thanks, Robin From hrosenstock at xsigo.com Sat Feb 16 06:40:57 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 16 Feb 2008 06:40:57 -0800 Subject: [ofa-general] What IB utillity can be run as normal user ? In-Reply-To: References: Message-ID: <1203172857.26729.235.camel@hrosenstock-ws.xsigo.com> On Sat, 2008-02-16 at 06:19 +0000, Tang, Changqing wrote: > Hi: > Can anyone tell if 'ibdiagpath' needs root to run ? I run it as root, it prints > out some useful info, but if I run as normal user, it fails. It all depends on how udev is setup for umad access. This is the default (and recommended) setting. > The other IB utilities under /usr/bin are: > ib-bond > ib_clock_test > ibdiagnet > ibdiagui > ibdmchk > ibdmsh > ibdmtr > ibmsquit > ibmssh > ibnlparse > > the man page does not say that. Do all these require root ? Any of the above tools which are part of ibutils likely work similarly. BTW, the maintainer for ibutils is Mellanox (Oren Kladnitsky ). -- Hal > Thanks. > > --CQ > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Sat Feb 16 06:46:45 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 16 Feb 2008 06:46:45 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203173205.26729.237.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 16:09 -0500, Chuck Hartley wrote: > Well I expect that is the problem then. I see the same problem when I > don't run the OpenSM at all and just use the one running on the blade > server. All those are SDR HCAs and that must be all that it will > negotiate for. I'll have to figure out how to turn it off and just run > the OpenSM one. Is the rest of the subnet pure DDR and a mix of SDR/DDR ? Does it work "right" with OpenSM off/using the vendor SM ? From mmsdata at gmail.com Fri Feb 15 12:35:42 2008 From: mmsdata at gmail.com (Data Division) Date: Fri, 15 Feb 2008 13:35:42 -0700 Subject: [ofa-general] ***SPAM*** Database Deployment Message-ID: An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Sat Feb 16 07:37:56 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 16 Feb 2008 07:37:56 -0800 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <20080215160632.GC7436@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> Message-ID: <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> On Fri, 2008-02-15 at 16:06 +0000, Sasha Khapyorsky wrote: > On 18:18 Tue 12 Feb , Sasha Khapyorsky wrote: > > > > __func__ macro is used in the OSM_LOG_ENTER() to show an actual function > > name, so the second parameter is not really useful here. OTOH it makes > > it harder to grep over OpenSM source code, when searches are by function > > names it generates a lot of unrelated matches. If so what about to > > remove this second parameter (like in this patch)? > > And if we are doing this (I didn't get any negative feedback up to now) This seems functionally equivalent as all functions supplied in second parameter to OSM_LOG_ENTER are indeed the function being entered. > what about a next step - to clean up function names in osm_log format > string? > > Something like to have OSM_LOG() macro: > > #define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, \ > "%s: " fmt , __func__, ##arg) > > , and use this macro instead of osm_log() where function name should be > logged? Do all instances of osm_log use the function they are in ? For those that do, this seems fine but I'm not sure all of them do. -- Hal > IMO it would save even more "grepping" time. > > Sasha From 9cd0d2cc at neo.rr.com Sat Feb 16 09:25:27 2008 From: 9cd0d2cc at neo.rr.com (William Gallegos) Date: , 17 Feb 2008 01:25:27 +0800 Subject: [ofa-general] I saw your pic Message-ID: <01c87103$f9cf1d80$808b1874@9cd0d2cc> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Ulrika at TheHealCare.info only, because I am using my friend's email to write this. I would like to share some of my pics. From sashak at voltaire.com Sat Feb 16 09:13:58 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 16 Feb 2008 17:13:58 +0000 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080216171358.GA18527@sashak.voltaire.com> Hi Hal, On 07:37 Sat 16 Feb , Hal Rosenstock wrote: > > This seems functionally equivalent as all functions supplied in second > parameter to OSM_LOG_ENTER are indeed the function being entered. Correct, it is the same. The only downside I can see here is needs to update some ibutils/ibis files too. Of course I can supply the patch. > > #define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, \ > > "%s: " fmt , __func__, ##arg) > > > > , and use this macro instead of osm_log() where function name should be > > logged? > > Do all instances of osm_log use the function they are in ? For those > that do, this seems fine but I'm not sure all of them do. Good point. And there is a good solution for this - those osm_log() calls where function name is used (or should be used) will be converted to OSM_LOG(), the rest will use osm_log() as usual. Sounds good? Sasha From hrosenstock at xsigo.com Sat Feb 16 09:57:54 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 16 Feb 2008 09:57:54 -0800 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <20080216171358.GA18527@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> Message-ID: <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> Hi Sasha, On Sat, 2008-02-16 at 17:13 +0000, Sasha Khapyorsky wrote: > Hi Hal, > > On 07:37 Sat 16 Feb , Hal Rosenstock wrote: > > > > This seems functionally equivalent as all functions supplied in second > > parameter to OSM_LOG_ENTER are indeed the function being entered. > > Correct, it is the same. The only downside I can see here is needs to > update some ibutils/ibis files too. Of course I can supply the patch. > > > > #define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, \ > > > "%s: " fmt , __func__, ##arg) > > > > > > , and use this macro instead of osm_log() where function name should be > > > logged? > > > > Do all instances of osm_log use the function they are in ? For those > > that do, this seems fine but I'm not sure all of them do. > > Good point. And there is a good solution for this - those osm_log() > calls where function name is used (or should be used) will be converted > to OSM_LOG(), the rest will use osm_log() as usual. Sounds good? FWIW sounds fine to me. -- Hal > Sasha From pooder1 at gmx.de Sat Feb 16 10:12:15 2008 From: pooder1 at gmx.de (Deontae Kim) Date: Sat, 16 Feb 2008 20:12:15 +0200 Subject: [ofa-general] Have a Pleasure of Being Confident Message-ID: <844619669.69178008920370@gmx.de> While cock enlargement surgery is expensive and sometime dangerous, pills could be easily forgotten to take and other products are usually just scams, the VPXL is safe and easy to use. Enjoy your life with the our products.http://geocities.com/laurahewitt818/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From sashak at voltaire.com Sat Feb 16 10:50:43 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 16 Feb 2008 18:50:43 +0000 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080216185043.GB18527@sashak.voltaire.com> On 16:09 Fri 15 Feb , Chuck Hartley wrote: > Well I expect that is the problem then. I see the same problem when I don't > run the OpenSM at all and just use the one running on the blade server. BTW, you can use 'sminfo' utility to see who is a master SM now. Sasha From mashirle at us.ibm.com Sat Feb 16 00:46:08 2008 From: mashirle at us.ibm.com (Shirley Ma) Date: Sat, 16 Feb 2008 00:46:08 -0800 Subject: [ofa-general] Re: [PATCH] IPOIB/CM Increase retry counts for OFED-1.3 In-Reply-To: <1203093002.4539.5.camel@localhost.localdomain> References: <47B226CC.1060706@linux.vnet.ibm.com> <47B456D1.7030600@mellanox.co.il> <47B47460.4080700@linux.vnet.ibm.com> <1203093002.4539.5.camel@localhost.localdomain> Message-ID: <1203151568.4539.10.camel@localhost.localdomain> Hello Roland, Do you have any suggestions on how to debug this problem? How can we hack the mthca/ipoib code to narrow down the root cause of the problem? >From the behavior it looks like the local resource temp unavailable, but it could be something else. Thanks Shirley From hrosenstock at xsigo.com Sat Feb 16 10:49:19 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 16 Feb 2008 10:49:19 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <20080216185043.GB18527@sashak.voltaire.com> References: <1203101529.26729.173.camel@hrosenstock-ws.xsigo.com> <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> Message-ID: <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> On Sat, 2008-02-16 at 18:50 +0000, Sasha Khapyorsky wrote: > On 16:09 Fri 15 Feb , Chuck Hartley wrote: > > Well I expect that is the problem then. I see the same problem when I don't > > run the OpenSM at all and just use the one running on the blade server. > > BTW, you can use 'sminfo' utility to see who is a master SM now. And use saquery -s to find all SMs on the subnet although this might not be supported in all vendor SMs. -- Hal > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From clameter at sgi.com Sat Feb 16 11:21:07 2008 From: clameter at sgi.com (Christoph Lameter) Date: Sat, 16 Feb 2008 11:21:07 -0800 (PST) Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080215193719.262c03a1.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> Message-ID: On Fri, 15 Feb 2008, Andrew Morton wrote: > What is the status of getting infiniband to use this facility? Well we are talking about this it seems. > > How important is this feature to KVM? Andrea can answer this. > To xpmem? Without this feature we are stuck with page pinning by increasing refcounts which leads to endless lru scanning and other misbehavior. Also applications that use XPmem will not be able to swap or be able to use things like remap. > Which other potential clients have been identified and how important it it > to those? It is likely important to various DMA engines, framebuffers devices etc etc. Seems to be a generally useful feature. > > +The notifier chains provide two callback mechanisms. The > > +first one is required for any device that establishes external mappings. > > +The second (rmap) mechanism is required if a device needs to be > > +able to sleep when invalidating references. Sleeping may be necessary > > +if we are mapping across a network or to different Linux instances > > +in the same address space. > > I'd have thought that a major reason for sleeping would be to wait for IO > to complete. Worth mentioning here? Right. > Why is that "easy"? I's have thought that it would only be easy if the > driver happened to be using those same locks for its own purposes. > Otherwise it is "awkward"? Its relatively easy because it is tied directly to a process and can use external tlb shootdown / external page table clearing directly. The other method requires an rmap in the device driver where it can lookup the processes that are mapping the page. > > +The invalidation mechanism for a range (*invalidate_range_begin/end*) is > > +called most of the time without any locks held. It is only called with > > +locks held for file backed mappings that are truncated. A flag indicates > > +in which mode we are. A driver can use that mechanism to f.e. > > +delay the freeing of the pages during truncate until no locks are held. > > That sucks big time. What do we need to do to make get the callback > functions called in non-atomic context? We would have to drop the inode_mmap_lock. Could be done with some minor work. > > +Pages must be marked dirty if dirty bits are found to be set in > > +the external ptes during unmap. > > That sentence is too vague. Define "marked dirty"? Call set_page_dirty(). > > +The *release* method is called when a Linux process exits. It is run before > > We'd conventionally use a notation such as "->release()" here, rather than > the asterisks. Ok. > > > +the pages and mappings of a process are torn down and gives the device driver > > +a chance to zap all the external mappings in one go. > > I assume what you mean here is that ->release() is called during exit() > when the final reference to an mm is being dropped. Right. > > +An example for a code that can be used to build a notifier mechanism into > > +a device driver can be found in the file > > +Documentation/mmu_notifier/skeleton.c > > Should that be in samples/? Oh. We have that? > > +The mmu_rmap_notifier adds another invalidate_page() callout that is called > > +*before* the Linux rmaps are walked. At that point only the page lock is > > +held. The invalidate_page() function must walk the driver rmaps and evict > > +all the references to the page. > > What happens if it cannot do so? The page is not reclaimed if we were called from try_to_unmap(). From page_mkclean() we must always evict the page to switch off the write protect bit. > > +There is no process information available before the rmaps are consulted. > > Not sure what that sentence means. I guess "available to the core VM"? At that point we only have the page. We do not know which processes map the page. In order to find out we need to take a spinlock. > > +The notifier mechanism can therefore not be attached to an mm_struct. Instead > > +it is a global callback list. Having to perform a callback for each and every > > +page that is reclaimed would be inefficient. Therefore we add an additional > > +page flag: PageRmapExternal(). > > How many page flags are left? 30 or so. Its only available on 64bit. > Is this feature important enough to justfy consumption of another one? > > > Only pages that are marked with this bit can > > +be exported and the rmap callbacks will only be performed for pages marked > > +that way. > > "exported": new term, unclear what it means. Something external to the kernel references the page. > > +The required additional Page flag is only availabe in 64 bit mode and > > +therefore the mmu_rmap_notifier portion is not available on 32 bit platforms. > > whoa. Is that good? You just made your feature unavailable on the great > majority of Linux systems. rmaps are usually used by complex drivers that are typically used in large systems. > > + * Notifier functions for hardware and software that establishes external > > + * references to pages of a Linux system. The notifier calls ensure that > > + * external mappings are removed when the Linux VM removes memory ranges > > + * or individual pages from a process. > > So the callee cannot fail. hm. If it can't block, it's likely screwed in > that case. In other cases it might be screwed anyway. I suspect we'll > need to be able to handle callee failure. Probably. > > > + * These fall into two classes: > > + * > > + * 1. mmu_notifier > > + * > > + * These are callbacks registered with an mm_struct. If pages are > > + * removed from an address space then callbacks are performed. > > "to be removed", I guess. It's called before the page is actually removed? Its called after the pte was cleared while holding the pte lock. > > + * The invalidate_range_start/end callbacks can be performed in contexts > > + * where sleeping is allowed or in atomic contexts. A flag is passed > > + * to indicate an atomic context. > > We generally would prefer separate callbacks, rather than a unified > callback with a mode flag. We could drop the inode_mmap_lock when doing truncate. That would make this work but its a kind of invasive thing for the VM. > > +struct mmu_notifier_ops { > > + /* > > + * The release notifier is called when no other execution threads > > + * are left. Synchronization is not necessary. > > "and the mm is about to be destroyed"? Right. > > + /* > > + * invalidate_range_begin() and invalidate_range_end() must be paired. > > + * > > + * Multiple invalidate_range_begin/ends may be nested or called > > + * concurrently. > > Under what circumstances would they be nested? Hmmmm.. Right they cannot be nested. Multiple processors can have invalidates() concurrently in progress. > > That is legit. However, no new external references > > references to what? To the ranges that are in the process of being invalidated. > > + * invalidate_range_begin() must clear all references in the range > > + * and stop the establishment of new references. > > and stop the establishment of new references within the range, I assume? Right. > If so, that's putting a heck of a lot of complexity into the driver, isn't > it? It needs to temporarily remember an arbitrarily large number of > regions in this mm against which references may not be taken? That is one implementation (XPmem does that). The other is to simply stop all references when any invalidate_range is in progress (KVM and GRU do that). > > + * invalidate_range_end() reenables the establishment of references. > > within the range? Right. > > +extern void mmu_notifier_release(struct mm_struct *mm); > > +extern int mmu_notifier_age_page(struct mm_struct *mm, > > + unsigned long address); > > There's the mysterious age_page again. Andrea put this in to check the reference status of a page. It functions like the accessed bit. > > +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) > > +{ > > + INIT_HLIST_HEAD(&mnh->head); > > +} > > + > > +#define mmu_notifier(function, mm, args...) \ > > + do { \ > > + struct mmu_notifier *__mn; \ > > + struct hlist_node *__n; \ > > + \ > > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > > + rcu_read_lock(); \ > > + hlist_for_each_entry_rcu(__mn, __n, \ > > + &(mm)->mmu_notifier.head, \ > > + hlist) \ > > + if (__mn->ops->function) \ > > + __mn->ops->function(__mn, \ > > + mm, \ > > + args); \ > > + rcu_read_unlock(); \ > > + } \ > > + } while (0) > > The macro references its args more than once. Anyone who does > > mmu_notifier(function, some_function_which_has_side_effects()) > > will get a surprise. Use temporaries. Ok. > > +#define mmu_notifier(function, mm, args...) \ > > + do { \ > > + if (0) { \ > > + struct mmu_notifier *__mn; \ > > + \ > > + __mn = (struct mmu_notifier *)(0x00ff); \ > > + __mn->ops->function(__mn, mm, args); \ > > + }; \ > > + } while (0) > > That's a bit weird. Can't we do the old > > (void)function; > (void)mm; > > trick? Or make it a staic inline function? Static inline wont allow the checking of the parameters. (void) may be a good thing here. > > +config MMU_NOTIFIER > > + def_bool y > > + bool "MMU notifier, for paging KVM/RDMA" > > Why is this not selectable? The help seems a bit brief. > > Does this cause 32-bit systems to drag in a bunch of code they're not > allowed to ever use? I have selected it a number of times. We could make that a bit longer right. > > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > > + hlist_for_each_entry_safe(mn, n, t, > > + &mm->mmu_notifier.head, hlist) { > > + hlist_del_init(&mn->hlist); > > + if (mn->ops->release) > > + mn->ops->release(mn, mm); > > We do this a lot, but back in the old days people didn't like optional > callbacks which can be NULL. If we expect that mmu_notifier_ops.release is > usually implemented, the just unconditionally call it and require that all > clients implement it. Perhaps provide an exported-to-modules stuv in core > kernel for clients which didn't want to implement ->release(). Ok. > > +{ > > + struct mmu_notifier *mn; > > + struct hlist_node *n; > > + int young = 0; > > + > > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > > + rcu_read_lock(); > > + hlist_for_each_entry_rcu(mn, n, > > + &mm->mmu_notifier.head, hlist) { > > + if (mn->ops->age_page) > > + young |= mn->ops->age_page(mn, mm, address); > > + } > > + rcu_read_unlock(); > > + } > > + > > + return young; > > +} > > should the rcu_read_lock() cover the hlist_empty() test? > > This function looks like it was tossed in at the last minute. It's > mysterious, undocumented, poorly commented, poorly named. A better name > would be one which has some correlation with the return value. > > Because anyone who looks at some code which does > > if (mmu_notifier_age_page(mm, address)) > ... > > has to go and reverse-engineer the implementation of > mmu_notifier_age_page() to work out under which circumstances the "..." > will be executed. But this should be apparent just from reading the callee > implementation. > > This function *really* does need some documentation. What does it *mean* > when the ->age_page() from some of the notifiers returned "1" and the > ->age_page() from some other notifiers returned zero? Dunno. Andrea: Could you provide some more detail here? From clameter at sgi.com Sat Feb 16 11:22:18 2008 From: clameter at sgi.com (Christoph Lameter) Date: Sat, 16 Feb 2008 11:22:18 -0800 (PST) Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: <20080215193736.9d6e7da3.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.918191502@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> Message-ID: On Fri, 15 Feb 2008, Andrew Morton wrote: > > @@ -287,7 +288,8 @@ static int page_referenced_one(struct pa > > if (vma->vm_flags & VM_LOCKED) { > > referenced++; > > *mapcount = 1; /* break early from loop */ > > - } else if (ptep_clear_flush_young(vma, address, pte)) > > + } else if (ptep_clear_flush_young(vma, address, pte) | > > + mmu_notifier_age_page(mm, address)) > > referenced++; > > The "|" is obviously deliberate. But no explanation is provided telling us > why we still call the callback if ptep_clear_flush_young() said the page > was recently referenced. People who read your code will want to understand > this. Andrea? > > flush_cache_page(vma, address, pte_pfn(*pte)); > > entry = ptep_clear_flush(vma, address, pte); > > + mmu_notifier(invalidate_page, mm, address); > > I just don't see how ths can be done if the callee has another thread in > the middle of establishing IO against this region of memory. > ->invalidate_page() _has_ to be able to block. Confused. The page lock is held and that holds off I/O? From clameter at sgi.com Sat Feb 16 11:26:51 2008 From: clameter at sgi.com (Christoph Lameter) Date: Sat, 16 Feb 2008 11:26:51 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080215193730.709c67ea.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <20080215193730.709c67ea.akpm@linux-foundation.org> Message-ID: On Fri, 15 Feb 2008, Andrew Morton wrote: > On Thu, 14 Feb 2008 22:49:01 -0800 Christoph Lameter wrote: > > > The invalidation of address ranges in a mm_struct needs to be > > performed when pages are removed or permissions etc change. > > hm. Do they? Why? If I'm in the process of zero-copy writing a hunk of > memory out to hardware then do I care if someone write-protects the ptes? > > Spose so, but some fleshing-out of the various scenarios here would clarify > things. You care f.e. if the VM needs to writeprotect a memory range and a write occurs. In that case the VM needs to be proper write processing and write through an external pte would cause memory corruption. > > If invalidate_range_begin() is called with locks held then we > > pass a flag into invalidate_range() to indicate that no sleeping is > > possible. Locks are only held for truncate and huge pages. > > This is so bad. Ok so I can twidlle around with the inode_mmap_lock to drop it while this is called? > > In two cases we use invalidate_range_begin/end to invalidate > > single pages because the pair allows holding off new references > > (idea by Robin Holt). > > Assuming that there is a missing "within the range" in this description, I > assume that all clients will just throw up theior hands in horror and will > disallow all references to all parts of the mm. Right. Missing within the range. We only need to disallow creating new ptes right? Why disallow references? > > xip_unmap: We are not taking the PageLock so we cannot > > use the invalidate_page mmu_rmap_notifier. invalidate_range_begin/end > > stands in. > > What does "stands in" mean? Use a range begin / end to invalidate a page. > > + mmu_notifier(invalidate_range_begin, mm, start, start + size, 0); > > err = populate_range(mm, vma, start, size, pgoff); > > + mmu_notifier(invalidate_range_end, mm, start, start + size, 0); > > To avoid off-by-one confusion the changelogs, documentation and comments > should be very careful to tell the reader whether the range includes the > byte at start+size. I don't thik that was done? No it was not. I assumed that the convention is always start - (end - 1) and the byte at end is not affected by the operation. From clameter at sgi.com Sat Feb 16 11:28:08 2008 From: clameter at sgi.com (Christoph Lameter) Date: Sat, 16 Feb 2008 11:28:08 -0800 (PST) Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080215193746.5d823092.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> <20080215193746.5d823092.akpm@linux-foundation.org> Message-ID: On Fri, 15 Feb 2008, Andrew Morton wrote: > > +#define mmu_rmap_notifier(function, args...) \ > > + do { \ > > + struct mmu_rmap_notifier *__mrn; \ > > + struct hlist_node *__n; \ > > + \ > > + rcu_read_lock(); \ > > + hlist_for_each_entry_rcu(__mrn, __n, \ > > + &mmu_rmap_notifier_list, hlist) \ > > + if (__mrn->ops->function) \ > > + __mrn->ops->function(__mrn, args); \ > > + rcu_read_unlock(); \ > > + } while (0); > > + > > buggy macro: use locals. Ok. Same as the non rmap version. > > +EXPORT_SYMBOL(mmu_rmap_export_page); > > The other patch used EXPORT_SYMBOL_GPL. Ok will make that consistent. From avi at qumranet.com Sat Feb 16 11:54:41 2008 From: avi at qumranet.com (Avi Kivity) Date: Sat, 16 Feb 2008 21:54:41 +0200 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.918191502@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> Message-ID: <47B73F81.7090907@qumranet.com> Christoph Lameter wrote: > On Fri, 15 Feb 2008, Andrew Morton wrote: > > >>> @@ -287,7 +288,8 @@ static int page_referenced_one(struct pa >>> if (vma->vm_flags & VM_LOCKED) { >>> referenced++; >>> *mapcount = 1; /* break early from loop */ >>> - } else if (ptep_clear_flush_young(vma, address, pte)) >>> + } else if (ptep_clear_flush_young(vma, address, pte) | >>> + mmu_notifier_age_page(mm, address)) >>> referenced++; >>> >> The "|" is obviously deliberate. But no explanation is provided telling us >> why we still call the callback if ptep_clear_flush_young() said the page >> was recently referenced. People who read your code will want to understand >> this. >> > > Andrea? > > I'm not Andrea, but the way I read it, ptep_clear_flush_young() and ->age_page() each have two effects: check whether the page has been referenced and clear the referenced bit. || would retain the semantics of the check but lose the clearing. | does the right thing. -- Any sufficiently difficult bug is indistinguishable from a feature. From dwpohakuukulelem at pohakuukulele.com Sat Feb 16 12:41:40 2008 From: dwpohakuukulelem at pohakuukulele.com (Ursula Corbin) Date: Sat, 16 Feb 2008 21:41:40 +0100 Subject: [ofa-general] Save on quality software! Message-ID: <01c870e4$b6b18200$41211d54@dwpohakuukulelem> Need some software urgently? Purchase, download and install right now! Software in English, German, French, Italian, and Spanish for IBM PC and Macintosh! Cheap prices give you the possibility to save or buy more software than you can afford purchasing software on a CD! All your questions concerning installation will be replied quickly. Highly professional customer service! If your software does not run, we'll refund you money. http://geocities.com/nestorolson782/ Incredible selection of programs and applications! From sashak at voltaire.com Sat Feb 16 14:03:04 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 16 Feb 2008 22:03:04 +0000 Subject: [ofa-general] [PATCH] opensm/osm_log: OSM_LOG() macro In-Reply-To: <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080216220304.GD18527@sashak.voltaire.com> This macro is similar to osm_log() function but also includes function name. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_log.h | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/opensm/include/opensm/osm_log.h b/opensm/include/opensm/osm_log.h index bfd2e96..dd63dc4 100644 --- a/opensm/include/opensm/osm_log.h +++ b/opensm/include/opensm/osm_log.h @@ -396,6 +396,8 @@ extern void osm_log_msg_box(osm_log_t *log, osm_log_level_t level, extern void osm_log_raw(IN osm_log_t * const p_log, IN const osm_log_level_t verbosity, IN const char *p_buf); +#define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, "%s: " fmt, __func__, ##arg) + #define DBG_CL_LOCK 0 #define CL_PLOCK_EXCL_ACQUIRE( __exp__ ) \ -- 1.5.4.1.122.gaa8d From sashak at voltaire.com Sat Feb 16 14:03:51 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 16 Feb 2008 22:03:51 +0000 Subject: [ofa-general] [PATCH] opensm: convert to OSM_LOG() macro In-Reply-To: <20080216220304.GD18527@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <20080216220304.GD18527@sashak.voltaire.com> Message-ID: <20080216220350.GE18527@sashak.voltaire.com> Convert osm_log() calls where caller function name is used to OSM_LOG() macro call which has caller function name as builtin. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/main.c | 13 +- opensm/opensm/osm_console.c | 37 ++--- opensm/opensm/osm_db_files.c | 71 ++++----- opensm/opensm/osm_drop_mgr.c | 69 +++------ opensm/opensm/osm_dump.c | 5 +- opensm/opensm/osm_event_plugin.c | 8 +- opensm/opensm/osm_helper.c | 2 +- opensm/opensm/osm_inform.c | 122 +++++---------- opensm/opensm/osm_lid_mgr.c | 118 +++++--------- opensm/opensm/osm_lin_fwd_rcv.c | 6 +- opensm/opensm/osm_link_mgr.c | 12 +- opensm/opensm/osm_mad_pool.c | 30 ++-- opensm/opensm/osm_mcast_fwd_rcv.c | 9 +- opensm/opensm/osm_mcast_mgr.c | 120 +++++---------- opensm/opensm/osm_multicast.c | 6 +- opensm/opensm/osm_node_desc_rcv.c | 6 +- opensm/opensm/osm_node_info_rcv.c | 92 ++++------- opensm/opensm/osm_opensm.c | 24 ++-- opensm/opensm/osm_perfmgr.c | 147 ++++++++---------- opensm/opensm/osm_pkey.c | 12 +- opensm/opensm/osm_pkey_mgr.c | 56 +++----- opensm/opensm/osm_pkey_rcv.c | 9 +- opensm/opensm/osm_port.c | 37 ++--- opensm/opensm/osm_port_info_rcv.c | 68 +++------ opensm/opensm/osm_prtn.c | 18 +-- opensm/opensm/osm_prtn_config.c | 32 ++-- opensm/opensm/osm_qos.c | 6 +- opensm/opensm/osm_qos_parser.y | 30 ++--- opensm/opensm/osm_qos_policy.c | 28 +--- opensm/opensm/osm_req.c | 14 +- opensm/opensm/osm_resp.c | 11 +- opensm/opensm/osm_sa.c | 70 ++++----- opensm/opensm/osm_sa_class_port_info.c | 9 +- opensm/opensm/osm_sa_guidinfo_record.c | 33 ++--- opensm/opensm/osm_sa_informinfo.c | 91 ++++------- opensm/opensm/osm_sa_lft_record.c | 39 ++--- opensm/opensm/osm_sa_link_record.c | 43 ++---- opensm/opensm/osm_sa_mad_ctrl.c | 54 ++----- opensm/opensm/osm_sa_mcmember_record.c | 229 +++++++++------------------ opensm/opensm/osm_sa_mft_record.c | 39 ++--- opensm/opensm/osm_sa_multipath_record.c | 123 +++++---------- opensm/opensm/osm_sa_node_record.c | 36 ++--- opensm/opensm/osm_sa_path_record.c | 151 ++++++------------ opensm/opensm/osm_sa_pkey_record.c | 42 ++---- opensm/opensm/osm_sa_portinfo_record.c | 36 ++--- opensm/opensm/osm_sa_response.c | 9 +- opensm/opensm/osm_sa_service_record.c | 70 +++------ opensm/opensm/osm_sa_slvl_record.c | 36 ++--- opensm/opensm/osm_sa_sminfo_record.c | 42 ++---- opensm/opensm/osm_sa_sw_info_record.c | 42 ++---- opensm/opensm/osm_sa_vlarb_record.c | 40 ++---- opensm/opensm/osm_service.c | 6 +- opensm/opensm/osm_slvl_map_rcv.c | 9 +- opensm/opensm/osm_sm.c | 39 ++--- opensm/opensm/osm_sm_mad_ctrl.c | 93 ++++-------- opensm/opensm/osm_sm_state_mgr.c | 39 ++--- opensm/opensm/osm_sminfo_rcv.c | 81 ++++------- opensm/opensm/osm_state_mgr.c | 107 +++++-------- opensm/opensm/osm_subnet.c | 49 +++---- opensm/opensm/osm_sw_info_rcv.c | 66 +++------ opensm/opensm/osm_sweep_fail_ctrl.c | 3 +- opensm/opensm/osm_trap_rcv.c | 78 ++++------ opensm/opensm/osm_ucast_file.c | 57 +++---- opensm/opensm/osm_ucast_ftree.c | 259 +++++++++++-------------------- opensm/opensm/osm_ucast_lash.c | 42 ++--- opensm/opensm/osm_ucast_mgr.c | 75 +++------ opensm/opensm/osm_ucast_updn.c | 96 ++++-------- opensm/opensm/osm_vl15intf.c | 29 ++-- opensm/opensm/osm_vl_arb_rcv.c | 12 +- 69 files changed, 1271 insertions(+), 2321 deletions(-) diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 38608be..fcc7a44 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -457,8 +457,7 @@ parse_ignore_guids_file(IN char *guids_file_name, IN osm_opensm_t * p_osm) fh = fopen(guids_file_name, "r"); if (fh == NULL) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "parse_ignore_guids_file: ERR 0601: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 0601: " "Unable to open ignore guids file (%s)\n", guids_file_name); status = IB_ERROR; @@ -475,8 +474,7 @@ parse_ignore_guids_file(IN char *guids_file_name, IN osm_opensm_t * p_osm) p_c++; port_guid = strtoull(p_c, &p_ec, 16); if (p_ec == p_c) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "parse_ignore_guids_file: ERR 0602: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 0602: " "Error in line (%u): %s\n", line_num, line); status = IB_ERROR; goto Exit; @@ -485,8 +483,7 @@ parse_ignore_guids_file(IN char *guids_file_name, IN osm_opensm_t * p_osm) while ((*p_ec == ' ') && (*p_ec != '\0')) p_ec++; if (!sscanf(p_ec, "%d", &port_num)) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "parse_ignore_guids_file: ERR 0603: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 0603: " "Error in line (%u): %s\n", line_num, p_ec); status = IB_ERROR; goto Exit; @@ -495,8 +492,8 @@ parse_ignore_guids_file(IN char *guids_file_name, IN osm_opensm_t * p_osm) /* ok insert it */ osm_port_prof_set_ignored_port(&p_osm->subn, cl_hton64(port_guid), port_num); - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "parse_ignore_guids_file: " "Inserted Port: 0x%" PRIx64 + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "Inserted Port: 0x%" PRIx64 " PortNum: 0x%X into ignored guids list\n", port_guid, port_num); diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c index 7ffe9bb..7b1f87c 100644 --- a/opensm/opensm/osm_console.c +++ b/opensm/opensm/osm_console.c @@ -815,8 +815,8 @@ static void osm_console_close(osm_opensm_t * p_osm) { #ifdef ENABLE_OSM_CONSOLE_SOCKET if ((p_osm->console.socket > 0) && (p_osm->console.in_fd != -1)) { - osm_log(&(p_osm->log), OSM_LOG_INFO, - "cio_close: Console connection closed: %s (%s)\n", + OSM_LOG(&(p_osm->log), OSM_LOG_INFO, + "Console connection closed: %s (%s)\n", p_osm->console.client_hn, p_osm->console.client_ip); cio_close(&p_osm->console); } @@ -942,8 +942,8 @@ void osm_console_init(osm_subn_opt_t * opt, osm_opensm_t * p_osm) int optval = 1; if ((p_oct->socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - osm_log(&(p_osm->log), OSM_LOG_ERROR, - "osm_console_init: ERR 4B01: Failed to open console socket: %s\n", + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 4B01: Failed to open console socket: %s\n", strerror(errno)); return; } @@ -956,14 +956,14 @@ void osm_console_init(osm_subn_opt_t * opt, osm_opensm_t * p_osm) else sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if (bind(p_oct->socket, &sin, sizeof(sin)) < 0) { - osm_log(&(p_osm->log), OSM_LOG_ERROR, - "osm_console_init: ERR 4B02: Failed to bind console socket: %s\n", + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 4B02: Failed to bind console socket: %s\n", strerror(errno)); return; } if (listen(p_oct->socket, 1) < 0) { - osm_log(&(p_osm->log), OSM_LOG_ERROR, - "osm_console_init: ERR 4B03: Failed to listen on socket: %s\n", + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 4B03: Failed to listen on socket: %s\n", strerror(errno)); return; } @@ -973,9 +973,8 @@ void osm_console_init(osm_subn_opt_t * opt, osm_opensm_t * p_osm) p_oct->out = NULL; p_oct->in_fd = -1; p_oct->out_fd = -1; - osm_log(&(p_osm->log), OSM_LOG_INFO, - "osm_console_init: Console listening on port %d\n", - opt->console_port); + OSM_LOG(&(p_osm->log), OSM_LOG_INFO, + "Console listening on port %d\n", opt->console_port); #endif } } @@ -1007,8 +1006,8 @@ static int cio_open(osm_opensm_t * p_osm, int new_fd) if (n > 0 && (p_line[0] == 'y' || p_line[0] == 'Y')) { osm_console_close(p_osm); } else { - osm_log(&(p_osm->log), OSM_LOG_INFO, - "cio_open: Console connection aborted: %s (%s)\n", + OSM_LOG(&(p_osm->log), OSM_LOG_INFO, + "Console connection aborted: %s (%s)\n", p_oct->client_hn, p_oct->client_ip); close(new_fd); return -1; @@ -1019,8 +1018,8 @@ static int cio_open(osm_opensm_t * p_osm, int new_fd) p_oct->in = fdopen(p_oct->in_fd, "w+"); p_oct->out = p_oct->in; osm_console_prompt(p_oct->out); - osm_log(&(p_osm->log), OSM_LOG_INFO, - "cio_open: Console connection accepted: %s (%s)\n", + OSM_LOG(&(p_osm->log), OSM_LOG_INFO, + "Console connection accepted: %s (%s)\n", p_oct->client_hn, p_oct->client_ip); return (p_oct->in == NULL) ? -1 : 0; @@ -1068,8 +1067,8 @@ void osm_console(osm_opensm_t * p_osm) socklen_t len = sizeof(sin); struct hostent *hent; if ((new_fd = accept(p_oct->socket, &sin, &len)) < 0) { - osm_log(&(p_osm->log), OSM_LOG_ERROR, - "osm_console: ERR 4B04: Failed to accept console socket: %s\n", + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 4B04: Failed to accept console socket: %s\n", strerror(errno)); p_oct->in_fd = -1; return; @@ -1089,8 +1088,8 @@ void osm_console(osm_opensm_t * p_osm) if (is_authorized(&p_osm->console)) { cio_open(p_osm, new_fd); } else { - osm_log(&(p_osm->log), OSM_LOG_ERROR, - "osm_console: ERR 4B05: Console connection denied: %s (%s)\n", + OSM_LOG(&(p_osm->log), OSM_LOG_ERROR, + "ERR 4B05: Console connection denied: %s (%s)\n", p_oct->client_hn, p_oct->client_ip); close(new_fd); } diff --git a/opensm/opensm/osm_db_files.c b/opensm/opensm/osm_db_files.c index e4a1d72..ce61980 100644 --- a/opensm/opensm/osm_db_files.c +++ b/opensm/opensm/osm_db_files.c @@ -182,9 +182,8 @@ int osm_db_init(IN osm_db_t * const p_db, IN osm_log_t * p_log) /* make sure the directory exists */ if (lstat(p_db_imp->db_dir_name, &dstat)) { if (mkdir(p_db_imp->db_dir_name, 0755)) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_init: ERR 6101: " - " Failed to create the db directory:%s\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6101: " + "Failed to create the db directory:%s\n", p_db_imp->db_dir_name); OSM_LOG_EXIT(p_log); return 1; @@ -237,9 +236,8 @@ osm_db_domain_t *osm_db_domain_init(IN osm_db_t * const p_db, /* make sure the file exists - or exit if not writable */ p_file = fopen(p_domain_imp->file_name, "a+"); if (!p_file) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_domain_init: ERR 6102: " - " Failed to open the db file:%s\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6102: " + "Failed to open the db file:%s\n", p_domain_imp->file_name); free(p_domain_imp); free(p_domain); @@ -290,9 +288,8 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) p_file = fopen(p_domain_imp->file_name, "r"); if (!p_file) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 6103: " - " Failed to open the db file:%s\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6103: " + "Failed to open the db file:%s\n", p_domain_imp->file_name); status = 1; goto Exit; @@ -326,18 +323,16 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) p_first_word = strtok_r(sLine, " \t\n", &p_last); if (!p_first_word) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 6104: " - " Failed to get key from line:%u : %s (file:%s)\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6104: " + "Failed to get key from line:%u : %s (file:%s)\n", line_num, sLine, p_domain_imp->file_name); status = 1; goto EndParsing; } if (strlen(p_first_word) > OSM_DB_MAX_GUID_LEN) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 610A: " - " Illegal key from line:%u : %s (file:%s)\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 610A: " + "Illegal key from line:%u : %s (file:%s)\n", line_num, sLine, p_domain_imp->file_name); status = 1; @@ -362,9 +357,8 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) strcpy(p_accum_val, "\0"); } } else if (sLine[0] != '\n') { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 6105: " - " How did we get here? line:%u : %s (file:%s)\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6105: " + "How did we get here? line:%u : %s (file:%s)\n", line_num, sLine, p_domain_imp->file_name); status = 1; @@ -382,9 +376,8 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) if (st_lookup(p_domain_imp->p_hash, (st_data_t) p_key, (st_data_t *) & p_prev_val)) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 6106: " - " Key:%s already exists in:%s with value:%s." + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6106: " + "Key:%s already exists in:%s with value:%s." " Removing it\n", p_key, p_domain_imp->file_name, @@ -393,16 +386,14 @@ int osm_db_restore(IN osm_db_domain_t * p_domain) p_prev_val = NULL; } - osm_log(p_log, OSM_LOG_DEBUG, - "osm_db_restore: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Got key:%s value:%s\n", p_key, p_accum_val); /* check that the key is a number */ if (!strtouq(p_key, &endptr, 0) && *endptr != '\0') { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_restore: ERR 610B: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 610B: " "Key:%s is invalid\n", p_key); } else { /* store our key and value */ @@ -466,9 +457,8 @@ int osm_db_store(IN osm_db_domain_t * p_domain) /* open up the output file */ p_file = fopen(p_tmp_file_name, "w"); if (!p_file) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_store: ERR 6107: " - " Failed to open the db file:%s for writing\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6107: " + "Failed to open the db file:%s for writing\n", p_domain_imp->file_name); status = 1; goto Exit; @@ -481,17 +471,15 @@ int osm_db_store(IN osm_db_domain_t * p_domain) /* move the domain file */ status = remove(p_domain_imp->file_name); if (status) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_store: ERR 6109: " - " Failed to remove file:%s (err:%u)\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6109: " + "Failed to remove file:%s (err:%u)\n", p_domain_imp->file_name, status); } status = rename(p_tmp_file_name, p_domain_imp->file_name); if (status) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_store: ERR 6108: " - " Failed to rename the db file to:%s (err:%u)\n", + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6108: " + "Failed to rename the db file to:%s (err:%u)\n", p_domain_imp->file_name, status); } Exit: @@ -586,9 +574,8 @@ osm_db_update(IN osm_db_domain_t * p_domain, if (st_lookup(p_domain_imp->p_hash, (st_data_t) p_key, (st_data_t *) & p_prev_val)) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_db_update: " - " Key:%s previously exists in:%s with value:%s\n", + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Key:%s previously exists in:%s with value:%s\n", p_key, p_domain_imp->file_name, p_prev_val); p_new_key = p_key; } else { @@ -629,9 +616,8 @@ int osm_db_delete(IN osm_db_domain_t * p_domain, IN char *const p_key) (st_data_t *) & p_key, (st_data_t *) & p_prev_val)) { if (st_lookup(p_domain_imp->p_hash, (st_data_t) p_key, (st_data_t *) & p_prev_val)) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_db_delete: " - " key:%s still exists in:%s with value:%s\n", + OSM_LOG(p_log, OSM_LOG_ERROR, + "key:%s still exists in:%s with value:%s\n", p_key, p_domain_imp->file_name, p_prev_val); res = 1; } else { @@ -640,9 +626,8 @@ int osm_db_delete(IN osm_db_domain_t * p_domain, IN char *const p_key) res = 0; } } else { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_db_update: " - " fail to find key:%s. delete failed\n", p_key); + OSM_LOG(p_log, OSM_LOG_DEBUG, + "fail to find key:%s. delete failed\n", p_key); res = 1; } cl_spinlock_release(&p_domain_imp->lock); diff --git a/opensm/opensm/osm_drop_mgr.c b/opensm/opensm/osm_drop_mgr.c index 785e9ec..f9b0b00 100644 --- a/opensm/opensm/osm_drop_mgr.c +++ b/opensm/opensm/osm_drop_mgr.c @@ -77,8 +77,7 @@ __osm_drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid) p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid); if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_remove_router: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Cleaned router for port guid 0x%016" PRIx64 "\n", cl_ntoh64(portguid)); osm_router_delete(&p_rtr); @@ -106,8 +105,7 @@ static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) if (p_remote_port->discovery_count && osm_physp_get_port_state(p_remote_physp) == IB_LINK_ACTIVE) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "drop_mgr_clean_physp: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Forcing new heavy sweep. Remote " "port 0x%016" PRIx64 " port num: 0x%X " "was recognized in ACTIVE state\n", @@ -121,8 +119,8 @@ static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) discovery count of the remote port. */ if (!p_remote_physp->p_node->sw) { p_remote_port->discovery_count = 0; - osm_log(sm->p_log, OSM_LOG_DEBUG, - "drop_mgr_clean_physp: Resetting discovery count of node: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Resetting discovery count of node: " "0x%016" PRIx64 " port num:0x%X\n", cl_ntoh64(osm_node_get_node_guid (p_remote_physp->p_node)), @@ -130,8 +128,7 @@ static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) } } - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "drop_mgr_clean_physp: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Unlinking local node 0x%016" PRIx64 ", port 0x%X" "\n\t\t\t\tand remote node 0x%016" PRIx64 ", port 0x%X\n", @@ -145,9 +142,8 @@ static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) } - osm_log(sm->p_log, OSM_LOG_DEBUG, - "drop_mgr_clean_physp: Clearing node 0x%016" PRIx64 - " physical port number 0x%X\n", + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Clearing node 0x%016" PRIx64 " physical port number 0x%X\n", cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), p_physp->port_num); @@ -176,16 +172,14 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) OSM_LOG_ENTER(sm->p_log); port_guid = osm_port_get_guid(p_port); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_remove_port: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); p_port_check = (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl, port_guid); if (p_port_check != p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_remove_port: ERR 0101: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: " "Port 0x%016" PRIx64 " not in guid table\n", cl_ntoh64(port_guid)); goto Exit; @@ -195,8 +189,7 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid); if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) { /* need to remove this item */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_remove_port: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Cleaned SM for port guid\n"); free(p_sm); @@ -206,8 +199,7 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_remove_port: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Clearing abandoned LID range [0x%X,0x%X]\n", min_lid_ho, max_lid_ho); @@ -260,16 +252,14 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_remove_port: ERR 0103: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: " "Error sending trap reports (%s)\n", ib_get_err_str(status)); goto Exit; } if (osm_log_is_active(sm->p_log, OSM_LOG_INFO)) { - osm_log(sm->p_log, OSM_LOG_INFO, - "__osm_drop_mgr_remove_port: " + OSM_LOG(sm->p_log, OSM_LOG_INFO, "Removed port with GUID:0x%016" PRIx64 " LID range [0x%X,0x%X] of node:%s\n", cl_ntoh64(port_gid.unicast.interface_id), @@ -296,8 +286,7 @@ static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node) p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid); if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_remove_switch: ERR 0102: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: " "Node 0x%016" PRIx64 " not in switch table\n", cl_ntoh64(osm_node_get_node_guid(p_node))); } else { @@ -323,8 +312,7 @@ __osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node) OSM_LOG_ENTER(sm->p_log); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_process_node: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Unreachable node 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -356,8 +344,7 @@ __osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node) (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl, osm_node_get_node_guid(p_node)); if (p_node_check != p_node) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_process_node: ERR 0105: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: " "Node 0x%016" PRIx64 " not in guid table\n", cl_ntoh64(osm_node_get_node_guid(p_node))); } @@ -383,8 +370,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) node_guid = osm_node_get_node_guid(p_node); if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_drop_mgr_check_node: ERR 0107: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0107: " "Node 0x%016" PRIx64 " is not a switch node\n", cl_ntoh64(node_guid)); goto Exit; @@ -393,8 +379,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) /* Make sure we have a switch object for this node */ if (!p_node->sw) { /* We do not have switch info for this node */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_check_node: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Node 0x%016" PRIx64 " no switch in table\n", cl_ntoh64(node_guid)); @@ -405,8 +390,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) /* Make sure we have a port object for port zero */ p_physp = osm_node_get_physp_ptr(p_node, 0); if (!p_physp) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_check_node: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Node 0x%016" PRIx64 " no valid physical port 0\n", cl_ntoh64(node_guid)); @@ -419,8 +403,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_check_node: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Node 0x%016" PRIx64 " has no port object\n", cl_ntoh64(node_guid)); @@ -429,8 +412,7 @@ static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) } if (p_port->discovery_count == 0) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_drop_mgr_check_node: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Node 0x%016" PRIx64 " port has discovery count zero\n", cl_ntoh64(node_guid)); @@ -476,8 +458,7 @@ void osm_drop_mgr_process(osm_sm_t * sm) if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { node_guid = osm_node_get_node_guid(p_node); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_drop_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Checking node 0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); } @@ -506,8 +487,7 @@ void osm_drop_mgr_process(osm_sm_t * sm) if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { node_guid = osm_node_get_node_guid(p_node); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_drop_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Checking full discovery of node 0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); } @@ -530,8 +510,7 @@ void osm_drop_mgr_process(osm_sm_t * sm) if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { port_guid = osm_port_get_guid(p_port); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_drop_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Checking port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); } diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c index f47c992..9f638b3 100644 --- a/opensm/opensm/osm_dump.c +++ b/opensm/opensm/osm_dump.c @@ -493,7 +493,7 @@ static void print_node_report(cl_map_item_t * p_map_item, void *cxt) uint8_t node_type; if (osm_log_is_active(log, OSM_LOG_DEBUG)) - osm_log(log, OSM_LOG_DEBUG, "__osm_state_mgr_report: " + OSM_LOG(log, OSM_LOG_DEBUG, "Processing node 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -609,8 +609,7 @@ static void dump_qmap_to_file(osm_opensm_t * p_osm, const char *file_name, file = fopen(path, "w"); if (!file) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "dump_qmap_to_file: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "cannot create file \'%s\': %s\n", path, strerror(errno)); return; diff --git a/opensm/opensm/osm_event_plugin.c b/opensm/opensm/osm_event_plugin.c index a49cea4..42282c6 100644 --- a/opensm/opensm/osm_event_plugin.c +++ b/opensm/opensm/osm_event_plugin.c @@ -76,7 +76,7 @@ osm_epi_plugin_t *osm_epi_construct(osm_log_t * p_log, char *plugin_name) rc->handle = dlopen(lib_name, RTLD_LAZY); if (!rc->handle) { - osm_log(p_log, OSM_LOG_ERROR, + OSM_LOG(p_log, OSM_LOG_ERROR, "Failed to open event plugin \"%s\" : \"%s\"\n", lib_name, dlerror()); goto DLOPENFAIL; @@ -86,7 +86,7 @@ osm_epi_plugin_t *osm_epi_construct(osm_log_t * p_log, char *plugin_name) (osm_event_plugin_t *) dlsym(rc->handle, OSM_EVENT_PLUGIN_IMPL_NAME); if (!rc->impl) { - osm_log(p_log, OSM_LOG_ERROR, + OSM_LOG(p_log, OSM_LOG_ERROR, "Failed to find \"%s\" symbol in \"%s\" : \"%s\"\n", OSM_EVENT_PLUGIN_IMPL_NAME, lib_name, dlerror()); goto Exit; @@ -94,7 +94,7 @@ osm_epi_plugin_t *osm_epi_construct(osm_log_t * p_log, char *plugin_name) /* Check the version to make sure this module will work with us */ if (rc->impl->interface_version != OSM_EVENT_PLUGIN_INTERFACE_VER) { - osm_log(p_log, OSM_LOG_ERROR, + OSM_LOG(p_log, OSM_LOG_ERROR, "Error opening %s: " "%s symbol is the wrong version %d != %d\n", plugin_name, @@ -105,7 +105,7 @@ osm_epi_plugin_t *osm_epi_construct(osm_log_t * p_log, char *plugin_name) } if (!rc->impl->construct) { - osm_log(p_log, OSM_LOG_ERROR, + OSM_LOG(p_log, OSM_LOG_ERROR, "%s symbol has no construct function\n", OSM_EVENT_PLUGIN_IMPL_NAME); goto Exit; diff --git a/opensm/opensm/osm_helper.c b/opensm/opensm/osm_helper.c index b4718cf..aa0a9ea 100644 --- a/opensm/opensm/osm_helper.c +++ b/opensm/opensm/osm_helper.c @@ -1942,7 +1942,7 @@ osm_dump_sa_mad(IN osm_log_t * const p_log, /* make sure the mad is valid */ if (p_mad == NULL) { - osm_log(p_log, log_level, "NULL MAD POINTER\n"); + OSM_LOG(p_log, log_level, "NULL MAD POINTER\n"); return; } diff --git a/opensm/opensm/osm_inform.c b/opensm/opensm/osm_inform.c index 655cf5c..9409a04 100644 --- a/opensm/opensm/osm_inform.c +++ b/opensm/opensm/osm_inform.c @@ -122,8 +122,7 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) if (memcmp(&p_infr->report_addr, &p_infr_rec->report_addr, sizeof(p_infr_rec->report_addr))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " "Differ by Address\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by Address\n"); goto Exit; } @@ -136,8 +135,7 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) if (memcmp(&p_infr->inform_record.inform_info.gid, &p_infr_rec->inform_record.inform_info.gid, sizeof(p_infr->inform_record.inform_info.gid))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.gid\n"); goto Exit; } @@ -146,8 +144,7 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) p_infr_rec->inform_record.inform_info.lid_range_begin) || (p_infr->inform_record.inform_info.lid_range_end != p_infr_rec->inform_record.inform_info.lid_range_end)) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.LIDRange\n"); goto Exit; } @@ -155,15 +152,15 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) if (p_infr->inform_record.inform_info.trap_type != p_infr_rec->inform_record.inform_info.trap_type) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " "Differ by InformInfo.TrapType\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.TrapType\n"); goto Exit; } if (p_infr->inform_record.inform_info.is_generic != p_infr_rec->inform_record.inform_info.is_generic) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " "Differ by InformInfo.IsGeneric\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Differ by InformInfo.IsGeneric\n"); goto Exit; } @@ -171,58 +168,50 @@ __match_inf_rec(IN const cl_list_item_t * const p_list_item, IN void *context) if (p_infr->inform_record.inform_info.g_or_v.generic.trap_num != p_infr_rec->inform_record.inform_info.g_or_v.generic. trap_num) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Generic.TrapNumber\n"); else if (p_infr->inform_record.inform_info.g_or_v.generic. qpn_resp_time_val != p_infr_rec->inform_record.inform_info.g_or_v.generic. qpn_resp_time_val) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Generic.QPNRespTimeVal\n"); else if (p_infr->inform_record.inform_info.g_or_v.generic. node_type_msb != p_infr_rec->inform_record.inform_info.g_or_v.generic. node_type_msb) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Generic.NodeTypeMSB\n"); else if (p_infr->inform_record.inform_info.g_or_v.generic. node_type_lsb != p_infr_rec->inform_record.inform_info.g_or_v.generic. node_type_lsb) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Generic.NodeTypeLSB\n"); else status = CL_SUCCESS; } else { if (p_infr->inform_record.inform_info.g_or_v.vend.dev_id != p_infr_rec->inform_record.inform_info.g_or_v.vend.dev_id) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Vendor.DeviceID\n"); else if (p_infr->inform_record.inform_info.g_or_v.vend. qpn_resp_time_val != p_infr_rec->inform_record.inform_info.g_or_v.vend. qpn_resp_time_val) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Vendor.QPNRespTimeVal\n"); else if (p_infr->inform_record.inform_info.g_or_v.vend. vendor_id_msb != p_infr_rec->inform_record.inform_info.g_or_v.vend. vendor_id_msb) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Vendor.VendorIdMSB\n"); else if (p_infr->inform_record.inform_info.g_or_v.vend. vendor_id_lsb != p_infr_rec->inform_record.inform_info.g_or_v.vend. vendor_id_lsb) - osm_log(p_log, OSM_LOG_DEBUG, - "__match_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Differ by InformInfo.Vendor.VendorIdLSB\n"); else status = CL_SUCCESS; @@ -245,13 +234,10 @@ osm_infr_t *osm_infr_get_by_rec(IN osm_subn_t const *p_subn, dump_all_informs(p_subn, p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_get_by_rec: " "Looking for Inform Record\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Looking for Inform Record\n"); osm_dump_inform_info(p_log, &(p_infr_rec->inform_record.inform_info), OSM_LOG_DEBUG); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_get_by_rec: " - "InformInfo list size %d\n", + OSM_LOG(p_log, OSM_LOG_DEBUG, "InformInfo list size %d\n", cl_qlist_count(&p_subn->sa_infr_list)); p_list_item = cl_qlist_find_from_head(&p_subn->sa_infr_list, @@ -272,11 +258,9 @@ osm_infr_insert_to_db(IN osm_subn_t * p_subn, { OSM_LOG_ENTER(p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_insert_to_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Inserting new InformInfo Record into Database\n"); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_insert_to_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Dump before insertion (size %d)\n", cl_qlist_count(&p_subn->sa_infr_list)); dump_all_informs(p_subn, p_log); @@ -289,8 +273,7 @@ osm_infr_insert_to_db(IN osm_subn_t * p_subn, cl_qlist_insert_head(&p_subn->sa_infr_list, &p_infr->list_item); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_insert_to_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Dump after insertion (size %d)\n", cl_qlist_count(&p_subn->sa_infr_list)); dump_all_informs(p_subn, p_log); @@ -305,8 +288,7 @@ osm_infr_remove_from_db(IN osm_subn_t * p_subn, { OSM_LOG_ENTER(p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_infr_remove_from_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Removing InformInfo Subscribing GID:0x%016" PRIx64 " : 0x%016" PRIx64 " Enum:0x%X from Database\n", cl_ntoh64(p_infr->inform_record.subscriber_gid.unicast.prefix), @@ -345,8 +327,7 @@ static ib_api_status_t __osm_send_report(IN osm_infr_t * p_infr_rec, /* the info /* HACK: who switches or uses the src and dest GIDs in the grh_info ?? */ /* it is better to use LIDs since the GIDs might not be there for SMI traps */ - osm_log(p_log, OSM_LOG_DEBUG, - "__osm_send_report: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Forwarding Notice Event from LID:0x%X" " to InformInfo LID: 0x%X TID:0x%X\n", cl_ntoh16(p_ntc->issuer_lid), @@ -361,8 +342,7 @@ static ib_api_status_t __osm_send_report(IN osm_infr_t * p_infr_rec, /* the info p_report_madw->resp_expected = TRUE; if (!p_report_madw) { - osm_log(p_log, OSM_LOG_ERROR, - "__osm_send_report: ERR 0203: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0203" "osm_mad_pool_get failed\n"); status = IB_ERROR; goto Exit; @@ -388,8 +368,7 @@ static ib_api_status_t __osm_send_report(IN osm_infr_t * p_infr_rec, /* the info status = osm_sa_vendor_send(p_report_madw->h_bind, p_report_madw, TRUE, p_infr_rec->sa->p_subn); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "__osm_send_report: ERR 0204: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0204: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; @@ -442,9 +421,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, /* match by GID */ if (memcmp (&(p_ii->gid), &(p_ntc->issuer_gid), sizeof(ib_gid_t))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " - "Mismatch by GID\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by GID\n"); goto Exit; } } else { @@ -456,8 +433,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, cl_hton16(p_ntc->issuer_lid)) || (cl_hton16(p_ntc->issuer_lid) > cl_hton16(p_ii->lid_range_end))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by LID Range. Needed: 0x%X <= 0x%X <= 0x%X\n", cl_hton16(p_ii->lid_range_begin), cl_hton16(p_ntc->issuer_lid), @@ -471,17 +447,14 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, /* IsGeneric IsGeneric is compulsory and must match the trap */ if ((p_ii->is_generic && !ib_notice_is_generic(p_ntc)) || (!p_ii->is_generic && ib_notice_is_generic(p_ntc))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " - "Mismatch by Generic/Vendor\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Generic/Vendor\n"); goto Exit; } /* Type Type if not 0xFFFF must match */ if ((p_ii->trap_type != 0xFFFF) && (cl_ntoh16(p_ii->trap_type) != ib_notice_get_type(p_ntc))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " "Mismatch by Type\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Type\n"); goto Exit; } @@ -491,9 +464,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, if ((p_ii->g_or_v.generic.trap_num != 0xFFFF) && (p_ii->g_or_v.generic.trap_num != p_ntc->g_or_v.generic.trap_num)) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " - "Mismatch by Trap Num\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Trap Num\n"); goto Exit; } @@ -501,8 +472,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, if ((cl_ntoh32(ib_inform_info_get_prod_type(p_ii)) != 0xFFFFFF) && (ib_inform_info_get_prod_type(p_ii) != ib_notice_get_prod_type(p_ntc))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Node Type: II=0x%06X (%s) Trap=0x%06X (%s)\n", cl_ntoh32(ib_inform_info_get_prod_type(p_ii)), ib_get_producer_type_str @@ -517,9 +487,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, /* DeviceId DeviceID if not 0xFFFF must match */ if ((p_ii->g_or_v.vend.dev_id != 0xFFFF) && (p_ii->g_or_v.vend.dev_id != p_ntc->g_or_v.vend.dev_id)) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " - "Mismatch by Dev Id\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Dev Id\n"); goto Exit; } @@ -527,9 +495,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, if ((ib_inform_info_get_vend_id(p_ii) != CL_HTON32(0xFFFFFF)) && (ib_inform_info_get_vend_id(p_ii) != ib_notice_get_vend_id(p_ntc))) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " - "Mismatch by Vendor ID\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Vendor ID\n"); goto Exit; } } @@ -554,8 +520,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, p_src_port = osm_get_port_by_guid(p_subn, source_gid.unicast.interface_id); if (!p_src_port) { - osm_log(p_log, OSM_LOG_INFO, - "__match_notice_to_inf_rec: " + OSM_LOG(p_log, OSM_LOG_INFO, "Cannot find source port with GUID:0x%016" PRIx64 "\n", cl_ntoh64(source_gid.unicast.interface_id)); goto Exit; @@ -565,22 +530,19 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, cl_ptr_vector_get(&p_subn->port_lid_tbl, cl_ntoh16(p_infr_rec->report_addr.dest_lid)); if (!p_dest_port) { - osm_log(p_log, OSM_LOG_INFO, - "__match_notice_to_inf_rec: " + OSM_LOG(p_log, OSM_LOG_INFO, "Cannot find destination port with LID:0x%04x\n", cl_ntoh16(p_infr_rec->report_addr.dest_lid)); goto Exit; } if (osm_port_share_pkey(p_log, p_src_port, p_dest_port) == FALSE) { - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " "Mismatch by Pkey\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "Mismatch by Pkey\n"); /* According to o13-17.1.2 - If this informInfo does not have lid_range_begin of 0xFFFF, then this informInfo request should be removed from database */ if (p_ii->lid_range_begin != 0xFFFF) { - osm_log(p_log, OSM_LOG_VERBOSE, - "__match_notice_to_inf_rec: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Pkey mismatch on lid_range_begin != 0xFFFF. " "Need to remove this informInfo from db\n"); /* add the informInfo record to the remove_infr list */ @@ -590,8 +552,7 @@ __match_notice_to_inf_rec(IN cl_list_item_t * const p_list_item, } /* send the report to the address provided in the inform record */ - osm_log(p_log, OSM_LOG_DEBUG, - "__match_notice_to_inf_rec: " "MATCH! Sending Report...\n"); + OSM_LOG(p_log, OSM_LOG_DEBUG, "MATCH! Sending Report...\n"); __osm_send_report(p_infr_rec, p_ntc); status = CL_SUCCESS; @@ -624,16 +585,14 @@ osm_report_notice(IN osm_log_t * const p_log, * the osm_infr_init call is performed. */ if (p_subn->sa_infr_list.state != CL_INITIALIZED) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_report_notice: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Ignoring Notice Reports since Inform List is not initialized yet!\n"); return (IB_ERROR); } /* an official Event information log */ if (ib_notice_is_generic(p_ntc)) { - osm_log(p_log, OSM_LOG_INFO, - "osm_report_notice: " + OSM_LOG(p_log, OSM_LOG_INFO, "Reporting Generic Notice type:%u num:%u" " from LID:0x%04X GID:0x%016" PRIx64 ",0x%016" PRIx64 "\n", @@ -644,8 +603,7 @@ osm_report_notice(IN osm_log_t * const p_log, cl_ntoh64(p_ntc->issuer_gid.unicast.interface_id) ); } else { - osm_log(p_log, OSM_LOG_INFO, - "osm_report_notice: " + OSM_LOG(p_log, OSM_LOG_INFO, "Reporting Vendor Notice type:%u vend:%u dev:%u" " from LID:0x%04X GID:0x%016" PRIx64 ",0x%016" PRIx64 "\n", diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index 8da89a7..ab23929 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -159,8 +159,7 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) cl_qlist_init(&guids); if (osm_db_guid2lid_guids(p_mgr->p_g2l, &guids)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0310: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0310: " "could not get guid list\n"); goto Exit; } @@ -169,8 +168,7 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) while ((cl_list_item_t *) p_item != cl_qlist_end(&guids)) { if (osm_db_guid2lid_get (p_mgr->p_g2l, p_item->guid, &min_lid, &max_lid)) - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0311: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0311: " "could not get lid for guid:0x%016" PRIx64 "\n", p_item->guid); else { @@ -179,8 +177,7 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) if ((min_lid > max_lid) || (min_lid == 0) || (p_item->guid == 0) || (max_lid > p_mgr->p_subn->max_unicast_lid_ho)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0312: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0312: " "Illegal LID range [0x%x:0x%x] for guid:0x%016" PRIx64 "\n", min_lid, max_lid, p_item->guid); @@ -189,8 +186,7 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) && ((min_lid & lmc_mask) != min_lid)) { /* check that if the lids define a range that is valid for the current LMC mask */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0313: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0313: " "LID range [0x%x:0x%x] for guid:0x%016" PRIx64 " is not aligned according to mask:0x%04x\n", @@ -205,9 +201,8 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) && (cl_ptr_vector_get (&p_mgr->used_lids, lid))) { - osm_log(p_mgr->p_log, - OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0314: " + OSM_LOG(p_mgr->p_log, + OSM_LOG_ERROR, "ERR 0314: " "0x%04x for guid:0x%016" PRIx64 " was previously used\n", @@ -220,8 +215,8 @@ static void __osm_lid_mgr_validate_db(IN osm_lid_mgr_t * p_mgr) if (!lids_ok) { if (osm_db_guid2lid_delete (p_mgr->p_g2l, p_item->guid)) - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_validate_db: ERR 0315: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, + "ERR 0315: " "failed to delete entry for guid:0x%016" PRIx64 "\n", p_item->guid); } else { @@ -258,8 +253,7 @@ osm_lid_mgr_init(IN osm_lid_mgr_t * const p_mgr, IN osm_sm_t *sm) /* we initialize and restore the db domain of guid to lid map */ p_mgr->p_g2l = osm_db_domain_init(p_mgr->p_db, "/guid2lid"); if (!p_mgr->p_g2l) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_lid_mgr_init: ERR 0316: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0316: " "Error initializing Guid-to-Lid persistent database\n"); status = IB_ERROR; goto Exit; @@ -277,8 +271,7 @@ osm_lid_mgr_init(IN osm_lid_mgr_t * const p_mgr, IN osm_sm_t *sm) status = IB_ERROR; goto Exit; } else - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_lid_mgr_init: ERR 0317: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0317: " "Error restoring Guid-to-Lid persistent database\n"); } @@ -340,8 +333,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) need to honor this file. */ if (p_mgr->p_subn->coming_out_of_standby == TRUE) { if (p_mgr->p_subn->opt.honor_guid2lid_file == FALSE) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Ignore guid2lid file when coming out of standby\n"); osm_db_clear(p_mgr->p_g2l); for (lid = 0; @@ -349,13 +341,11 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) lid++) cl_ptr_vector_set(p_persistent_vec, lid, NULL); } else { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Honor current guid2lid file when coming out of standby\n"); osm_db_clear(p_mgr->p_g2l); if (osm_db_restore(p_mgr->p_g2l)) - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_lid_mgr_init_sweep: ERR 0306: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0306: " "Error restoring Guid-to-Lid persistent database. Ignoring it\n"); } } @@ -376,8 +366,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) huge empty range */ if ((p_mgr->p_subn->first_time_master_sweep == TRUE) && (p_mgr->p_subn->opt.reassign_lids == TRUE)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Skipping all lids as we are reassigning them\n"); p_range = (osm_lid_mgr_range_t *) malloc(sizeof(osm_lid_mgr_range_t)); @@ -411,8 +400,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) (((db_min_lid & lmc_mask) != db_min_lid) || (db_max_lid - db_min_lid + 1 < num_lids))) { /* Not aligned, or not wide enough, then remove the entry */ - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Cleaning persistent entry for guid:0x%016" PRIx64 " illegal range:[0x%x:0x%x]\n", cl_ntoh64(osm_port_get_guid(p_port)), @@ -467,8 +455,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) /* first check to see if the lid is used by a persistent assignment */ if ((lid <= max_persistent_lid) && cl_ptr_vector_get(p_persistent_vec, lid)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%04x is not free as its mapped by the persistent db\n", lid); is_free = FALSE; @@ -494,8 +481,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) (p_port)), &db_min_lid, &db_max_lid)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%04x is free as it was discovered " "but mapped by the persistent db to [0x%04x:0x%04x]\n", lid, db_min_lid, db_max_lid); @@ -523,9 +509,8 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) && ((disc_min_lid & lmc_mask) != disc_min_lid)) { /* The lid cannot be used */ - osm_log(p_mgr->p_log, + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " "0x%04x is free as it was discovered " "but not aligned\n", lid); @@ -541,10 +526,9 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) cl_ptr_vector_get (p_persistent_vec, req_lid)) { - osm_log(p_mgr-> + OSM_LOG(p_mgr-> p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " "0x%04x is free as it was discovered " "but mapped\n", lid); @@ -581,8 +565,7 @@ static int __osm_lid_mgr_init_sweep(IN osm_lid_mgr_t * const p_mgr) if (p_range) { cl_qlist_insert_tail(&p_mgr->free_ranges, &p_range->item); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "new free lid range [0x%x:0x%x]\n", p_range->min_lid, p_range->max_lid); p_range = NULL; @@ -607,8 +590,7 @@ AfterScanningLids: } p_range->max_lid = p_mgr->p_subn->max_unicast_lid_ho - 1; cl_qlist_insert_tail(&p_mgr->free_ranges, &p_range->item); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_init_sweep: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "final free lid range [0x%x:0x%x]\n", p_range->min_lid, p_range->max_lid); @@ -665,8 +647,7 @@ __osm_lid_mgr_find_free_lid_range(IN osm_lid_mgr_t * const p_mgr, uint8_t lmc_num_lids; uint16_t lmc_mask; - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_find_free_lid_range: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "LMC = %u, number LIDs = %u\n", p_mgr->p_subn->opt.lmc, num_lids); @@ -715,8 +696,7 @@ __osm_lid_mgr_find_free_lid_range(IN osm_lid_mgr_t * const p_mgr, */ *p_min_lid = *p_max_lid = 0; /* if we run out of lids, give an error and abort! */ - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_find_free_lid_range: ERR 0307: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0307: " "OPENSM RAN OUT OF LIDS!!!\n"); CL_ASSERT(0); } @@ -783,15 +763,13 @@ __osm_lid_mgr_get_port_lid(IN osm_lid_mgr_t * const p_mgr, *p_min_lid = min_lid; *p_max_lid = min_lid + num_lids - 1; if (min_lid == cl_ntoh16(osm_port_get_base_lid(p_port))) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " matches its known lid:0x%04x\n", guid, min_lid); goto Exit; } else { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " with lid:0x%04x does not match its known lid:0x%04x\n", guid, cl_ntoh16(osm_port_get_base_lid(p_port)), @@ -803,8 +781,7 @@ __osm_lid_mgr_get_port_lid(IN osm_lid_mgr_t * const p_mgr, goto Exit; } } else - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " has no persistent lid assigned\n", guid); @@ -824,21 +801,18 @@ __osm_lid_mgr_get_port_lid(IN osm_lid_mgr_t * const p_mgr, (p_mgr, min_lid, num_lids)) { *p_min_lid = min_lid; *p_max_lid = min_lid + num_lids - 1; - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " lid range:[0x%x-0x%x] is free\n", guid, *p_min_lid, *p_max_lid); goto NewLidSet; } else - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " existing lid range:[0x%x:0x%x] is not free\n", guid, min_lid, min_lid + num_lids - 1); } else - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "0x%016" PRIx64 " existing lid range:[0x%x:0x%x] is not lmc aligned\n", guid, min_lid, min_lid + num_lids - 1); @@ -850,10 +824,9 @@ __osm_lid_mgr_get_port_lid(IN osm_lid_mgr_t * const p_mgr, /* find an empty space */ __osm_lid_mgr_find_free_lid_range(p_mgr, num_lids, p_min_lid, p_max_lid); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_get_port_lid: " "0x%016" PRIx64 - " assigned a new lid range:[0x%x-0x%x]\n", guid, *p_min_lid, - *p_max_lid); + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "0x%016" PRIx64 " assigned a new lid range:[0x%x-0x%x]\n", + guid, *p_min_lid, *p_max_lid); lid_changed = 1; NewLidSet: @@ -926,8 +899,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, (during NO_CHANGE state in link mgr). */ if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_set_physp_pi: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Skipping switch port %u, GUID 0x%016" PRIx64 "\n", port_num, cl_ntoh64(osm_physp_get_port_guid(p_physp))); @@ -1059,8 +1031,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, if ((mtu != ib_port_info_get_neighbor_mtu(p_old_pi)) || (op_vls != ib_port_info_get_op_vls(p_old_pi))) { if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_set_physp_pi: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Sending Link Down to GUID 0x%016" PRIx64 "port %d due to op_vls or mtu change. MTU:%u,%u VL_CAP:%u,%u\n", @@ -1095,8 +1066,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, ib_port_info_get_neighbor_mtu(p_old_pi)) send_set = TRUE; - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_set_physp_pi: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Updating neighbor_mtu on switch GUID 0x%016" PRIx64 " port 0 to:%u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -1180,8 +1150,7 @@ __osm_lid_mgr_process_our_sm_node(IN osm_lid_mgr_t * const p_mgr) p_port = osm_get_port_by_guid(p_mgr->p_subn, p_mgr->p_subn->sm_port_guid); if (!p_port) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_lid_mgr_process_our_sm_node: ERR 0308: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 0308: " "Can't acquire SM's port object, GUID 0x%016" PRIx64 "\n", cl_ntoh64(p_mgr->p_subn->sm_port_guid)); res = FALSE; @@ -1197,8 +1166,7 @@ __osm_lid_mgr_process_our_sm_node(IN osm_lid_mgr_t * const p_mgr) LMC masking by hardware. */ __osm_lid_mgr_get_port_lid(p_mgr, p_port, &min_lid_ho, &max_lid_ho); - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_lid_mgr_process_our_sm_node: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Current base LID is 0x%X\n", min_lid_ho); /* Update subnet object. @@ -1207,8 +1175,7 @@ __osm_lid_mgr_process_our_sm_node(IN osm_lid_mgr_t * const p_mgr) p_mgr->p_subn->sm_base_lid = cl_hton16(min_lid_ho); if (osm_log_is_active(p_mgr->p_log, OSM_LOG_VERBOSE)) - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "__osm_lid_mgr_process_our_sm_node: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Assigning SM's port 0x%016" PRIx64 "\n\t\t\t\tto LID range [0x%X,0x%X]\n", cl_ntoh64(osm_port_get_guid(p_port)), @@ -1242,8 +1209,7 @@ osm_signal_t osm_lid_mgr_process_sm(IN osm_lid_mgr_t * const p_mgr) __osm_lid_mgr_init_sweep(p_mgr); if (p_mgr->p_subn->opt.pfn_ui_pre_lid_assign) { - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "osm_lid_mgr_process_sm: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Invoking UI function pfn_ui_pre_lid_assign\n"); p_mgr->p_subn->opt.pfn_ui_pre_lid_assign(p_mgr->p_subn->opt. ui_pre_lid_assign_ctx); @@ -1310,8 +1276,7 @@ osm_signal_t osm_lid_mgr_process_subnet(IN osm_lid_mgr_t * const p_mgr) we will not add it to any of these lists. */ if (port_guid == p_mgr->p_subn->sm_port_guid) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "osm_lid_mgr_process_subnet: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Skipping our own port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); continue; @@ -1327,8 +1292,7 @@ osm_signal_t osm_lid_mgr_process_subnet(IN osm_lid_mgr_t * const p_mgr) /* we can call the function to update the port info as it known to look for any field change and will only send an updated if required */ - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "osm_lid_mgr_process_subnet: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Assigned port 0x%016" PRIx64 ", LID [0x%X,0x%X]\n", cl_ntoh64(port_guid), min_lid_ho, max_lid_ho); diff --git a/opensm/opensm/osm_lin_fwd_rcv.c b/opensm/opensm/osm_lin_fwd_rcv.c index 09edd1a..18be6a3 100644 --- a/opensm/opensm/osm_lin_fwd_rcv.c +++ b/opensm/opensm/osm_lin_fwd_rcv.c @@ -88,15 +88,13 @@ void osm_lft_rcv_process(IN void *context, IN void *data) p_sw = osm_get_switch_by_guid(sm->p_subn, node_guid); if (!p_sw) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_lft_rcv_process: ERR 0401: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0401: " "LFT received for nonexistent node " "0x%" PRIx64 "\n", cl_ntoh64(node_guid)); } else { status = osm_switch_set_ft_block(p_sw, p_block, block_num); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_lft_rcv_process: ERR 0402: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0402: " "Setting forwarding table block failed (%s)" "\n\t\t\t\tSwitch 0x%" PRIx64 "\n", ib_get_err_str(status), cl_ntoh64(node_guid)); diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 8ca5786..e4ba93e 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -90,8 +90,7 @@ __osm_link_mgr_set_physp_pi(osm_sm_t * sm, For base port 0 the following parameters are not valid (p822, table 145). */ if (!p_node->sw) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_link_mgr_set_physp_pi: ERR 4201: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: " "Cannot find switch by guid: 0x%" PRIx64 "\n", cl_ntoh64(p_node->node_info.node_guid)); goto Exit; @@ -102,8 +101,7 @@ __osm_link_mgr_set_physp_pi(osm_sm_t * sm, /* This means the switch doesn't support enhanced port 0. Can skip it. */ if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_link_mgr_set_physp_pi: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Skipping port 0, GUID 0x%016" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid @@ -355,8 +353,7 @@ __osm_link_mgr_process_node(osm_sm_t * sm, OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_link_mgr_process_node: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Node 0x%" PRIx64 " going to %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), ib_get_port_state_str(link_state)); @@ -388,8 +385,7 @@ __osm_link_mgr_process_node(osm_sm_t * sm, */ if (link_state != IB_LINK_NO_CHANGE && link_state <= current_state) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_link_mgr_process_node: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Physical port 0x%X already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); diff --git a/opensm/opensm/osm_mad_pool.c b/opensm/opensm/osm_mad_pool.c index 62b998a..9b3812f 100644 --- a/opensm/opensm/osm_mad_pool.c +++ b/opensm/opensm/osm_mad_pool.c @@ -105,8 +105,7 @@ osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * const p_pool, */ p_madw = malloc(sizeof(*p_madw)); if (p_madw == NULL) { - osm_log(p_pool->p_log, OSM_LOG_ERROR, - "osm_mad_pool_get: ERR 0703: " + OSM_LOG(p_pool->p_log, OSM_LOG_ERROR, "ERR 0703: " "Unable to acquire MAD wrapper object\n"); goto Exit; } @@ -118,8 +117,7 @@ osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * const p_pool, */ p_mad = osm_vendor_get(h_bind, total_size, &p_madw->vend_wrap); if (p_mad == NULL) { - osm_log(p_pool->p_log, OSM_LOG_ERROR, - "osm_mad_pool_get: ERR 0704: " + OSM_LOG(p_pool->p_log, OSM_LOG_ERROR, "ERR 0704: " "Unable to acquire wire MAD\n"); /* Don't leak wrappers! */ @@ -134,9 +132,9 @@ osm_madw_t *osm_mad_pool_get(IN osm_mad_pool_t * const p_pool, */ osm_madw_set_mad(p_madw, p_mad); - osm_log(p_pool->p_log, OSM_LOG_DEBUG, - "osm_mad_pool_get: Acquired p_madw = %p, p_mad = %p, " - "size = %u\n", p_madw, p_madw->p_mad, total_size); + OSM_LOG(p_pool->p_log, OSM_LOG_DEBUG, + "Acquired p_madw = %p, p_mad = %p, size = %u\n", + p_madw, p_madw->p_mad, total_size); Exit: OSM_LOG_EXIT(p_pool->p_log); @@ -164,8 +162,7 @@ osm_madw_t *osm_mad_pool_get_wrapper(IN osm_mad_pool_t * const p_pool, */ p_madw = malloc(sizeof(*p_madw)); if (p_madw == NULL) { - osm_log(p_pool->p_log, OSM_LOG_ERROR, - "osm_mad_pool_get_wrapper: ERR 0705: " + OSM_LOG(p_pool->p_log, OSM_LOG_ERROR, "ERR 0705: " "Unable to acquire MAD wrapper object\n"); goto Exit; } @@ -177,9 +174,9 @@ osm_madw_t *osm_mad_pool_get_wrapper(IN osm_mad_pool_t * const p_pool, osm_madw_init(p_madw, h_bind, total_size, p_mad_addr); osm_madw_set_mad(p_madw, p_mad); - osm_log(p_pool->p_log, OSM_LOG_DEBUG, - "osm_mad_pool_get_wrapper: Acquired p_madw = %p, p_mad = %p " - "size = %u\n", p_madw, p_madw->p_mad, total_size); + OSM_LOG(p_pool->p_log, OSM_LOG_DEBUG, + "Acquired p_madw = %p, p_mad = %p size = %u\n", + p_madw, p_madw->p_mad, total_size); Exit: OSM_LOG_EXIT(p_pool->p_log); @@ -198,9 +195,7 @@ osm_madw_t *osm_mad_pool_get_wrapper_raw(IN osm_mad_pool_t * const p_pool) if (!p_madw) return NULL; - osm_log(p_pool->p_log, OSM_LOG_DEBUG, - "osm_mad_pool_get_wrapper_raw: " - "Getting p_madw = %p\n", p_madw); + OSM_LOG(p_pool->p_log, OSM_LOG_DEBUG, "Getting p_madw = %p\n", p_madw); osm_madw_init(p_madw, 0, 0, 0); osm_madw_set_mad(p_madw, 0); @@ -219,9 +214,8 @@ osm_mad_pool_put(IN osm_mad_pool_t * const p_pool, IN osm_madw_t * const p_madw) CL_ASSERT(p_madw); - osm_log(p_pool->p_log, OSM_LOG_DEBUG, - "osm_mad_pool_put: Releasing p_madw = %p, p_mad = %p\n", - p_madw, p_madw->p_mad); + OSM_LOG(p_pool->p_log, OSM_LOG_DEBUG, + "Releasing p_madw = %p, p_mad = %p\n", p_madw, p_madw->p_mad); /* First, return the wire mad to the pool diff --git a/opensm/opensm/osm_mcast_fwd_rcv.c b/opensm/opensm/osm_mcast_fwd_rcv.c index 2c97945..8c75254 100644 --- a/opensm/opensm/osm_mcast_fwd_rcv.c +++ b/opensm/opensm/osm_mcast_fwd_rcv.c @@ -95,8 +95,7 @@ void osm_mft_rcv_process(IN void *context, IN void *data) node_guid = p_mft_context->node_guid; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mft_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Setting MFT block %u, position %u, " "Switch 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n", block_num, position, cl_ntoh64(node_guid), @@ -107,8 +106,7 @@ void osm_mft_rcv_process(IN void *context, IN void *data) p_sw = osm_get_switch_by_guid(sm->p_subn, node_guid); if (!p_sw) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mft_rcv_process: ERR 0801: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0801: " "MFT received for nonexistent node " "0x%016" PRIx64 "\n", cl_ntoh64(node_guid)); } else { @@ -116,8 +114,7 @@ void osm_mft_rcv_process(IN void *context, IN void *data) (uint16_t) block_num, position); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mft_rcv_process: ERR 0802: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0802: " "Setting MFT block failed (%s)" "\n\t\t\t\tSwitch 0x%016" PRIx64 ", block %u, position %u\n", diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c index 1a62ce5..fe4cfbf 100644 --- a/opensm/opensm/osm_mcast_mgr.c +++ b/opensm/opensm/osm_mcast_mgr.c @@ -167,8 +167,7 @@ osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm, port_gid)); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_compute_avg_hops: ERR 0A18: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A18: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid (&p_mcm_port->port_gid))); @@ -227,8 +226,7 @@ osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, port_gid)); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_compute_max_hops: ERR 0A1A: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1A: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid (&p_mcm_port->port_gid))); @@ -293,8 +291,7 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t * sm, if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { sw_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_mcast_mgr_find_optimal_switch: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Switch 0x%016" PRIx64 ", hops = %f\n", sw_guid_ho, hops); } @@ -310,13 +307,11 @@ static osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t * sm, sw_guid_ho = cl_ntoh64(osm_node_get_node_guid (p_best_sw->p_node)); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_mcast_mgr_find_optimal_switch: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Best switch is 0x%" PRIx64 ", hops = %f\n", sw_guid_ho, best_hops); } else { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_mcast_mgr_find_optimal_switch: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "No multicast capable switches detected\n"); } } @@ -394,8 +389,7 @@ __osm_mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * const p_sw) while (osm_mcast_tbl_get_block(p_tbl, block_num, (uint8_t) position, block)) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_mcast_mgr_set_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Writing MFT block 0x%X\n", block_id_ho); } @@ -407,8 +401,7 @@ __osm_mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * const p_sw) CL_DISP_MSGID_NONE, &mad_context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_set_tbl: ERR 0A02: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: " "Sending multicast fwd. tbl. block failed (%s)\n", ib_get_err_str(status)); } @@ -473,8 +466,7 @@ __osm_mcast_mgr_subdivide(osm_sm_t * sm, */ uint64_t node_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_subdivide: ERR 0A03: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: " "Error routing MLID 0x%X through switch 0x%" PRIx64 "\n" "\t\t\t\tNo multicast paths from this switch for port " @@ -489,8 +481,7 @@ __osm_mcast_mgr_subdivide(osm_sm_t * sm, if (port_num > array_size) { uint64_t node_guid_ho = cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_subdivide: ERR 0A04: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: " "Error routing MLID 0x%X through switch 0x%" PRIx64 "\n" "\t\t\t\tNo multicast paths from this switch to port " @@ -521,8 +512,7 @@ static void __osm_mcast_mgr_purge_list(osm_sm_t * sm, cl_qlist_t * const p_list) while ((p_wobj = (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) != (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_purge_list: ERR 0A06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: " "Unable to route for port 0x%" PRIx64 "\n", osm_port_get_guid(p_wobj->p_port)); __osm_mcast_work_obj_delete(p_wobj); @@ -569,8 +559,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_mcast_mgr_branch: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Routing MLID 0x%X through switch 0x%" PRIx64 ", %u nodes at depth %u\n", mlid_ho, node_guid_ho, cl_qlist_count(p_list), depth); @@ -589,8 +578,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, /* This switch doesn't do multicast. Clean-up. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_branch: ERR 0A14: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: " "Switch 0x%" PRIx64 " does not support multicast\n", node_guid_ho); @@ -607,8 +595,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, We are unable to continue routing down this leg of the tree. Clean-up. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_branch: ERR 0A15: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: " "Insufficient memory to build multicast tree\n"); /* @@ -629,8 +616,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, */ list_array = malloc(sizeof(cl_qlist_t) * max_children); if (list_array == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_branch: ERR 0A16: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: " "Unable to allocate list array\n"); __osm_mcast_mgr_purge_list(sm, p_list); goto Exit; @@ -652,8 +638,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, */ if (depth > 1) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_mcast_mgr_branch: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Adding upstream port 0x%X\n", upstream_port); } @@ -689,8 +674,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, continue; /* No routes down this port. */ if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_mcast_mgr_branch: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Routing %zu destinations via switch port 0x%X\n", count, i); } @@ -745,8 +729,7 @@ static osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, CL_ASSERT(cl_is_qlist_empty(p_port_list)); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_mcast_mgr_branch: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Found leaf for port 0x%016" PRIx64 " on switch port 0x%X\n", cl_ntoh64(osm_port_get_guid @@ -795,8 +778,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) num_ports = cl_qmap_count(p_mcm_tbl); if (num_ports == 0) { if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_mcast_mgr_build_spanning_tree: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "MLID 0x%X has no members - nothing to do\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); } @@ -821,8 +803,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) */ p_sw = __osm_mcast_mgr_find_root_switch(sm, p_mgrp); if (p_sw == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_build_spanning_tree: ERR 0A08: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: " "Unable to locate a suitable switch for group 0x%X\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); status = IB_ERROR; @@ -844,8 +825,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) ib_gid_get_guid(&p_mcm_port-> port_gid)); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_build_spanning_tree: ERR 0A09: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A09: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(ib_gid_get_guid (&p_mcm_port->port_gid))); @@ -854,8 +834,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) p_wobj = __osm_mcast_work_obj_new(p_port); if (p_wobj == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_mcast_mgr_build_spanning_tree: ERR 0A10: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: " "Insufficient memory to route port 0x%016" PRIx64 "\n", cl_ntoh64(osm_port_get_guid(p_port))); @@ -869,8 +848,7 @@ __osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) p_mgrp->p_root = __osm_mcast_mgr_branch(sm, p_mgrp, p_sw, &port_list, 0, 0, &max_depth); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_mcast_mgr_build_spanning_tree: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Configured MLID 0x%X for %u ports, max tree depth = %u\n", cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)), count, max_depth); @@ -903,8 +881,7 @@ osm_mcast_mgr_set_table(osm_sm_t * sm, CL_ASSERT(p_sw); if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_mcast_mgr_set_table: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n", mlid_ho, osm_node_get_node_guid(p_sw->p_node)); } @@ -986,8 +963,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, mlid_ho = cl_ntoh16(mlid); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_single: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, " "\n\t\t\t\tjoin state = 0x%X\n", cl_ntoh64(port_guid), mlid_ho, join_state); @@ -998,8 +974,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, */ p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A01: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: " "Unable to acquire port object for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); status = IB_ERROR; @@ -1008,8 +983,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, p_physp = p_port->p_physp; if (p_physp == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A05: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: " "Unable to acquire phsyical port object for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); status = IB_ERROR; @@ -1018,8 +992,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, p_remote_physp = osm_physp_get_remote(p_physp); if (p_remote_physp == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A11: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: " "Unable to acquire remote phsyical port object " "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); status = IB_ERROR; @@ -1033,8 +1006,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, sw_guid = osm_node_get_node_guid(p_remote_node); if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A22: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: " "Remote node not a switch node 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); status = IB_ERROR; @@ -1042,8 +1014,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, } if (!p_remote_node->sw) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A12: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: " "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); status = IB_ERROR; goto Exit; @@ -1070,14 +1041,12 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, } else { if (join_state & IB_JOIN_STATE_SEND_ONLY) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_single: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Success. Nothing to do for send" "only member\n"); } } else { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_single: ERR 0A13: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: " "Unknown join state 0x%X\n", join_state); status = IB_ERROR; @@ -1086,8 +1055,7 @@ osm_mcast_mgr_process_single(osm_sm_t * sm, } } else { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_single: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n"); } } @@ -1116,8 +1084,7 @@ osm_mcast_mgr_process_tree(osm_sm_t * sm, mlid = osm_mgrp_get_mlid(p_mgrp); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_tree: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Processing multicast group 0x%X\n", cl_ntoh16(mlid)); } @@ -1126,8 +1093,7 @@ osm_mcast_mgr_process_tree(osm_sm_t * sm, */ if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_tree: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No switches in subnet. Nothing to do\n"); } goto Exit; @@ -1156,16 +1122,14 @@ osm_mcast_mgr_process_tree(osm_sm_t * sm, if (ui_mcast_fdb_assign_func_defined == FALSE) { status = __osm_mcast_mgr_build_spanning_tree(sm, p_mgrp); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_mcast_mgr_process_tree: ERR 0A17: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: " "Unable to create spanning tree (%s)\n", ib_get_err_str(status)); goto Exit; } } else { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_tree: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Invoking UI function pfn_ui_mcast_fdb_assign\n"); } @@ -1196,8 +1160,7 @@ mcast_mgr_process_mgrp(osm_sm_t * sm, status = osm_mcast_mgr_process_tree(sm, p_mgrp, req_type, port_guid); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "mcast_mgr_process_mgrp: ERR 0A19: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A19: " "Unable to create spanning tree (%s)\n", ib_get_err_str(status)); goto Exit; @@ -1208,8 +1171,7 @@ mcast_mgr_process_mgrp(osm_sm_t * sm, * Not a well known group */ if (cl_qmap_count(&p_mgrp->mcm_port_tbl) == 0 && !p_mgrp->well_known) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "mcast_mgr_process_mgrp: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Destroying mgrp with lid:0x%X\n", cl_ntoh16(p_mgrp->mlid)); /* Send a Report to any InformInfo registered for @@ -1343,15 +1305,13 @@ osm_signal_t osm_mcast_mgr_process_mgroups(osm_sm_t * sm) * we processed the group we can skip doing anything */ if (p_mgrp->last_change_id == p_mgrp->last_tree_id) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_mgroups: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Skip processing mgrp with lid:0x%X change id:%u\n", cl_ntoh16(mlid), p_mgrp->last_change_id); continue; } - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_mcast_mgr_process_mgroups: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Processing mgrp with lid:0x%X change id:%u\n", cl_ntoh16(mlid), p_mgrp->last_change_id); mcast_mgr_process_mgrp(sm, p_mgrp, req_type, port_guid); diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c index 628d966..a07e5da 100644 --- a/opensm/opensm/osm_multicast.c +++ b/opensm/opensm/osm_multicast.c @@ -310,8 +310,7 @@ osm_mgrp_send_delete_notice(IN osm_subn_t * const p_subn, status = osm_report_notice(p_log, p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_mgrp_send_delete_notice: ERR 7601: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7601: " "Error sending trap reports (%s)\n", ib_get_err_str(status)); goto Exit; @@ -352,8 +351,7 @@ osm_mgrp_send_create_notice(IN osm_subn_t * const p_subn, status = osm_report_notice(p_log, p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_mgrp_send_create_notice: ERR 7602: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7602: " "Error sending trap reports (%s)\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_node_desc_rcv.c b/opensm/opensm/osm_node_desc_rcv.c index 4268526..4a22aab 100644 --- a/opensm/opensm/osm_node_desc_rcv.c +++ b/opensm/opensm/osm_node_desc_rcv.c @@ -87,8 +87,7 @@ __osm_nd_rcv_process_nd(IN osm_sm_t * sm, p_node->print_desc = tmp_desc; if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_nd_rcv_process_nd: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Node 0x%" PRIx64 "\n\t\t\t\tDescription = %s\n", cl_ntoh64(osm_node_get_node_guid(p_node)), p_node->print_desc); @@ -125,8 +124,7 @@ void osm_nd_rcv_process(IN void *context, IN void *data) CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); p_node = osm_get_node_by_guid(sm->p_subn, node_guid); if (!p_node) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_nd_rcv_process: ERR 0B01: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0B01: " "NodeDescription received for nonexistent node " "0x%" PRIx64 "\n", cl_ntoh64(node_guid)); } else { diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c index 68d55b9..9b2c74c 100644 --- a/opensm/opensm/osm_node_info_rcv.c +++ b/opensm/opensm/osm_node_info_rcv.c @@ -76,8 +76,7 @@ report_duplicated_guid(IN osm_sm_t * sm, p_old = p_physp->p_remote_physp; p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num); - osm_log(sm->p_log, OSM_LOG_ERROR, - "report_duplicated_guid: ERR 0D01: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: " "Found duplicated node.\n" "Node 0x%" PRIx64 " port %u is reachable from remote node " "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n" @@ -119,8 +118,7 @@ static void requery_dup_node_info(IN osm_sm_t * sm, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "requery_dup_node_info: ERR 0D02: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " "Failure initiating NodeInfo request (%s)\n", ib_get_err_str(status)); } @@ -145,8 +143,7 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, the ni_context will be zero. */ if (p_ni_context->node_guid == 0) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_ni_rcv_set_links: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Nothing to link for our own node 0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); goto _exit; @@ -155,8 +152,7 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, p_neighbor_node = osm_get_node_by_guid(sm->p_subn, p_ni_context->node_guid); if (!p_neighbor_node) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_set_links: ERR 0D10: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: " "Unexpected removal of neighbor node " "0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid)); goto _exit; @@ -177,8 +173,7 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, if (osm_node_link_exists(p_node, port_num, p_neighbor_node, p_ni_context->port_num)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_ni_rcv_set_links: " "Link already exists\n"); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n"); goto _exit; } @@ -225,8 +220,7 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) && (port_num == p_ni_context->port_num) && port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_ni_rcv_set_links: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Duplicate GUID found by link from a port to itself:" "node 0x%" PRIx64 ", port number 0x%X\n", cl_ntoh64(osm_node_get_node_guid(p_node)), port_num); @@ -245,15 +239,11 @@ __osm_ni_rcv_set_links(IN osm_sm_t * sm, } if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_ni_rcv_set_links: " - "Creating new link between: " - "\n\t\t\t\tnode 0x%" PRIx64 ", " - "port number 0x%X and" - "\n\t\t\t\tnode 0x%" PRIx64 ", " - "port number 0x%X\n", - cl_ntoh64(osm_node_get_node_guid(p_node)), - port_num, + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64 + ", port number 0x%X and\n\t\t\t\tnode 0x%" PRIx64 + ", port number 0x%X\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), port_num, cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num); @@ -306,8 +296,7 @@ __osm_ni_rcv_process_new_node(IN osm_sm_t * sm, IB_MAD_ATTR_PORT_INFO, cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new_node: ERR 0D02: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " "Failure initiating PortInfo request (%s)\n", ib_get_err_str(status)); @@ -351,8 +340,7 @@ __osm_ni_rcv_get_node_desc(IN osm_sm_t * sm, IB_MAD_ATTR_NODE_DESC, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_get_node_desc: ERR 0D03: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: " "Failure initiating NodeDescription request (%s)\n", ib_get_err_str(status)); @@ -415,8 +403,7 @@ __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, */ p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_ni_rcv_process_existing_ca_or_router: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Creating new port object with GUID 0x%" PRIx64 "\n", cl_ntoh64(p_ni->port_guid)); @@ -424,8 +411,7 @@ __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, p_port = osm_port_new(p_ni, p_node); if (p_port == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_existing_ca_or_router: ERR 0D04: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: " "Unable to create new port object\n"); goto Exit; } @@ -442,8 +428,7 @@ __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, We should never be here! Somehow, this port GUID already exists in the table. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_existing_ca_or_router: ERR 0D12: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: " "Port 0x%" PRIx64 " already in the database!\n", cl_ntoh64(p_ni->port_guid)); @@ -485,8 +470,7 @@ __osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_existing_ca_or_router: ERR 0D13: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: " "Failure initiating PortInfo request (%s)\n", ib_get_err_str(status)); @@ -523,8 +507,7 @@ __osm_ni_rcv_process_switch(IN osm_sm_t * sm, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) /* continue despite error */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_switch: ERR 0D06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: " "Failure initiating SwitchInfo request (%s)\n", ib_get_err_str(status)); @@ -553,8 +536,7 @@ __osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm, __osm_ni_rcv_process_switch(sm, p_node, p_madw); else if (!p_node->sw || p_node->sw->discovery_count == 0) { /* we don't have the SwitchInfo - retry to get it */ - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_ni_rcv_process_existing_switch: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Retry to get SwitchInfo on node GUID:0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); __osm_ni_rcv_process_switch(sm, p_node, p_madw); @@ -614,8 +596,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_ni_rcv_process_new: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Discovered new %s node," "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", ib_get_node_type_str(p_ni->node_type), @@ -623,8 +604,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, p_node = osm_node_new(p_madw); if (p_node == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D07: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: " "Unable to create new node object\n"); goto Exit; } @@ -635,8 +615,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, */ p_port = osm_port_new(p_ni, p_node); if (p_port == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D14: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: " "Unable to create new port object\n"); osm_node_delete(&p_node); goto Exit; @@ -653,8 +632,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, We should never be here! Somehow, this port GUID already exists in the table. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D15: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: " "Duplicate Port GUID 0x%" PRIx64 "! Found by the two directed routes:\n", cl_ntoh64(p_ni->port_guid)); @@ -684,8 +662,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, this would be elsewhere */ if (p_ni->node_type == IB_NODE_TYPE_ROUTER) { if ((p_rtr = osm_router_new(p_port)) == NULL) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D1A: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: " "Unable to create new router object\n"); else { p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; @@ -694,8 +671,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, p_ni->port_guid, &p_rtr->map_item); if (p_rtr_check != p_rtr) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D1B: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: " "Unable to add port GUID:0x%016" PRIx64 " to router table\n", cl_ntoh64(p_ni->port_guid)); @@ -712,8 +688,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, We can simply clean-up, since the other thread will see this processing through to completion. */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_ni_rcv_process_new: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Discovery race detected at node 0x%" PRIx64 "\n", cl_ntoh64(p_ni->node_guid)); osm_node_delete(&p_node); @@ -735,8 +710,7 @@ __osm_ni_rcv_process_new(IN osm_sm_t * sm, __osm_ni_rcv_process_new_switch(sm, p_node, p_madw); break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_new: ERR 0D16: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " "Unknown node type %u with GUID 0x%" PRIx64 "\n", p_ni->node_type, cl_ntoh64(p_ni->node_guid)); break; @@ -767,8 +741,7 @@ __osm_ni_rcv_process_existing(IN osm_sm_t * sm, port_num = ib_node_info_get_local_port_num(p_ni); if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_ni_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64 ", discovered %u times already\n", ib_get_node_type_str(p_ni->node_type), @@ -793,8 +766,7 @@ __osm_ni_rcv_process_existing(IN osm_sm_t * sm, break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_ni_rcv_process_existing: ERR 0D09: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: " "Unknown node type %u with GUID 0x%" PRIx64 "\n", p_ni->node_type, cl_ntoh64(p_ni->node_guid)); break; @@ -827,16 +799,14 @@ void osm_ni_rcv_process(IN void *context, IN void *data) CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO); if (p_ni->node_guid == 0) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_ni_rcv_process: ERR 0D16: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " "Got Zero Node GUID! Found on the directed route:\n"); osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR); goto Exit; } if (p_ni->port_guid == 0) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_ni_rcv_process: ERR 0D17: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: " "Got Zero Port GUID! Found on the directed route:\n"); osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR); goto Exit; diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 1760f22..aa7ded3 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -147,13 +147,12 @@ static int setup_routing_engine(osm_opensm_t * p_osm, const char *name) if (!strcmp(r->name, name)) { p_osm->routing_engine.name = r->name; if (r->setup(p_osm)) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "setup_routing_engine: setup of routing" + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "setup of routing" " engine \'%s\' failed\n", name); return -2; } - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "setup_routing_engine: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "\'%s\' routing engine set up\n", p_osm->routing_engine.name); return 0; @@ -164,9 +163,8 @@ static int setup_routing_engine(osm_opensm_t * p_osm, const char *name) static int osm_ucast_null_setup(osm_opensm_t * p_osm) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "osm_ucast_null_setup: nothing yet - " - "using default (minhop) routing engine\n"); + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "nothing yet - using default (minhop) routing engine\n"); return 0; } @@ -276,7 +274,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, /* Write the OSM_VERSION to the SYS_LOG */ osm_log(&p_osm->log, OSM_LOG_SYS, "%s\n", OSM_VERSION); /* Format Waived */ - osm_log(&p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: [\n"); /* Format Waived */ + OSM_LOG(&p_osm->log, OSM_LOG_FUNCS, "[\n"); /* Format Waived */ status = cl_plock_init(&p_osm->lock); if (status != IB_SUCCESS) @@ -292,8 +290,8 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, #endif if (p_opt->single_thread) { - osm_log(&p_osm->log, OSM_LOG_INFO, - "osm_opensm_init: Forcing single threaded dispatcher\n"); + OSM_LOG(&p_osm->log, OSM_LOG_INFO, + "Forcing single threaded dispatcher\n"); status = cl_disp_init(&p_osm->disp, 1, "opensm"); } else { /* @@ -373,8 +371,8 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, if (p_opt->routing_engine_name && setup_routing_engine(p_osm, p_opt->routing_engine_name)) - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "osm_opensm_init: cannot find or setup routing engine" + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "cannot find or setup routing engine" " \'%s\'. Default will be used instead\n", p_opt->routing_engine_name); @@ -383,7 +381,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, p_osm->node_name_map = open_node_name_map(p_opt->node_name_map_name); Exit: - osm_log(&p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: ]\n"); /* Format Waived */ + OSM_LOG(&p_osm->log, OSM_LOG_FUNCS, "]\n"); /* Format Waived */ return (status); } diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 74fc2e8..c1c620c 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -183,8 +183,7 @@ osm_perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, /* post this message for later processing. */ if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS, (void *)p_madw, NULL, NULL) != CL_SUCCESS) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_recv_callback: ERR 4C01: " + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C01: " "PerfMgr Dispatcher post failed\n"); osm_mad_pool_put(pm->mad_pool, p_madw); } @@ -211,16 +210,14 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) */ if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == cl_qmap_end(&(pm->monitored_map))) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C15: GUID 0x%016" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C15: GUID 0x%016" PRIx64 " not found in monitored map\n", node_guid); goto Exit; } p_mon_node = (__monitored_node_t *) p_node; - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_send_err_callback: ERR 4C02: %s (0x%" PRIx64 + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C02: %s (0x%" PRIx64 ") port %d\n", p_mon_node->name, p_mon_node->guid, port); if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { @@ -229,8 +226,8 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) /* Now, validate port number */ if (port > p_mon_node->redir_tbl_size) { cl_plock_release(pm->lock); - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_send_err_callback: ERR 4C16: Invalid port num %d for %s (GUID 0x%016" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: " + "Invalid port num %d for %s (GUID 0x%016" PRIx64 ") num ports %d\n", port, p_mon_node->name, p_mon_node->guid, p_mon_node->redir_tbl_size); goto Exit; @@ -261,8 +258,8 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) OSM_LOG_ENTER(pm->log); if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_bind: ERR 4C03: Multiple binds not allowed\n"); + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 4C03: Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; } @@ -276,8 +273,7 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_bind: " + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); pm->bind_handle = osm_vendor_bind(pm->vendor, @@ -289,8 +285,8 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_bind: ERR 4C04: Vendor specific bind failed (%s)\n", + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 4C04: Vendor specific bind failed (%s)\n", ib_get_err_str(status)); goto Exit; } @@ -307,8 +303,7 @@ static void osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm) { OSM_LOG_ENTER(pm->log); if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_unbind: ERR 4C05: No previous bind\n"); + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C05: No previous bind\n"); goto Exit; } osm_vendor_unbind(pm->bind_handle); @@ -446,8 +441,8 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context) size = node->node_info.num_ports; mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size); if (!mon_node) { - osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: __collect_guids ERR 4C06: malloc failed: not handling node %s" + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C06: " + "malloc failed: not handling node %s" "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid); goto Exit; } @@ -484,9 +479,8 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) cl_plock_acquire(pm->lock); node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); if (!node) { - osm_log(pm->log, OSM_LOG_ERROR, - "__osm_perfmgr_query_counters: ERR 4C07: Node \"%s\" (guid 0x%" - PRIx64 + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 4C07: Node \"%s\" (guid 0x%" PRIx64 ") no longer exists so removing from PerfMgr monitoring\n", mon_node->name, mon_node->guid); __mark_for_removal(pm, mon_node); @@ -500,8 +494,8 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) if (perfmgr_db_create_entry(pm->db, node_guid, num_ports, node->print_desc) != PERFMGR_EVENT_DB_SUCCESS) { - osm_log(pm->log, OSM_LOG_ERROR, - "__osm_perfmgr_query_counters: ERR 4C08: DB create entry failed for 0x%" + OSM_LOG(pm->log, OSM_LOG_ERROR, + "ERR 4C08: DB create entry failed for 0x%" PRIx64 " (%s) : %s\n", node_guid, node->print_desc, strerror(errno)); goto Exit; @@ -522,8 +516,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) lid = get_lid(node, port, mon_node); if (lid == 0) { - osm_log(pm->log, OSM_LOG_DEBUG, - "__osm_perfmgr_query_counters: WARN: node 0x%" PRIx64 + OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64 " port %d (%s): port out of range, skipping\n", cl_ntoh64(node->node_info.node_guid), port, node->print_desc); @@ -538,16 +531,15 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) #if ENABLE_OSM_PERF_MGR_PROFILE gettimeofday(&(mad_context.perfmgr_context.query_start), NULL); #endif - osm_log(pm->log, OSM_LOG_VERBOSE, - "__osm_perfmgr_query_counters: Getting stats for node 0x%" + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" PRIx64 " port %d (lid %X) (%s)\n", node_guid, port, cl_ntoh16(lid), node->print_desc); status = osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port, IB_MAD_METHOD_GET, &mad_context); if (status != IB_SUCCESS) - osm_log(pm->log, OSM_LOG_ERROR, - "__osm_perfmgr_query_counters: ERR 4C09: Failed to issue port counter query for node 0x%" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C09: " + "Failed to issue port counter query for node 0x%" PRIx64 " port %d (%s)\n", node->node_info.node_guid, port, node->print_desc); @@ -583,16 +575,16 @@ static int sweep_hop_1(osm_sm_t * sm) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "sweep_hop_1: ERR 4C81: No SM port object\n"); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 4C81: No SM port object\n"); return -1; } p_node = p_port->p_node; port_num = ib_node_info_get_local_port_num(&p_node->node_info); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "sweep_hop_1: Probing hop 1 on local port %u\n", port_num); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Probing hop 1 on local port %u\n", port_num); p_physp = osm_node_get_physp_ptr(p_node, port_num); @@ -621,8 +613,7 @@ static int sweep_hop_1(osm_sm_t * sm) CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "sweep_hop_1: ERR 4C82: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: " "Request for NodeInfo failed\n"); break; @@ -655,15 +646,14 @@ static int sweep_hop_1(osm_sm_t * sm) CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "sweep_hop_1: ERR 4C82: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C82: " "Request for NodeInfo failed\n"); } break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "sweep_hop_1: ERR 4C83: Unknown node type %d\n", + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 4C83: Unknown node type %d\n", osm_node_get_type(p_node)); } @@ -683,8 +673,7 @@ static unsigned is_sm_port_down(osm_sm_t * const sm) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { CL_PLOCK_RELEASE(sm->p_lock); - osm_log(sm->p_log, OSM_LOG_ERROR, - "is_sm_port_down: ERR 4C85: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4C85: " "SM port with GUID:%016" PRIx64 " is unknown\n", cl_ntoh64(port_guid)); return 1; @@ -705,8 +694,7 @@ static int sweep_hop_0(osm_sm_t * const sm) h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "sweep_hop_0: No bound ports.\n"); + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports.\n"); return -1; } @@ -715,8 +703,8 @@ static int sweep_hop_0(osm_sm_t * const sm) CL_DISP_MSGID_NONE, NULL); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "sweep_hop_0: ERR 4C86: Request for NodeInfo failed\n"); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 4C86: Request for NodeInfo failed\n"); return (status); } @@ -760,7 +748,7 @@ static int perfmgr_discovery(osm_opensm_t * osm) goto _exit; if (is_sm_port_down(&osm->sm)) { - osm_log(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); + OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); goto _drop; } @@ -804,7 +792,7 @@ void osm_perfmgr_process(osm_perfmgr_t * pm) /* FIXME we should be able to track SA notices * and not have to sweep the node_guid_tbl each pass */ - osm_log(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); cl_plock_acquire(pm->lock); cl_qmap_apply_func(&(pm->subn->node_guid_tbl), __collect_guids, (void *)pm); @@ -893,8 +881,7 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) != PERFMGR_EVENT_DB_SUCCESS) { - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_check_oob_clear: Failed to find previous " + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " "error reading for %s (guid 0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; @@ -912,8 +899,8 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, cr->link_integrity < prev_err.link_integrity || cr->buffer_overrun < prev_err.buffer_overrun || cr->vl15_dropped < prev_err.vl15_dropped) { - osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C0A: Detected an out of band error clear " + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0A: " + "Detected an out of band error clear " "on %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); @@ -922,8 +909,8 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, /* FIXME handle extended counters */ if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) != PERFMGR_EVENT_DB_SUCCESS) { - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_check_oob_clear: Failed to find previous data count " + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Failed to find previous data count " "reading for %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; @@ -933,7 +920,7 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, dc->rcv_data < prev_dc.rcv_data || dc->xmit_pkts < prev_dc.xmit_pkts || dc->rcv_pkts < prev_dc.rcv_pkts) { - osm_log(pm->log, OSM_LOG_ERROR, + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0B: Detected an out of band data counter " "clear on node %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); @@ -1007,8 +994,8 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, lid = get_lid(p_node, port, mon_node); cl_plock_release(pm->lock); if (lid == 0) { - osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C0C: Failed to clear counters for %s (0x%" + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C0C: " + "Failed to clear counters for %s (0x%" PRIx64 ") port %d; failed to get lid\n", mon_node->name, mon_node->guid, port); goto Exit; @@ -1024,8 +1011,8 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, osm_perfmgr_send_pc_mad(pm, lid, remote_qp, port, IB_MAD_METHOD_SET, &mad_context); if (status != IB_SUCCESS) - osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C11: Failed to send clear counters MAD for %s (0x%" + OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 4C11: " + "Failed to send clear counters MAD for %s (0x%" PRIx64 ") port %d\n", mon_node->name, mon_node->guid, port); @@ -1049,8 +1036,7 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); if (err != PERFMGR_EVENT_DB_SUCCESS) { - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_log_events: Failed to find previous " + OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " "reading for %s (0x%" PRIx64 ") port %u\n", mon_node->name, mon_node->guid, port); return; @@ -1060,24 +1046,21 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t /* FIXME these events should be defineable by the user in a config * file somewhere. */ if (reading->symbol_err_cnt > prev_read.symbol_err_cnt) - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_log_events: ERR 4C0D: " + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0D: " "Found %" PRIu64 " Symbol errors in %lu sec on %s (0x%" PRIx64 ") port %u\n", (reading->symbol_err_cnt - prev_read.symbol_err_cnt), time_diff, mon_node->name, mon_node->guid, port); if (reading->rcv_err > prev_read.rcv_err) - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_log_events: ERR 4C0E: " + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0E: " "Found %" PRIu64 " Receive errors in %lu sec on %s (0x%" PRIx64 ") port %u\n", (reading->rcv_err - prev_read.rcv_err), time_diff, mon_node->name, mon_node->guid, port); if (reading->xmit_discards > prev_read.xmit_discards) - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_log_events: ERR 4C0F: " + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C0F: " "Found %" PRIu64 " Xmit Discards in %lu sec on %s (0x%" PRIx64 ") port %u\n", (reading->xmit_discards - prev_read.xmit_discards), @@ -1111,16 +1094,15 @@ static void osm_pc_rcv_process(void *context, void *data) */ if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == cl_qmap_end(&(pm->monitored_map))) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C12: GUID 0x%016" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C12: GUID 0x%016" PRIx64 " not found in monitored map\n", node_guid); goto Exit; } p_mon_node = (__monitored_node_t *) p_node; - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_pc_rcv_process: Processing received MAD status 0x%x context 0x%" + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Processing received MAD status 0x%x context 0x%" PRIx64 " port %u\n", p_mad->status, node_guid, port); /* Response could also be redirection (IBM eHCA PMA does this) */ @@ -1131,8 +1113,8 @@ static void osm_pc_rcv_process(void *context, void *data) (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); ib_api_status_t status; - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_pc_rcv_process: Redirection to LID 0x%x " + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "Redirection to LID 0x%x " "GID 0x%016" PRIx64 " : 0x%016" PRIx64 " QP 0x%x received\n", cl_ntoh16(cpi->redir_lid), @@ -1143,14 +1125,14 @@ static void osm_pc_rcv_process(void *context, void *data) /* LID or GID redirection ? */ /* For GID redirection, need to get PathRecord from SA */ if (cpi->redir_lid == 0) { - osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_pc_rcv_process: GID redirection not currently implemented!\n"); + OSM_LOG(pm->log, OSM_LOG_VERBOSE, + "GID redirection not currently implemented!\n"); goto Exit; } if (!pm->subn->opt.perfmgr_redir) { - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C16: redirection requested but disabled\n"); + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C16: " + "redirection requested but disabled\n"); goto Exit; } @@ -1159,8 +1141,8 @@ static void osm_pc_rcv_process(void *context, void *data) /* Now, validate port number */ if (port > p_mon_node->redir_tbl_size) { cl_plock_release(pm->lock); - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C13: Invalid port num %d for GUID 0x%016" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C13: " + "Invalid port num %d for GUID 0x%016" PRIx64 " num ports %d\n", port, node_guid, p_mon_node->redir_tbl_size); goto Exit; @@ -1176,8 +1158,8 @@ static void osm_pc_rcv_process(void *context, void *data) mad_context->perfmgr_context. mad_method, mad_context); if (status != IB_SUCCESS) - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C14: Failed to send redirected MAD with method 0x%x for node 0x%" + OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 4C14: " + "Failed to send redirected MAD with method 0x%x for node 0x%" PRIx64 " port %d\n", mad_context->perfmgr_context.mad_method, node_guid, port); @@ -1247,7 +1229,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, OSM_LOG_ENTER(log); - osm_log(log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); + OSM_LOG(log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); memset(pm, 0, sizeof(*pm)); @@ -1309,8 +1291,7 @@ void osm_perfmgr_clear_counters(osm_perfmgr_t * pm) void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type) { if (perfmgr_db_dump(pm->db, pm->event_db_dump_file, dump_type) != 0) - osm_log(pm->log, OSM_LOG_ERROR, - "PB dump port counters: ERR 4C10: Failed to dump file %s : %s", + OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s", pm->event_db_dump_file, strerror(errno)); } diff --git a/opensm/opensm/osm_pkey.c b/opensm/opensm/osm_pkey.c index d7a695b..9b43669 100644 --- a/opensm/opensm/osm_pkey.c +++ b/opensm/opensm/osm_pkey.c @@ -478,14 +478,12 @@ osm_physp_has_pkey(IN osm_log_t * p_log, OSM_LOG_ENTER(p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_has_pkey: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Search for PKey: 0x%4x\n", cl_ntoh16(pkey)); /* if the pkey given is an invalid pkey - return TRUE. */ if (ib_pkey_is_invalid(pkey)) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_has_pkey: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Given invalid PKey - we treat it loosely and allow it\n"); res = TRUE; goto Exit; @@ -498,12 +496,10 @@ osm_physp_has_pkey(IN osm_log_t * p_log, p_pkey = cl_map_get(&pkey_tbl->keys, pkey_base); if (p_pkey) { res = TRUE; - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_has_pkey: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "PKey 0x%04x was found\n", cl_ntoh16(pkey)); } else { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_has_pkey: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "PKey 0x%04x was not found\n", cl_ntoh16(pkey)); } diff --git a/opensm/opensm/osm_pkey_mgr.c b/opensm/opensm/osm_pkey_mgr.c index 39b8ac5..0e94df4 100644 --- a/opensm/opensm/osm_pkey_mgr.c +++ b/opensm/opensm/osm_pkey_mgr.c @@ -100,8 +100,7 @@ pkey_mgr_process_physical_port(IN osm_log_t * p_log, p_pkey_tbl = &p_physp->pkeys; p_pending = (osm_pending_pkey_t *) malloc(sizeof(osm_pending_pkey_t)); if (!p_pending) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_process_physical_port: ERR 0502: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0502: " "Failed to allocate new pending pkey entry for node " "0x%016" PRIx64 " port %u\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -123,8 +122,7 @@ pkey_mgr_process_physical_port(IN osm_log_t * p_log, &p_pending->block, &p_pending->index) != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_process_physical_port: ERR 0503: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0503: " "Failed to obtain P_Key 0x%04x block and index for node " "0x%016" PRIx64 " port %u\n", ib_pkey_get_base(pkey), @@ -137,10 +135,8 @@ pkey_mgr_process_physical_port(IN osm_log_t * p_log, stat = "updated"; } - osm_log(p_log, OSM_LOG_DEBUG, - "pkey_mgr_process_physical_port: " - "pkey 0x%04x was %s for node 0x%016" PRIx64 - " port %u\n", + OSM_LOG(p_log, OSM_LOG_DEBUG, + "pkey 0x%04x was %s for node 0x%016" PRIx64 " port %u\n", cl_ntoh16(pkey), stat, cl_ntoh64(osm_node_get_node_guid(p_node)), osm_physp_get_port_num(p_physp)); @@ -211,8 +207,7 @@ pkey_mgr_enforce_partition(IN osm_log_t * p_log, osm_sm_t * sm, p_pi = &p_physp->port_info; if ((p_pi->vl_enforce & 0xc) == (0xc) * (enforce == TRUE)) { - osm_log(p_log, OSM_LOG_DEBUG, - "pkey_mgr_enforce_partition: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "No need to update PortInfo for " "node 0x%016" PRIx64 " port %u\n", cl_ntoh64(osm_node_get_node_guid @@ -246,8 +241,7 @@ pkey_mgr_enforce_partition(IN osm_log_t * p_log, osm_sm_t * sm, cl_hton32(osm_physp_get_port_num(p_physp)), CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_enforce_partition: ERR 0511: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0511: " "Failed to set PortInfo for " "node 0x%016" PRIx64 " port %u\n", cl_ntoh64(osm_node_get_node_guid @@ -255,10 +249,8 @@ pkey_mgr_enforce_partition(IN osm_log_t * p_log, osm_sm_t * sm, osm_physp_get_port_num(p_physp)); return FALSE; } else { - osm_log(p_log, OSM_LOG_DEBUG, - "pkey_mgr_enforce_partition: " - "Set PortInfo for " - "node 0x%016" PRIx64 " port %u\n", + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Set PortInfo for node 0x%016" PRIx64 " port %u\n", cl_ntoh64(osm_node_get_node_guid (osm_physp_get_node_ptr(p_physp))), osm_physp_get_port_num(p_physp)); @@ -299,8 +291,7 @@ static boolean_t pkey_mgr_update_port(osm_log_t * p_log, osm_sm_t * sm, max_num_of_blocks = pkey_mgr_get_physp_max_blocks(sm->p_subn, p_physp); if (p_pkey_tbl->max_blocks > max_num_of_blocks) { - osm_log(p_log, OSM_LOG_INFO, - "pkey_mgr_update_port: " + OSM_LOG(p_log, OSM_LOG_INFO, "Max number of blocks reduced from %u to %u " "for node 0x%016" PRIx64 " port %u\n", p_pkey_tbl->max_blocks, max_num_of_blocks, @@ -329,8 +320,7 @@ static boolean_t pkey_mgr_update_port(osm_log_t * p_log, osm_sm_t * sm, &last_free_block_index, &last_free_pkey_index); if (!found) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_update_port: ERR 0504: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0504: " "Failed to find empty space for new pkey 0x%04x " "for node 0x%016" PRIx64 " port %u\n", cl_ntoh16(p_pending->pkey), @@ -348,8 +338,7 @@ static boolean_t pkey_mgr_update_port(osm_log_t * p_log, osm_sm_t * sm, osm_pkey_tbl_set_new_entry(p_pkey_tbl, block_index, pkey_index, p_pending->pkey)) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_update_port: ERR 0505: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0505: " "Failed to set PKey 0x%04x in block %u idx %u " "for node 0x%016" PRIx64 " port %u\n", cl_ntoh16(p_pending->pkey), block_index, @@ -379,17 +368,14 @@ static boolean_t pkey_mgr_update_port(osm_log_t * p_log, osm_sm_t * sm, pkey_mgr_update_pkey_entry(sm, p_physp, new_block, block_index); if (status == IB_SUCCESS) { - osm_log(p_log, OSM_LOG_DEBUG, - "pkey_mgr_update_port: " - "Updated " - "pkey table block %d for node 0x%016" PRIx64 - " port %u\n", block_index, + OSM_LOG(p_log, OSM_LOG_DEBUG, + "Updated pkey table block %d for node 0x%016" + PRIx64 " port %u\n", block_index, cl_ntoh64(osm_node_get_node_guid(p_node)), osm_physp_get_port_num(p_physp)); ret_val = TRUE; } else { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_update_port: ERR 0506: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0506: " "pkey_mgr_update_pkey_entry() failed to update " "pkey table block %d for node 0x%016" PRIx64 " port %u\n", block_index, @@ -438,8 +424,7 @@ pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, num_of_blocks = osm_pkey_tbl_get_num_blocks(p_pkey_tbl); peer_max_blocks = pkey_mgr_get_physp_max_blocks(p_subn, peer); if (peer_max_blocks < p_pkey_tbl->used_blocks) { - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_update_peer_port: ERR 0508: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0508: " "Not enough pkey entries (%u < %u) on switch 0x%016" PRIx64 " port %u. Clearing Enforcement bit\n", peer_max_blocks, num_of_blocks, @@ -471,8 +456,7 @@ pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, if (status == IB_SUCCESS) ret_val = TRUE; else - osm_log(p_log, OSM_LOG_ERROR, - "pkey_mgr_update_peer_port: ERR 0509: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0509: " "pkey_mgr_update_pkey_entry() failed to update " "pkey table block %d for node 0x%016" PRIx64 " port %u\n", block_index, @@ -483,8 +467,7 @@ pkey_mgr_update_peer_port(osm_log_t * p_log, osm_sm_t * sm, } if ((ret_val == TRUE) && osm_log_is_active(p_log, OSM_LOG_DEBUG)) { - osm_log(p_log, OSM_LOG_DEBUG, - "pkey_mgr_update_peer_port: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Pkey table was updated for node 0x%016" PRIx64 " port %u\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -513,8 +496,7 @@ osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm) CL_PLOCK_EXCL_ACQUIRE(&p_osm->lock); if (osm_prtn_make_partitions(&p_osm->log, &p_osm->subn) != IB_SUCCESS) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "osm_pkey_mgr_process: ERR 0510: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 0510: " "osm_prtn_make_partitions() failed\n"); goto _err; } diff --git a/opensm/opensm/osm_pkey_rcv.c b/opensm/opensm/osm_pkey_rcv.c index a69106f..db9ab99 100644 --- a/opensm/opensm/osm_pkey_rcv.c +++ b/opensm/opensm/osm_pkey_rcv.c @@ -87,8 +87,7 @@ void osm_pkey_rcv_process(IN void *context, IN void *data) cl_plock_excl_acquire(sm->p_lock); p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_pkey_rcv_process: ERR 4806: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4806: " "No port object for port with GUID 0x%" PRIx64 "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", @@ -117,8 +116,7 @@ void osm_pkey_rcv_process(IN void *context, IN void *data) update the subnet. */ if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_pkey_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Got GetResp(PKey) block:%u port_num %u with GUID 0x%" PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", block_num, port_num, cl_ntoh64(port_guid), @@ -130,8 +128,7 @@ void osm_pkey_rcv_process(IN void *context, IN void *data) If so, ignore it. */ if (!p_physp) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_pkey_rcv_process: ERR 4807: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4807: " "Got invalid port number 0x%X\n", port_num); goto Exit; } diff --git a/opensm/opensm/osm_port.c b/opensm/opensm/osm_port.c index 579f797..3317cb5 100644 --- a/opensm/opensm/osm_port.c +++ b/opensm/opensm/osm_port.c @@ -326,8 +326,7 @@ osm_physp_calc_link_mtu(IN osm_log_t * p_log, IN const osm_physp_t * p_physp) ib_port_info_get_mtu_cap(&p_remote_physp->port_info); if (osm_log_is_active(p_log, OSM_LOG_DEBUG)) - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_calc_link_mtu: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Remote port 0x%016" PRIx64 " port = 0x%X : " "MTU = %u. This Port MTU: %u\n", cl_ntoh64(osm_physp_get_port_guid @@ -340,8 +339,7 @@ osm_physp_calc_link_mtu(IN osm_log_t * p_log, IN const osm_physp_t * p_physp) mtu = remote_mtu; if (osm_log_is_active(p_log, OSM_LOG_VERBOSE)) - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_physp_calc_link_mtu: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "MTU mismatch between ports." "\n\t\t\t\tPort 0x%016" PRIx64 ", port 0x%X" " and port 0x%016" PRIx64 @@ -359,8 +357,7 @@ osm_physp_calc_link_mtu(IN osm_log_t * p_log, IN const osm_physp_t * p_physp) mtu = ib_port_info_get_neighbor_mtu(&p_physp->port_info); if (mtu == 0) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_calc_link_mtu: ERR 4101: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "ERR 4101: " "Invalid MTU = 0. Forcing correction to 256\n"); mtu = 1; } @@ -391,8 +388,7 @@ osm_physp_calc_link_op_vls(IN osm_log_t * p_log, ib_port_info_get_vl_cap(&p_remote_physp->port_info); if (osm_log_is_active(p_log, OSM_LOG_DEBUG)) - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_calc_link_op_vls: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Remote port 0x%016" PRIx64 " port = 0x%X : " "VL_CAP = %u. This port VL_CAP = %u\n", cl_ntoh64(osm_physp_get_port_guid @@ -405,8 +401,7 @@ osm_physp_calc_link_op_vls(IN osm_log_t * p_log, op_vls = remote_op_vls; if (osm_log_is_active(p_log, OSM_LOG_VERBOSE)) - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_physp_calc_link_op_vls: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "OP_VLS mismatch between ports." "\n\t\t\t\tPort 0x%016" PRIx64 ", port 0x%X" " and port 0x%016" PRIx64 @@ -428,8 +423,7 @@ osm_physp_calc_link_op_vls(IN osm_log_t * p_log, op_vls = p_subn->opt.max_op_vls; if (op_vls == 0) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_physp_calc_link_op_vls: ERR 4102: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "ERR 4102: " "Invalid OP_VLS = 0. Forcing correction to 1 (VL0)\n"); op_vls = 1; } @@ -476,8 +470,7 @@ __osm_physp_get_dr_physp_set(IN osm_log_t * p_log, /* find the OSM node */ p_port = osm_get_port_by_guid(p_subn, p_subn->sm_port_guid); if (!p_port) { - osm_log(p_log, OSM_LOG_ERROR, - "__osm_physp_get_dr_nodes_set: ERR 4103: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4103: " "Failed to find the SM own port by guid\n"); status = CL_ERROR; goto Exit; @@ -500,8 +493,7 @@ __osm_physp_get_dr_physp_set(IN osm_log_t * p_log, cl_map_insert(p_physp_map, __osm_ptr_to_key(p_physp), NULL); - osm_log(p_log, OSM_LOG_DEBUG, - "__osm_physp_get_dr_nodes_set: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Traversed through node: 0x%016" PRIx64 " port:%u\n", cl_ntoh64(p_node->node_info.node_guid), @@ -509,8 +501,7 @@ __osm_physp_get_dr_physp_set(IN osm_log_t * p_log, /* make sure we got a valid port and it has a remote port */ if (!p_physp) { - osm_log(p_log, OSM_LOG_ERROR, - "__osm_physp_get_dr_nodes_set: ERR 4104: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4104: " "DR Traversal stopped on invalid port at hop:%u\n", hop); status = CL_ERROR; @@ -518,8 +509,7 @@ __osm_physp_get_dr_physp_set(IN osm_log_t * p_log, } if (!(p_physp = osm_physp_get_remote(p_physp))) { - osm_log(p_log, OSM_LOG_ERROR, - "__osm_physp_get_dr_nodes_set: ERR 4106: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4106: " "DR Traversal stopped on missing remote physp at hop:%u\n", hop); status = CL_ERROR; @@ -629,9 +619,7 @@ osm_physp_replace_dr_path_with_alternate_dr_path(IN osm_log_t * p_log, p_port = osm_get_port_by_guid(p_subn, port_guid); if (!p_port) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_physp_replace_dr_path_with_alternate_dr_path: ERR 4105: " - "No SM port object\n"); + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4105: No SM port object\n"); goto Exit; } @@ -789,8 +777,7 @@ osm_physp_set_pkey_tbl(IN osm_log_t * p_log, 1) / IB_NUM_PKEY_ELEMENTS_IN_BLOCK; if (block_num >= max_blocks) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_physp_set_pkey_tbl: ERR 4108: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4108: " "Got illegal set for block number:%u " "For GUID: %" PRIx64 " port number:0x%X\n", block_num, diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c index 758a609..ecac2a8 100644 --- a/opensm/opensm/osm_port_info_rcv.c +++ b/opensm/opensm/osm_port_info_rcv.c @@ -78,8 +78,7 @@ __osm_pi_rcv_set_sm(IN osm_sm_t * sm, OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_pi_rcv_set_sm: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Setting IS_SM bit in port attributes\n"); p_dr_path = osm_physp_get_dr_path_ptr(p_physp); @@ -99,8 +98,7 @@ static void pi_rcv_check_and_fix_lid(osm_log_t *log, ib_port_info_t * const pi, osm_physp_t * p) { if (cl_ntoh16(pi->base_lid) > IB_LID_UCAST_END_HO) { - osm_log(log, OSM_LOG_ERROR, - "pi_rcv_check_and_fix_lid: ERR 0F04: " + OSM_LOG(log, OSM_LOG_ERROR, "ERR 0F04: " "Got invalid base LID 0x%x from the network. " "Corrected to 0x%x.\n", cl_ntoh16(pi->base_lid), cl_ntoh16(p->port_info.base_lid)); @@ -131,8 +129,7 @@ __osm_pi_rcv_process_endport(IN osm_sm_t * sm, /* track the minimal endport MTU and rate */ mtu = ib_port_info_get_mtu_cap(p_pi); if (mtu < sm->p_subn->min_ca_mtu) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_pi_rcv_process_endport: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Setting endport minimal MTU to:%u defined by port:0x%" PRIx64 "\n", mtu, cl_ntoh64(port_guid)); sm->p_subn->min_ca_mtu = mtu; @@ -140,8 +137,7 @@ __osm_pi_rcv_process_endport(IN osm_sm_t * sm, rate = ib_port_info_compute_rate(p_pi); if (rate < sm->p_subn->min_ca_rate) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_pi_rcv_process_endport: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Setting endport minimal rate to:%u defined by port:0x%" PRIx64 "\n", rate, cl_ntoh64(port_guid)); sm->p_subn->min_ca_rate = rate; @@ -170,15 +166,13 @@ __osm_pi_rcv_process_endport(IN osm_sm_t * sm, if (p_pi->capability_mask & IB_PORT_CAP_IS_SM) { if (sm->p_subn->opt.ignore_other_sm) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_pi_rcv_process_endport: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Ignoring SM on port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); else { if (osm_log_is_active (sm->p_log, OSM_LOG_VERBOSE)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_pi_rcv_process_endport: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Detected another SM. Requesting SMInfo" "\n\t\t\t\tPort 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); @@ -198,8 +192,8 @@ __osm_pi_rcv_process_endport(IN osm_sm_t * sm, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_pi_rcv_process_endport: ERR 0F05: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0F05: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); } @@ -247,8 +241,7 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, remote_port_num = osm_physp_get_port_num(p_remote_physp); - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_pi_rcv_process_switch_port: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Unlinking local node 0x%" PRIx64 ", port 0x%X" "\n\t\t\t\tand remote node 0x%" PRIx64 @@ -301,21 +294,19 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_pi_rcv_process_switch_port: ERR 0F02: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0F02: " "Failure initiating NodeInfo request (%s)\n", ib_get_err_str(status)); } else if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_pi_rcv_process_switch_port: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Skipping SMP responder port 0x%X\n", p_pi->local_port_num); break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_pi_rcv_process_switch_port: ERR 0F03: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F03: " "Unknown link state = %u, port = 0x%X\n", ib_port_info_get_port_state(p_pi), p_pi->local_port_num); @@ -428,8 +419,7 @@ static void get_pkey_table(IN osm_log_t * p_log, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_physp_has_pkey: ERR 0F12: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 0F12: " "Failure initiating PKeyTable request (%s)\n", ib_get_err_str(status)); goto Exit; @@ -487,15 +477,13 @@ osm_pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * const p_node, if (p_context->active_transition && (cl_ntoh16(p_smp->status) & 0x7fff) == 0x1c) { level = OSM_LOG_INFO; - osm_log(sm->p_log, OSM_LOG_INFO, - "osm_pi_rcv_process_set: " + OSM_LOG(sm->p_log, OSM_LOG_INFO, "Received error status 0x%x for SetResp() during ACTIVE transition\n", cl_ntoh16(p_smp->status) & 0x7fff); /* Should there be a subsequent Get to validate that port is ACTIVE ? */ } else { level = OSM_LOG_ERROR; - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_pi_rcv_process_set: ERR 0F10: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F10: " "Received error status for SetResp()\n"); } osm_dump_port_info(sm->p_log, @@ -504,8 +492,7 @@ osm_pi_rcv_process_set(IN osm_sm_t * sm, IN osm_node_t * const p_node, } if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_pi_rcv_process_set: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received logical SetResp() for GUID 0x%" PRIx64 ", port num 0x%X" "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 @@ -559,8 +546,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) /* On receipt of client reregister, clear the reregister bit so reregistering won't be sent again and again */ if (ib_port_info_get_client_rereg(p_pi)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_pi_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Client reregister received on response\n"); ib_port_info_set_client_rereg(p_pi, 0); } @@ -572,8 +558,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) do anything with the response - just flag that we need a heavy sweep */ if (p_context->light_sweep == TRUE) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_pi_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Got light sweep response from remote port of parent node " "GUID 0x%" PRIx64 " port 0x%016" PRIx64 ", Commencing heavy sweep\n", @@ -586,8 +571,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { CL_PLOCK_RELEASE(sm->p_lock); - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_pi_rcv_process: ERR 0F06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F06: " "No port object for port with GUID 0x%" PRIx64 "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", @@ -620,8 +604,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) most likely due to a subnet sweep in progress. */ if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_pi_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Discovered port num 0x%X with GUID 0x%" PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", @@ -639,8 +622,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) */ if (!p_physp) { if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_pi_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Initializing port number 0x%X\n", port_num); p_physp = &p_node->physp_table[port_num]; @@ -667,8 +649,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) in the subnet. */ if (p_context->update_master_sm_base_lid == TRUE) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_pi_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "update_master_sm is TRUE. " "Updating master_sm_base_lid to:%u\n", p_pi->master_sm_base_lid); @@ -695,8 +676,7 @@ void osm_pi_rcv_process(IN void *context, IN void *data) p_node, p_physp, p_pi); break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_pi_rcv_process: ERR 0F07: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0F07: " "Unknown node type %u with GUID 0x%" PRIx64 "\n", osm_node_get_type(p_node), cl_ntoh64(node_guid)); diff --git a/opensm/opensm/osm_prtn.c b/opensm/opensm/osm_prtn.c index d7b7152..76227c0 100644 --- a/opensm/opensm/osm_prtn.c +++ b/opensm/opensm/osm_prtn.c @@ -110,14 +110,14 @@ ib_api_status_t osm_prtn_add_port(osm_log_t * p_log, osm_subn_t * p_subn, p_port = osm_get_port_by_guid(p_subn, guid); if (!p_port) { - osm_log(p_log, OSM_LOG_VERBOSE, "osm_prtn_add_port: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "port 0x%" PRIx64 " not found\n", cl_ntoh64(guid)); return status; } p_physp = p_port->p_physp; if (!p_physp) { - osm_log(p_log, OSM_LOG_VERBOSE, "osm_prtn_add_port: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "no physical for port 0x%" PRIx64 "\n", cl_ntoh64(guid)); return status; @@ -125,7 +125,7 @@ ib_api_status_t osm_prtn_add_port(osm_log_t * p_log, osm_subn_t * p_subn, if (cl_map_remove(&p->part_guid_tbl, guid) || cl_map_remove(&p->full_guid_tbl, guid)) { - osm_log(p_log, OSM_LOG_VERBOSE, "osm_prtn_add_port: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "port 0x%" PRIx64 " already in " "partition \'%s\' (0x%04x). Will overwrite\n", cl_ntoh64(guid), p->name, cl_ntoh16(p->pkey)); @@ -227,8 +227,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, status = osm_mcmr_rcv_find_or_create_new_mgrp(p_sa, comp_mask, &mc_rec, &p_mgrp); if (!p_mgrp || status != IB_SUCCESS) - osm_log(p_log, OSM_LOG_ERROR, - "osm_prtn_add_mcgroup: " + OSM_LOG(p_log, OSM_LOG_ERROR, "Failed to create MC group with pkey 0x%04x\n", cl_ntoh16(pkey)); if (p_mgrp) @@ -296,8 +295,7 @@ osm_prtn_t *osm_prtn_make_new(osm_log_t * p_log, osm_subn_t * p_subn, p = osm_prtn_new(name, pkey); if (!p) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_prtn_make_new: Unable to create" + OSM_LOG(p_log, OSM_LOG_ERROR, "Unable to create" " partition \'%s\' (0x%04x)\n", name, cl_ntoh16(pkey)); return NULL; } @@ -305,8 +303,7 @@ osm_prtn_t *osm_prtn_make_new(osm_log_t * p_log, osm_subn_t * p_subn, p_check = (osm_prtn_t *) cl_qmap_insert(&p_subn->prtn_pkey_tbl, p->pkey, &p->map_item); if (p != p_check) { - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_prtn_make_new: Duplicated partition" + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Duplicated partition" " definition: \'%s\' (0x%04x) prev name \'%s\'" ". Will use it\n", name, cl_ntoh16(pkey), p_check->name); @@ -373,8 +370,7 @@ ib_api_status_t osm_prtn_make_partitions(osm_log_t * const p_log, goto _err; if (is_config && osm_prtn_config_parse_file(p_log, p_subn, file_name)) { - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_prtn_make_partitions: Partition configuration " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Partition configuration " "was not fully processed\n"); } diff --git a/opensm/opensm/osm_prtn_config.c b/opensm/opensm/osm_prtn_config.c index 7c4eb5c..57467d4 100644 --- a/opensm/opensm/osm_prtn_config.c +++ b/opensm/opensm/osm_prtn_config.c @@ -103,8 +103,7 @@ static int partition_create(unsigned lineno, struct part_conf *conf, return -1; if (!conf->p_subn->opt.qos && conf->sl != OSM_DEFAULT_SL) { - osm_log(conf->p_log, OSM_LOG_DEBUG, - "partition_create: Overriding SL %d" + OSM_LOG(conf->p_log, OSM_LOG_DEBUG, "Overriding SL %d" " to default SL %d on partition %s" " as QoS is not enabled.\n", conf->sl, OSM_DEFAULT_SL, name); @@ -143,20 +142,20 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, conf->is_ipoib = 1; } else if (!strncmp(flag, "mtu", len)) { if (!val || (conf->mtu = strtoul(val, NULL, 0)) == 0) - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'mtu\' requires valid value" " - skipped\n", lineno); } else if (!strncmp(flag, "rate", len)) { if (!val || (conf->rate = strtoul(val, NULL, 0)) == 0) - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'rate\' requires valid value" " - skipped\n", lineno); } else if (!strncmp(flag, "scope", len)) { unsigned int scope; if (!val || (scope = strtoul(val, NULL, 0)) == 0 || scope > 0xF) - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'scope\' requires valid value" " - skipped\n", lineno); @@ -168,7 +167,7 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, if (!val || !*val || (sl = strtoul(val, &end, 0)) > 15 || (*end && !isspace(*end))) - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'sl\' requires valid value" " - skipped\n", lineno); @@ -177,14 +176,14 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, } else if (!strncmp(flag, "defmember", len)) { if (!val || (strncmp(val, "limited", strlen(val)) && strncmp(val, "full", strlen(val)))) - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'defmember\' requires valid value (limited or full)" " - skipped\n", lineno); else conf->full = strncmp(val, "full", strlen(val)) == 0; } else { - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "unrecognized partition flag \'%s\'" " - ignored\n", lineno, flag); @@ -208,7 +207,7 @@ static int partition_add_port(unsigned lineno, struct part_conf *conf, if (!strncmp(flag, "full", strlen(flag))) full = TRUE; else if (strncmp(flag, "limited", strlen(flag))) { - osm_log(conf->p_log, OSM_LOG_VERBOSE, + OSM_LOG(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "unrecognized port flag \'%s\'." " Assume \'limited\'\n", lineno, flag); @@ -323,8 +322,7 @@ static int parse_part_conf(struct part_conf *conf, char *str, int lineno) q = strchr(p, ':'); if (!q) { - osm_log(conf->p_log, OSM_LOG_ERROR, - "PARSE ERROR: line %d: " + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " "no partition definition found\n", lineno); fprintf(stderr, "\nPARSE ERROR: line %d: " "no partition definition found\n", lineno); @@ -351,7 +349,7 @@ static int parse_part_conf(struct part_conf *conf, char *str, int lineno) *q++ = '\0'; ret = parse_name_token(p, &flag, &flval); if (!flag) { - osm_log(conf->p_log, OSM_LOG_ERROR, + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " "bad partition flags\n", lineno); fprintf(stderr, "\nPARSE ERROR: line %d: " @@ -365,8 +363,7 @@ static int parse_part_conf(struct part_conf *conf, char *str, int lineno) if (p != str || (partition_create(lineno, conf, name, id, flag, flval) < 0)) { - osm_log(conf->p_log, OSM_LOG_ERROR, - "PARSE ERROR: line %d: " + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " "bad partition definition\n", lineno); fprintf(stderr, "\nPARSE ERROR: line %d: " "bad partition definition\n", lineno); @@ -381,7 +378,7 @@ skip_header: *q++ = '\0'; ret = parse_name_token(p, &name, &flag); if (partition_add_port(lineno, conf, name, flag) < 0) { - osm_log(conf->p_log, OSM_LOG_ERROR, + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " "bad PortGUID\n", lineno); fprintf(stderr, "PARSE ERROR: line %d: " @@ -405,8 +402,7 @@ int osm_prtn_config_parse_file(osm_log_t * p_log, osm_subn_t * p_subn, file = fopen(file_name, "r"); if (!file) { - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_prtn_config_parse_file: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Cannot open config file \'%s\': %s\n", file_name, strerror(errno)); return -1; @@ -433,7 +429,7 @@ int osm_prtn_config_parse_file(osm_log_t * p_log, osm_subn_t * p_subn, break; if (!conf && !(conf = new_part_conf(p_log, p_subn))) { - osm_log(conf->p_log, OSM_LOG_ERROR, + OSM_LOG(conf->p_log, OSM_LOG_ERROR, "PARSE ERROR: line %d: " "internal: cannot create config\n", lineno); diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c index 675189a..ca9a9d3 100644 --- a/opensm/opensm/osm_qos.c +++ b/opensm/opensm/osm_qos.c @@ -245,8 +245,7 @@ static ib_api_status_t qos_physp_setup(osm_log_t * p_log, osm_sm_t * sm, /* setup VLArbitration */ status = vlarb_update(sm, p, port_num, force_update, qcfg); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "qos_physp_setup: ERR 6202 : " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6202 : " "failed to update VLArbitration tables " "for port %" PRIx64 " #%d\n", cl_ntoh64(p->port_guid), port_num); @@ -256,8 +255,7 @@ static ib_api_status_t qos_physp_setup(osm_log_t * p_log, osm_sm_t * sm, /* setup SL2VL tables */ status = sl2vl_update(sm, p_port, p, port_num, force_update, qcfg); if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "qos_physp_setup: ERR 6203 : " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 6203 : " "failed to update SL2VLMapping tables " "for port %" PRIx64 " #%d\n", cl_ntoh64(p->port_guid), port_num); diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index 11da30b..437822f 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -2281,15 +2281,13 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) if (!__qos_parser_in) { if (strcmp(p_subn->opt.qos_policy_file,OSM_DEFAULT_QOS_POLICY_FILE)) { - osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, - "osm_qos_parse_policy_file: ERR AC01: " + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC01: " "Failed opening QoS policy file %s - %s\n", p_subn->opt.qos_policy_file, strerror(errno)); res = 1; } else - osm_log(p_qos_parser_osm_log, OSM_LOG_VERBOSE, - "osm_qos_parse_policy_file: " + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_VERBOSE, "QoS policy file not found (%s)\n", p_subn->opt.qos_policy_file); @@ -2301,8 +2299,8 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) first_time = FALSE; __setup_simple_qos_levels(); __setup_ulp_match_rules(); - osm_log(p_qos_parser_osm_log, OSM_LOG_INFO, - "osm_qos_parse_policy_file: Loading QoS policy file (%s)\n", + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_INFO, + "Loading QoS policy file (%s)\n", p_subn->opt.qos_policy_file); } else @@ -2327,8 +2325,7 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) if (res != 0) { - osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, - "osm_qos_parse_policy_file: ERR AC03: " + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC03: " "Failed parsing QoS policy file (%s)\n", p_subn->opt.qos_policy_file); osm_qos_policy_destroy(p_subn->p_qos_policy); @@ -2342,12 +2339,10 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) if (osm_qos_policy_validate(p_subn->p_qos_policy,p_qos_parser_osm_log)) { - osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, - "osm_qos_parse_policy_file: ERR AC04: " + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC04: " "Error(s) in QoS policy file (%s)\n", p_subn->opt.qos_policy_file); - fprintf(stderr, - "Error(s) in QoS policy file (%s)\n", + fprintf(stderr, "Error(s) in QoS policy file (%s)\n", p_subn->opt.qos_policy_file); osm_qos_policy_destroy(p_subn->p_qos_policy); p_subn->p_qos_policy = NULL; @@ -2384,12 +2379,10 @@ static void __qos_parser_error(const char *format, ...) vsnprintf(s, 256, format, pvar); va_end(pvar); - osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, - "__qos_parser_error: ERR AC05: " + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC05: " "Syntax error (line %d:%d): %s", line_num, column_num, s); - fprintf(stderr, - "Error in QoS Policy File (line %d:%d): %s.\n", + fprintf(stderr, "Error in QoS Policy File (line %d:%d): %s.\n", line_num, column_num, s); OSM_LOG_EXIT(p_qos_parser_osm_log); } @@ -3055,9 +3048,8 @@ static int __validate_pkeys( uint64_t ** range_arr, * And even if it doesn't, don't exit - just print * error message and continue. */ - osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, - "__validate_pkeys: ERR AC02: pkey 0x%04X - " - "partition doesn't exist", + OSM_LOG(p_qos_parser_osm_log, OSM_LOG_ERROR, "ERR AC02: " + "pkey 0x%04X - partition doesn't exist", cl_ntoh16(pkey)); } } diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index 788e4d8..aef1856 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -791,8 +791,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_qos_policy->p_default_qos_level = &__default_simple_qos_level; } else { - osm_log(p_log, OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC10: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC10: " "Default qos-level (%s) not defined.\n", OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); res = 1; @@ -821,8 +820,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_qos_match_rule->qos_level_name); if (!p_qos_match_rule->p_qos_level) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC11: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC11: " "qos-match-rule num %u: qos-level '%s' not found\n", i, p_qos_match_rule->qos_level_name); res = 1; @@ -842,9 +840,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_port_group = __qos_policy_get_port_group_by_name(p_qos_policy, str); if (!p_port_group) { - osm_log(p_log, - OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC12: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC12: " "qos-match-rule num %u: source port-group '%s' not found\n", i, str); res = 1; @@ -873,9 +869,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_port_group = __qos_policy_get_port_group_by_name(p_qos_policy,str); if (!p_port_group) { - osm_log(p_log, - OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC13: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC13: " "qos-match-rule num %u: destination port-group '%s' not found\n", i, str); res = 1; @@ -909,9 +903,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, if (p_prtn == (osm_prtn_t *)cl_qmap_end( &p_qos_policy->p_subn->prtn_pkey_tbl)) { /* partition for this pkey not found */ - osm_log(p_log, - OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC14: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC14: " "pkey 0x%04X in match rule - " "partition doesn't exist\n", cl_ntoh16(pkey)); @@ -921,9 +913,7 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, if (p_qos_match_rule->p_qos_level->sl_set && p_prtn->sl != p_qos_match_rule->p_qos_level->sl) { /* overriding partition's SL */ - osm_log(p_log, - OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC15: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AC15: " "pkey 0x%04X in match rule - " "overriding partition SL (%u) " "with QoS Level SL (%u)\n", @@ -976,15 +966,13 @@ static osm_qos_level_t * __qos_policy_get_qos_level_by_params( else p_qos_level = p_qos_policy->p_default_qos_level; - osm_log(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG, - "__qos_policy_get_qos_level_by_params: " + OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG, "PathRecord request:" "Src port 0x%016" PRIx64 ", " "Dst port 0x%016" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), cl_ntoh64(osm_physp_get_port_guid(p_dest_physp))); - osm_log(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG, - "__qos_policy_get_qos_level_by_params: " + OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG, "Applying QoS Level %s (%s)\n", p_qos_level->name, (p_qos_level->use) ? p_qos_level->use : "no description"); diff --git a/opensm/opensm/osm_req.c b/opensm/opensm/osm_req.c index 44eeff0..42a638a 100644 --- a/opensm/opensm/osm_req.c +++ b/opensm/opensm/osm_req.c @@ -93,8 +93,8 @@ osm_req_get(IN osm_sm_t * sm, p_path->h_bind, MAD_BLOCK_SIZE, NULL); if (p_madw == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_req_get: ERR 1101: " "Unable to acquire MAD\n"); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1101: Unable to acquire MAD\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } @@ -102,8 +102,7 @@ osm_req_get(IN osm_sm_t * sm, tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id)); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_req_get: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Getting %s (0x%X), modifier 0x%X, TID 0x%" PRIx64 "\n", ib_get_sm_attr_str(attr_id), cl_ntoh16(attr_id), @@ -175,8 +174,8 @@ osm_req_set(IN osm_sm_t * sm, p_path->h_bind, MAD_BLOCK_SIZE, NULL); if (p_madw == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_req_set: ERR 1102: " "Unable to acquire MAD\n"); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1102: Unable to acquire MAD\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } @@ -184,8 +183,7 @@ osm_req_set(IN osm_sm_t * sm, tid = cl_hton64((uint64_t) cl_atomic_inc(&sm->sm_trans_id)); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_req_set: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Setting %s (0x%X), modifier 0x%X, TID 0x%" PRIx64 "\n", ib_get_sm_attr_str(attr_id), cl_ntoh16(attr_id), diff --git a/opensm/opensm/osm_resp.c b/opensm/opensm/osm_resp.c index 86a62ff..09239de 100644 --- a/opensm/opensm/osm_resp.c +++ b/opensm/opensm/osm_resp.c @@ -85,8 +85,7 @@ osm_resp_make_resp_smp(IN osm_sm_t * sm, p_dest_smp->method = IB_MAD_METHOD_TRAP_REPRESS; p_dest_smp->status = 0; } else { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_resp_make_resp_smp: ERR 1302: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 1302: " "src smp method unsupported 0x%X\n", p_src_smp->method); goto Exit; } @@ -129,8 +128,8 @@ osm_resp_send(IN osm_sm_t * sm, MAD_BLOCK_SIZE, NULL); if (p_madw == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_resp_send: ERR 1301: " "Unable to acquire MAD\n"); + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 1301: Unable to acquire MAD\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } @@ -151,9 +150,7 @@ osm_resp_send(IN osm_sm_t * sm, p_madw->fail_msg = CL_DISP_MSGID_NONE; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_resp_send: " - "Responding to %s (0x%X)" + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to %s (0x%X)" "\n\t\t\t\tattribute modifier 0x%X, TID 0x%" PRIx64 "\n", ib_get_sm_attr_str(p_smp->attr_id), cl_ntoh16(p_smp->attr_id), cl_ntoh32(p_smp->attr_mod), diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c index 86be992..9dbab9d 100644 --- a/opensm/opensm/osm_sa.c +++ b/opensm/opensm/osm_sa.c @@ -306,8 +306,7 @@ osm_sa_bind(IN osm_sa_t * const p_sa, IN const ib_net64_t port_guid) status = osm_sa_mad_ctrl_bind(&p_sa->mad_ctrl, port_guid); if (status != IB_SUCCESS) { - osm_log(p_sa->p_log, OSM_LOG_ERROR, - "osm_sa_bind: ERR 4C03: " + OSM_LOG(p_sa->p_log, OSM_LOG_ERROR, "ERR 4C03: " "SA MAD Controller bind failed (%s)\n", ib_get_err_str(status)); goto Exit; @@ -357,8 +356,7 @@ opensm_dump_to_file(osm_opensm_t * p_osm, const char *file_name, file = fopen(path, "w"); if (!file) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "opensm_dump_to_file: ERR 4C01: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR 4C01: " "cannot open file \'%s\': %s\n", file_name, strerror(errno)); return -1; @@ -530,15 +528,14 @@ static void sa_dump_all_sa(osm_opensm_t * p_osm, FILE * file) dump_context.p_osm = p_osm; dump_context.file = file; - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "sa_dump_all_sa: Dump multicast:\n"); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump multicast:\n"); cl_plock_acquire(&p_osm->lock); cl_qmap_apply_func(&p_osm->subn.mgrp_mlid_tbl, sa_dump_one_mgrp, &dump_context); - osm_log(&p_osm->log, OSM_LOG_DEBUG, "sa_dump_all_sa: Dump inform:\n"); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump inform:\n"); cl_qlist_apply_func(&p_osm->subn.sa_infr_list, sa_dump_one_inform, &dump_context); - osm_log(&p_osm->log, OSM_LOG_DEBUG, "sa_dump_all_sa: Dump services:\n"); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump services:\n"); cl_qlist_apply_func(&p_osm->subn.sa_sr_list, sa_dump_one_service, &dump_context); cl_plock_release(&p_osm->lock); @@ -567,15 +564,13 @@ static osm_mgrp_t *load_mcgroup(osm_opensm_t * p_osm, ib_net16_t mlid, p_mgrp = (osm_mgrp_t *) p_next; if (!memcmp(&p_mgrp->mcmember_rec.mgid, &p_mcm_rec->mgid, sizeof(ib_gid_t))) { - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "load_mcgroup: mgrp %04x is already here.", - cl_ntoh16(mlid)); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "mgrp %04x is already here.", cl_ntoh16(mlid)); goto _out; } - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "load_mcgroup: mlid %04x is already used by another " - "MC group. Will request clients reregistration.\n", - cl_ntoh16(mlid)); + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "mlid %04x is already used by another MC group. Will " + "request clients reregistration.\n", cl_ntoh16(mlid)); p_mgrp = NULL; goto _out; } @@ -586,10 +581,9 @@ static osm_mgrp_t *load_mcgroup(osm_opensm_t * p_osm, ib_net16_t mlid, comp_mask, p_mcm_rec, &p_mgrp) != IB_SUCCESS || !p_mgrp || p_mgrp->mlid != mlid) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "load_mcgroup: cannot create MC group with mlid " - "0x%04x and mgid 0x%016" PRIx64 ":0x%016" PRIx64 "\n", - cl_ntoh16(mlid), + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot create MC group with mlid 0x%04x and mgid " + "0x%016" PRIx64 ":0x%016" PRIx64 "\n", cl_ntoh16(mlid), cl_ntoh64(p_mcm_rec->mgid.unicast.prefix), cl_ntoh64(p_mcm_rec->mgid.unicast.interface_id)); p_mgrp = NULL; @@ -611,14 +605,14 @@ static int load_svcr(osm_opensm_t * p_osm, ib_service_record_t * sr, cl_plock_excl_acquire(&p_osm->lock); if (osm_svcr_get_by_rid(&p_osm->subn, &p_osm->log, sr)) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "load_svcr: ServiceRecord already exists\n"); + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "ServiceRecord already exists\n"); goto _out; } if (!(p_svcr = osm_svcr_new(sr))) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "load_svcr: cannot allocate new service struct\n"); + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot allocate new service struct\n"); ret = -1; goto _out; } @@ -626,8 +620,7 @@ static int load_svcr(osm_opensm_t * p_osm, ib_service_record_t * sr, p_svcr->modified_time = modified_time; p_svcr->lease_period = lease_period; - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "load_svcr: adding ServiceRecord...\n"); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "adding ServiceRecord...\n"); osm_svcr_insert_to_db(&p_osm->subn, &p_osm->log, p_svcr); @@ -655,20 +648,19 @@ static int load_infr(osm_opensm_t * p_osm, ib_inform_info_record_t * iir, cl_plock_excl_acquire(&p_osm->lock); if (osm_infr_get_by_rec(&p_osm->subn, &p_osm->log, &infr)) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "load_infr: InformInfo Record already exists\n"); + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "InformInfo Record already exists\n"); goto _out; } if (!(p_infr = osm_infr_new(&infr))) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "load_infr: cannot allocate new infr struct\n"); + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, + "cannot allocate new infr struct\n"); ret = -1; goto _out; } - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "load_infr: adding InformInfo Record...\n"); + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "adding InformInfo Record...\n"); osm_infr_insert_to_db(&p_osm->subn, &p_osm->log, p_infr); @@ -723,7 +715,7 @@ static int unpack_string64(char *p, uint8_t * buf) #define PARSE_AHEAD(p, x, name, val_ptr) { int _ret; \ p = strstr(p, name); \ if (!p) { \ - osm_log(&p_osm->log, OSM_LOG_ERROR, \ + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, \ "PARSE ERROR: %s:%u: cannot find \"%s\" string\n", \ file_name, lineno, (name)); \ ret = -2; \ @@ -732,7 +724,7 @@ static int unpack_string64(char *p, uint8_t * buf) p += strlen(name); \ _ret = unpack_##x(p, (val_ptr)); \ if (_ret < 0) { \ - osm_log(&p_osm->log, OSM_LOG_ERROR, \ + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, \ "PARSE ERROR: %s:%u: cannot parse "#x" value " \ "after \"%s\"\n", file_name, lineno, (name)); \ ret = _ret; \ @@ -753,18 +745,16 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm) file_name = p_osm->subn.opt.sa_db_file; if (!file_name) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "osm_sa_db_file_load: sa db file name is not " - "specifed. Skip restore\n"); + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "sa db file name is not specifed. Skip restore\n"); return 0; } file = fopen(file_name, "r"); if (!file) { - osm_log(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, - "osm_sa_db_file_load: ERR 4C02: " - "cannot open sa db file \'%s\'. " - "Skip restoring\n", file_name); + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 4C02: " + "cannot open sa db file \'%s\'. Skip restoring\n", + file_name); return -1; } diff --git a/opensm/opensm/osm_sa_class_port_info.c b/opensm/opensm/osm_sa_class_port_info.c index 814420a..744c97d 100644 --- a/opensm/opensm/osm_sa_class_port_info.c +++ b/opensm/opensm/osm_sa_class_port_info.c @@ -95,8 +95,7 @@ __osm_cpi_rcv_respond(IN osm_sa_t * sa, p_madw->h_bind, MAD_BLOCK_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_cpi_rcv_respond: ERR 1408: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1408: " "Unable to allocate MAD\n"); goto Exit; } @@ -180,8 +179,7 @@ __osm_cpi_rcv_respond(IN osm_sa_t * sa, status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_cpi_rcv_respond: ERR 1409: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1409: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ goto Exit; @@ -208,8 +206,7 @@ void osm_cpi_rcv_process(IN void *context, IN void *data) /* we only support GET */ if (p_sa_mad->method != IB_MAD_METHOD_GET) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_cpi_rcv_process: ERR 1403: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1403: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, diff --git a/opensm/opensm/osm_sa_guidinfo_record.c b/opensm/opensm/osm_sa_guidinfo_record.c index 8c10336..c6baadd 100644 --- a/opensm/opensm/osm_sa_guidinfo_record.c +++ b/opensm/opensm/osm_sa_guidinfo_record.c @@ -92,16 +92,14 @@ __osm_gir_rcv_new_gir(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_gir_rcv_new_gir: ERR 5102: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5102: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_gir_rcv_new_gir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New GUIDInfoRecord: lid 0x%X, block num %d\n", cl_ntoh16(match_lid), block_num); } @@ -145,8 +143,7 @@ __osm_sa_gir_create_gir(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_gir_create_gir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Looking for GUIDRecord with LID: 0x%X GUID:0x%016" PRIx64 "\n", cl_ntoh16(match_lid), cl_ntoh64(match_port_guid) @@ -209,8 +206,7 @@ __osm_sa_gir_create_gir(IN osm_sa_t * sa, We validate that the lid belongs to this node. */ if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_gir_create_gir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing LID: 0x%X <= 0x%X <= 0x%X\n", base_lid_ho, match_lid_ho, max_lid_ho); } @@ -346,8 +342,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_gir_rcv_process: ERR 5105: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5105: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -361,8 +356,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_gir_rcv_process: ERR 5104: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5104: " "Cannot find requester physical port\n"); goto Exit; } @@ -399,8 +393,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_gir_rcv_process: ERR 5103: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5103: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -426,16 +419,14 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_guidinfo_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_gir_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_gir_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -452,8 +443,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_gir_rcv_process: ERR 5106: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5106: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -515,8 +505,7 @@ void osm_gir_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_gir_rcv_process: ERR 5107: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5107: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_informinfo.c b/opensm/opensm/osm_sa_informinfo.c index f81b6c1..286a348 100644 --- a/opensm/opensm/osm_sa_informinfo.c +++ b/opensm/opensm/osm_sa_informinfo.c @@ -119,8 +119,7 @@ __validate_ports_access_rights(IN osm_sa_t * sa, p_port = osm_get_port_by_guid(sa->p_subn, portguid); if (p_port == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__validate_ports_access_rights: ERR 4301: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4301: " "Invalid port guid: 0x%016" PRIx64 "\n", cl_ntoh64(portguid)); valid = FALSE; @@ -134,8 +133,7 @@ __validate_ports_access_rights(IN osm_sa_t * sa, according to the current partitioning. */ if (!osm_physp_share_pkey (sa->p_log, p_physp, p_requester_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_ports_access_rights: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "port and requester don't share pkey\n"); valid = FALSE; goto Exit; @@ -166,8 +164,7 @@ __validate_ports_access_rights(IN osm_sa_t * sa, p_port = cl_ptr_vector_get(p_tbl, lid); } else { /* lid requested is out of range */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__validate_ports_access_rights: ERR 4302: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4302: " "Given LID (0x%X) is out of range:0x%X\n", lid, cl_ptr_vector_get_size(p_tbl)); valid = FALSE; @@ -181,8 +178,7 @@ __validate_ports_access_rights(IN osm_sa_t * sa, each other according to the current partitioning. */ if (!osm_physp_share_pkey (sa->p_log, p_physp, p_requester_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_ports_access_rights: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "port and requester don't share pkey\n"); valid = FALSE; goto Exit; @@ -206,8 +202,8 @@ __validate_infr(IN osm_sa_t * sa, IN osm_infr_t * p_infr_rec) valid = __validate_ports_access_rights(sa, p_infr_rec); if (!valid) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_infr: " "Invalid Access for InformInfo\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Invalid Access for InformInfo\n"); valid = FALSE; } @@ -233,8 +229,7 @@ __osm_infr_rcv_respond(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_infr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Generating successful InformInfo response\n"); } @@ -245,8 +240,7 @@ __osm_infr_rcv_respond(IN osm_sa_t * sa, p_madw->h_bind, MAD_BLOCK_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_infr_rcv_respond: ERR 4303: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4303: " "Unable to allocate MAD\n"); goto Exit; } @@ -267,8 +261,7 @@ __osm_infr_rcv_respond(IN osm_sa_t * sa, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_infr_rcv_respond: ERR 4304: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4304: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ goto Exit; @@ -318,8 +311,7 @@ __osm_sa_inform_info_rec_by_comp_mask(IN osm_sa_t * sa, portguid = p_infr->inform_record.subscriber_gid.unicast.interface_id; p_subscriber_port = osm_get_port_by_guid(sa->p_subn, portguid); if (p_subscriber_port == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_inform_info_rec_by_comp_mask: ERR 430D: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430D: " "Invalid subscriber port guid: 0x%016" PRIx64 "\n", cl_ntoh64(portguid)); goto Exit; @@ -331,16 +323,14 @@ __osm_sa_inform_info_rec_by_comp_mask(IN osm_sa_t * sa, according to the current partitioning. */ if (!osm_physp_share_pkey (sa->p_log, p_req_physp, p_subscriber_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_inform_info_rec_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "requester and subscriber ports don't share pkey\n"); goto Exit; } p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_inform_info_rec_by_comp_mask: ERR 430E: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430E: " "rec_item alloc failed\n"); goto Exit; } @@ -401,8 +391,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_get_method: ERR 4309: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4309: " "Cannot find requester physical port\n"); goto Exit; } @@ -421,8 +410,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, context.sa = sa; context.p_req_physp = p_req_physp; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infr_rcv_process_get_method: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Query Subscriber GID:0x%016" PRIx64 " : 0x%016" PRIx64 "(%02X) Enum:0x%X(%02X)\n", cl_ntoh64(p_rcvd_rec->subscriber_gid.unicast.prefix), @@ -451,8 +439,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_get_method: ERR 430A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430A: " "More than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -479,17 +466,14 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_inform_info_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_infr_rcv_process_get_method: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infr_rcv_process_get_method: " - "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); /* * Get a MAD to reply. Address of Mad is in the received mad_wrapper @@ -501,8 +485,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_get_method: ERR 430B: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430B: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -571,8 +554,7 @@ osm_infr_rcv_process_get_method(IN osm_sa_t * sa, status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_get_method: ERR 430C: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 430C: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; @@ -633,8 +615,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, if (res != IB_SUCCESS) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_set_method: ERR 4308 " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4308 " "Subscribe Request from unknown LID: 0x%04X\n", cl_ntoh16(p_madw->mad_addr.dest_lid) ); @@ -650,8 +631,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, if (p_recvd_inform_info->subscribe > 1) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_set_method: ERR 4308 " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4308 " "Invalid subscribe: %d\n", p_recvd_inform_info->subscribe); osm_sa_send_error(sa, p_madw, @@ -672,21 +652,18 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, inform_info_rec.report_addr.addr_type. gsi.remote_qp); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infr_rcv_process_set_method: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Subscribe Request with QPN: 0x%06X\n", cl_ntoh32(inform_info_rec.report_addr.addr_type.gsi. - remote_qp) - ); + remote_qp)); } else { ib_inform_info_get_qpn_resp_time(p_recvd_inform_info->g_or_v. generic.qpn_resp_time_val, &qpn, &resp_time_val); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infr_rcv_process_set_method: " - "UnSubscribe Request with QPN: 0x%06X\n", cl_ntoh32(qpn) - ); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "UnSubscribe Request with QPN: 0x%06X\n", + cl_ntoh32(qpn)); } /* If record exists with matching InformInfo */ @@ -699,8 +676,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, if (__validate_infr(sa, &inform_info_rec) != TRUE) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_set_method: ERR 4305: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4305: " "Failed to validate a new inform object\n"); /* o13-13.1.1: we need to set the subscribe bit to 0 */ @@ -717,8 +693,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, if (p_infr == NULL) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_set_method: ERR 4306: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4306: " "Failed to create a new inform object\n"); /* o13-13.1.1: we need to set the subscribe bit to 0 */ @@ -741,8 +716,7 @@ osm_infr_rcv_process_set_method(IN osm_sa_t * sa, cl_plock_release(sa->p_lock); /* No Such Item - So Error */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_infr_rcv_process_set_method: ERR 4307: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4307: " "Failed to UnSubscribe to non existing inform object\n"); /* o13-13.1.1: we need to set the subscribe bit to 0 */ @@ -783,8 +757,7 @@ void osm_infr_rcv_process(IN void *context, IN void *data) CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_INFORM_INFO); if (p_sa_mad->method != IB_MAD_METHOD_SET) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, @@ -816,9 +789,7 @@ void osm_infir_rcv_process(IN void *context, IN void *data) if ((p_sa_mad->method != IB_MAD_METHOD_GET) && (p_sa_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_infir_rcv_process: " - "Unsupported Method (%s)\n", + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); diff --git a/opensm/opensm/osm_sa_lft_record.c b/opensm/opensm/osm_sa_lft_record.c index 042fc68..10c0e71 100644 --- a/opensm/opensm/osm_sa_lft_record.c +++ b/opensm/opensm/osm_sa_lft_record.c @@ -87,16 +87,14 @@ __osm_lftr_rcv_new_lftr(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lftr_rcv_new_lftr: ERR 4402: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4402: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lftr_rcv_new_lftr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New LinearForwardingTable: sw 0x%016" PRIx64 "\n\t\t\t\tblock 0x%02X lid 0x%02X\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), @@ -130,8 +128,7 @@ static osm_port_t *__osm_lftr_get_port_by_guid(IN osm_sa_t * sa, p_port = osm_get_port_by_guid(sa->p_subn, port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lftr_get_port_by_guid ERR 4404: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "ERR 4404: " "Invalid port GUID 0x%016" PRIx64 "\n", port_guid); p_port = NULL; } @@ -163,8 +160,7 @@ __osm_lftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, __osm_lftr_get_port_by_guid(sa, p_sw->p_node->node_info.port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lftr_rcv_by_comp_mask: ERR 4405: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4405: " "Failed to find Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) ); @@ -175,8 +171,7 @@ __osm_lftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, the same partition. */ p_physp = p_port->p_physp; if (!p_physp) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lftr_rcv_by_comp_mask: ERR 4406: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4406: " "Failed to find default physical Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) @@ -191,8 +186,7 @@ __osm_lftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, /* compare the lids - if required */ if (comp_mask & IB_LFTR_COMPMASK_LID) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lftr_rcv_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing lid:0x%02X to port lid range: 0x%02X .. 0x%02X\n", cl_ntoh16(p_rcvd_rec->lid), min_lid_ho, max_lid_ho); /* ok we are ready for range check */ @@ -253,8 +247,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_process: ERR 4408: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4408: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -268,8 +261,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_process: ERR 4407: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4407: " "Cannot find requester physical port\n"); goto Exit; } @@ -303,8 +295,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_process: ERR 4409: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4409: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -330,16 +321,14 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_lft_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_lftr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_lftr_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -356,8 +345,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_process: ERR 4410: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4410: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -421,8 +409,7 @@ void osm_lftr_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lftr_rcv_process: ERR 4411: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4411: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_link_record.c b/opensm/opensm/osm_sa_link_record.c index 2a9a3db..1f88d0e 100644 --- a/opensm/opensm/osm_sa_link_record.c +++ b/opensm/opensm/osm_sa_link_record.c @@ -78,8 +78,7 @@ __osm_lr_rcv_build_physp_link(IN osm_sa_t * sa, p_lr_item = malloc(sizeof(*p_lr_item)); if (p_lr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lr_rcv_build_physp_link: ERR 1801: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1801: " "Unable to acquire link record\n" "\t\t\t\tFrom port 0x%u\n" "\t\t\t\tTo port 0x%u\n" @@ -159,20 +158,17 @@ __osm_lr_rcv_get_physp_link(IN osm_sa_t * sa, /* Check that the p_src_physp, p_dest_physp and p_req_physp all share a pkey (doesn't have to be the same p_key). */ if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_dest_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lr_rcv_get_physp_link: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Source and Dest PhysPorts do not share PKey\n"); goto Exit; } if (!osm_physp_share_pkey(sa->p_log, p_src_physp, p_req_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lr_rcv_get_physp_link: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Source and Requester PhysPorts do not share PKey\n"); goto Exit; } if (!osm_physp_share_pkey(sa->p_log, p_req_physp, p_dest_physp)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lr_rcv_get_physp_link: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Requester and Dest PhysPorts do not share PKey\n"); goto Exit; } @@ -203,9 +199,7 @@ __osm_lr_rcv_get_physp_link(IN osm_sa_t * sa, goto Exit; if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lr_rcv_get_physp_link: " - "Acquiring link record\n" + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Acquiring link record\n" "\t\t\t\tsrc port 0x%" PRIx64 " (port 0x%X)" ", dest port 0x%" PRIx64 " (port 0x%X)\n", cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), @@ -414,8 +408,7 @@ __osm_lr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_lr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No source port with LID = 0x%X\n", cl_ntoh16(p_lr->from_lid)); @@ -434,8 +427,7 @@ __osm_lr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_lr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No dest port with LID = 0x%X\n", cl_ntoh16(p_lr->to_lid)); @@ -476,8 +468,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, * If we do a SubnAdmGet and got more than one record it is an error ! */ if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec > 1)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lr_rcv_respond: ERR 1806: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1806: " "Got more than one record for SubnAdmGet (%zu)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -497,8 +488,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_link_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_lr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; @@ -506,8 +496,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, #endif if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_lr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Generating response with %zu records", num_rec); } @@ -519,8 +508,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, num_rec * sizeof(ib_link_record_t) + IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lr_rcv_respond: ERR 1802: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1802: " "Unable to allocate MAD\n"); /* Release the quick pool items */ p_lr_item = (osm_lr_item_t *) cl_qlist_remove_head(p_list); @@ -588,8 +576,7 @@ __osm_lr_rcv_respond(IN osm_sa_t * sa, status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_lr_rcv_respond: ERR 1803: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1803: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ goto Exit; @@ -625,8 +612,7 @@ void osm_lr_rcv_process(IN void *context, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_sa_mad->method != IB_MAD_METHOD_GET) && (p_sa_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lr_rcv_process: ERR 1804: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1804: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, @@ -640,8 +626,7 @@ void osm_lr_rcv_process(IN void *context, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_lr_rcv_process: ERR 1805: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1805: " "Cannot find requester physical port\n"); goto Exit; } diff --git a/opensm/opensm/osm_sa_mad_ctrl.c b/opensm/opensm/osm_sa_mad_ctrl.c index 6182470..fc475f7 100644 --- a/opensm/opensm/osm_sa_mad_ctrl.c +++ b/opensm/opensm/osm_sa_mad_ctrl.c @@ -123,8 +123,7 @@ __osm_sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * const p_ctrl, (p_ctrl->p_subn->opt.max_msg_fifo_timeout) && (last_dispatched_msg_queue_time_msec > p_ctrl->p_subn->opt.max_msg_fifo_timeout)) { - osm_log(p_ctrl->p_log, OSM_LOG_INFO, - "__osm_sa_mad_ctrl_process: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_INFO, /* "Responding BUSY status since the dispatcher is already" */ "Dropping MAD since the dispatcher is already" " overloaded with %u messages and queue time of:" @@ -222,8 +221,7 @@ __osm_sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * const p_ctrl, #endif default: - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_process: ERR 1A01: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A01: " "Unsupported attribute = 0x%X\n", cl_ntoh16(p_sa_mad->attr_id)); osm_dump_sa_mad(p_ctrl->p_log, p_sa_mad, OSM_LOG_ERROR); @@ -235,8 +233,7 @@ __osm_sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * const p_ctrl, processing by the appropriate controller. */ - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sa_mad_ctrl_process: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); @@ -247,8 +244,7 @@ __osm_sa_mad_ctrl_process(IN osm_sa_mad_ctrl_t * const p_ctrl, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_process: ERR 1A02: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A02: " "Dispatcher post message failed (%s) for attribute = 0x%X\n", CL_STATUS_MSG(status), cl_ntoh16(p_sa_mad->attr_id)); @@ -306,8 +302,7 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd); if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) { - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sa_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u SA MADs received\n", p_ctrl->p_stats->sa_mads_rcvd); } @@ -321,16 +316,14 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, */ if (p_ctrl->p_subn->sm_state != IB_SMINFO_STATE_MASTER) { cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); - osm_log(p_ctrl->p_log, OSM_LOG_VERBOSE, - "__osm_sa_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Received SA MAD while SM not MASTER. MAD ignored\n"); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; } if (p_ctrl->p_subn->first_time_master_sweep == TRUE) { cl_atomic_inc(&p_ctrl->p_stats->sa_mads_ignored); - osm_log(p_ctrl->p_log, OSM_LOG_VERBOSE, - "__osm_sa_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Received SA MAD while SM in first sweep. MAD ignored\n"); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; @@ -348,8 +341,7 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, */ if ((p_sa_mad->sm_key != 0) && (p_sa_mad->sm_key != p_ctrl->p_subn->opt.sm_key)) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_rcv_callback: ERR 1A04: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A04: " "Non-Zero SA MAD SM_Key: 0x%" PRIx64 " != SM_Key: 0x%" PRIx64 "; MAD ignored\n", cl_ntoh64(p_sa_mad->sm_key), cl_ntoh64(p_ctrl->p_subn->opt.sm_key) @@ -362,8 +354,7 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, case IB_MAD_METHOD_REPORT_RESP: /* we do not really do anything with report represses - just retire the transaction */ - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sa_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Received Report Repress. Retiring the transaction\n"); if (p_req_madw) @@ -384,8 +375,7 @@ __osm_sa_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, default: cl_atomic_inc(&p_ctrl->p_stats->sa_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_rcv_callback: ERR 1A05: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A05: " "Unsupported method = 0x%X\n", p_sa_mad->method); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); goto Exit; @@ -424,8 +414,7 @@ __osm_sa_mad_ctrl_send_err_callback(IN void *bind_context, OSM_LOG_ENTER(p_ctrl->p_log); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_send_err_callback: ERR 1A06: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A06: " "MAD transaction completed in error\n"); /* @@ -447,8 +436,7 @@ __osm_sa_mad_ctrl_send_err_callback(IN void *bind_context, if (osm_madw_get_err_msg(p_madw) != CL_DISP_MSGID_NONE) { if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) { - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sa_mad_ctrl_send_err_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(osm_madw_get_err_msg (p_madw))); @@ -460,8 +448,7 @@ __osm_sa_mad_ctrl_send_err_callback(IN void *bind_context, __osm_sa_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sa_mad_ctrl_send_err_callback: ERR 1A07: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A07: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); } @@ -532,8 +519,7 @@ osm_sa_mad_ctrl_init(IN osm_sa_mad_ctrl_t * const p_ctrl, CL_DISP_MSGID_NONE, NULL, p_ctrl); if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_sa_mad_ctrl_init: ERR 1A08: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 1A08: " "Dispatcher registration failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -556,8 +542,7 @@ osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * const p_ctrl, OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "osm_sa_mad_ctrl_bind: ERR 1A09: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A09: " "Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; @@ -572,8 +557,7 @@ osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * const p_ctrl, bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; - osm_log(p_ctrl->p_log, OSM_LOG_VERBOSE, - "osm_sa_mad_ctrl_bind: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); p_ctrl->h_bind = osm_vendor_bind(p_ctrl->p_vendor, @@ -585,8 +569,7 @@ osm_sa_mad_ctrl_bind(IN osm_sa_mad_ctrl_t * const p_ctrl, if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "osm_sa_mad_ctrl_bind: ERR 1A10: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A10: " "Vendor specific bind failed (%s)\n", ib_get_err_str(status)); goto Exit; @@ -606,8 +589,7 @@ ib_api_status_t osm_sa_mad_ctrl_unbind(IN osm_sa_mad_ctrl_t * const p_ctrl) OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "osm_sa_mad_ctrl_unbind: ERR 1A11: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 1A11: " "No previous bind\n"); status = IB_ERROR; goto Exit; diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c index f3cbf6c..63202e8 100644 --- a/opensm/opensm/osm_sa_mcmember_record.c +++ b/opensm/opensm/osm_sa_mcmember_record.c @@ -171,8 +171,7 @@ __get_new_mlid(IN osm_sa_t * sa, IN ib_net16_t requested_mlid) p_mgrp = (osm_mgrp_t *) cl_qmap_head(&p_subn->mgrp_mlid_tbl); if (p_mgrp == (osm_mgrp_t *) cl_qmap_end(&p_subn->mgrp_mlid_tbl)) { mlid = IB_LID_MCAST_START_HO; - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__get_new_mlid: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No multicast groups found using minimal mlid:0x%04X\n", mlid); goto Exit; @@ -192,8 +191,7 @@ __get_new_mlid(IN osm_sa_t * sa, IN ib_net16_t requested_mlid) while (p_mgrp != (osm_mgrp_t *) cl_qmap_end(&p_subn->mgrp_mlid_tbl)) { /* ignore mgrps marked for deletion */ if (p_mgrp->to_be_deleted == FALSE) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__get_new_mlid: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Found mgrp with lid:0x%X MGID: 0x%016" PRIx64 " : " "0x%016" PRIx64 "\n", cl_ntoh16(p_mgrp->mlid), @@ -205,8 +203,7 @@ __get_new_mlid(IN osm_sa_t * sa, IN ib_net16_t requested_mlid) /* Map in table */ if (cl_ntoh16(p_mgrp->mlid) > sa->p_subn->max_multicast_lid_ho) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__get_new_mlid: ERR 1B27: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B27: " "Found mgrp with mlid:0x%04X > max allowed mlid:0x%04X\n", cl_ntoh16(p_mgrp->mlid), max_num_mlids + IB_LID_MCAST_START_HO); @@ -225,12 +222,10 @@ __get_new_mlid(IN osm_sa_t * sa, IN ib_net16_t requested_mlid) /* did it go above the maximal mlid allowed */ if (idx < max_num_mlids) { mlid = idx + IB_LID_MCAST_START_HO; - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__get_new_mlid: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Found available mlid:0x%04X at idx:%u\n", mlid, idx); } else { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__get_new_mlid: ERR 1B23: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B23: " "All available:%u mlids are taken\n", max_num_mlids); mlid = 0; } @@ -289,8 +284,7 @@ __add_new_mgrp_port(IN osm_sa_t * sa, sa->p_subn, p_mad_addr, &requester_gid); if (res != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__add_new_mgrp_port: ERR 1B29: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B29: " "Could not find GID for requester\n"); return IB_INVALID_PARAMETER; @@ -299,16 +293,14 @@ __add_new_mgrp_port(IN osm_sa_t * sa, if (!memcmp(&p_recvd_mcmember_rec->port_gid, &requester_gid, sizeof(ib_gid_t))) { proxy_join = FALSE; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__add_new_mgrp_port: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Create new port with proxy_join FALSE\n"); } else { /* The port is not the one specified in PortGID. The check that the requester is in the same partition as the PortGID is done before - just need to update the proxy_join. */ proxy_join = TRUE; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__add_new_mgrp_port: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Create new port with proxy_join TRUE\n"); } @@ -317,8 +309,7 @@ __add_new_mgrp_port(IN osm_sa_t * sa, p_recvd_mcmember_rec->scope_state, proxy_join); if (*pp_mcmr_port == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__add_new_mgrp_port: ERR 1B06: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B06: " "osm_mgrp_add_port failed\n"); return IB_INSUFFICIENT_MEMORY; @@ -408,8 +399,7 @@ __osm_mcmr_rcv_respond(IN osm_sa_t * sa, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_respond: ERR 1B07: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B07: " "Unable to send MAD (%s) for TID <0x%" PRIx64 ">\n", ib_get_err_str(status), p_resp_sa_mad->trans_id); } @@ -445,8 +435,7 @@ __validate_more_comp_fields(osm_log_t * p_log, switch (mtu_sel) { case 0: /* Greater than MTU specified */ if (mtu_mgrp <= mtu_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has MTU %x, which is not greater than %x\n", mtu_mgrp, mtu_required); return FALSE; @@ -454,8 +443,7 @@ __validate_more_comp_fields(osm_log_t * p_log, break; case 1: /* Less than MTU specified */ if (mtu_mgrp >= mtu_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has MTU %x, which is not less than %x\n", mtu_mgrp, mtu_required); return FALSE; @@ -463,8 +451,7 @@ __validate_more_comp_fields(osm_log_t * p_log, break; case 2: /* Exactly MTU specified */ if (mtu_mgrp != mtu_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has MTU %x, which is not equal to %x\n", mtu_mgrp, mtu_required); return FALSE; @@ -484,8 +471,7 @@ __validate_more_comp_fields(osm_log_t * p_log, switch (rate_sel) { case 0: /* Greater than RATE specified */ if (rate_mgrp <= rate_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has RATE %x, which is not greater than %x\n", rate_mgrp, rate_required); return FALSE; @@ -493,8 +479,7 @@ __validate_more_comp_fields(osm_log_t * p_log, break; case 1: /* Less than RATE specified */ if (rate_mgrp >= rate_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has RATE %x, which is not less than %x\n", rate_mgrp, rate_required); return FALSE; @@ -502,8 +487,7 @@ __validate_more_comp_fields(osm_log_t * p_log, break; case 2: /* Exactly RATE specified */ if (rate_mgrp != rate_required) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_more_comp_fields: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested mcast group has RATE %x, which is not equal to %x\n", rate_mgrp, rate_required); return FALSE; @@ -533,8 +517,7 @@ __validate_port_caps(osm_log_t * const p_log, mtu_required = ib_port_info_get_mtu_cap(&p_physp->port_info); mtu_mgrp = (uint8_t) (p_mgrp->mcmember_rec.mtu & 0x3F); if (mtu_required < mtu_mgrp) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_port_caps: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Port's MTU %x is less than %x\n", mtu_required, mtu_mgrp); return FALSE; @@ -543,8 +526,7 @@ __validate_port_caps(osm_log_t * const p_log, rate_required = ib_port_info_compute_rate(&p_physp->port_info); rate_mgrp = (uint8_t) (p_mgrp->mcmember_rec.rate & 0x3F); if (rate_required < rate_mgrp) { - osm_log(p_log, OSM_LOG_DEBUG, - "__validate_port_caps: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Port's RATE %x is less than %x\n", rate_required, rate_mgrp); return FALSE; @@ -582,8 +564,7 @@ __validate_modify(IN osm_sa_t * sa, /* o15-0.2.1: If this is a new port being added - nothing to check */ if (!osm_mgrp_is_port_present(p_mgrp, portguid, pp_mcm_port)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_modify: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "This is a new port in the MC group\n"); return TRUE; } @@ -598,8 +579,7 @@ __validate_modify(IN osm_sa_t * sa, p_mad_addr, &request_gid); if (res != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_modify: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Could not find port for requested address\n"); return FALSE; } @@ -607,8 +587,7 @@ __validate_modify(IN osm_sa_t * sa, if (memcmp (&((*pp_mcm_port)->port_gid), &request_gid, sizeof(ib_gid_t))) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_modify: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "No ProxyJoin but different ports: stored:0x%016" PRIx64 " request:0x%016" PRIx64 "\n", cl_ntoh64((*pp_mcm_port)->port_gid.unicast. @@ -630,8 +609,7 @@ __validate_modify(IN osm_sa_t * sa, if (!osm_physp_has_pkey(sa->p_log, p_mgrp->mcmember_rec.pkey, p_request_physp)) { /* the request port is not part of the partition for this mgrp */ - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_modify: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "ProxyJoin but port not in partition. stored:0x%016" PRIx64 " request:0x%016" PRIx64 "\n", cl_ntoh64((*pp_mcm_port)->port_gid.unicast. @@ -682,8 +660,7 @@ __validate_delete(IN osm_sa_t * sa, /* 1 */ if (!osm_mgrp_is_port_present(p_mgrp, portguid, pp_mcm_port)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_delete: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Failed to find the port in the MC group\n"); return FALSE; } @@ -691,8 +668,7 @@ __validate_delete(IN osm_sa_t * sa, /* 2 */ if (!(p_recvd_mcmember_rec->scope_state & 0x0F & (*pp_mcm_port)->scope_state)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_delete: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Could not find any matching bits in the stored and requested JoinStates\n"); return FALSE; } @@ -701,8 +677,7 @@ __validate_delete(IN osm_sa_t * sa, if (((p_recvd_mcmember_rec->scope_state & 0x0F) | (0x0F & (*pp_mcm_port)->scope_state)) != (0x0F & (*pp_mcm_port)->scope_state)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_delete: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Some bits in the request JoinState (0x%X) are not set in the stored port (0x%X)\n", (p_recvd_mcmember_rec->scope_state & 0x0F), (0x0F & (*pp_mcm_port)->scope_state) @@ -714,8 +689,7 @@ __validate_delete(IN osm_sa_t * sa, /* Validate according the the proxy_join (o15-0.1.2) */ if (__validate_modify(sa, p_mgrp, p_mad_addr, p_recvd_mcmember_rec, pp_mcm_port) == FALSE) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_delete: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "proxy_join validation failure\n"); return FALSE; } @@ -775,8 +749,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, /* 14-a: mcast GID must start with 0xFF */ if (p_mcm_rec->mgid.multicast.header[0] != 0xFF) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__validate_requested_mgid: ERR 1B01: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B01: " "Wrong MGID Prefix 0x%02X must be 0xFF\n", cl_ntoh16(p_mcm_rec->mgid.multicast.header[0]) ); @@ -788,8 +761,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, memcpy(&signature, &(p_mcm_rec->mgid.multicast.raw_group_id), sizeof(signature)); signature = cl_ntoh16(signature); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_requested_mgid: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MGID Signed as 0x%04X\n", signature); /* @@ -812,8 +784,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, * */ if (signature == 0x401B || signature == 0x601B) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_requested_mgid: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Skipping MGID Validation for IPoIB Signed (0x%04X) MGIDs\n", signature); goto Exit; @@ -821,8 +792,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, /* 14-b: the 3 upper bits in the "flags" should be zero: */ if (p_mcm_rec->mgid.multicast.header[1] & 0xE0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__validate_requested_mgid: ERR 1B28: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B28: " "MGID uses Reserved Flags: flags=0x%X\n", (p_mcm_rec->mgid.multicast.header[1] & 0xE0) >> 4); valid = FALSE; @@ -834,8 +804,7 @@ __validate_requested_mgid(IN osm_sa_t * sa, if ((signature == 0xA01B) && ((p_mcm_rec->mgid.multicast.header[1] & 0x0F) == IB_MC_SCOPE_LINK_LOCAL)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__validate_requested_mgid: ERR 1B24: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B24: " "MGID uses 0xA01B signature but with link-local scope\n"); valid = FALSE; goto Exit; @@ -893,8 +862,7 @@ __mgrp_request_is_realizable(IN osm_sa_t * sa, switch (mtu_sel) { case 0: /* Greater than MTU specified */ if (port_mtu && mtu_required >= port_mtu) { - osm_log(p_log, OSM_LOG_DEBUG, - "__mgrp_request_is_realizable: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested MTU %x >= the port\'s mtu:%x\n", mtu_required, port_mtu); return FALSE; @@ -922,8 +890,7 @@ __mgrp_request_is_realizable(IN osm_sa_t * sa, } /* make sure it still be in the range */ if (mtu < IB_MIN_MTU || mtu > IB_MAX_MTU) { - osm_log(p_log, OSM_LOG_DEBUG, - "__mgrp_request_is_realizable: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Calculated MTU %x is out of range\n", mtu); return FALSE; } @@ -942,8 +909,7 @@ __mgrp_request_is_realizable(IN osm_sa_t * sa, switch (rate_sel) { case 0: /* Greater than RATE specified */ if (port_rate && rate_required >= port_rate) { - osm_log(p_log, OSM_LOG_DEBUG, - "__mgrp_request_is_realizable: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Requested RATE %x >= the port\'s rate:%x\n", rate_required, port_rate); return FALSE; @@ -971,8 +937,7 @@ __mgrp_request_is_realizable(IN osm_sa_t * sa, } /* make sure it still is in the range */ if (rate < IB_MIN_RATE || rate > IB_MAX_RATE) { - osm_log(p_log, OSM_LOG_DEBUG, - "__mgrp_request_is_realizable: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Calculated RATE %x is out of range\n", rate); return FALSE; } @@ -1019,15 +984,13 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, */ mlid = __get_new_mlid(sa, mcm_rec.mlid); if (mlid == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_create_new_mgrp: ERR 1B19: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B19: " "__get_new_mlid failed\n"); status = IB_SA_MAD_STATUS_NO_RESOURCES; goto Exit; } - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_mcmr_rcv_create_new_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Obtained new mlid 0x%X\n", cl_ntoh16(mlid)); /* we need to create the new MGID if it was not defined */ @@ -1056,8 +1019,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, /* HACK: how do we get a unique number - use the mlid twice */ memcpy(&p_mgid->raw[10], &mlid, sizeof(uint16_t)); memcpy(&p_mgid->raw[12], &mlid, sizeof(uint16_t)); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_mcmr_rcv_create_new_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Allocated new MGID:0x%016" PRIx64 " : " "0x%016" PRIx64 "\n", cl_ntoh64(p_mgid->unicast.prefix), @@ -1066,8 +1028,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, /* a specific MGID was requested so validate the resulting MGID */ valid = __validate_requested_mgid(sa, &mcm_rec); if (!valid) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_create_new_mgrp: ERR 1B22: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B22: " "Invalid requested MGID\n"); __free_mlid(sa, mlid); status = IB_SA_MAD_STATUS_REQ_INVALID; @@ -1078,8 +1039,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, /* check the requested parameters are realizable */ if (__mgrp_request_is_realizable(sa, comp_mask, &mcm_rec, p_physp) == FALSE) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_create_new_mgrp: ERR 1B26: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B26: " "Requested MGRP parameters are not realizable\n"); __free_mlid(sa, mlid); status = IB_SA_MAD_STATUS_REQ_INVALID; @@ -1089,8 +1049,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, /* create a new MC Group */ *pp_mgrp = osm_mgrp_new(mlid); if (*pp_mgrp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_create_new_mgrp: ERR 1B08: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B08: " "osm_mgrp_new failed\n"); __free_mlid(sa, mlid); status = IB_SA_MAD_STATUS_NO_RESOURCES; @@ -1118,8 +1077,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa, (osm_mgrp_t *) cl_qmap_get(&sa->p_subn->mgrp_mlid_tbl, mlid); if (p_prev_mgrp != (osm_mgrp_t *) cl_qmap_end(&sa->p_subn->mgrp_mlid_tbl)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_mcmr_rcv_create_new_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Found previous group for mlid:0x%04x - Need to destroy it\n", cl_ntoh16(mlid)); cl_qmap_remove_item(&sa->p_subn->mgrp_mlid_tbl, @@ -1186,7 +1144,7 @@ __search_mgrp_by_mgid(IN cl_map_item_t * const p_map_item, IN void *context) g_prefix == rcv_prefix && (g_interface_id & INT_ID_MASK) == (rcv_interface_id & INT_ID_MASK)) { - osm_log(sa->p_log, OSM_LOG_INFO, + OSM_LOG(sa->p_log, OSM_LOG_INFO, "Special Case Mcast Join for MGID " " MGID 0x%016"PRIx64" : 0x%016"PRIx64"\n", rcv_prefix, rcv_interface_id); @@ -1197,8 +1155,7 @@ __search_mgrp_by_mgid(IN cl_map_item_t * const p_map_item, IN void *context) } if (p_ctxt->p_mgrp) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__search_mgrp_by_mgid: ERR 1B30: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B30: " "Multiple MC groups for same MGID\n"); return; } @@ -1278,8 +1235,7 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, mcmember_rec = *p_recvd_mcmember_rec; if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_leave_mgrp: Dump of record\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of record\n"); osm_dump_mc_record(sa->p_log, &mcmember_rec, OSM_LOG_DEBUG); } @@ -1316,8 +1272,7 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_leave_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "After update JoinState != 0. Updating from 0x%X to 0x%X\n", port_join_state, new_join_state); } else { @@ -1332,15 +1287,13 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, osm_sm_mcgrp_leave(sa->sm, mlid, portguid); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_leave_mgrp: ERR 1B09: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B09: " "osm_sm_mcgrp_leave failed\n"); } } } else { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_leave_mgrp: ERR 1B25: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B25: " "Received an invalid delete request for " "MGID: 0x%016" PRIx64 " : " "0x%016" PRIx64 " for " @@ -1360,8 +1313,7 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, } } else { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_leave_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Failed since multicast group not present\n"); sa_status = IB_SA_MAD_STATUS_REQ_INVALID; osm_sa_send_error(sa, p_madw, sa_status); @@ -1412,9 +1364,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, mcmember_rec = *p_recvd_mcmember_rec; if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_join_mgrp: " - "Dump of incoming record\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Dump of incoming record\n"); osm_dump_mc_record(sa->p_log, &mcmember_rec, OSM_LOG_DEBUG); } @@ -1425,8 +1375,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, if (!p_port) { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_join_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unknown port GUID 0x%016" PRIx64 "\n", portguid); sa_status = IB_SA_MAD_STATUS_REQ_INVALID; osm_sa_send_error(sa, p_madw, sa_status); @@ -1448,8 +1397,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, if (!osm_physp_share_pkey(sa->p_log, p_physp, p_request_physp)) { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_rcv_join_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Port and requester don't share pkey\n"); sa_status = IB_SA_MAD_STATUS_REQ_INVALID; osm_sa_send_error(sa, p_madw, sa_status); @@ -1465,8 +1413,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, /* check for JoinState.FullMember = 1 o15.0.1.9 */ if ((join_state & 0x01) != 0x01) { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_join_mgrp: ERR 1B10: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B10: " "Provided Join State != FullMember - required for create, " "MGID: 0x%016" PRIx64 " : " "0x%016" PRIx64 " from port 0x%016" PRIx64 @@ -1502,15 +1449,12 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, } else { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_join_mgrp: ERR 1B11: " - "method = %s, " - "scope_state = 0x%x, " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B11: " + "method = %s, scope_state = 0x%x, " "component mask = 0x%016" PRIx64 ", " "expected comp mask = 0x%016" PRIx64 ", " - "MGID: 0x%016" PRIx64 " : " - "0x%016" PRIx64 " from port 0x%016" PRIx64 - " (%s)\n", + "MGID: 0x%016" PRIx64 " : 0x%016" PRIx64 + " from port 0x%016" PRIx64 " (%s)\n", ib_get_sa_method_str(p_sa_mad->method), p_recvd_mcmember_rec->scope_state, cl_ntoh64(p_sa_mad->comp_mask), @@ -1568,8 +1512,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_join_mgrp: ERR 1B12: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B12: " "__validate_more_comp_fields, __validate_port_caps, " "or JoinState = 0 failed from port 0x%016" PRIx64 " (%s), " "sending IB_SA_MAD_STATUS_REQ_INVALID\n", @@ -1595,8 +1538,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, if (!valid) { CL_PLOCK_RELEASE(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_join_mgrp: ERR 1B13: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B13: " "__validate_modify failed from port 0x%016" PRIx64 " (%s), " "sending IB_SA_MAD_STATUS_REQ_INVALID\n", @@ -1647,8 +1589,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, interface_id, req_type); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_join_mgrp: ERR 1B14: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B14: " "osm_sm_mcgrp_join failed from port 0x%016" PRIx64 " (%s), " "sending IB_SA_MAD_STATUS_NO_RESOURCES\n", cl_ntoh64(portguid), p_port->p_node->print_desc); @@ -1697,8 +1638,7 @@ __osm_mcmr_rcv_new_mcmr(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_rcv_new_mcmr: ERR 1B15: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B15: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -1744,14 +1684,12 @@ __osm_sa_mcm_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, OSM_LOG_ENTER(sa->p_log); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_mcm_by_comp_mask_cb: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Checking mlid:0x%X\n", cl_ntoh16(p_mgrp->mlid)); /* the group might be marked for deletion */ if (p_mgrp->to_be_deleted) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_mcm_by_comp_mask_cb: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Group mlid:0x%X is marked to be deleted\n", cl_ntoh16(p_mgrp->mlid)); goto Exit; @@ -1848,8 +1786,7 @@ __osm_sa_mcm_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, /* Many MC records returned */ if ((p_ctxt->trusted_req == TRUE) && !(IB_MCR_COMPMASK_PORT_GID & comp_mask)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_mcm_by_comp_mask_cb: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Trusted req is TRUE and no specific port defined\n"); /* return all the ports that match in this MC group */ @@ -1865,8 +1802,7 @@ __osm_sa_mcm_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, memcpy(&(match_rec.port_gid), &(p_mcm_port->port_gid), sizeof(ib_gid_t)); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_mcm_by_comp_mask_cb: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Record of port_gid: 0x%016" PRIx64 "0x%016" PRIx64 " in multicast_lid: 0x%X is returned\n", @@ -1948,8 +1884,7 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_query_mgrp: ERR 1B04: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B04: " "Cannot find requester physical port\n"); goto Exit; } @@ -1978,8 +1913,7 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, * If we do a SubnAdmGet and got more than one record it is an error ! */ if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec > 1)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_query_mgrp: ERR 1B05: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B05: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -2003,16 +1937,14 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_member_rec_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_mcmr_query_mgrp: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mcmr_query_mgrp: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -2030,8 +1962,7 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, osm_madw_get_mad_addr_ptr(p_madw)); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_query_mgrp: ERR 1B16: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B16: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -2110,8 +2041,7 @@ __osm_mcmr_query_mgrp(IN osm_sa_t * sa, status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mcmr_query_mgrp: ERR 1B17: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B17: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; @@ -2148,15 +2078,12 @@ void osm_mcmr_rcv_process(IN void *context, IN void *data) case IB_MAD_METHOD_SET: valid = __check_join_comp_mask(p_sa_mad->comp_mask); if (!valid) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_process: ERR 1B18: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B18: " "component mask = 0x%016" PRIx64 ", " "expected comp mask = 0x%016" PRIx64 " ," - "MGID: 0x%016" PRIx64 " : " - "0x%016" PRIx64 " for " - "PortGID: 0x%016" PRIx64 " : " - "0x%016" PRIx64 "\n", - cl_ntoh64(p_sa_mad->comp_mask), + "MGID: 0x%016" PRIx64 " : 0x%016" PRIx64 + " for PortGID: 0x%016" PRIx64 " : 0x%016" + PRIx64 "\n", cl_ntoh64(p_sa_mad->comp_mask), CL_NTOH64(JOIN_MC_COMP_MASK), cl_ntoh64(p_recvd_mcmember_rec->mgid.unicast. prefix), @@ -2179,8 +2106,7 @@ void osm_mcmr_rcv_process(IN void *context, IN void *data) case IB_MAD_METHOD_DELETE: valid = __check_join_comp_mask(p_sa_mad->comp_mask); if (!valid) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_process: ERR 1B20: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B20: " "component mask = 0x%016" PRIx64 ", " "expected comp mask = 0x%016" PRIx64 "\n", cl_ntoh64(p_sa_mad->comp_mask), @@ -2203,8 +2129,7 @@ void osm_mcmr_rcv_process(IN void *context, IN void *data) __osm_mcmr_query_mgrp(sa, p_madw); break; default: - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mcmr_rcv_process: ERR 1B21: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B21: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, diff --git a/opensm/opensm/osm_sa_mft_record.c b/opensm/opensm/osm_sa_mft_record.c index 73111ca..a5d1292 100644 --- a/opensm/opensm/osm_sa_mft_record.c +++ b/opensm/opensm/osm_sa_mft_record.c @@ -88,16 +88,14 @@ __osm_mftr_rcv_new_mftr(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mftr_rcv_new_mftr: ERR 4A02: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A02: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mftr_rcv_new_mftr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New MulticastForwardingTable: sw 0x%016" PRIx64 "\n\t\t\t\tblock %u position %u lid 0x%02X\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), @@ -134,8 +132,7 @@ static osm_port_t *__osm_mftr_get_port_by_guid(IN osm_sa_t * sa, p_port = osm_get_port_by_guid(sa->p_subn, port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mftr_get_port_by_guid ERR 4A04: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "ERR 4A04: " "Invalid port GUID 0x%016" PRIx64 "\n", port_guid); } @@ -168,8 +165,7 @@ __osm_mftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, __osm_mftr_get_port_by_guid(sa, p_sw->p_node->node_info.port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mftr_rcv_by_comp_mask: ERR 4A05: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A05: " "Failed to find Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) ); @@ -180,8 +176,7 @@ __osm_mftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, the same partition. */ p_physp = p_port->p_physp; if (!p_physp) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mftr_rcv_by_comp_mask: ERR 4A06: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A06: " "Failed to find default physical Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) @@ -196,8 +191,7 @@ __osm_mftr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, /* compare the lids - if required */ if (comp_mask & IB_MFTR_COMPMASK_LID) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mftr_rcv_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing lid:0x%02X to port lid range: 0x%02X .. 0x%02X\n", cl_ntoh16(p_rcvd_rec->lid), min_lid_ho, max_lid_ho); /* ok we are ready for range check */ @@ -284,8 +278,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_process: ERR 4A08: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A08: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -299,8 +292,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_process: ERR 4A07: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A07: " "Cannot find requester physical port\n"); goto Exit; } @@ -334,8 +326,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_process: ERR 4A09: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A09: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -361,16 +352,14 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_mft_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_mftr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_mftr_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -387,8 +376,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_process: ERR 4A10: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A10: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -452,8 +440,7 @@ void osm_mftr_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mftr_rcv_process: ERR 4A11: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4A11: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_multipath_record.c b/opensm/opensm/osm_sa_multipath_record.c index a81bfd5..98a2da1 100644 --- a/opensm/opensm/osm_sa_multipath_record.c +++ b/opensm/opensm/osm_sa_multipath_record.c @@ -216,8 +216,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, comp_mask)) if (mtu > IB_MTU_LEN_1024) { mtu = IB_MTU_LEN_1024; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Optimized Path MTU to 1K for Mellanox Tavor device\n"); } @@ -239,8 +238,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, */ p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4514: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: " "Can't find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -265,8 +263,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, } if (!valid_sl_mask) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs lead to VL15 on this path\n"); status = IB_NOT_FOUND; goto Exit; @@ -285,8 +282,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_dest_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4515: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: " "Can't find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -306,8 +302,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, p_physp = osm_physp_get_remote(p_physp); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4505: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: " "Can't find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -332,8 +327,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, If this isn't a switch, we should have reached the destination by now! */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4503: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: " "Internal error, bad path\n"); status = IB_ERROR; goto Exit; @@ -355,8 +349,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, */ p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4516: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: " "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -384,8 +377,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, } if (!valid_sl_mask) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs lead to VL15 " "on this path\n"); status = IB_NOT_FOUND; @@ -406,8 +398,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, rate = ib_port_info_compute_rate(p_pi); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path min MTU = %u, min rate = %u\n", mtu, rate); } @@ -423,8 +414,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, p_dest_physp, comp_mask))) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPathRecord request matches QoS Level '%s' (%s)\n", p_qos_level->name, (p_qos_level->use) ? p_qos_level-> @@ -608,8 +598,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, required_pkey = p_mpr->pkey; if (!osm_physp_share_this_pkey (p_src_physp, p_dest_physp, required_pkey)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4518: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: " "Ports do not share specified PKey 0x%04x\n" "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n", cl_ntoh16(required_pkey), @@ -621,8 +610,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, } if (p_qos_level && p_qos_level->pkey_range_len && !osm_qos_level_has_pkey(p_qos_level, required_pkey)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 451C: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: " "Ports do not share PKeys defined by QoS level\n"); status = IB_NOT_FOUND; goto Exit; @@ -637,8 +625,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, p_src_physp, p_dest_physp); if (!required_pkey) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 451D: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: " "Ports do not share PKeys defined by QoS level\n"); status = IB_NOT_FOUND; goto Exit; @@ -652,8 +639,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, required_pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp); if (!required_pkey) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 4519: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: " "Ports do not have any shared PKeys\n" "\t\tsrc %" PRIx64 " dst %" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -686,8 +672,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, if (p_qos_level && p_qos_level->sl_set && p_qos_level->sl != required_sl) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 451E: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: " "QoS constaraints: required MultiPathRecord SL (%u) " "doesn't match QoS policy SL (%u)\n", required_sl, p_qos_level->sl); @@ -703,8 +688,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, required_sl = p_qos_level->sl; if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "QoS level SL (%u) overrides partition SL (%u)\n", p_qos_level->sl, p_prtn->sl); @@ -720,8 +704,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, required_sl = OSM_DEFAULT_SL; /* this may be possible when pkey tables are created somehow in previous runs or things are going wrong here */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 451A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: " "No partition found for PKey 0x%04x - using default SL %d\n", cl_ntoh16(required_pkey), required_sl); } else @@ -740,8 +723,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, required_sl = OSM_DEFAULT_SL; if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_path_parms: ERR 451F: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: " "Selected SL (%u) leads to VL15\n", required_sl); status = IB_NOT_FOUND; goto Exit; @@ -760,8 +742,7 @@ __osm_mpr_rcv_get_path_parms(IN osm_sa_t * sa, p_parms->hops = hops; if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_path_parms: MultiPath params:" + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:" " mtu = %u, rate = %u, packet lifetime = %u," " pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate, pkt_life, cl_ntoh16(required_pkey), required_sl, hops); @@ -844,15 +825,13 @@ __osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_lid_pair_path: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID 0x%X, Dest LID 0x%X\n", src_lid_ho, dest_lid_ho); p_pr_item = malloc(sizeof(*p_pr_item)); if (p_pr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_lid_pair_path: ERR 4501: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: " "Unable to allocate path record\n"); goto Exit; } @@ -883,8 +862,7 @@ __osm_mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, */ if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) { if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_lid_pair_path: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Requested reversible path but failed to get one\n"); free(p_pr_item); @@ -933,8 +911,7 @@ __osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_port_pair_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " "Dst port 0x%016" PRIx64 "\n", cl_ntoh64(osm_port_get_guid(p_src_port)), @@ -1000,10 +977,8 @@ __osm_mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, &dest_lid_max_ho); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_port_pair_paths: " - "Src LID [0x%X-0x%X], " - "Dest LID [0x%X-0x%X]\n", + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src LID [0x%X-0x%X], Dest LID [0x%X-0x%X]\n", src_lid_min_ho, src_lid_max_ho, dest_lid_min_ho, dest_lid_max_ho); @@ -1131,8 +1106,7 @@ __osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_apm_port_pair_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " "Dst port 0x%016" PRIx64 ", base offs %d\n", cl_ntoh64(osm_port_get_guid(p_src_port)), @@ -1151,8 +1125,7 @@ __osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, src_lid_ho += base_offs % src_lids; dest_lid_ho += base_offs % dest_lids; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_apm_port_pair_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LIDs [0x%X-0x%X] hashed %d, " "Dest LIDs [0x%X-0x%X] hashed %d\n", src_lid_min_ho, src_lid_max_ho, src_lid_ho, @@ -1172,8 +1145,7 @@ __osm_mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, comp_mask, 0); if (p_pr_item) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_apm_port_pair_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Found matching path from Src LID 0x%X to Dest LID 0x%X with %d hops\n", src_lid_ho, dest_lid_ho, p_pr_item->hops); break; @@ -1213,8 +1185,7 @@ __osm_mpr_rcv_get_gids(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_mpr_rcv_get_gids: ERR 451B: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: " "%sGID 0x%016" PRIx64 " is multicast or non local subnet prefix\n", is_sgid ? "S" : "D", @@ -1234,8 +1205,7 @@ __osm_mpr_rcv_get_gids(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_get_gids: ERR 4506: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: " "No port with GUID 0x%016" PRIx64 "\n", cl_ntoh64(gids->unicast.interface_id)); @@ -1381,8 +1351,7 @@ __osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa, pp_ports[3], base_offs + 1, comp_mask, p_list); - osm_log(sa->p_log, OSM_LOG_DEBUG, "__osm_mpr_rcv_get_apm_paths: " - "APM matrix:\n" + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n" "\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n" "\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n", matrix[0][0]->path_rec.slid, matrix[0][0]->path_rec.dlid, @@ -1403,8 +1372,7 @@ __osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa, /* and the winner is... */ if (minA <= minB || (minA == minB && sumA < sumB)) { /* Diag A */ - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_apm_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Diag {0,0} & {1,1} is the best:\n" "\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n", matrix[0][0]->path_rec.slid, @@ -1417,8 +1385,7 @@ __osm_mpr_rcv_get_apm_paths(IN osm_sa_t * sa, free(matrix[1][0]); } else { /* Diag B */ - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_get_apm_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Diag {0,1} & {1,0} is the best:\n" "\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n", matrix[0][1]->path_rec.slid, @@ -1471,8 +1438,7 @@ __osm_mpr_rcv_process_pairs(IN osm_sa_t * sa, comp_mask, p_list); total_paths += num_paths; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_process_pairs: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "%d paths %d total paths %d max paths\n", num_paths, total_paths, max_paths); /* Just take first NumbPaths found */ @@ -1510,8 +1476,7 @@ __osm_mpr_rcv_respond(IN osm_sa_t * sa, num_rec = cl_qlist_count(p_list); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_mpr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Generating response with %zu records\n", num_rec); mad_size = IB_SA_MAD_HDR_SIZE + num_rec * sizeof(ib_path_rec_t); @@ -1523,8 +1488,7 @@ __osm_mpr_rcv_respond(IN osm_sa_t * sa, mad_size, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4502: Unable to allocate MAD\n"); for (i = 0; i < num_rec; i++) { @@ -1577,8 +1541,7 @@ __osm_mpr_rcv_respond(IN osm_sa_t * sa, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_mpr_rcv_respond: ERR 4507: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4507: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ } @@ -1611,8 +1574,7 @@ void osm_mpr_rcv_process(IN void *context, IN void *data) CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD); if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mpr_rcv_process: ERR 4510: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: " "Invalid request since RMPP_FLAG_ACTIVE is not set\n"); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); @@ -1621,8 +1583,7 @@ void osm_mpr_rcv_process(IN void *context, IN void *data) /* we only support SubnAdmGetMulti method */ if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mpr_rcv_process: ERR 4513: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, @@ -1635,8 +1596,7 @@ void osm_mpr_rcv_process(IN void *context, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (requester_port == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mpr_rcv_process: ERR 4517: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: " "Cannot find requester physical port\n"); goto Exit; } @@ -1657,8 +1617,7 @@ void osm_mpr_rcv_process(IN void *context, IN void *data) if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) { if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest)) - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_mpr_rcv_process_cb: ERR 4512: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: " "__osm_mpr_rcv_get_end_points failed, not enough GIDs " "(nsrc %d ndest %d)\n", nsrc, ndest); cl_plock_release(sa->p_lock); diff --git a/opensm/opensm/osm_sa_node_record.c b/opensm/opensm/osm_sa_node_record.c index 5148509..ab5e3a6 100644 --- a/opensm/opensm/osm_sa_node_record.c +++ b/opensm/opensm/osm_sa_node_record.c @@ -87,16 +87,14 @@ __osm_nr_rcv_new_nr(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_nr_rcv_new_nr: ERR 1D02: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D02: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_nr_rcv_new_nr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New NodeRecord: node 0x%016" PRIx64 "\n\t\t\t\tport 0x%016" PRIx64 ", lid 0x%X\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -141,8 +139,7 @@ __osm_nr_rcv_create_nr(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_nr_rcv_create_nr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Looking for NodeRecord with LID: 0x%X GUID:0x%016" PRIx64 "\n", cl_ntoh16(match_lid), cl_ntoh64(match_port_guid) @@ -184,8 +181,7 @@ __osm_nr_rcv_create_nr(IN osm_sa_t * sa, We validate that the lid belongs to this node. */ if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_nr_rcv_create_nr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing LID: 0x%X <= 0x%X <= 0x%X\n", base_lid_ho, match_lid_ho, max_lid_ho); } @@ -230,8 +226,7 @@ __osm_nr_rcv_by_comp_mask(IN cl_map_item_t * const p_map_item, IN void *context) DEBUG TOP */ if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_nr_rcv_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Looking for node 0x%016" PRIx64 ", found 0x%016" PRIx64 "\n", cl_ntoh64(p_rcvd_rec->node_info.node_guid), @@ -348,8 +343,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_nr_rcv_process: ERR 1D05: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D05: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -363,8 +357,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_nr_rcv_process: ERR 1D04: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D04: " "Cannot find requester physical port\n"); goto Exit; } @@ -394,8 +387,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) * If we do a SubnAdmGet and got more than one record it is an error ! */ if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec > 1)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_nr_rcv_process: ERR 1D03: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D03: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -418,16 +410,14 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_node_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_nr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_nr_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -444,8 +434,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_nr_rcv_process: ERR 1D06: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D06: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -507,8 +496,7 @@ void osm_nr_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_nr_rcv_process: ERR 1D07: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1D07: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index 0cf59e6..f94145b 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -223,8 +223,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, p_dest_port, comp_mask)) if (mtu > IB_MTU_LEN_1024) { mtu = IB_MTU_LEN_1024; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Optimized Path MTU to 1K for Mellanox Tavor device\n"); } @@ -246,8 +245,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, */ p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F02: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F02: " "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -272,8 +270,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, } if (!valid_sl_mask) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs lead to VL15 on this path\n"); status = IB_NOT_FOUND; goto Exit; @@ -292,8 +289,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, p_dest_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_dest_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F03: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F03: " "Cannot find routing to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -313,8 +309,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, p_physp = osm_physp_get_remote(p_physp); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F05: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F05: " "Cannot find remote phys port when routing to LID 0x%X from node GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -338,8 +333,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, If this isn't a switch, we should have reached the destination by now! */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F06: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F06: " "Internal error, bad path\n"); status = IB_ERROR; goto Exit; @@ -361,8 +355,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, */ p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); if (p_physp == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F07: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F07: " "Dead end on path to LID 0x%X from switch for GUID 0x%016" PRIx64 "\n", dest_lid_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -390,8 +383,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, } if (!valid_sl_mask) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "All the SLs lead to VL15 " "on this path\n"); status = IB_NOT_FOUND; @@ -412,8 +404,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, rate = ib_port_info_compute_rate(p_pi); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path min MTU = %u, min rate = %u\n", mtu, rate); /* @@ -427,8 +418,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, p_pr, p_src_physp, p_dest_physp, comp_mask))) { if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "PathRecord request matches QoS Level '%s' (%s)\n", p_qos_level->name, (p_qos_level->use) ? p_qos_level-> @@ -614,8 +604,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, */ pkey = p_pr->pkey; if (!osm_physp_share_this_pkey(p_src_physp, p_dest_physp, pkey)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1A: " "Ports do not share specified PKey 0x%04x\n", cl_ntoh16(pkey)); status = IB_NOT_FOUND; @@ -623,8 +612,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, } if (p_qos_level && p_qos_level->pkey_range_len && !osm_qos_level_has_pkey(p_qos_level, pkey)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1D: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1D: " "Ports do not share PKeys defined by QoS level\n"); status = IB_NOT_FOUND; goto Exit; @@ -638,8 +626,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, pkey = osm_qos_level_get_shared_pkey(p_qos_level, p_src_physp, p_dest_physp); if (!pkey) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1E: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1E: " "Ports do not share PKeys defined by QoS level\n"); status = IB_NOT_FOUND; goto Exit; @@ -651,8 +638,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, */ pkey = osm_physp_find_common_pkey(p_src_physp, p_dest_physp); if (!pkey) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1B: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1B: " "Ports do not have any shared PKeys\n"); status = IB_NOT_FOUND; goto Exit; @@ -683,8 +669,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, if (p_qos_level && p_qos_level->sl_set && (p_qos_level->sl != sl)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1F: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1F: " "QoS constaraints: required PathRecord SL (%u) " "doesn't match QoS policy SL (%u)\n", sl, p_qos_level->sl); @@ -694,8 +679,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, if (is_lash && osm_get_lash_sl(p_osm, p_src_port, p_dest_port) != sl) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F23: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F23: " "Required PathRecord SL (%u) doesn't " "match LASH SL\n", sl); status = IB_NOT_FOUND; @@ -717,8 +701,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, sl = p_qos_level->sl; if (pkey && p_prtn && p_prtn->sl != p_qos_level->sl) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "QoS level SL (%u) overrides partition SL (%u)\n", p_qos_level->sl, p_prtn->sl); @@ -730,8 +713,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, sl = OSM_DEFAULT_SL; /* this may be possible when pkey tables are created somehow in previous runs or things are going wrong here */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F1C: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F1C: " "No partition found for PKey 0x%04x - using default SL %d\n", cl_ntoh16(pkey), sl); } else @@ -749,8 +731,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, sl = OSM_DEFAULT_SL; if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << sl))) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_path_parms: ERR 1F24: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F24: " "Selected SL (%u) leads to VL15\n", sl); status = IB_NOT_FOUND; goto Exit; @@ -768,8 +749,7 @@ __osm_pr_rcv_get_path_parms(IN osm_sa_t * sa, p_parms->sl = sl; if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_path_parms: Path params:" + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Path params:" " mtu = %u, rate = %u, packet lifetime = %u," " pkey = 0x%04X, sl = %u\n", mtu, rate, pkt_life, cl_ntoh16(pkey), sl); @@ -870,15 +850,13 @@ __osm_pr_rcv_get_lid_pair_path(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_lid_pair_path: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID 0x%X, Dest LID 0x%X\n", src_lid_ho, dest_lid_ho); p_pr_item = malloc(sizeof(*p_pr_item)); if (p_pr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_lid_pair_path: ERR 1F01: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F01: " "Unable to allocate path record\n"); goto Exit; } @@ -909,8 +887,7 @@ __osm_pr_rcv_get_lid_pair_path(IN osm_sa_t * sa, */ if (comp_mask & IB_PR_COMPMASK_REVERSIBLE) { if ((!path_parms.reversible && (p_pr->num_path & 0x80))) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_lid_pair_path: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Requested reversible path but failed to get one\n"); free(p_pr_item); @@ -958,8 +935,7 @@ __osm_pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_port_pair_paths: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " "Dst port 0x%016" PRIx64 "\n", cl_ntoh64(osm_port_get_guid(p_src_port)), @@ -1041,24 +1017,20 @@ __osm_pr_rcv_get_port_pair_paths(IN osm_sa_t * sa, &src_lid_max_ho); if (src_lid_min_ho == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_port_pair_paths: ERR 1F20:" + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F20:" "Obtained source LID of 0. No such LID possible\n"); goto Exit; } if (dest_lid_min_ho == 0) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_port_pair_paths: ERR 1F21:" + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F21:" "Obtained destination LID of 0. No such LID possible\n"); goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_get_port_pair_paths: " - "Src LIDs [0x%X-0x%X], " - "Dest LIDs [0x%X-0x%X]\n", + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "Src LIDs [0x%X-0x%X], Dest LIDs [0x%X-0x%X]\n", src_lid_min_ho, src_lid_max_ho, dest_lid_min_ho, dest_lid_max_ho); @@ -1213,8 +1185,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Non local SGID subnet prefix 0x%016" PRIx64 "\n", cl_ntoh64(p_pr->sgid.unicast.prefix)); @@ -1233,8 +1204,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No source port with GUID 0x%016" PRIx64 "\n", cl_ntoh64(p_pr->sgid.unicast.interface_id)); @@ -1254,8 +1224,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No source port with LID = 0x%X\n", cl_ntoh16(p_pr->slid)); @@ -1274,8 +1243,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, if (!ib_gid_is_multicast(&p_pr->dgid) && ib_gid_get_subnet_prefix(&p_pr->dgid) != sa->p_subn->opt.subnet_prefix) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Non local DGID subnet prefix 0x%016" PRIx64 "\n", cl_ntoh64(p_pr->dgid.unicast.prefix)); @@ -1325,8 +1293,8 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, p_subn-> rtr_guid_tbl)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_get_end_points: ERR 1F22: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, + "ERR 1F22: " "Off subnet DGID but router not found\n"); sa_status = IB_SA_MAD_STATUS_INVALID_GID; @@ -1347,8 +1315,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No dest port with GUID 0x%016" PRIx64 "\n", cl_ntoh64(dest_guid)); @@ -1368,8 +1335,7 @@ __osm_pr_rcv_get_end_points(IN osm_sa_t * sa, don't enter it as an error in our own log. Return an error response to the client. */ - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_get_end_points: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "No dest port with LID = 0x%X\n", cl_ntoh16(p_pr->dlid)); @@ -1542,8 +1508,7 @@ __osm_pr_get_mgrp(IN osm_sa_t * sa, if (comp_mask & IB_PR_COMPMASK_DGID) { status = osm_get_mgrp_by_mgid(sa, &p_pr->dgid, pp_mgrp); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_get_mgrp: ERR 1F09: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F09: " "No MC group found for PathRecord destination GID\n"); goto Exit; } @@ -1555,8 +1520,7 @@ __osm_pr_get_mgrp(IN osm_sa_t * sa, /* the same as the DLID in the PathRecord */ if ((*pp_mgrp)->mlid != p_pr->dlid) { /* Note: perhaps this might be better indicated as an invalid request */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_get_mgrp: ERR 1F10: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F10: " "MC group MLID does not match PathRecord destination LID\n"); *pp_mgrp = NULL; goto Exit; @@ -1564,8 +1528,7 @@ __osm_pr_get_mgrp(IN osm_sa_t * sa, } else { *pp_mgrp = __get_mgrp_by_mlid(sa, p_pr->dlid); if (*pp_mgrp == NULL) - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_get_mgrp: ERR 1F11: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F11: " "No MC group found for PathRecord destination LID\n"); } } @@ -1669,8 +1632,7 @@ __osm_pr_rcv_check_mcast_dest(IN osm_sa_t * sa, cl_ntoh16(p_pr->dlid) <= IB_LID_MCAST_END_HO) is_multicast = 1; else if (is_multicast) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_check_mcast_dest: ERR 1F12: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F12: " "PathRecord request indicates MGID but not MLID\n"); is_multicast = -1; } @@ -1716,8 +1678,7 @@ __osm_pr_rcv_respond(IN osm_sa_t * sa, goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_respond: ERR 1F13: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F13: " "Got more than one record for SubnAdmGet (%zu)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -1740,16 +1701,14 @@ __osm_pr_rcv_respond(IN osm_sa_t * sa, trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_path_rec_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_pr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Generating response with %zu records\n", num_rec); if ((sad_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { @@ -1765,8 +1724,7 @@ __osm_pr_rcv_respond(IN osm_sa_t * sa, num_rec * sizeof(ib_path_rec_t) + IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_respond: ERR 1F14: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F14: " "Unable to allocate MAD\n"); for (i = 0; i < num_rec; i++) { @@ -1823,8 +1781,7 @@ __osm_pr_rcv_respond(IN osm_sa_t * sa, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pr_rcv_respond: ERR 1F15: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F15: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ } @@ -1861,8 +1818,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_sa_mad->method != IB_MAD_METHOD_GET) && (p_sa_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pr_rcv_process: ERR 1F17: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F17: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, @@ -1875,8 +1831,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (requester_port == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pr_rcv_process: ERR 1F16: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F16: " "Cannot find requester physical port\n"); goto Exit; } @@ -1904,8 +1859,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) if (ret > 0) goto McastDest; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_pr_rcv_process: " "Unicast destination requested\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unicast destination requested\n"); sa_status = __osm_pr_rcv_get_end_points(sa, p_madw, &p_src_port, &p_dest_port, @@ -1952,8 +1906,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) goto Unlock; McastDest: - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_pr_rcv_process: " "Multicast destination requested\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Multicast destination requested\n"); { osm_mgrp_t *p_mgrp = NULL; ib_api_status_t status; @@ -1971,16 +1924,14 @@ McastDest: /* Make sure the rest of the PathRecord matches the MC group attributes */ status = __osm_pr_match_mgrp_attributes(sa, p_madw, p_mgrp); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pr_rcv_process: ERR 1F19: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F19: " "MC group attributes don't match PathRecord request\n"); goto Unlock; } p_pr_item = malloc(sizeof(*p_pr_item)); if (p_pr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pr_rcv_process: ERR 1F18: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1F18: " "Unable to allocate path record for MC group\n"); goto Unlock; } diff --git a/opensm/opensm/osm_sa_pkey_record.c b/opensm/opensm/osm_sa_pkey_record.c index 31d7837..aeed9fc 100644 --- a/opensm/opensm/osm_sa_pkey_record.c +++ b/opensm/opensm/osm_sa_pkey_record.c @@ -80,8 +80,7 @@ __osm_sa_pkey_create(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_pkey_create: ERR 4602: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4602: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -93,8 +92,7 @@ __osm_sa_pkey_create(IN osm_sa_t * sa, lid = osm_node_get_base_lid(p_physp->p_node, 0); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_pkey_create: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New P_Key table for: port 0x%016" PRIx64 ", lid 0x%X, port 0x%X Block:%u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -168,8 +166,7 @@ __osm_sa_pkey_by_comp_mask(IN osm_sa_t * sa, if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { /* we put it in the comp mask and port num */ port_num = p_port->p_physp->port_num; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_pkey_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Using Physical Default Port Number: 0x%X (for End Node)\n", port_num); comp_mask |= IB_PKEY_COMPMASK_PORT; @@ -187,8 +184,7 @@ __osm_sa_pkey_by_comp_mask(IN osm_sa_t * sa, __osm_sa_pkey_check_physp(sa, p_physp, p_ctxt); } else { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_pkey_by_comp_mask: ERR 4603: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4603: " "Given Physical Port Number: 0x%X is out of range should be < 0x%X\n", port_num, osm_node_get_num_physp(p_port->p_node)); @@ -270,8 +266,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 4605: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4605: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -286,8 +281,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) */ if (p_rcvd_mad->sm_key != sa->p_subn->opt.sm_key) { /* This is not a trusted requester! */ - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process ERR 4608: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4608: " "Request from non-trusted requester: " "Given SM_Key:0x%016" PRIx64 "\n", cl_ntoh64(p_rcvd_mad->sm_key)); @@ -302,8 +296,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 4604: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4604: " "Cannot find requester physical port\n"); goto Exit; } @@ -319,8 +312,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) context.block_num = p_rcvd_rec->block_num; context.p_req_physp = p_req_physp; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_pkey_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Got Query Lid:0x%04X(%02X), Block:0x%02X(%02X), Port:0x%02X(%02X)\n", cl_ntoh16(p_rcvd_rec->lid), (comp_mask & IB_PKEY_COMPMASK_LID) != 0, p_rcvd_rec->port_num, @@ -344,8 +336,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) &p_port); if ((status != IB_SUCCESS) || (p_port == NULL)) { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 460B: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 460B: " "No port found with LID 0x%x\n", cl_ntoh16(p_rcvd_rec->lid)); } @@ -378,8 +369,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 460A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 460A: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -405,16 +395,14 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_pkey_table_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_pkey_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_pkey_rec_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -432,8 +420,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 4606: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4606: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -497,8 +484,7 @@ void osm_pkey_rec_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pkey_rec_rcv_process: ERR 4607: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4607: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_portinfo_record.c b/opensm/opensm/osm_sa_portinfo_record.c index e85fa3e..3e866f3 100644 --- a/opensm/opensm/osm_sa_portinfo_record.c +++ b/opensm/opensm/osm_sa_portinfo_record.c @@ -91,16 +91,14 @@ __osm_pir_rcv_new_pir(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_pir_rcv_new_pir: ERR 2102: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2102: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_pir_rcv_new_pir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New PortInfoRecord: port 0x%016" PRIx64 ", lid 0x%X, port 0x%X\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -155,8 +153,7 @@ __osm_sa_pir_create(IN osm_sa_t * sa, We validate that the lid belongs to this node. */ if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_pir_create: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing LID: 0x%X <= 0x%X <= 0x%X\n", base_lid_ho, match_lid_ho, max_lid_ho); @@ -513,8 +510,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2105: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2105: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -528,8 +524,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2104: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2104: " "Cannot find requester physical port\n"); goto Exit; } @@ -565,8 +560,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) &p_port); if ((status != IB_SUCCESS) || (p_port == NULL)) { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2109: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2109: " "No port found with LID 0x%x\n", cl_ntoh16(p_rcvd_rec->lid)); } @@ -577,8 +571,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) cl_ntoh16(p_pi->base_lid)); else { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2103: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2103: " "Given LID (0x%X) is out of range:0x%X\n", cl_ntoh16(p_pi->base_lid), cl_ptr_vector_get_size(p_tbl)); @@ -610,8 +603,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2108: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2108: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -637,16 +629,14 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_portinfo_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_pir_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_pir_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -663,8 +653,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2106: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2106: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -740,8 +729,7 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_pir_rcv_process: ERR 2107: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2107: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_response.c b/opensm/opensm/osm_sa_response.c index c4e138d..7db3927 100644 --- a/opensm/opensm/osm_sa_response.c +++ b/opensm/opensm/osm_sa_response.c @@ -73,8 +73,7 @@ osm_sa_send_error(IN osm_sa_t * sa, /* avoid races - if we are exiting - exit */ if (osm_exit_flag) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sa_send_error: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Ignoring requested send after exit\n"); goto Exit; } @@ -84,8 +83,7 @@ osm_sa_send_error(IN osm_sa_t * sa, &p_madw->mad_addr); if (p_resp_madw == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sa_send_error: ERR 2301: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2301: " "Unable to acquire response MAD\n"); goto Exit; } @@ -121,8 +119,7 @@ osm_sa_send_error(IN osm_sa_t * sa, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sa_send_error: ERR 2302: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2302: " "Error sending MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ goto Exit; diff --git a/opensm/opensm/osm_sa_service_record.c b/opensm/opensm/osm_sa_service_record.c index 762eb25..1680ea1 100644 --- a/opensm/opensm/osm_sa_service_record.c +++ b/opensm/opensm/osm_sa_service_record.c @@ -100,8 +100,7 @@ __match_service_pkey_with_ports_pkey(IN osm_sa_t * sa, osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__match_service_pkey_with_ports_pkey: ERR 2404: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2404: " "Cannot find requester physical port\n"); valid = FALSE; goto Exit; @@ -124,8 +123,7 @@ __match_service_pkey_with_ports_pkey(IN osm_sa_t * sa, service_port = osm_get_port_by_guid(sa->p_subn, service_guid); if (!service_port) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__match_service_pkey_with_ports_pkey: ERR 2405: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2405: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(service_guid)); valid = FALSE; @@ -187,8 +185,8 @@ __validate_sr(IN osm_sa_t * sa, IN const osm_madw_t * const p_madw) p_sa_mad->comp_mask); if (!valid) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_sr: " "No Match for Service Pkey\n"); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "No Match for Service Pkey\n"); valid = FALSE; goto Exit; } @@ -198,8 +196,7 @@ __validate_sr(IN osm_sa_t * sa, IN const osm_madw_t * const p_madw) p_sa_mad->comp_mask); if (!valid) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__validate_sr: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Service Record Name to key matching failed\n"); valid = FALSE; goto Exit; @@ -239,8 +236,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, * If we do a SubnAdmGet and got more than one record it is an error ! */ if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec > 1)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sr_rcv_respond: ERR 2406: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2406: " "Got more than one record for SubnAdmGet (%u).\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -260,8 +256,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_service_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "__osm_sr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; @@ -269,8 +264,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, #endif if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sr_rcv_respond: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Generating response with %u records\n", num_rec); } @@ -282,8 +276,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, num_rec * sizeof(ib_service_record_t) + IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sr_rcv_respond: ERR 2402: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2402: " "Unable to allocate MAD\n"); /* Release the quick pool items */ p_sr_item = (osm_sr_item_t *) cl_qlist_remove_head(p_list); @@ -373,8 +366,7 @@ __osm_sr_rcv_respond(IN osm_sa_t * sa, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sr_rcv_respond: ERR 2407: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2407: " "Unable to send MAD (%s)\n", ib_get_err_str(status)); /* osm_mad_pool_put( sa->p_mad_pool, p_resp_madw ); */ goto Exit; @@ -591,8 +583,7 @@ __get_matching_sr(IN cl_list_item_t * const p_list_item, IN void *context) if (!osm_physp_has_pkey(p_sr_item->sa->p_log, p_svcr->service_record.service_pkey, p_req_physp)) { - osm_log(p_sr_item->sa->p_log, OSM_LOG_VERBOSE, - "__get_matching_sr: " + OSM_LOG(p_sr_item->sa->p_log, OSM_LOG_VERBOSE, "requester port doesn't have the service_pkey: 0x%X\n", cl_ntoh16(p_svcr->service_record.service_pkey)); return; @@ -601,8 +592,7 @@ __get_matching_sr(IN cl_list_item_t * const p_list_item, IN void *context) p_sr_pool_item = malloc(sizeof(*p_sr_pool_item)); if (p_sr_pool_item == NULL) { - osm_log(p_sr_item->sa->p_log, OSM_LOG_ERROR, - "__get_matching_sr: ERR 2408: " + OSM_LOG(p_sr_item->sa->p_log, OSM_LOG_ERROR, "ERR 2408: " "Unable to acquire Service Record from pool\n"); goto Exit; } @@ -637,8 +627,7 @@ osm_sr_rcv_process_get_method(IN osm_sa_t * sa, osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sr_rcv_process_get_method: ERR 2409: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2409: " "Cannot find requester physical port\n"); goto Exit; } @@ -670,8 +659,7 @@ osm_sr_rcv_process_get_method(IN osm_sa_t * sa, if ((p_sa_mad->method == IB_MAD_METHOD_GET) && (cl_qlist_count(&sr_match_item.sr_list) == 0)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_process_get_method: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "No records matched the Service Record query\n"); osm_sa_send_error(sa, p_madw, @@ -717,8 +705,7 @@ osm_sr_rcv_process_set_method(IN osm_sa_t * sa, if ((comp_mask & (IB_SR_COMPMASK_SID | IB_SR_COMPMASK_SGID)) != (IB_SR_COMPMASK_SID | IB_SR_COMPMASK_SGID)) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_sr_rcv_process_set_method: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Component Mask RID check failed for METHOD_SET\n"); osm_sa_send_error(sa, p_madw, sa_status); goto Exit; @@ -727,8 +714,7 @@ osm_sr_rcv_process_set_method(IN osm_sa_t * sa, /* if we were not provided with a service lease make it infinite */ if ((comp_mask & IB_SR_COMPMASK_SLEASE) != IB_SR_COMPMASK_SLEASE) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_process_set_method: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "ServiceLease Component Mask not set - using infinite lease\n"); p_recvd_service_rec->service_lease = 0xFFFFFFFF; } @@ -746,8 +732,7 @@ osm_sr_rcv_process_set_method(IN osm_sa_t * sa, if (p_svcr == NULL) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sr_rcv_process_set_method: ERR 2411: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2411: " "osm_svcr_get_by_rid failed\n"); osm_sa_send_error(sa, p_madw, @@ -778,8 +763,7 @@ osm_sr_rcv_process_set_method(IN osm_sa_t * sa, p_sr_item = malloc(sizeof(*p_sr_item)); if (p_sr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sr_rcv_process_set_method: ERR 2412: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2412: " "Unable to acquire Service record\n"); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); @@ -840,8 +824,7 @@ osm_sr_rcv_process_delete_method(IN osm_sa_t * sa, if (p_svcr == NULL) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_process_delete_method: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "No records matched the RID\n"); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RECORDS); @@ -854,8 +837,7 @@ osm_sr_rcv_process_delete_method(IN osm_sa_t * sa, p_sr_item = malloc(sizeof(*p_sr_item)); if (p_sr_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sr_rcv_process_delete_method: ERR 2413: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2413: " "Unable to acquire Service record\n"); osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES); @@ -900,8 +882,7 @@ void osm_sr_rcv_process(IN void *context, IN void *data) case IB_MAD_METHOD_SET: valid = __validate_sr(sa, p_madw); if (!valid) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_sr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Component Mask check failed for set request\n"); osm_sa_send_error(sa, p_madw, sa_status); goto Exit; @@ -911,8 +892,7 @@ void osm_sr_rcv_process(IN void *context, IN void *data) case IB_MAD_METHOD_DELETE: valid = __validate_sr(sa, p_madw); if (!valid) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Component Mask check failed for delete request\n"); osm_sa_send_error(sa, p_madw, sa_status); goto Exit; @@ -924,8 +904,7 @@ void osm_sr_rcv_process(IN void *context, IN void *data) osm_sr_rcv_process_get_method(sa, p_madw); break; default: - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Unsupported Method (%s)\n", ib_get_sa_method_str(p_sa_mad->method)); osm_sa_send_error(sa, p_madw, @@ -980,8 +959,7 @@ void osm_sr_rcv_lease_cb(IN void *context) */ p_svcr->lease_period -= elapsed_time; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sr_rcv_lease_cb: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Remaining time for Service Name:%s is:0x%X\n", p_svcr->service_record.service_name, p_svcr->lease_period); diff --git a/opensm/opensm/osm_sa_slvl_record.c b/opensm/opensm/osm_sa_slvl_record.c index 72d259f..9d717ba 100644 --- a/opensm/opensm/osm_sa_slvl_record.c +++ b/opensm/opensm/osm_sa_slvl_record.c @@ -92,8 +92,7 @@ __osm_sa_slvl_create(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_slvl_create: ERR 2602: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2602: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -105,8 +104,7 @@ __osm_sa_slvl_create(IN osm_sa_t * sa, lid = osm_node_get_base_lid(p_physp->p_node, 0); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_slvl_create: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New SLtoVL Map for: OUT port 0x%016" PRIx64 ", lid 0x%X, port 0x%X to In Port:%u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -155,8 +153,7 @@ __osm_sa_slvl_by_comp_mask(IN osm_sa_t * sa, p_req_physp = p_ctxt->p_req_physp; if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_slvl_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Using Physical Default Port Number: 0x%X (for End Node)\n", p_port->p_physp->port_num); p_out_physp = p_port->p_physp; @@ -260,8 +257,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((p_rcvd_mad->method != IB_MAD_METHOD_GET) && (p_rcvd_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2604: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2604: " "Unsupported Method (%s)\n", ib_get_sa_method_str(p_rcvd_mad->method)); osm_sa_send_error(sa, p_madw, @@ -275,8 +271,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2603: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2603: " "Cannot find requester physical port\n"); goto Exit; } @@ -292,8 +287,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) cl_plock_acquire(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_slvl_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Got Query Lid:0x%04X(%02X), In-Port:0x%02X(%02X), Out-Port:0x%02X(%02X)\n", cl_ntoh16(p_rcvd_rec->lid), (comp_mask & IB_SLVL_COMPMASK_LID) != 0, @@ -317,8 +311,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) &p_port); if ((status != IB_SUCCESS) || (p_port == NULL)) { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2608: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2608: " "No port found with LID 0x%x\n", cl_ntoh16(p_rcvd_rec->lid)); } @@ -350,8 +343,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2607: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2607: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -377,16 +369,14 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_slvl_table_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_slvl_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_slvl_rec_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((p_rcvd_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -404,8 +394,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2605: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2605: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -469,8 +458,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_slvl_rec_rcv_process: ERR 2606: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2606: " "osm_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_sminfo_record.c b/opensm/opensm/osm_sa_sminfo_record.c index 595d890..5527892 100644 --- a/opensm/opensm/osm_sa_sminfo_record.c +++ b/opensm/opensm/osm_sa_sminfo_record.c @@ -97,18 +97,15 @@ __osm_smir_rcv_new_smir(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_smir_rcv_new_smir: ERR 2801: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2801: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_smir_rcv_new_smir: " - "New SMInfo: GUID 0x%016" PRIx64 "\n", cl_ntoh64(guid) - ); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, + "New SMInfo: GUID 0x%016" PRIx64 "\n", cl_ntoh64(guid)); memset(p_rec_item, 0, sizeof(*p_rec_item)); @@ -225,8 +222,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((sad_mad->method != IB_MAD_METHOD_GET) && (sad_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2804: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2804: " "Unsupported Method (%s)\n", ib_get_sa_method_str(sad_mad->method)); osm_sa_send_error(sa, p_madw, @@ -240,8 +236,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2803: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2803: " "Cannot find requester physical port\n"); goto Exit; } @@ -273,8 +268,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) &p_port); if ((status != IB_SUCCESS) || (p_port == NULL)) { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2806: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2806: " "No port found with LID 0x%x\n", cl_ntoh16(p_rcvd_rec->lid)); } @@ -287,8 +281,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) sa->p_subn->sm_port_guid); if (!local_port) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2809: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2809: " "No port found with GUID 0x%016" PRIx64 "\n", cl_ntoh64(sa->p_subn->sm_port_guid)); goto Exit; @@ -299,8 +292,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) osm_physp_share_pkey(sa->p_log, p_req_physp, local_port->p_physp)) { cl_plock_release(sa->p_lock); - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2805: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2805: " "Cannot get SMInfo record due to pkey violation\n"); goto Exit; } @@ -345,8 +337,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) __osm_sa_smir_by_comp_mask(sa, p_rem_sm, &context); else { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 280A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 280A: " "No remote SM for GUID 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); } @@ -373,8 +364,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2808: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2808: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -399,16 +389,14 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) trim_num_rec = (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_sminfo_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_smir_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_smir_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((sad_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -425,8 +413,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2807: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2807: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -492,8 +479,7 @@ void osm_smir_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_smir_rcv_process: ERR 2802: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2802: " "Error sending MAD (%s)\n", ib_get_err_str(status)); goto Exit; } diff --git a/opensm/opensm/osm_sa_sw_info_record.c b/opensm/opensm/osm_sa_sw_info_record.c index 33a5df8..79f7a19 100644 --- a/opensm/opensm/osm_sa_sw_info_record.c +++ b/opensm/opensm/osm_sa_sw_info_record.c @@ -85,16 +85,14 @@ __osm_sir_rcv_new_sir(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sir_rcv_new_sir: ERR 5308: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5308: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; } if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sir_rcv_new_sir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New SwitchInfoRecord: lid 0x%X\n", cl_ntoh16(lid) ); @@ -121,8 +119,7 @@ static osm_port_t *__osm_sir_get_port_by_guid(IN osm_sa_t * sa, p_port = osm_get_port_by_guid(sa->p_subn, port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sir_get_port_by_guid ERR 5309: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "ERR 5309: " "Invalid port GUID 0x%016" PRIx64 "\n", port_guid); p_port = NULL; } @@ -149,8 +146,7 @@ __osm_sir_rcv_create_sir(IN osm_sa_t * sa, OSM_LOG_ENTER(sa->p_log); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sir_rcv_create_sir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Looking for SwitchInfoRecord with LID: 0x%X\n", cl_ntoh16(match_lid) ); @@ -161,8 +157,7 @@ __osm_sir_rcv_create_sir(IN osm_sa_t * sa, __osm_sir_get_port_by_guid(sa, p_sw->p_node->node_info.port_guid); if (!p_port) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sir_rcv_create_sir: ERR 530A: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 530A: " "Failed to find Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) ); @@ -173,8 +168,7 @@ __osm_sir_rcv_create_sir(IN osm_sa_t * sa, the same partition. */ p_physp = p_port->p_physp; if (!p_physp) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sir_rcv_create_sir: ERR 530B: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 530B: " "Failed to find default physical Port by Node Guid:0x%016" PRIx64 "\n", cl_ntoh64(p_sw->p_node->node_info.node_guid) @@ -193,8 +187,7 @@ __osm_sir_rcv_create_sir(IN osm_sa_t * sa, We validate that the lid belongs to this switch. */ if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) { - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sir_rcv_create_sir: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Comparing LID: 0x%X <= 0x%X <= 0x%X\n", min_lid_ho, match_lid_ho, max_lid_ho); } @@ -281,8 +274,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((sad_mad->method != IB_MAD_METHOD_GET) && (sad_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sir_rcv_process: ERR 5305: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5305: " "Unsupported Method (%s)\n", ib_get_sa_method_str(sad_mad->method)); osm_sa_send_error(sa, p_madw, @@ -296,8 +288,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sir_rcv_process: ERR 5304: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5304: " "Cannot find requester physical port\n"); goto Exit; } @@ -329,8 +320,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) * If we do a SubnAdmGet and got more than one record it is an error ! */ if ((sad_mad->method == IB_MAD_METHOD_GET) && (num_rec > 1)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sir_rcv_process: ERR 5303: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5303: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -354,16 +344,14 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_switch_info_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_sir_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_sir_rcv_process: " "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((sad_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -381,8 +369,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sir_rcv_process: ERR 5306: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5306: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -444,8 +431,7 @@ void osm_sir_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_sir_rcv_process: ERR 5307: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 5307: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_sa_vlarb_record.c b/opensm/opensm/osm_sa_vlarb_record.c index 2823412..7b022aa 100644 --- a/opensm/opensm/osm_sa_vlarb_record.c +++ b/opensm/opensm/osm_sa_vlarb_record.c @@ -92,8 +92,7 @@ __osm_sa_vl_arb_create(IN osm_sa_t * sa, p_rec_item = malloc(sizeof(*p_rec_item)); if (p_rec_item == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_vl_arb_create: ERR 2A02: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A02: " "rec_item alloc failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -105,8 +104,7 @@ __osm_sa_vl_arb_create(IN osm_sa_t * sa, lid = osm_node_get_base_lid(p_physp->p_node, 0); if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG)) - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_vl_arb_create: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "New VLArbitration for: port 0x%016" PRIx64 ", lid 0x%X, port 0x%X Block:%u\n", cl_ntoh64(osm_physp_get_port_guid(p_physp)), @@ -174,8 +172,7 @@ __osm_sa_vl_arb_by_comp_mask(IN osm_sa_t * sa, if (p_port->p_node->node_info.node_type != IB_NODE_TYPE_SWITCH) { /* we put it in the comp mask and port num */ port_num = p_port->p_physp->port_num; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "__osm_sa_vl_arb_by_comp_mask: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Using Physical Default Port Number: 0x%X (for End Node)\n", port_num); comp_mask |= IB_VLA_COMPMASK_OUT_PORT; @@ -193,8 +190,7 @@ __osm_sa_vl_arb_by_comp_mask(IN osm_sa_t * sa, __osm_sa_vl_arb_check_physp(sa, p_physp, p_ctxt); } else { - osm_log(sa->p_log, OSM_LOG_ERROR, - "__osm_sa_vl_arb_by_comp_mask: ERR 2A03: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A03: " "Given Physical Port Number: 0x%X is out of range should be < 0x%X\n", port_num, osm_node_get_num_physp(p_port->p_node)); @@ -276,8 +272,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) /* we only support SubnAdmGet and SubnAdmGetTable methods */ if ((sad_mad->method != IB_MAD_METHOD_GET) && (sad_mad->method != IB_MAD_METHOD_GETTABLE)) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A05: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A05: " "Unsupported Method (%s)\n", ib_get_sa_method_str(sad_mad->method)); osm_sa_send_error(sa, p_madw, @@ -291,8 +286,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) osm_madw_get_mad_addr_ptr (p_madw)); if (p_req_physp == NULL) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A04: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A04: " "Cannot find requester physical port\n"); goto Exit; } @@ -308,8 +302,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) context.block_num = p_rcvd_rec->block_num; context.p_req_physp = p_req_physp; - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_vlarb_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Got Query Lid:0x%04X(%02X), Port:0x%02X(%02X), Block:0x%02X(%02X)\n", cl_ntoh16(p_rcvd_rec->lid), (comp_mask & IB_VLA_COMPMASK_LID) != 0, p_rcvd_rec->port_num, @@ -334,8 +327,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) &p_port); if ((status != IB_SUCCESS) || (p_port == NULL)) { status = IB_NOT_FOUND; - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A09: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A09: " "No port found with LID 0x%x\n", cl_ntoh16(p_rcvd_rec->lid)); } @@ -368,8 +360,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) goto Exit; } if (num_rec > 1) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A08: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A08: " "Got more than one record for SubnAdmGet (%u)\n", num_rec); osm_sa_send_error(sa, p_madw, @@ -395,17 +386,14 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) (MAD_BLOCK_SIZE - IB_SA_MAD_HDR_SIZE) / sizeof(ib_vl_arb_table_record_t); if (trim_num_rec < num_rec) { - osm_log(sa->p_log, OSM_LOG_VERBOSE, - "osm_vlarb_rec_rcv_process: " + OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "Number of records:%u trimmed to:%u to fit in one MAD\n", num_rec, trim_num_rec); num_rec = trim_num_rec; } #endif - osm_log(sa->p_log, OSM_LOG_DEBUG, - "osm_vlarb_rec_rcv_process: " - "Returning %u records\n", num_rec); + OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Returning %u records\n", num_rec); if ((sad_mad->method == IB_MAD_METHOD_GET) && (num_rec == 0)) { osm_sa_send_error(sa, p_madw, @@ -423,8 +411,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) IB_SA_MAD_HDR_SIZE, &p_madw->mad_addr); if (!p_resp_madw) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A06: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A06: " "osm_mad_pool_get failed\n"); for (i = 0; i < num_rec; i++) { @@ -488,8 +475,7 @@ void osm_vlarb_rec_rcv_process(IN void *ctx, IN void *data) status = osm_sa_vendor_send(p_resp_madw->h_bind, p_resp_madw, FALSE, sa->p_subn); if (status != IB_SUCCESS) { - osm_log(sa->p_log, OSM_LOG_ERROR, - "osm_vlarb_rec_rcv_process: ERR 2A07: " + OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2A07: " "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; diff --git a/opensm/opensm/osm_service.c b/opensm/opensm/osm_service.c index 1c5c127..41aada2 100644 --- a/opensm/opensm/osm_service.c +++ b/opensm/opensm/osm_service.c @@ -146,8 +146,7 @@ osm_svcr_insert_to_db(IN osm_subn_t * p_subn, { OSM_LOG_ENTER(p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_svcr_insert_to_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Inserting new Service Record into Database\n"); cl_qlist_insert_head(&p_subn->sa_sr_list, &p_svcr->list_item); @@ -161,8 +160,7 @@ osm_svcr_remove_from_db(IN osm_subn_t * p_subn, { OSM_LOG_ENTER(p_log); - osm_log(p_log, OSM_LOG_DEBUG, - "osm_svcr_remove_from_db: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Removing Service Record Name:%s ID:0x%016" PRIx64 " from Database\n", p_svcr->service_record.service_name, p_svcr->service_record.service_id); diff --git a/opensm/opensm/osm_slvl_map_rcv.c b/opensm/opensm/osm_slvl_map_rcv.c index aca364e..c78951d 100644 --- a/opensm/opensm/osm_slvl_map_rcv.c +++ b/opensm/opensm/osm_slvl_map_rcv.c @@ -99,8 +99,7 @@ void osm_slvl_rcv_process(IN void *context, IN void *p_data) if (!p_port) { cl_plock_release(sm->p_lock); - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_slvl_rcv_process: ERR 2C06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2C06: " "No port object for port with GUID 0x%" PRIx64 "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", @@ -130,8 +129,7 @@ void osm_slvl_rcv_process(IN void *context, IN void *p_data) the subnet. */ if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_slvl_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Got SLtoVL get response in_port_num %u out_port_num %u with GUID 0x%" PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", in_port_num, out_port_num, @@ -144,8 +142,7 @@ void osm_slvl_rcv_process(IN void *context, IN void *p_data) If so, Ignore it. */ if (!p_physp) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_slvl_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Got invalid port number 0x%X\n", out_port_num); goto Exit; } diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c index f412e58..32525ba 100644 --- a/opensm/opensm/osm_sm.c +++ b/opensm/opensm/osm_sm.c @@ -112,12 +112,10 @@ static void __osm_sm_sweeper(IN void *p_ptr) EVENT_NO_TIMEOUT, TRUE); if (status == CL_SUCCESS) - osm_log(p_sm->p_log, OSM_LOG_DEBUG, - "__osm_sm_sweeper: " + OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG, "Off schedule sweep signalled\n"); else if (status != CL_TIMEOUT) { - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "__osm_sm_sweeper: ERR 2E01: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E01: " "Event wait failed (%s)\n", CL_STATUS_MSG(status)); continue; @@ -431,8 +429,7 @@ osm_sm_bind(IN osm_sm_t * const p_sm, IN const ib_net64_t port_guid) status = osm_sm_mad_ctrl_bind(&p_sm->mad_ctrl, port_guid); if (status != IB_SUCCESS) { - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_bind: ERR 2E10: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E10: " "SM MAD Controller bind failed (%s)\n", ib_get_err_str(status)); goto Exit; @@ -512,8 +509,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, OSM_LOG_ENTER(p_sm->p_log); - osm_log(p_sm->p_log, OSM_LOG_VERBOSE, - "osm_sm_mcgrp_join: " + OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE, "Port 0x%016" PRIx64 " joining MLID 0x%X\n", cl_ntoh64(port_guid), cl_ntoh16(mlid)); @@ -524,8 +520,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, p_port = osm_get_port_by_guid(p_sm->p_subn, port_guid); if (!p_port) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_join: ERR 2E05: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E05: " "No port object for port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); status = IB_INVALID_PARAMETER; @@ -538,15 +533,13 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, p_tbl = &p_sm->p_subn->mgrp_mlid_tbl; p_mgrp = (osm_mgrp_t *) cl_qmap_get(p_tbl, mlid); if (p_mgrp == (osm_mgrp_t *) cl_qmap_end(p_tbl)) { - osm_log(p_sm->p_log, OSM_LOG_VERBOSE, - "osm_sm_mcgrp_join: " + OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE, "Creating group, MLID 0x%X\n", cl_ntoh16(mlid)); p_mgrp = osm_mgrp_new(mlid); if (p_mgrp == NULL) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_join: ERR 2E06: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E06: " "Unable to allocate multicast group object\n"); status = IB_INSUFFICIENT_MEMORY; goto Exit; @@ -562,8 +555,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, */ if (!osm_mgrp_is_guid(p_mgrp, port_guid)) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_join: ERR 2E12: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E12: " "Port 0x%016" PRIx64 " not in mcast group 0x%X\n", cl_ntoh64(port_guid), cl_ntoh16(mlid)); @@ -581,8 +573,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) { if (p_mcm->mlid == mlid) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_DEBUG, - "osm_sm_mcgrp_join: " + OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG, "Found mlid object for Port:" "0x%016" PRIx64 " lid:0x%X\n", cl_ntoh64(port_guid), cl_ntoh16(mlid)); @@ -594,8 +585,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm, status = osm_port_add_mgrp(p_port, mlid); if (status != IB_SUCCESS) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_join: ERR 2E03: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E03: " "Unable to associate port 0x%" PRIx64 " to mlid 0x%X\n", cl_ntoh64(osm_port_get_guid(p_port)), cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); @@ -623,8 +613,7 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm, OSM_LOG_ENTER(p_sm->p_log); - osm_log(p_sm->p_log, OSM_LOG_VERBOSE, - "osm_sm_mcgrp_leave: " + OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE, "Port 0x%" PRIx64 " leaving MLID 0x%X\n", cl_ntoh64(port_guid), cl_ntoh16(mlid)); @@ -636,8 +625,7 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm, p_port = osm_get_port_by_guid(p_sm->p_subn, port_guid); if (!p_port) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_leave: ERR 2E04: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E04: " "No port object for port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); status = IB_INVALID_PARAMETER; @@ -651,8 +639,7 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm, p_mgrp = (osm_mgrp_t *) cl_qmap_get(p_tbl, mlid); if (p_mgrp == (osm_mgrp_t *) cl_qmap_end(p_tbl)) { CL_PLOCK_RELEASE(p_sm->p_lock); - osm_log(p_sm->p_log, OSM_LOG_ERROR, - "osm_sm_mcgrp_leave: ERR 2E08: " + OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E08: " "No multicast group for MLID 0x%X\n", cl_ntoh16(mlid)); status = IB_INVALID_PARAMETER; goto Exit; diff --git a/opensm/opensm/osm_sm_mad_ctrl.c b/opensm/opensm/osm_sm_mad_ctrl.c index 15b8626..4b0c27f 100644 --- a/opensm/opensm/osm_sm_mad_ctrl.c +++ b/opensm/opensm/osm_sm_mad_ctrl.c @@ -82,8 +82,7 @@ __osm_sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * const p_ctrl, Return the MAD & wrapper to the pool. */ if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_retire_trans_mad: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Retiring MAD with TID 0x%" PRIx64 "\n", cl_ntoh64(osm_madw_get_smp_ptr(p_madw)->trans_id)); @@ -92,8 +91,7 @@ __osm_sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * const p_ctrl, outstanding = cl_atomic_dec(&p_ctrl->p_stats->qp0_mads_outstanding); if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_retire_trans_mad: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs outstanding\n", p_ctrl->p_stats->qp0_mads_outstanding); @@ -102,8 +100,7 @@ __osm_sm_mad_ctrl_retire_trans_mad(IN osm_sm_mad_ctrl_t * const p_ctrl, The wire is clean. Signal the subnet manager. */ - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_retire_trans_mad: wire is clean.\n"); + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "wire is clean.\n"); #ifdef HAVE_LIBPTHREAD pthread_cond_signal(&p_ctrl->p_stats->cond); #else @@ -179,8 +176,7 @@ __osm_sm_mad_ctrl_update_wire_stats(IN osm_sm_mad_ctrl_t * const p_ctrl) cl_atomic_dec(&p_ctrl->p_stats->qp0_mads_outstanding_on_wire); if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_update_wire_stats: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u SMPs on the wire, %u outstanding\n", mads_on_wire, p_ctrl->p_stats->qp0_mads_outstanding); @@ -220,8 +216,7 @@ __osm_sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * const p_ctrl, p_smp = osm_madw_get_smp_ptr(p_madw); if (p_smp->mgmt_class == IB_MCLASS_SUBN_DIR && !ib_smp_is_d(p_smp)) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_get_resp: ERR 3102: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3102: " "'D' bit not set in returned SMP\n"); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); } @@ -282,8 +277,7 @@ __osm_sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * const p_ctrl, case IB_MAD_ATTR_INFORM_INFO: default: cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_get_resp: ERR 3103: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3103: " "Unsupported attribute = 0x%X\n", cl_ntoh16(p_smp->attr_id)); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); @@ -299,8 +293,7 @@ __osm_sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * const p_ctrl, */ if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_process_get_resp: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); @@ -308,8 +301,7 @@ __osm_sm_mad_ctrl_process_get_resp(IN osm_sm_mad_ctrl_t * const p_ctrl, __osm_sm_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_get_resp: ERR 3104: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3104: " "Dispatcher post message failed (%s) for attribute = 0x%X\n", CL_STATUS_MSG(status), cl_ntoh16(p_smp->attr_id)); goto Exit; @@ -351,8 +343,7 @@ __osm_sm_mad_ctrl_process_get(IN osm_sm_mad_ctrl_t * const p_ctrl, default: cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_VERBOSE, - "__osm_sm_mad_ctrl_process_get: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Ignoring SubnGet MAD - unsupported attribute = 0x%X\n", cl_ntoh16(p_smp->attr_id)); break; @@ -373,8 +364,7 @@ __osm_sm_mad_ctrl_process_get(IN osm_sm_mad_ctrl_t * const p_ctrl, */ if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_process_get: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); @@ -382,8 +372,7 @@ __osm_sm_mad_ctrl_process_get(IN osm_sm_mad_ctrl_t * const p_ctrl, __osm_sm_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_get: ERR 3106: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3106: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); goto Exit; @@ -435,8 +424,7 @@ __osm_sm_mad_ctrl_process_set(IN osm_sm_mad_ctrl_t * const p_ctrl, default: cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_set: ERR 3107: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3107: " "Unsupported attribute = 0x%X\n", cl_ntoh16(p_smp->attr_id)); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); @@ -458,8 +446,7 @@ __osm_sm_mad_ctrl_process_set(IN osm_sm_mad_ctrl_t * const p_ctrl, */ if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_process_set: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); @@ -467,8 +454,7 @@ __osm_sm_mad_ctrl_process_set(IN osm_sm_mad_ctrl_t * const p_ctrl, __osm_sm_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_set: ERR 3108: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3108: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); goto Exit; @@ -511,8 +497,7 @@ __osm_sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * const p_ctrl, /* Make sure OpenSM is master. If not - then we should not process the trap */ if (p_ctrl->p_subn->sm_state != IB_SMINFO_STATE_MASTER) { - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_process_trap: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Received trap but OpenSM is not in MASTER state. " "Dropping mad\n"); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); @@ -530,8 +515,7 @@ __osm_sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * const p_ctrl, default: cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_trap: ERR 3109: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3109: " "Unsupported attribute = 0x%X\n", cl_ntoh16(p_smp->attr_id)); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); @@ -553,8 +537,7 @@ __osm_sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * const p_ctrl, */ if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_process_trap: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(msg_id)); @@ -562,8 +545,7 @@ __osm_sm_mad_ctrl_process_trap(IN osm_sm_mad_ctrl_t * const p_ctrl, __osm_sm_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_process_trap: ERR 3110: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3110: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); goto Exit; @@ -611,8 +593,7 @@ __osm_sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd); if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs received\n", p_ctrl->p_stats->qp0_mads_rcvd); @@ -620,8 +601,7 @@ __osm_sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, /* if we are closing down simply do nothing */ if (osm_exit_flag) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_rcv_callback: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "Ignoring received mad - since we are exiting\n"); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_DEBUG); @@ -648,8 +628,7 @@ __osm_sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, status = p_smp->status; if (status != 0) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_rcv_callback: ERR 3111: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3111: " "Error status = 0x%X\n", status); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); } @@ -681,8 +660,7 @@ __osm_sm_mad_ctrl_rcv_callback(IN osm_madw_t * p_madw, case IB_MAD_METHOD_TRAP_REPRESS: default: cl_atomic_inc(&p_ctrl->p_stats->qp0_mads_rcvd_unknown); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_rcv_callback: ERR 3112: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3112: " "Unsupported method = 0x%X\n", p_smp->method); osm_dump_dr_smp(p_ctrl->p_log, p_smp, OSM_LOG_ERROR); osm_mad_pool_put(p_ctrl->p_mad_pool, p_madw); @@ -724,8 +702,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) CL_ASSERT(p_madw); - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_send_err_cb: ERR 3113: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3113: " "MAD completed in error (%s)\n", ib_get_err_str(p_madw->status)); @@ -741,8 +718,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) p_smp->attr_id == IB_MAD_ATTR_MCAST_FWD_TBL || p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO || p_smp->attr_id == IB_MAD_ATTR_LIN_FWD_TBL)) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_send_err_cb: ERR 3119: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3119: " "Set method failed\n"); p_ctrl->p_subn->subnet_initialization_error = TRUE; } @@ -763,8 +739,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) p_ctrl->p_subn, &(p_madw->mad_addr)); if (!p_physp) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_send_err_cb: ERR 3114: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3114: " "Failed to find the corresponding phys port\n"); } else { osm_physp_replace_dr_path_with_alternate_dr_path @@ -786,8 +761,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) if (osm_madw_get_err_msg(p_madw) != CL_DISP_MSGID_NONE) { if (osm_log_is_active(p_ctrl->p_log, OSM_LOG_DEBUG)) - osm_log(p_ctrl->p_log, OSM_LOG_DEBUG, - "__osm_sm_mad_ctrl_send_err_cb: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_DEBUG, "Posting Dispatcher message %s\n", osm_get_disp_msg_str(osm_madw_get_err_msg (p_madw))); @@ -798,8 +772,7 @@ __osm_sm_mad_ctrl_send_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) __osm_sm_mad_ctrl_disp_done_callback, p_ctrl); if (status != CL_SUCCESS) - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "__osm_sm_mad_ctrl_send_err_cb: ERR 3115: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3115: " "Dispatcher post message failed (%s)\n", CL_STATUS_MSG(status)); } else @@ -873,8 +846,7 @@ osm_sm_mad_ctrl_init(IN osm_sm_mad_ctrl_t * const p_ctrl, CL_DISP_MSGID_NONE, NULL, NULL); if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_sm_mad_ctrl_init: ERR 3116: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3116: " "Dispatcher registration failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; @@ -897,8 +869,7 @@ osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * const p_ctrl, OSM_LOG_ENTER(p_ctrl->p_log); if (p_ctrl->h_bind != OSM_BIND_INVALID_HANDLE) { - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "osm_sm_mad_ctrl_bind: ERR 3117: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3117: " "Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; @@ -913,8 +884,7 @@ osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * const p_ctrl, bind_info.recv_q_size = OSM_SM_DEFAULT_QP0_RCV_SIZE; bind_info.send_q_size = OSM_SM_DEFAULT_QP0_SEND_SIZE; - osm_log(p_ctrl->p_log, OSM_LOG_VERBOSE, - "osm_sm_mad_ctrl_bind: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_VERBOSE, "Binding to port 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); p_ctrl->h_bind = osm_vendor_bind(p_ctrl->p_vendor, @@ -925,8 +895,7 @@ osm_sm_mad_ctrl_bind(IN osm_sm_mad_ctrl_t * const p_ctrl, if (p_ctrl->h_bind == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; - osm_log(p_ctrl->p_log, OSM_LOG_ERROR, - "osm_sm_mad_ctrl_bind: ERR 3118: " + OSM_LOG(p_ctrl->p_log, OSM_LOG_ERROR, "ERR 3118: " "Vendor specific bind failed\n"); goto Exit; } diff --git a/opensm/opensm/osm_sm_state_mgr.c b/opensm/opensm/osm_sm_state_mgr.c index f71bc43..1fb67c5 100644 --- a/opensm/opensm/osm_sm_state_mgr.c +++ b/opensm/opensm/osm_sm_state_mgr.c @@ -105,8 +105,7 @@ static void __osm_sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm) p_port = sm->p_polling_sm->p_port; } if (p_port == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sm_state_mgr_send_master_sm_info_req: ERR 3203: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3203: " "No port object for GUID 0x%016" PRIx64 "\n", cl_ntoh64(sm->master_sm_guid)); goto Exit; @@ -120,8 +119,7 @@ static void __osm_sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm) &context); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sm_state_mgr_send_master_sm_info_req: ERR 3204: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3204: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); @@ -155,8 +153,7 @@ static void __osm_sm_state_mgr_start_polling(osm_sm_t * sm) */ cl_status = cl_timer_start(&sm->polling_timer, timeout); if (cl_status != CL_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sm_state_mgr_start_polling: ERR 3210: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3210: " "Failed to start timer\n"); OSM_LOG_EXIT(sm->p_log); @@ -191,8 +188,7 @@ void osm_sm_state_mgr_polling_callback(IN void *context) * signal is on - since we are currently in exit flow */ if (sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY && osm_exit_flag) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_sm_state_mgr_polling_callback: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Signalling subnet_up_event\n"); cl_event_signal(&sm->subnet_up_event); goto Exit; @@ -204,13 +200,11 @@ void osm_sm_state_mgr_polling_callback(IN void *context) * osm_sm_state_mgr_process with signal OSM_SM_SIGNAL_POLLING_TIMEOUT */ sm->retry_number++; - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sm_state_mgr_polling_callback: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Retry number:%d\n", sm->retry_number); if (sm->retry_number >= sm->p_subn->opt.polling_retry_number) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_sm_state_mgr_polling_callback: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Reached polling_retry_number value in retry_number. " "Go to DISCOVERY state\n"); osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_POLLING_TIMEOUT); @@ -223,8 +217,7 @@ void osm_sm_state_mgr_polling_callback(IN void *context) /* restart the timer */ cl_status = cl_timer_start(&sm->polling_timer, timeout); if (cl_status != CL_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sm_state_mgr_polling_callback: ERR 3211: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3211: " "Failed to restart timer\n"); Exit: @@ -237,8 +230,7 @@ Exit: static void __osm_sm_state_mgr_signal_error(osm_sm_t * sm, IN const osm_sm_signal_t signal) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sm_state_mgr_signal_error: ERR 3207: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3207: " "Invalid signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); @@ -271,8 +263,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm, cl_spinlock_acquire(&sm->state_lock); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_sm_state_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); @@ -423,8 +414,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm, * We also want to clear the p_polling_sm object - since we are * done polling on that remote sm - we got a handover from it. */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_sm_state_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Forcing heavy sweep. " "Received OSM_SM_SIGNAL_HANDOVER or OSM_SM_SIGNAL_POLLING_TIMEOUT\n"); sm->p_polling_sm = NULL; @@ -462,8 +452,7 @@ ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm, break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_sm_state_mgr_process: ERR 3208: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3208: " "Invalid state %s\n", osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); @@ -493,8 +482,7 @@ ib_api_status_t osm_sm_state_mgr_check_legality(osm_sm_t * sm, cl_spinlock_acquire(&sm->state_lock); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_sm_state_mgr_check_legality: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received signal %s in state %s\n", osm_get_sm_mgr_signal_str(signal), osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); @@ -556,8 +544,7 @@ ib_api_status_t osm_sm_state_mgr_check_legality(osm_sm_t * sm, break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_sm_state_mgr_check_legality: ERR 3209: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3209: " "Invalid state %s\n", osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); status = IB_INVALID_PARAMETER; diff --git a/opensm/opensm/osm_sminfo_rcv.c b/opensm/opensm/osm_sminfo_rcv.c index 8f96eaf..96221d6 100644 --- a/opensm/opensm/osm_sminfo_rcv.c +++ b/opensm/opensm/osm_sminfo_rcv.c @@ -116,22 +116,19 @@ __osm_sminfo_rcv_process_get_request(IN osm_sm_t * sm, */ p_remote_smi = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(p_madw)); if (ib_sminfo_get_state(p_remote_smi) == IB_SMINFO_STATE_MASTER) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_sminfo_rcv_process_get_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to master SM with real sm_key\n"); p_smi->sm_key = sm->p_subn->opt.sm_key; } else { /* The requester is not authenticated as master - set sm_key to zero. */ - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_sminfo_rcv_process_get_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to SM not master with zero sm_key\n"); p_smi->sm_key = 0; } status = osm_resp_send(sm, p_madw, 0, payload); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_request: ERR 2F02: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F02: " "Error sending response (%s)\n", ib_get_err_str(status)); goto Exit; @@ -202,8 +199,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, sm_smi = ib_smp_get_payload_ptr(p_smp); if (p_smp->method != IB_MAD_METHOD_SET) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F03: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F03: " "Unsupported method 0x%X\n", p_smp->method); CL_PLOCK_RELEASE(sm->p_lock); goto Exit; @@ -219,14 +215,12 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, */ p_remote_smi = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(p_madw)); if (ib_sminfo_get_state(p_remote_smi) == IB_SMINFO_STATE_MASTER) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_sminfo_rcv_process_set_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to master SM with real sm_key\n"); p_smi->sm_key = sm->p_subn->opt.sm_key; } else { /* The requester is not authenticated as master - set sm_key to zero. */ - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_sminfo_rcv_process_set_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to SM not master with zero sm_key\n"); p_smi->sm_key = 0; } @@ -234,16 +228,14 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, /* Check the legality of the packet */ status = __osm_sminfo_rcv_check_set_req_legality(p_smp); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F04: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F04: " "Check legality failed. AttributeModifier:0x%X RemoteState:%s\n", p_smp->attr_mod, osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); /* send a response with error code */ status = osm_resp_send(sm, p_madw, 7, payload); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F05: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F05: " "Error sending response (%s)\n", ib_get_err_str(status)); CL_PLOCK_RELEASE(sm->p_lock); @@ -272,8 +264,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, This code shouldn't be reached - checked in the check legality */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F06: " "THIS CODE SHOULD NOT BE REACHED!!\n"); CL_PLOCK_RELEASE(sm->p_lock); goto Exit; @@ -282,16 +273,14 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, /* check legality of the needed transition in the SM state machine */ status = osm_sm_state_mgr_check_legality(sm, sm_signal); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F07: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F07: " "Failed check of legality of needed SM transition. AttributeModifier:0x%X RemoteState:%s\n", p_smp->attr_mod, osm_get_sm_mgr_state_str(ib_sminfo_get_state(sm_smi))); /* send a response with error code */ status = osm_resp_send(sm, p_madw, 7, payload); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F08: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F08: " "Error sending response (%s)\n", ib_get_err_str(status)); CL_PLOCK_RELEASE(sm->p_lock); @@ -301,8 +290,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, /* the SubnSet(SMInfo) command is ok. Send a response. */ status = osm_resp_send(sm, p_madw, 0, payload); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F09: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F09: " "Error sending response (%s)\n", ib_get_err_str(status)); @@ -311,8 +299,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, /* if the AttributeModifier is STANDBY - need to save on the sm in */ /* the master_sm_guid variable - the guid of the current master. */ if (p_smp->attr_mod == IB_SMINFO_ATTR_MOD_STANDBY) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sminfo_rcv_process_set_request: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Received a STANDBY signal. Updating " "sm_state_mgr master_guid: 0x%016" PRIx64 "\n", cl_ntoh64(sm_smi->guid)); @@ -324,8 +311,7 @@ __osm_sminfo_rcv_process_set_request(IN osm_sm_t * sm, status = osm_sm_state_mgr_process(sm, sm_signal); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_request: ERR 2F10: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F10: " "Error in SM state transition (%s)\n", ib_get_err_str(status)); @@ -346,8 +332,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, p_smi = &p_sm->smi; if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sminfo_rcv_process_get_sm: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Detected SM 0x%016" PRIx64 " in state %u\n", cl_ntoh64(p_smi->guid), ib_sminfo_get_state(p_smi)); @@ -365,8 +350,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, case IB_SMINFO_STATE_MASTER: sm->master_sm_found = 1; /* save on the sm the guid of the current master. */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sminfo_rcv_process_get_sm: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Found master SM. Updating sm_state_mgr master_guid: 0x%016" PRIx64 "\n", cl_ntoh64(p_sm->p_port->guid)); sm->master_sm_guid = p_sm->p_port->guid; @@ -379,8 +363,7 @@ __osm_sminfo_rcv_process_get_sm(IN osm_sm_t * sm, sm->master_sm_found = 1; /* save on the sm the guid of the higher SM we found - */ /* we will poll it - as long as it lives - we should be in Standby. */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sminfo_rcv_process_get_sm: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Found higher SM. Updating sm_state_mgr master_guid:" " 0x%016" PRIx64 "\n", cl_ntoh64(p_sm->p_port->guid)); @@ -468,8 +451,7 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, p_smp = osm_madw_get_smp_ptr(p_madw); if (p_smp->method != IB_MAD_METHOD_GET_RESP) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_response: ERR 2F11: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F11: " "Unsupported method 0x%X\n", p_smp->method); goto Exit; } @@ -484,8 +466,7 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, Check that the sm_key of the found SM is the same as ours, or is zero. If not - OpenSM cannot continue with configuration!. */ if (p_smi->sm_key != 0 && p_smi->sm_key != sm->p_subn->opt.sm_key) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_response: ERR 2F18: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F18: " "Got SM with sm_key that doesn't match our " "local key. Exiting\n"); osm_log(sm->p_log, OSM_LOG_SYS, @@ -501,17 +482,14 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_response: ERR 2F12: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F12: " "No port object for this SM\n"); goto _unlock_and_exit; } if (osm_port_get_guid(p_port) != p_smi->guid) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_response: ERR 2F13: " - "Bogus SM port GUID" - "\n\t\t\t\tExpected 0x%016" PRIx64 + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F13: " + "Bogus SM port GUID\n\t\t\t\tExpected 0x%016" PRIx64 ", Received 0x%016" PRIx64 "\n", cl_ntoh64(osm_port_get_guid(p_port)), cl_ntoh64(p_smi->guid)); @@ -519,8 +497,7 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, } if (port_guid == sm->p_subn->sm_port_guid) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_sminfo_rcv_process_get_response: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Self query response received - SM port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); goto _unlock_and_exit; @@ -530,8 +507,7 @@ __osm_sminfo_rcv_process_get_response(IN osm_sm_t * sm, if (p_sm == (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl)) { p_sm = malloc(sizeof(*p_sm)); if (p_sm == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_get_response: ERR 2F14: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F14: " "Unable to allocate SM object\n"); goto _unlock_and_exit; } @@ -571,8 +547,7 @@ __osm_sminfo_rcv_process_set_response(IN osm_sm_t * sm, p_smp = osm_madw_get_smp_ptr(p_madw); if (p_smp->method != IB_MAD_METHOD_GET_RESP) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_response: ERR 2F16: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F16: " "Unsupported method 0x%X\n", p_smp->method); goto Exit; } @@ -582,8 +557,7 @@ __osm_sminfo_rcv_process_set_response(IN osm_sm_t * sm, /* Check the AttributeModifier */ if (p_smp->attr_mod != IB_SMINFO_ATTR_MOD_HANDOVER) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_sminfo_rcv_process_set_response: ERR 2F17: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F17: " "Unsupported attribute modifier 0x%X\n", p_smp->attr_mod); goto Exit; @@ -629,8 +603,7 @@ void osm_sminfo_rcv_process(IN void *context, IN void *data) moving issue. */ if (p_smi_context->port_guid != p_smi->guid) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_sminfo_rcv_process: ERR 2F19: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 2F19: " "Unexpected SM port GUID in response" "\n\t\t\t\tExpected 0x%016" PRIx64 ", Received 0x%016" PRIx64 "\n", diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index fa9a273..8f76c00 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -101,8 +101,7 @@ static void __osm_state_mgr_reset_node_count(IN cl_map_item_t * osm_sm_t *sm = context; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_reset_node_count: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Resetting discovery count for node 0x%" PRIx64 "(%s)\n", cl_ntoh64(osm_node_get_node_guid(p_node)), p_node->print_desc); @@ -120,8 +119,7 @@ static void __osm_state_mgr_reset_port_count(IN cl_map_item_t * osm_sm_t *sm = context; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_reset_port_count: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Resetting discovery count for port 0x%" PRIx64 "(node %s)\n", cl_ntoh64(osm_port_get_guid(p_port)), p_port->p_node ? p_port->p_node-> @@ -141,8 +139,7 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, osm_sm_t *sm = context; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_reset_switch_count: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Resetting discovery count for switch 0x%" PRIx64 " (%s)\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), @@ -180,8 +177,7 @@ static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object, OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_get_sw_info: ERR 3304: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3304: " "Request for SwitchInfo failed\n"); } @@ -225,8 +221,7 @@ __osm_state_mgr_get_remote_port_info(IN osm_sm_t * sm, &mad_context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_get_remote_port_info: ERR 332E: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: " "Request for PortInfo failed\n"); } @@ -286,13 +281,11 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t * sm) CL_DISP_MSGID_NONE, NULL); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_0: ERR 3305: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: " "Request for NodeInfo failed\n"); } } else { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_sweep_hop_0: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; } @@ -340,8 +333,7 @@ static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm) */ h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); if (h_bind == OSM_BIND_INVALID_HANDLE) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_notify_lid_change: ERR 3306: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3306: " "No bound ports\n"); status = IB_ERROR; goto Exit; @@ -352,8 +344,7 @@ static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm) */ status = osm_vendor_local_lid_change(h_bind); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_notify_lid_change: ERR 3307: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3307: " "Vendor LID update failed (%s)\n", ib_get_err_str(status)); } @@ -382,8 +373,7 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm) * If we don't know our own port guid yet, assume the port is down. */ if (port_guid == 0) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_is_sm_port_down: ERR 3308: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: " "SM port GUID unknown\n"); state = IB_LINK_DOWN; goto Exit; @@ -394,8 +384,7 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm) CL_PLOCK_ACQUIRE(sm->p_lock); p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_is_sm_port_down: ERR 3309: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: " "SM port with GUID:%016" PRIx64 " (%s) is unknown\n", cl_ntoh64(port_guid), p_port->p_node ? p_port->p_node-> @@ -457,8 +446,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_1: ERR 3310: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: " "No SM port object\n"); status = IB_ERROR; goto Exit; @@ -469,8 +457,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) port_num = ib_node_info_get_local_port_num(&p_node->node_info); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_sweep_hop_1: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Probing hop 1 on local port %u\n", port_num); p_physp = osm_node_get_physp_ptr(p_node, port_num); @@ -500,8 +487,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_1: ERR 3311: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: " "Request for NodeInfo failed\n"); } break; @@ -535,8 +521,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_1: ERR 3312: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3312: " "Request for NodeInfo failed\n"); } } @@ -544,8 +529,8 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) break; default: - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_1: ERR 3313: Unknown node type %d (%s)\n", + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3313: Unknown node type %d (%s)\n", osm_node_get_type(p_node), p_node->print_desc); } @@ -599,8 +584,8 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm) && (osm_physp_get_port_state(p_physp) != IB_LINK_DOWN) && !osm_physp_get_remote(p_physp)) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_light_sweep_start: ERR 0108: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 0108: " "Unknown remote side for node 0x%016" PRIx64 "(%s) port %u. Adding to light sweep sampling list\n", @@ -620,8 +605,7 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm) } CL_PLOCK_RELEASE(sm->p_lock); } else { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_light_sweep_start: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports. Deferring sweep...\n"); status = IB_INVALID_STATE; } @@ -652,8 +636,7 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t * sm) p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { /* If the sm is in MASTER state - return a pointer to it */ if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_state_mgr_exists_other_master_sm: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Found remote master SM with guid:0x%016" PRIx64 " (node %s)\n", cl_ntoh64(p_sm->smi.guid), p_sm->p_port->p_node ? p_sm->p_port->p_node-> @@ -714,8 +697,7 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t * sm) } if (p_highest_sm != NULL) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_get_highest_sm: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Found higher SM with guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_highest_sm->smi.guid), p_highest_sm->p_port->p_node ? @@ -749,16 +731,14 @@ __osm_state_mgr_send_handover(IN osm_sm_t * const sm, memset(&context, 0, sizeof(context)); p_port = p_sm->p_port; if (p_port == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_send_handover: ERR 3316: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: " "No port object on given remote_sm object\n"); goto Exit; } /* update the master_guid in the sm_state_mgr object according to */ /* the guid of the port where the new Master SM should reside. */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_state_mgr_send_handover: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Handing over mastership. Updating sm_state_mgr master_guid: %016" PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); @@ -776,14 +756,12 @@ __osm_state_mgr_send_handover(IN osm_sm_t * const sm, * as the master SM. */ if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_send_handover: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to master SM with real sm_key\n"); p_smi->sm_key = sm->p_subn->opt.sm_key; } else { /* The requester is not authenticated as master - set sm_key to zero */ - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_state_mgr_send_handover: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Responding to SM not master with zero sm_key\n"); p_smi->sm_key = 0; } @@ -795,8 +773,7 @@ __osm_state_mgr_send_handover(IN osm_sm_t * const sm, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_send_handover: ERR 3317: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: " "Failure requesting SMInfo (%s)\n", ib_get_err_str(status)); } @@ -856,15 +833,13 @@ static void __osm_state_mgr_report_new_ports(IN osm_sm_t * sm) status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_report_new_ports: ERR 3318: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: " "Error sending trap reports on GUID:0x%016" PRIx64 " (%s)\n", port_gid.unicast.interface_id, ib_get_err_str(status)); } osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); - osm_log(sm->p_log, OSM_LOG_INFO, - "__osm_state_mgr_report_new_ports: " + OSM_LOG(sm->p_log, OSM_LOG_INFO, "Discovered new port with GUID:0x%016" PRIx64 " LID range [0x%X,0x%X] of node:%s\n", cl_ntoh64(port_gid.unicast.interface_id), @@ -950,8 +925,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm) * didn't get the PortInfo Set request. Due to this, the port * is updated with its original lid in our database, but with the * new lid we wanted to give it in our port_lid_tbl. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_check_tbl_consistency: ERR 3322: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3322: " "lid 0x%zX is wrongly assigned to port 0x%016" PRIx64 " in port_lid_tbl\n", lid, cl_ntoh64(osm_port_get_guid(p_port_stored))); @@ -960,11 +934,10 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm) /* There is an object in the new database, but no object in our subnet * database. This is the matching case of the prior check - the port * still has its original lid. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_check_tbl_consistency: ERR 3323: " - "port 0x%016" PRIx64 - " exists in new port_lid_tbl under " - "lid 0x%zX, but missing in subnet port_lid_tbl db\n", + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3323: " + "port 0x%016" PRIx64 " exists in new " + "port_lid_tbl under lid 0x%zX, but " + "missing in subnet port_lid_tbl db\n", cl_ntoh64(osm_port_get_guid (p_port_ref)), lid); } else { @@ -972,8 +945,7 @@ static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm) /* if we reached here then p_port_stored != p_port_ref. * We were trying to set a lid to p_port_stored, but it didn't reach it, * and p_port_ref also didn't get the lid update. */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_check_tbl_consistency: ERR 3324: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3324: " "lid 0x%zX has port 0x%016" PRIx64 " in new port_lid_tbl db, " "and port 0x%016" PRIx64 @@ -1069,8 +1041,7 @@ _repeat_discovery: /* rescan configuration updates */ status = osm_subn_rescan_conf_files(sm->p_subn); if (status != IB_SUCCESS) - osm_log(sm->p_log, OSM_LOG_ERROR, - "do_sweep: ERR 331A: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: " "osm_subn_rescan_conf_file failed\n"); if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) @@ -1298,8 +1269,7 @@ void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal) OSM_LOG_ENTER(sm->p_log); if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_state_mgr_process: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received signal %s in state %s\n", osm_get_sm_signal_str(signal), osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); @@ -1315,8 +1285,7 @@ void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal) default: CL_ASSERT(FALSE); - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_state_mgr_process: ERR 3320: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3320: " "Invalid SM signal %u\n", signal); break; } diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index a0fbf00..03fd53a 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -234,8 +234,7 @@ osm_get_gid_by_mad_addr(IN osm_log_t * p_log, const osm_port_t *p_port = NULL; if (p_gid == NULL) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_get_gid_by_mad_addr: ERR 7505: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7505: " "Provided output GID is NULL\n"); return (IB_INVALID_PARAMETER); } @@ -250,22 +249,18 @@ osm_get_gid_by_mad_addr(IN osm_log_t * p_log, p_port = cl_ptr_vector_get(p_tbl, cl_ntoh16(p_mad_addr->dest_lid)); if (p_port == NULL) { - osm_log(p_log, OSM_LOG_DEBUG, - "osm_get_gid_by_mad_addr: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Did not find any port with LID: 0x%X\n", - cl_ntoh16(p_mad_addr->dest_lid) - ); + cl_ntoh16(p_mad_addr->dest_lid)); return (IB_INVALID_PARAMETER); } p_gid->unicast.interface_id = p_port->p_physp->port_guid; p_gid->unicast.prefix = p_subn->opt.subnet_prefix; } else { /* The dest_lid is not in the subnet table - this is an error */ - osm_log(p_log, OSM_LOG_ERROR, - "osm_get_gid_by_mad_addr: ERR 7501: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7501: " "LID is out of range: 0x%X\n", - cl_ntoh16(p_mad_addr->dest_lid) - ); + cl_ntoh16(p_mad_addr->dest_lid)); return (IB_INVALID_PARAMETER); } @@ -294,22 +289,18 @@ osm_physp_t *osm_get_physp_by_mad_addr(IN osm_log_t * p_log, cl_ntoh16(p_mad_addr->dest_lid)); if (p_port == NULL) { /* The port is not in the port_lid table - this is an error */ - osm_log(p_log, OSM_LOG_ERROR, - "osm_get_physp_by_mad_addr: ERR 7502: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7502: " "Cannot locate port object by lid: 0x%X\n", - cl_ntoh16(p_mad_addr->dest_lid) - ); + cl_ntoh16(p_mad_addr->dest_lid)); goto Exit; } p_physp = p_port->p_physp; } else { /* The dest_lid is not in the subnet table - this is an error */ - osm_log(p_log, OSM_LOG_ERROR, - "osm_get_physp_by_mad_addr: ERR 7503: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7503: " "Lid is out of range: 0x%X\n", - cl_ntoh16(p_mad_addr->dest_lid) - ); + cl_ntoh16(p_mad_addr->dest_lid)); } Exit: @@ -337,11 +328,9 @@ osm_port_t *osm_get_port_by_mad_addr(IN osm_log_t * p_log, cl_ntoh16(p_mad_addr->dest_lid)); } else { /* The dest_lid is not in the subnet table - this is an error */ - osm_log(p_log, OSM_LOG_ERROR, - "osm_get_port_by_mad_addr: ERR 7504: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 7504: " "Lid is out of range: 0x%X\n", - cl_ntoh16(p_mad_addr->dest_lid) - ); + cl_ntoh16(p_mad_addr->dest_lid)); } return p_port; @@ -696,7 +685,7 @@ append_prefix_route(IN osm_subn_t * const p_subn, uint64_t prefix, uint64_t guid route = malloc(sizeof *route); if (! route) { - osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, "%s: out of memory", __FUNCTION__); + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "out of memory"); return IB_ERROR; } @@ -725,8 +714,8 @@ osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) if (errno == ENOENT) return IB_SUCCESS; - osm_log(log, OSM_LOG_ERROR, "%s: fopen(%s) failed: %s", - __FUNCTION__, p_subn->opt.prefix_routes_file, strerror(errno)); + OSM_LOG(log, OSM_LOG_ERROR, "fopen(%s) failed: %s", + p_subn->opt.prefix_routes_file, strerror(errno)); return IB_ERROR; } @@ -747,7 +736,7 @@ osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) p_guid = strtok_r(NULL, " \t\n", &p_last); if (! p_guid) { - osm_log(log, OSM_LOG_ERROR, "%s:%d: missing GUID\n", + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: missing GUID\n", p_subn->opt.prefix_routes_file, line); errors++; continue; @@ -755,7 +744,7 @@ osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) p_extra = strtok_r(NULL, " \t\n", &p_last); if (p_extra && *p_extra != '#') { - osm_log(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", + OSM_LOG(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", p_subn->opt.prefix_routes_file, line); } @@ -764,7 +753,7 @@ osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) else { prefix = strtoull(p_prefix, &p_end, 16); if (*p_end != '\0') { - osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal prefix: %s\n", + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: illegal prefix: %s\n", p_subn->opt.prefix_routes_file, line, p_prefix); errors++; continue; @@ -776,7 +765,7 @@ osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) else { guid = strtoull(p_guid, &p_end, 16); if (*p_end != '\0' && *p_end != '#') { - osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal GUID: %s\n", + OSM_LOG(log, OSM_LOG_ERROR, "%s:%d: illegal GUID: %s\n", p_subn->opt.prefix_routes_file, line, p_guid); errors++; continue; @@ -814,7 +803,7 @@ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) if (!opts_file) { if (errno == ENOENT) return IB_SUCCESS; - osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "cannot open file \'%s\': %s\n", file_name, strerror(errno)); return IB_ERROR; diff --git a/opensm/opensm/osm_sw_info_rcv.c b/opensm/opensm/osm_sw_info_rcv.c index 06aaa91..f03f5be 100644 --- a/opensm/opensm/osm_sw_info_rcv.c +++ b/opensm/opensm/osm_sw_info_rcv.c @@ -114,8 +114,7 @@ __osm_si_rcv_get_port_info(IN osm_sm_t * sm, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { /* continue the loop despite the error */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_get_port_info: ERR 3602: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3602: " "Failure initiating PortInfo request (%s)\n", ib_get_err_str(status)); } @@ -159,8 +158,7 @@ __osm_si_rcv_get_fwd_tbl(IN osm_sm_t * sm, for (block_id_ho = 0; block_id_ho <= max_block_id_ho; block_id_ho++) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_get_fwd_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Retrieving FT block %u\n", block_id_ho); } @@ -171,8 +169,7 @@ __osm_si_rcv_get_fwd_tbl(IN osm_sm_t * sm, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { /* continue the loop despite the error */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_get_fwd_tbl: ERR 3603: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3603: " "Failure initiating PortInfo request (%s)\n", ib_get_err_str(status)); } @@ -209,8 +206,7 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); if (osm_switch_get_mcast_fwd_tbl_size(p_sw) == 0) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_get_mcast_fwd_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Multicast not supported by switch 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); goto Exit; @@ -225,8 +221,7 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, max_block_id_ho = osm_mcast_tbl_get_max_block(p_tbl); if (max_block_id_ho > IB_MCAST_MAX_BLOCK_ID) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_get_mcast_fwd_tbl: ERR 3609: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3609: " "Out-of-range mcast block size = %u on switch 0x%016" PRIx64 "\n", max_block_id_ho, cl_ntoh64(osm_node_get_node_guid(p_node))); @@ -237,8 +232,7 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, CL_ASSERT(max_position <= IB_MCAST_POSITION_MAX); - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_get_mcast_fwd_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Max MFT block = %u, Max position = %u\n", max_block_id_ho, max_position); @@ -246,15 +240,13 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, for (block_id_ho = 0; block_id_ho <= max_block_id_ho; block_id_ho++) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_get_mcast_fwd_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Retrieving MFT block %u\n", block_id_ho); } for (position = 0; position <= max_position; position++) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_get_mcast_fwd_tbl: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Retrieving MFT position %u\n", position); } @@ -268,8 +260,7 @@ __osm_si_rcv_get_mcast_fwd_tbl(IN osm_sm_t * sm, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { /* continue the loop despite the error */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_get_mcast_fwd_tbl: ERR 3607: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3607: " "Failure initiating PortInfo request (%s)\n", ib_get_err_str(status)); } @@ -314,8 +305,7 @@ __osm_si_rcv_process_new(IN osm_sm_t * sm, */ p_sw = osm_switch_new(p_node, p_madw); if (p_sw == NULL) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_process_new: ERR 3608: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3608: " "Unable to allocate new switch object\n"); goto Exit; } @@ -324,8 +314,7 @@ __osm_si_rcv_process_new(IN osm_sm_t * sm, if (p_sw->mcast_tbl.max_mlid_ho < sm->p_subn->max_multicast_lid_ho) { sm->p_subn->max_multicast_lid_ho = p_sw->mcast_tbl.max_mlid_ho; - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_si_rcv_process_new: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Subnet max multicast lid is 0x%X\n", sm->p_subn->max_multicast_lid_ho); } @@ -334,8 +323,7 @@ __osm_si_rcv_process_new(IN osm_sm_t * sm, if (p_sw->fwd_tbl.p_lin_tbl->size < sm->p_subn->max_unicast_lid_ho) { sm->p_subn->max_unicast_lid_ho = p_sw->fwd_tbl.p_lin_tbl->size; - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_si_rcv_process_new: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Subnet max unicast lid is 0x%X\n", sm->p_subn->max_unicast_lid_ho); } @@ -348,8 +336,7 @@ __osm_si_rcv_process_new(IN osm_sm_t * sm, /* This shouldn't happen since we hold the lock! */ - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_si_rcv_process_new: ERR 3605: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3605: " "Unable to add new switch object to database\n"); osm_switch_delete(&p_sw); goto Exit; @@ -416,16 +403,14 @@ __osm_si_rcv_process_existing(IN osm_sm_t * sm, if (p_si_context->set_method) { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received logical SetResp()\n"); } osm_switch_set_switch_info(p_sw, p_si); } else { if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received logical GetResp()\n"); } @@ -440,8 +425,7 @@ __osm_si_rcv_process_existing(IN osm_sm_t * sm, /* If the mad was returned with an error - signal a change to the state manager. */ if (ib_smp_get_status(p_smp) != 0) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_si_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "GetResp() received with error in light sweep. " "Commencing heavy sweep\n"); is_change_detected = TRUE; @@ -463,8 +447,7 @@ __osm_si_rcv_process_existing(IN osm_sm_t * sm, of the state change bit. */ p_sw->discovery_count++; - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_si_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "discovery_count is:%u\n", p_sw->discovery_count); @@ -472,8 +455,7 @@ __osm_si_rcv_process_existing(IN osm_sm_t * sm, if (p_sw->discovery_count == 1) __osm_si_rcv_get_port_info(sm, p_sw, p_madw); else { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_si_rcv_process_existing: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Not discovering again through switch:0x%" PRIx64 "\n", osm_node_get_node_guid(p_sw->p_node)); @@ -515,10 +497,8 @@ void osm_si_rcv_process(IN void *context, IN void *data) node_guid = p_context->node_guid; if (osm_log_is_active(sm->p_log, OSM_LOG_DEBUG)) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "osm_si_rcv_process: " - "Switch GUID 0x%016" PRIx64 - ", TID 0x%" PRIx64 "\n", + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, + "Switch GUID 0x%016" PRIx64 ", TID 0x%" PRIx64 "\n", cl_ntoh64(node_guid), cl_ntoh64(p_smp->trans_id)); } @@ -526,8 +506,7 @@ void osm_si_rcv_process(IN void *context, IN void *data) p_node = osm_get_node_by_guid(sm->p_subn, node_guid); if (!p_node) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_si_rcv_process: ERR 3606: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3606: " "SwitchInfo received for nonexistent node " "with GUID 0x%" PRIx64 "\n", cl_ntoh64(node_guid)); } else { @@ -536,8 +515,7 @@ void osm_si_rcv_process(IN void *context, IN void *data) Hack for bad value in Mellanox switch */ if (cl_ntoh16(p_si->lin_top) > IB_LID_UCAST_END_HO) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_si_rcv_process: ERR 3610: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3610: " "\n\t\t\t\tBad LinearFDBTop value = 0x%X " "on switch 0x%" PRIx64 "\n\t\t\t\tForcing correction to 0x%X\n", diff --git a/opensm/opensm/osm_sweep_fail_ctrl.c b/opensm/opensm/osm_sweep_fail_ctrl.c index 111ddf3..efe7e65 100644 --- a/opensm/opensm/osm_sweep_fail_ctrl.c +++ b/opensm/opensm/osm_sweep_fail_ctrl.c @@ -105,8 +105,7 @@ osm_sweep_fail_ctrl_init(IN osm_sweep_fail_ctrl_t * const p_ctrl, p_ctrl); if (p_ctrl->h_disp == CL_DISP_INVALID_HANDLE) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_sweep_fail_ctrl_init: ERR 3501: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3501: " "Dispatcher registration failed\n"); status = IB_INSUFFICIENT_RESOURCES; goto Exit; diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c index 491b104..5cf5a21 100644 --- a/opensm/opensm/osm_trap_rcv.c +++ b/opensm/opensm/osm_trap_rcv.c @@ -131,15 +131,13 @@ osm_trap_rcv_aging_tracker_callback(IN uint64_t key, p_physp = get_physp_by_lid_and_num(sm, lid, port_num); if (!p_physp) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_trap_rcv_aging_tracker_callback: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Cannot find port num:0x%X with lid:%u\n", port_num, lid); /* make sure the physp is still valid */ /* If the health port was false - set it to true */ else if (!osm_physp_is_healthy(p_physp)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_trap_rcv_aging_tracker_callback: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Clearing health bit of port num:%u with lid:%u\n", port_num, lid); @@ -286,8 +284,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, p_smp = osm_madw_get_smp_ptr(p_madw); if (p_smp->method != IB_MAD_METHOD_TRAP) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3801: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: " "Unsupported method 0x%X\n", p_smp->method); goto Exit; } @@ -313,13 +310,11 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, the local lid wasn't configured yet. Don't send a response to the trap. */ if (sm->p_subn->sm_base_lid == 0) { - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received SLID=0 Trap with local LID=0. Ignoring MAD\n"); goto Exit; } - osm_log(sm->p_log, OSM_LOG_DEBUG, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Received SLID=0 Trap. Using local LID:0x%04X instead\n", cl_ntoh16(sm->p_subn->sm_base_lid) ); @@ -336,8 +331,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, CL_HTON16(130)) || (p_ntci->g_or_v.generic.trap_num == CL_HTON16(131))) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Received Generic Notice type:0x%02X num:%u Producer:%u (%s) " "from LID:0x%04X Port %d TID:0x%016" PRIx64 "\n", ib_notice_get_type(p_ntci), @@ -352,8 +346,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, port_num, cl_ntoh64(p_smp->trans_id) ); else - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Received Generic Notice type:0x%02X num:%u Producer:%u (%s) " "from LID:0x%04X TID:0x%016" PRIx64 "\n", ib_notice_get_type(p_ntci), @@ -367,8 +360,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, cl_ntoh64(p_smp->trans_id) ); } else - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Received Vendor Notice type:0x%02X vend:0x%06X dev:%u " "from LID:0x%04X TID:0x%016" PRIx64 "\n", ib_notice_get_type(p_ntci), @@ -386,14 +378,12 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, if (p_physp) p_smp->m_key = p_physp->port_info.m_key; else - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3809: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: " "Failed to find source physical port for trap\n"); status = osm_resp_send(sm, &tmp_madw, 0, payload); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3802: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: " "Error sending response (%s)\n", ib_get_err_str(status)); goto Exit; @@ -437,8 +427,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, /* Now we know how many times it provided this trap */ if (num_received > 10) { if (__print_num_received(num_received)) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3804: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " "Received trap %u times consecutively\n", num_received); /* @@ -456,8 +445,8 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, port_num); if (!p_physp) - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3805: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3805: " "Failed to find physical port by lid:0x%02X num:%u\n", cl_ntoh16(p_ntci->data_details. ntc_129_131.lid), @@ -479,10 +468,9 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, /* If trap 131, might want to disable peer port if available */ /* but peer port has been observed not to respond to SM requests */ - osm_log(sm->p_log, - OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3810: " - " Disabling physical port lid:0x%02X num:%u\n", + OSM_LOG(sm->p_log, OSM_LOG_ERROR, + "ERR 3810: " + "Disabling physical port lid:0x%02X num:%u\n", cl_ntoh16(p_ntci-> data_details. ntc_129_131. @@ -535,14 +523,12 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, if (status == IB_SUCCESS) goto Exit; - osm_log(sm->p_log, - OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3811: " + OSM_LOG(sm->p_log, + OSM_LOG_ERROR, "ERR 3811: " "Request to set PortInfo failed\n"); } - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Marking unhealthy physical port by lid:0x%02X num:%u\n", cl_ntoh16(p_ntci->data_details. ntc_129_131.lid), @@ -584,8 +570,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, /* If was already registered do nothing more */ if (num_received > 10 && run_heavy_sweep == FALSE) { if (__print_num_received(num_received)) - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Continuously received this trap %u times. Ignoring\n", num_received); goto Exit; @@ -605,10 +590,8 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 144) || (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == 145) || run_heavy_sweep)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_trap_rcv_process_request: " - "Forcing heavy sweep. " - "Received trap:%u\n", + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Forcing heavy sweep. Received trap:%u\n", cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); sm->p_subn->force_heavy_sweep = TRUE; @@ -627,8 +610,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */ if (is_gsi) { if (!tmp_madw.mad_addr.addr_type.gsi.global_route) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3806: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: " "Received gsi trap with global_route FALSE. " "Cannot update issuer_gid!\n"); goto Exit; @@ -645,8 +627,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, if ((uint16_t) cl_ptr_vector_get_size(p_tbl) <= cl_ntoh16(source_lid)) { /* the source lid is out of range */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "source lid is out of range:0x%X\n", cl_ntoh16(source_lid)); @@ -655,8 +636,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, p_port = cl_ptr_vector_get(p_tbl, cl_ntoh16(source_lid)); if (p_port == 0) { /* We have the lid - but no corresponding port */ - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "__osm_trap_rcv_process_request: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Cannot find port corresponding to lid:0x%X\n", cl_ntoh16(source_lid)); @@ -673,8 +653,7 @@ __osm_trap_rcv_process_request(IN osm_sm_t * sm, status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci); CL_PLOCK_RELEASE(sm->p_lock); if (status != IB_SUCCESS) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_request: ERR 3803: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: " "Error sending trap reports (%s)\n", ib_get_err_str(status)); goto Exit; @@ -696,8 +675,7 @@ __osm_trap_rcv_process_sm(IN osm_sm_t * sm, OSM_LOG_ENTER(sm->p_log); - osm_log(sm->p_log, OSM_LOG_ERROR, - "__osm_trap_rcv_process_sm: ERR 3807: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3807: " "This function is not supported yet\n"); OSM_LOG_EXIT(sm->p_log); @@ -714,7 +692,7 @@ __osm_trap_rcv_process_response(IN osm_sm_t * sm, OSM_LOG_ENTER(sm->p_log); - osm_log(sm->p_log, OSM_LOG_ERROR, + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "__osm_trap_rcv_process_response: ERR 3808: " "This function is not supported yet\n"); diff --git a/opensm/opensm/osm_ucast_file.c b/opensm/opensm/osm_ucast_file.c index ed3ca10..3b0349d 100644 --- a/opensm/opensm/osm_ucast_file.c +++ b/opensm/opensm/osm_ucast_file.c @@ -64,8 +64,8 @@ static uint16_t remap_lid(osm_opensm_t * p_osm, uint16_t lid, ib_net64_t guid) p_port = osm_get_port_by_guid(&p_osm->subn, guid); if (!p_port) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "remap_lid: cannot find port guid 0x%016" PRIx64 + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "cannot find port guid 0x%016" PRIx64 " , will use the same lid\n", cl_ntoh64(guid)); return lid; } @@ -88,8 +88,8 @@ static void add_path(osm_opensm_t * p_osm, new_lid = port_guid ? remap_lid(p_osm, lid, port_guid) : lid; old_port = osm_fwd_tbl_get(osm_switch_get_fwd_tbl_ptr(p_sw), new_lid); if (old_port != OSM_NO_PATH && old_port != port_num) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "add_path: LID collision is detected on switch " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, + "LID collision is detected on switch " "0x016%" PRIx64 ", will overwrite LID 0x%x entry\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), new_lid); @@ -100,8 +100,8 @@ static void add_path(osm_opensm_t * p_osm, osm_get_switch_by_guid(&p_osm->subn, port_guid))) osm_switch_count_path(p_sw, port_num); - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "add_path: route 0x%04x(was 0x%04x) %u 0x%016" PRIx64 + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, + "route 0x%04x(was 0x%04x) %u 0x%016" PRIx64 " is added to switch 0x%016" PRIx64 "\n", new_lid, lid, port_num, cl_ntoh64(port_guid), cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); @@ -136,7 +136,7 @@ static int do_ucast_file_load(void *context) file_name = p_osm->subn.opt.ucast_dump_file; if (!file_name) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, "do_ucast_file_load: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "ucast dump file name is not given; " "using default routing algorithm\n"); return -1; @@ -144,8 +144,7 @@ static int do_ucast_file_load(void *context) file = fopen(file_name, "r"); if (!file) { - osm_log(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, - "do_ucast_file_load: ERR 6302: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6302: " "cannot open ucast dump file \'%s\'; " "using default routing algorithm\n", file_name); return -1; @@ -166,8 +165,8 @@ static int do_ucast_file_load(void *context) continue; if (!strncmp(p, "Multicast mlids", 15)) { - osm_log(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, - "do_ucast_file_load: ERR 6303: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, + "ERR 6303: " "Multicast dump file detected; " "skipping parsing. Using default " "routing algorithm\n"); @@ -177,7 +176,7 @@ static int do_ucast_file_load(void *context) ucast_mgr, p_sw); q = strstr(p, " guid 0x"); if (!q) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse switch definition\n", file_name, lineno); @@ -186,7 +185,7 @@ static int do_ucast_file_load(void *context) p = q + 8; sw_guid = strtoull(p, &q, 16); if (q == p || !isspace(*q)) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse switch guid: \'%s\'\n", file_name, lineno, p); @@ -196,8 +195,7 @@ static int do_ucast_file_load(void *context) p_sw = osm_get_switch_by_guid(&p_osm->subn, sw_guid); if (!p_sw) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "do_ucast_file_load: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "cannot find switch %016" PRIx64 "\n", cl_ntoh64(sw_guid)); continue; @@ -208,7 +206,7 @@ static int do_ucast_file_load(void *context) p += 2; lid = (uint16_t) strtoul(p, &q, 16); if (q == p || !isspace(*q)) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse lid: \'%s\'\n", file_name, lineno, p); @@ -219,7 +217,7 @@ static int do_ucast_file_load(void *context) p++; port_num = (uint8_t) strtoul(p, &q, 10); if (q == p || !isspace(*q)) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse port: \'%s\'\n", file_name, lineno, p); @@ -229,7 +227,7 @@ static int do_ucast_file_load(void *context) /* additionally try to exract guid */ q = strstr(p, " portguid 0x"); if (!q) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "PARSE WARNING: %s:%u: " "cannot find port guid " "(maybe broken dump): \'%s\'\n", @@ -239,7 +237,7 @@ static int do_ucast_file_load(void *context) p = q + 12; port_guid = strtoull(p, &q, 16); if (q == p || (!isspace(*q) && *q != ':')) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "PARSE WARNING: %s:%u: " "cannot parse port guid " "(maybe broken dump): \'%s\'\n", @@ -273,8 +271,7 @@ static int do_lid_matrix_file_load(void *context) file_name = p_osm->subn.opt.lid_matrix_dump_file; if (!file_name) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "do_lid_matrix_file_load: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "lid matrix file name is not given; " "using default lid matrix generation algorithm\n"); return -1; @@ -282,8 +279,7 @@ static int do_lid_matrix_file_load(void *context) file = fopen(file_name, "r"); if (!file) { - osm_log(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, - "do_lid_matrix_file_load: ERR 6305: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6305: " "cannot open lid matrix file \'%s\'; " "using default lid matrix generation algorithm\n", file_name); @@ -307,7 +303,7 @@ static int do_lid_matrix_file_load(void *context) if (!strncmp(p, "Switch", 6)) { q = strstr(p, " guid 0x"); if (!q) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse switch definition\n", file_name, lineno); @@ -316,7 +312,7 @@ static int do_lid_matrix_file_load(void *context) p = q + 8; guid = strtoull(p, &q, 16); if (q == p || !isspace(*q)) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse switch guid: \'%s\'\n", file_name, lineno, p); @@ -326,8 +322,7 @@ static int do_lid_matrix_file_load(void *context) p_sw = osm_get_switch_by_guid(&p_osm->subn, guid); if (!p_sw) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "do_lid_matrix_file_load: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "cannot find switch %016" PRIx64 "\n", cl_ntoh64(guid)); continue; @@ -342,7 +337,7 @@ static int do_lid_matrix_file_load(void *context) num = strtoul(p, &q, 16); if (num > 0xffff || q == p || (*q != ':' && !isspace(*q))) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse lid: \'%s\'\n", file_name, lineno, p); @@ -356,7 +351,7 @@ static int do_lid_matrix_file_load(void *context) while (len < 256 && *p && *p != '#') { num = strtoul(p, &q, 16); if (num > 0xff || q == p) { - osm_log(&p_osm->log, OSM_LOG_ERROR, + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "PARSE ERROR: %s:%u: " "cannot parse hops number: \'%s\'\n", file_name, lineno, p); @@ -371,7 +366,7 @@ static int do_lid_matrix_file_load(void *context) /* additionally try to extract guid */ q = strstr(p, " portguid 0x"); if (!q) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "PARSE WARNING: %s:%u: " "cannot find port guid " "(maybe broken dump): \'%s\'\n", @@ -381,7 +376,7 @@ static int do_lid_matrix_file_load(void *context) p = q + 12; guid = strtoull(p, &q, 16); if (q == p || !isspace(*q)) { - osm_log(&p_osm->log, OSM_LOG_VERBOSE, + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "PARSE WARNING: %s:%u: " "cannot parse port guid " "(maybe broken dump): \'%s\'\n", diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c index 3641f94..c3759ca 100644 --- a/opensm/opensm/osm_ucast_ftree.c +++ b/opensm/opensm/osm_ucast_ftree.c @@ -546,8 +546,7 @@ __osm_ftree_port_group_dump(IN ftree_fabric_t * p_ftree, sprintf(buff + strlen(buff), "%u", p_port->port_num); } - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_port_group_dump:" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, " Port Group of size %u, port(s): %s, direction: %s\n" " Local <--> Remote GUID (LID):" "0x%016" PRIx64 " (0x%04x) <--> 0x%016" PRIx64 " (0x%04x)\n", @@ -684,8 +683,7 @@ static void __osm_ftree_sw_dump(IN ftree_fabric_t * p_ftree, if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) return; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_sw_dump: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch index: %s, GUID: 0x%016" PRIx64 ", Ports: %u DOWN, %u UP\n", __osm_ftree_tuple_to_str(p_sw->tuple), @@ -877,8 +875,7 @@ static void __osm_ftree_hca_dump(IN ftree_fabric_t * p_ftree, if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) return; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_hca_dump: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "CA GUID: 0x%016" PRIx64 ", Ports: %u UP\n", __osm_ftree_hca_get_guid_ho(p_hca), p_hca->up_port_groups_num); @@ -1189,14 +1186,12 @@ static void __osm_ftree_fabric_dump(ftree_fabric_t * p_ftree) if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) return; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_dump: \n" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" " |-------------------------------|\n" " |- Full fabric topology dump -|\n" " |-------------------------------|\n\n"); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_dump: -- CAs:\n"); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "-- CAs:\n"); for (p_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); p_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl); @@ -1205,8 +1200,8 @@ static void __osm_ftree_fabric_dump(ftree_fabric_t * p_ftree) } for (i = 0; i < p_ftree->max_switch_rank; i++) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_dump: -- Rank %u switches\n", i); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "-- Rank %u switches\n", i); for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { @@ -1215,8 +1210,7 @@ static void __osm_ftree_fabric_dump(ftree_fabric_t * p_ftree) } } - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_dump: \n" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" " |---------------------------------------|\n" " |- Full fabric topology dump completed -|\n" " |---------------------------------------|\n\n"); @@ -1229,22 +1223,17 @@ static void __osm_ftree_fabric_dump_general_info(IN ftree_fabric_t * p_ftree) uint32_t i, j; ftree_sw_t *p_sw; - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, "General fabric topology info\n"); - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, "============================\n"); - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - FatTree rank (roots to leaf switches): %u\n", p_ftree->leaf_switch_rank + 1); - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - FatTree max switch rank: %u\n", p_ftree->max_switch_rank); - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - Fabric has %u CAs (%u of them CNs), %u switches\n", cl_qmap_count(&p_ftree->hca_tbl), p_ftree->cn_num, cl_qmap_count(&p_ftree->sw_tbl)); @@ -1260,32 +1249,27 @@ static void __osm_ftree_fabric_dump_general_info(IN ftree_fabric_t * p_ftree) j++; } if (i == 0) - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - Fabric has %u switches at rank %u (roots)\n", j, i); else if (i == p_ftree->leaf_switch_rank) - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - Fabric has %u switches at rank %u (%u of them leafs)\n", j, i, p_ftree->leaf_switches_num); else - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, " - Fabric has %u switches at rank %u\n", j, i); } if (osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_VERBOSE)) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, " - Root switches:\n"); for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { if (p_sw->rank == 0) - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, " GUID: 0x%016" PRIx64 ", LID: 0x%04x, Index %s\n", __osm_ftree_sw_get_guid_ho(p_sw), @@ -1293,12 +1277,10 @@ static void __osm_ftree_fabric_dump_general_info(IN ftree_fabric_t * p_ftree) __osm_ftree_tuple_to_str(p_sw->tuple)); } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, " - Leaf switches (sorted by index):\n"); for (i = 0; i < p_ftree->leaf_switches_num; i++) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_dump_general_info: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, " GUID: 0x%016" PRIx64 ", LID: 0x%04x, Index %s\n", __osm_ftree_sw_get_guid_ho(p_ftree-> @@ -1331,8 +1313,7 @@ static void __osm_ftree_fabric_dump_hca_ordering(IN ftree_fabric_t * p_ftree) p_ftree->p_osm->subn.opt.dump_files_dir, filename); p_hca_ordering_file = fopen(path, "w"); if (!p_hca_ordering_file) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_dump_hca_ordering: ERR AB01: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB01: " "cannot open file \'%s\': %s\n", filename, strerror(errno)); OSM_LOG_EXIT(&p_ftree->p_osm->log); @@ -1473,8 +1454,7 @@ static int __osm_ftree_fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree) OSM_LOG_ENTER(&p_ftree->p_osm->log); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_mark_leaf_switches: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Marking leaf switches in fabric\n"); /* Scan all the CAs, if they have CNs - find CN port and mark switch @@ -1504,9 +1484,8 @@ static int __osm_ftree_fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree) /* ensure that this leaf switch is at the correct tree level */ if (p_sw->rank != p_ftree->leaf_switch_rank) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_mark_leaf_switches: ERR AB26: " - "CN port 0x%" PRIx64 + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB26: CN port 0x%" PRIx64 " is connected to switch 0x%" PRIx64 " with rank %u, " "while FatTree leaf rank is %u\n", @@ -1540,8 +1519,7 @@ static void __osm_ftree_fabric_make_indexing(IN ftree_fabric_t * p_ftree) OSM_LOG_ENTER(&p_ftree->p_osm->log); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_make_indexing: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Starting FatTree indexing\n"); /* using the first leaf switch as a starting point for indexing algorithm. */ @@ -1560,8 +1538,8 @@ static void __osm_ftree_fabric_make_indexing(IN ftree_fabric_t * p_ftree) This fuction also adds the switch it into the switch_by_tuple table. */ __osm_ftree_fabric_assign_first_tuple(p_ftree, p_sw); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_make_indexing: Indexing starting point:\n" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Indexing starting point:\n" " - Switch rank : %u\n" " - Switch index : %s\n" " - Node LID : 0x%04x\n" @@ -1719,8 +1697,7 @@ static int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t * p_sw = p_next_sw; p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); if (p_sw->rank == p_ftree->leaf_switch_rank) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_create_leaf_switch_array: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Adding switch 0x%" PRIx64 " to full leaf switch array\n", __osm_ftree_sw_get_guid_ho(p_sw)); @@ -1749,8 +1726,7 @@ static int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t * } CL_ASSERT(first_leaf_idx < last_leaf_idx); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_create_leaf_switch_array: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Full leaf array info: first_leaf_idx = %u, last_leaf_idx = %u\n", first_leaf_idx, last_leaf_idx); @@ -1773,8 +1749,7 @@ static int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t * free(all_switches_at_leaf_level); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_create_leaf_switch_array: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Created array of %u leaf switches\n", p_ftree->leaf_switches_num); @@ -1824,8 +1799,7 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * OSM_LOG_ENTER(&p_ftree->p_osm->log); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_validate_topology: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Validating fabric topology\n"); reference_sw_arr = @@ -1851,8 +1825,7 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * if (reference_sw_arr[p_sw->rank]->up_port_groups_num != p_sw->up_port_groups_num) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_validate_topology: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB09: Different number of upward port groups on switches:\n" " GUID 0x%016" PRIx64 ", LID 0x%04x, Index %s - %u groups\n" @@ -1879,8 +1852,7 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * down_port_groups_num != p_sw->down_port_groups_num) { /* we're allowing some hca's to be missing */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_validate_topology: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0A: Different number of downward port groups on switches:\n" " GUID 0x%016" PRIx64 ", LID 0x%04x, Index %s - %u port groups\n" @@ -1913,9 +1885,8 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * (&p_ref_group->ports) != cl_ptr_vector_get_size(&p_group-> ports)) { - osm_log(&p_ftree->p_osm->log, + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_validate_topology: " "ERR AB0B: Different number of ports in an upward port group on switches:\n" " GUID 0x%016" PRIx64 @@ -1961,9 +1932,8 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * (&p_ref_group->ports) != cl_ptr_vector_get_size(&p_group-> ports)) { - osm_log(&p_ftree->p_osm->log, + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_validate_topology: " "ERR AB0C: Different number of ports in an downward port group on switches:\n" " GUID 0x%016" PRIx64 @@ -2000,12 +1970,10 @@ static boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * } /* end of while */ if (res == TRUE) - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_validate_topology: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Fabric topology has been identified as FatTree\n"); else - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_validate_topology: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0D: Fabric topology hasn't been identified as FatTree\n"); free(reference_sw_arr); @@ -2125,8 +2093,7 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, OSM_NO_PATH) { /* Loop in the fabric - we already routed the remote switch on our way UP, and now we see it again on our way DOWN */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_upgoing_by_going_down: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Loop of lenght %d in the fabric:\n " "Switch %s (LID 0x%04x) closes loop through switch %s (LID 0x%04x)\n", (p_remote_sw->rank - highest_rank_in_route) * 2, @@ -2179,8 +2146,7 @@ __osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, (target_lid), p_min_port-> remote_port_num); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_upgoing_by_going_down: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch %s: set path to CA LID 0x%04x through port %u\n", __osm_ftree_tuple_to_str(p_remote_sw->tuple), cl_ntoh16(target_lid), @@ -2343,8 +2309,7 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, /* covering first half of case 1, and case 3 */ if (is_main_path) { if (p_sw->is_leaf) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_downgoing_by_going_up: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, " - Routing MAIN path for %s CA LID 0x%04x: %s --> %s\n", (is_real_lid) ? "real" : "DUMMY", cl_ntoh16(target_lid), @@ -2363,8 +2328,7 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, (target_lid), p_min_port-> remote_port_num); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_downgoing_by_going_up: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch %s: set path to CA LID 0x%04x through port %u\n", __osm_ftree_tuple_to_str(p_remote_sw->tuple), cl_ntoh16(target_lid), @@ -2443,8 +2407,7 @@ __osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, continue; if (p_sw->is_leaf) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_downgoing_by_going_up: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, " - Routing SECONDARY path for LID 0x%04x: %s --> %s\n", cl_ntoh16(target_lid), __osm_ftree_tuple_to_str(p_sw->tuple), @@ -2554,8 +2517,7 @@ static void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) __osm_ftree_sw_set_fwd_table_block(p_sw, cl_ntoh16(hca_lid), p_port->port_num); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_to_cns: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch %s: set path to CN LID 0x%04x through port %u\n", __osm_ftree_tuple_to_str(p_sw->tuple), cl_ntoh16(hca_lid), p_port->port_num); @@ -2584,8 +2546,7 @@ static void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) When routing to dummy HCAs we don't fill lid matrices. */ if (p_ftree->max_cn_per_leaf > routed_targets_on_leaf) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_to_cns: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Routing %u dummy CAs\n", p_ftree->max_cn_per_leaf - p_sw->down_port_groups_num); @@ -2667,8 +2628,7 @@ static void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) cl_ntoh16(hca_lid), port_num_on_switch); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_to_non_cns: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch %s: set path to non-CN HCA LID 0x%04x through port %u\n", __osm_ftree_tuple_to_str(p_sw->tuple), cl_ntoh16(hca_lid), port_num_on_switch); @@ -2726,8 +2686,7 @@ static void __osm_ftree_fabric_route_to_switches(IN ftree_fabric_t * p_ftree) cl_ntoh16(p_sw->base_lid), 0); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_route_to_switches: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Switch %s (LID 0x%04x): routing switch-to-switch pathes\n", __osm_ftree_tuple_to_str(p_sw->tuple), cl_ntoh16(p_sw->base_lid)); @@ -2776,8 +2735,7 @@ static int __osm_ftree_fabric_populate_nodes(IN ftree_fabric_t * p_ftree) __osm_ftree_fabric_add_sw(p_ftree, p_osm_node->sw); break; default: - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_populate_nodes: ERR AB0E: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0E: " "Node GUID 0x%016" PRIx64 " - Unknown node type: %s\n", cl_ntoh64(osm_node_get_node_guid(p_osm_node)), @@ -2889,8 +2847,7 @@ __osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree, switch (osm_node_get_type(p_remote_osm_node)) { case IB_NODE_TYPE_CA: /* HCA connected directly to another HCA - not FatTree */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_rank_leaf_switches: ERR AB0F: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0F: " "CA conected directly to another CA: " "0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n", __osm_ftree_hca_get_guid_ho(p_hca), @@ -2908,9 +2865,8 @@ __osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree, break; default: - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_rank_leaf_switches: ERR AB10: " - "Node GUID 0x%016" PRIx64 + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB10: Node GUID 0x%016" PRIx64 " - Unknown node type: %s\n", cl_ntoh64(osm_node_get_node_guid (p_remote_osm_node)), @@ -2933,8 +2889,7 @@ __osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree, if (!__osm_ftree_sw_update_rank(p_sw, 0)) continue; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_rank_leaf_switches: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Marking rank of switch that is directly connected to CA:\n" " - CA guid : 0x%016" PRIx64 "\n" @@ -3002,8 +2957,7 @@ __osm_ftree_fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree, case IB_NODE_TYPE_CA: /* HCA connected directly to another HCA - not FatTree */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_construct_hca_ports: ERR AB11: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB11: " "CA conected directly to another CA: " "0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node)), @@ -3016,9 +2970,8 @@ __osm_ftree_fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree, break; default: - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_construct_hca_ports: ERR AB12: " - "Node GUID 0x%016" PRIx64 + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB12: Node GUID 0x%016" PRIx64 " - Unknown node type: %s\n", cl_ntoh64(remote_node_guid), ib_get_node_type_str(remote_node_type)); @@ -3053,13 +3006,11 @@ __osm_ftree_fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree, if (is_cn) { p_ftree->cn_num++; p_hca->cn_num++; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_construct_hca_ports: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Marking CN port GUID 0x%016" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); } else { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_construct_hca_ports: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Marking non-CN port GUID 0x%016" PRIx64 "\n", cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); } @@ -3117,8 +3068,7 @@ static int __osm_ftree_fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree, /* ignore any loopback connection on switch */ if (p_node == p_remote_node) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_construct_sw_ports: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Ignoring loopback on switch GUID 0x%016" PRIx64 ", LID 0x%04x, rank %u\n", __osm_ftree_sw_get_guid_ho(p_sw), @@ -3161,8 +3111,8 @@ static int __osm_ftree_fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree, p_remote_hca_or_sw = (void *)p_remote_sw; if (abs(p_sw->rank - p_remote_sw->rank) != 1) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_construct_sw_ports: ERR AB16: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB16: " "Illegal link between switches with ranks %u and %u:\n" " GUID 0x%016" PRIx64 ", LID 0x%04x, rank %u\n" @@ -3190,9 +3140,8 @@ static int __osm_ftree_fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree, break; default: - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_construct_sw_ports: ERR AB13: " - "Node GUID 0x%016" PRIx64 + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, + "ERR AB13: Node GUID 0x%016" PRIx64 " - Unknown node type: %s\n", cl_ntoh64(remote_node_guid), ib_get_node_type_str(remote_node_type)); @@ -3253,15 +3202,13 @@ static int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) cl_hton64(*p_guid)); if (!p_sw) { /* the specified root guid wasn't found in the fabric */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_rank_from_roots: ERR AB24: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB24: " "Root switch GUID 0x%" PRIx64 " not found\n", *p_guid); continue; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_rank_from_roots: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Ranking root switch with GUID 0x%" PRIx64 "\n", *p_guid); p_sw->rank = 0; @@ -3270,15 +3217,13 @@ static int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) num_roots = cl_list_count(&ranking_bfs_list); if (!num_roots) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_rank_from_roots: ERR AB25: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: " "No valid roots supplied\n"); res = -1; goto Exit; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_rank_from_roots: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranked %u valid root switches\n", num_roots); /* Now the list has all the roots. @@ -3311,8 +3256,7 @@ static int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) /* if needed, rank the remote switch and add it to the BFS list */ if (__osm_ftree_sw_update_rank (p_remote_sw, p_sw->rank + 1)) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_rank_from_roots: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Ranking switch 0x%" PRIx64 " with rank %u\n", __osm_ftree_sw_get_guid_ho(p_remote_sw), @@ -3325,8 +3269,7 @@ static int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) /* done with ports of this switch - go to the next switch in the list */ } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_fabric_rank_from_roots: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Subnet ranking completed. Max Node Rank = %u\n", max_rank); /* set FatTree maximal switch rank */ @@ -3362,8 +3305,7 @@ static int __osm_ftree_fabric_rank_from_hcas(IN ftree_fabric_t * p_ftree) if (__osm_ftree_rank_leaf_switches (p_ftree, p_hca, &ranking_bfs_list) != 0) { res = -1; - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_rank_from_hcas: ERR AB14: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB14: " "Subnet ranking failed - subnet is not FatTree"); goto Exit; } @@ -3400,8 +3342,7 @@ static int __osm_ftree_fabric_rank(IN ftree_fabric_t * p_ftree) if (res) goto Exit; - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_rank: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, "FatTree max switch rank is %u\n", p_ftree->max_switch_rank); Exit: @@ -3442,8 +3383,7 @@ static void __osm_ftree_fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree) CL_ASSERT(p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_set_leaf_rank: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Selected CN port GUID 0x%" PRIx64 "\n", __osm_ftree_hca_get_guid_ho(p_hca)); @@ -3454,15 +3394,13 @@ static void __osm_ftree_fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree) IB_NODE_TYPE_SWITCH); p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw; - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_fabric_set_leaf_rank: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Selected leaf switch GUID 0x%" PRIx64 ", rank %u\n", __osm_ftree_sw_get_guid_ho(p_sw), p_sw->rank); p_ftree->leaf_switch_rank = p_sw->rank; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_INFO, - "__osm_ftree_fabric_set_leaf_rank: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, "FatTree leaf switch rank is %u\n", p_ftree->leaf_switch_rank); OSM_LOG_EXIT(&p_ftree->p_osm->log); } /* __osm_ftree_fabric_set_leaf_rank() */ @@ -3536,8 +3474,7 @@ static int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) OSM_LOG_ENTER(&p_ftree->p_osm->log); if (__osm_ftree_fabric_roots_provided(p_ftree)) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_read_guid_files: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Fetching root nodes from file %s\n", p_ftree->p_osm->subn.opt.root_guid_file); @@ -3550,8 +3487,7 @@ static int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) } if (!cl_list_count(&p_ftree->root_guid_list)) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_read_guid_files: ERR AB22: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB22: " "Root guids file has no valid guids\n"); status = -1; goto Exit; @@ -3563,8 +3499,7 @@ static int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) cl_list_construct(&cn_guid_list); cl_list_init(&cn_guid_list, 10); - osm_log(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "__osm_ftree_read_guid_files: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "Fetching compute nodes from file %s\n", p_ftree->p_osm->subn.opt.cn_guid_file); @@ -3576,8 +3511,7 @@ static int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) } if (!cl_list_count(&cn_guid_list)) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_ERROR, - "__osm_ftree_fabric_read_guid_files: ERR AB23: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB23: " "Compute node guids file has no valid guids\n"); status = -1; goto Exit; @@ -3634,14 +3568,12 @@ static int __osm_ftree_construct_fabric(IN void *context) goto Exit; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: \n" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" " |----------------------------------------|\n" " |- Starting FatTree fabric construction -|\n" " |----------------------------------------|\n\n"); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Populating FatTree Switch and CA tables\n"); if (__osm_ftree_fabric_populate_nodes(p_ftree) != 0) { osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, @@ -3651,8 +3583,7 @@ static int __osm_ftree_construct_fabric(IN void *context) goto Exit; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Reading guid files provided by user\n"); if (__osm_ftree_fabric_read_guid_files(p_ftree) != 0) { osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, @@ -3675,8 +3606,7 @@ static int __osm_ftree_construct_fabric(IN void *context) After that we will know only fabric max switch rank. We will be able to check leaf switches rank and the whole tree rank after filling ports and marking CNs. */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: Ranking FatTree\n"); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n"); if (__osm_ftree_fabric_rank(p_ftree) != 0) { osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, "Failed ranking the tree - " @@ -3690,8 +3620,7 @@ static int __osm_ftree_construct_fabric(IN void *context) because we want the ports to have pointers to ftree_{sw,hca}_t objects, and we need the switches to be already ranked because that's how the port direction is determined. */ - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Populating CA & switch ports\n"); if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) { osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, @@ -3774,22 +3703,19 @@ static int __osm_ftree_construct_fabric(IN void *context) goto Exit; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Max LID in switch LFTs (in host order): 0x%04x\n", p_ftree->lft_max_lid_ho); Exit: if (status != 0) { - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Clearing FatTree Fabric data structures\n"); __osm_ftree_fabric_clear(p_ftree); } else p_ftree->fabric_built = TRUE; - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_construct_fabric: \n" + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" " |--------------------------------------------------|\n" " |- Done constructing FatTree fabric (status = %d) -|\n" " |--------------------------------------------------|\n\n", @@ -3814,21 +3740,18 @@ static int __osm_ftree_do_routing(IN void *context) goto Exit; } - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_do_routing: " "Starting FatTree routing\n"); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Starting FatTree routing\n"); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_do_routing: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Filling switch forwarding tables for Compute Nodes\n"); __osm_ftree_fabric_route_to_cns(p_ftree); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_do_routing: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Filling switch forwarding tables for non-CN targets\n"); __osm_ftree_fabric_route_to_non_cns(p_ftree); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_do_routing: " + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Filling switch forwarding tables for switch-to-switch pathes\n"); __osm_ftree_fabric_route_to_switches(p_ftree); @@ -3839,8 +3762,8 @@ static int __osm_ftree_do_routing(IN void *context) /* write out hca ordering file */ __osm_ftree_fabric_dump_hca_ordering(p_ftree); - osm_log(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "__osm_ftree_do_routing: " "FatTree routing is done\n"); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "FatTree routing is done\n"); Exit: OSM_LOG_EXIT(&p_ftree->p_osm->log); diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c index 43e6526..0735aed 100644 --- a/opensm/opensm/osm_ucast_lash.c +++ b/opensm/opensm/osm_ucast_lash.c @@ -135,8 +135,8 @@ static void connect_switches(lash_t * p_lash, int sw1, int sw2, int phy_port_1) p_lash->switches[sw1]->virtual_physical_port_table[num] = phy_port_1; p_lash->switches[sw1]->num_connections++; - osm_log(p_log, OSM_LOG_VERBOSE, - "connect_switches: " "LASH connect: %d, %d, %d\n", sw1, sw2, + OSM_LOG(p_log, OSM_LOG_VERBOSE, + "LASH connect: %d, %d, %d\n", sw1, sw2, phy_port_1); } @@ -921,8 +921,7 @@ static int init_lash_structures(lash_t * p_lash) Exit_Mem_Error: status = IB_ERROR; - osm_log(p_log, OSM_LOG_ERROR, - "lash_init_structures: ERR 4D01: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: " "Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n", errno, ENOMEM); @@ -952,8 +951,7 @@ static int lash_core(lash_t * p_lash) switch_bitmap = (int *)malloc(num_switches * num_switches * sizeof(int)); if (!switch_bitmap) { - osm_log(p_log, OSM_LOG_ERROR, - "lash_core: ERR 4D04: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D04: " "Failed allocating switch_bitmap - out of memory\n"); goto Exit; } @@ -1080,29 +1078,26 @@ static int lash_core(lash_t * p_lash) } } - osm_log(p_log, OSM_LOG_INFO, - "lash_core: " "Lanes needed: %d, Balancing\n", lanes_needed); + OSM_LOG(p_log, OSM_LOG_INFO, + "Lanes needed: %d, Balancing\n", lanes_needed); for (i = 0; i < lanes_needed; i++) { - osm_log(p_log, OSM_LOG_INFO, - "lash_core: " "Lanes in layer %d: %d\n", i, - p_lash->num_mst_in_lane[i]); + OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n", + i, p_lash->num_mst_in_lane[i]); } balance_virtual_lanes(p_lash, lanes_needed); for (i = 0; i < lanes_needed; i++) { - osm_log(p_log, OSM_LOG_INFO, - "lash_core: " "Lanes in layer %d: %d\n", i, - p_lash->num_mst_in_lane[i]); + OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n", + i, p_lash->num_mst_in_lane[i]); } goto Exit; Error_Not_Enough_Lanes: status = IB_ERROR; - osm_log(p_log, OSM_LOG_ERROR, - "lash_core: ERR 4D02: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: " "Lane requirements (%d) exceed available lanes (%d)\n", p_lash->vl_min, lanes_needed); Exit: @@ -1147,8 +1142,7 @@ static void populate_fwd_tbls(lash_t * p_lash) p_dst_sw = get_osm_switch_from_lid(p_lash->p_osm, lid); if (p_dst_sw == NULL) { - osm_log(p_log, OSM_LOG_ERROR, - "populate_fwd_tbls: ERR 4D03: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D03: " "LASH fwd NULL Cannot find GUID 0x%016" PRIx64 " src lash id (%d), src lid no (0x%04X)\n", @@ -1157,8 +1151,7 @@ static void populate_fwd_tbls(lash_t * p_lash) uint8_t egress_port = find_port_from_lid(cl_hton16(lid), p_sw); p_osm->sm.ucast_mgr.lft_buf[lid] = egress_port; - osm_log(p_log, OSM_LOG_VERBOSE, - "populate_fwd_tbls: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "LASH fwd MY SRC SRC GUID 0x%016" PRIx64 " src lash id (%d), src lid no (0x%04X) src lash port (%d) " "DST GUID 0x%016" PRIx64 @@ -1179,8 +1172,7 @@ static void populate_fwd_tbls(lash_t * p_lash) p_osm->sm.ucast_mgr.lft_buf[lid] = physical_egress_port; - osm_log(p_log, OSM_LOG_VERBOSE, - "populate_fwd_tbls: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "LASH fwd SRC GUID 0x%016" PRIx64 " src lash id (%d), " "src lid no ( 0x%04X ) src lash port (%d) " @@ -1228,8 +1220,7 @@ static void osm_lash_process_switch(lash_t * p_lash, osm_switch_t * p_sw) connect_switches(p_lash, switch_a_lash_id, switch_b_lash_id, physical_port_a_num); - osm_log(p_log, OSM_LOG_VERBOSE, - "osm_lash_process_switch: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "LASH SUCCESS connected G 0x%016" PRIx64 " , lash_id(%u), P(%u) " " to G 0x%016" PRIx64 " , lash_id(%u) , P(%u)\n", @@ -1330,8 +1321,7 @@ static int discover_network_properties(lash_t * p_lash) p_lash->vl_min = vl_min; - osm_log(p_log, OSM_LOG_INFO, - "lash discover_network_properties: " + OSM_LOG(p_log, OSM_LOG_INFO, "min operational vl(%d) max_switches(%d)\n", p_lash->vl_min, p_lash->num_switches); return 0; diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c index 0813a23..27a206f 100644 --- a/opensm/opensm/osm_ucast_mgr.c +++ b/opensm/opensm/osm_ucast_mgr.c @@ -155,8 +155,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr, OSM_LOG_ENTER(p_mgr->p_log); if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_neighbor: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Node 0x%" PRIx64 ", remote node 0x%" PRIx64 ", port 0x%X, remote port 0x%X\n", cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)), @@ -179,8 +178,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr, osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) { if (osm_switch_set_hops (p_this_sw, lid_ho, port_num, hops) != 0) - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_neighbor: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "cannot set hops for lid %u at switch 0x%" PRIx64 "\n", lid_ho, cl_ntoh64(osm_node_get_node_guid @@ -221,8 +219,7 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, if (lids_per_port > 1) { remote_sys_guids = malloc(sizeof(uint64_t) * lids_per_port); if (remote_sys_guids == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_port: ERR 3A09: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A09: " "Cannot allocate array. Insufficient memory\n"); goto Exit; } @@ -231,8 +228,7 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, remote_node_guids = malloc(sizeof(uint64_t) * lids_per_port); if (remote_node_guids == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_port: ERR 3A0A: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A0A: " "Cannot allocate array. Insufficient memory\n"); goto Exit; } @@ -245,8 +241,7 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, /* If the lids are zero - then there was some problem with the initialization. Don't handle this port. */ if (min_lid_ho == 0 || max_lid_ho == 0) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_port: ERR 3A04: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A04: " "Port 0x%" PRIx64 " has LID 0. An initialization " "error occurred. Ignoring port\n", cl_ntoh64(osm_port_get_guid(p_port))); @@ -254,10 +249,8 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, } if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_port: " - "Processing port 0x%" PRIx64 - ", LIDs [0x%X,0x%X]\n", + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, + "Processing port 0x%" PRIx64 ", LIDs [0x%X,0x%X]\n", cl_ntoh64(osm_port_get_guid(p_port)), min_lid_ho, max_lid_ho); } @@ -306,8 +299,7 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, /* Up/Down routing can cause unreachable routes between some switches so we do not report that as an error in that case */ if (!p_routing_eng->build_lid_matrices) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_ucast_mgr_process_port: ERR 3A08: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A08: " "No path to get to LID 0x%X from switch 0x%" PRIx64 "\n", lid_ho, cl_ntoh64(node_guid)); @@ -315,14 +307,12 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr, p_mgr->p_subn->subnet_initialization_error = TRUE; } else - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_port: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "No path to get to LID 0x%X from switch 0x%" PRIx64 "\n", lid_ho, cl_ntoh64(node_guid)); } else { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_port: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Routing LID 0x%X to port 0x%X" " for switch 0x%" PRIx64 "\n", lid_ho, port, cl_ntoh64(node_guid)); @@ -420,8 +410,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr, if (set_swinfo_require) { if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "osm_ucast_mgr_set_fwd_table: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Setting switch FT top to LID 0x%X\n", p_sw->max_lid_ho); } @@ -436,8 +425,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr, 0, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_ucast_mgr_set_fwd_table: ERR 3A06: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A06: " "Sending SwitchInfo attribute failed (%s)\n", ib_get_err_str(status)); } else @@ -461,8 +449,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr, continue; if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "osm_ucast_mgr_set_fwd_table: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Writing FT block %u\n", block_id_ho); } @@ -474,8 +461,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr, CL_DISP_MSGID_NONE, &context); if (status != IB_SUCCESS) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_ucast_mgr_set_fwd_table: ERR 3A05: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: " "Sending linear fwd. tbl. block failed (%s)\n", ib_get_err_str(status)); } else { @@ -506,8 +492,7 @@ __osm_ucast_mgr_process_tbl(IN cl_map_item_t * const p_map_item, CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_tbl: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Processing switch 0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); } @@ -556,8 +541,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item, CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "__osm_ucast_mgr_process_neighbors: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Processing switch with GUID 0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); } @@ -601,8 +585,7 @@ void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr) p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl; - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "osm_ucast_mgr_build_lid_matrices: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Starting switches' Min Hop Table Assignment\n"); /* @@ -654,8 +637,7 @@ void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr) __osm_ucast_mgr_process_neighbors, p_mgr); } - osm_log(p_mgr->p_log, OSM_LOG_DEBUG, - "osm_ucast_mgr_build_lid_matrices: " + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Min-hop propagated in %d steps\n", i); } } @@ -674,8 +656,7 @@ static int ucast_mgr_setup_all_switches(osm_subn_t * p_subn) p_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl); p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) if (osm_switch_prepare_path_rebuild(p_sw, lids)) { - osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, - "ucast_mgr_setup_all_switches: ERR 3A0B: " + OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0B: " "cannot setup switch 0x%016" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid (p_sw->p_node))); @@ -702,8 +683,7 @@ osm_ucast_mgr_read_guid_file(IN osm_ucast_mgr_t * const p_mgr, guid_file = fopen(guid_file_name, "r"); if (guid_file == NULL) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_ucast_mgr_read_guid_file: ERR 3A13: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A13: " "Failed to open guid list file (%s)\n", guid_file_name); status = IB_NOT_FOUND; goto Exit; @@ -711,8 +691,7 @@ osm_ucast_mgr_read_guid_file(IN osm_ucast_mgr_t * const p_mgr, while (fgets(line, sizeof(line), guid_file)) { if (strcspn(line, " ,;.") != strlen(line)) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_ucast_mgr_read_guid_file: ERR 3A14: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A14: " "Poorly formatted guid in file (%s): %s\n", guid_file_name, line); status = IB_NOT_FOUND; @@ -734,8 +713,7 @@ osm_ucast_mgr_read_guid_file(IN osm_ucast_mgr_t * const p_mgr, /* check that the string is a number */ if (!(*p_guid) && (*endptr != '\0')) { - osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "osm_ucast_mgr_read_guid_file: ERR 3A15: " + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A15: " "Poorly formatted guid in file (%s): %s\n", guid_file_name, line); status = IB_NOT_FOUND; @@ -808,20 +786,17 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr) else p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP; - osm_log(p_mgr->p_log, OSM_LOG_INFO, - "osm_ucast_mgr_process: " + OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, "%s tables configured on all switches\n", osm_routing_engine_type_str(p_osm->routing_engine_used)); if (p_mgr->any_change) { signal = OSM_SIGNAL_DONE_PENDING; - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "osm_ucast_mgr_process: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "LFT Tables configured on all switches\n"); } else { signal = OSM_SIGNAL_DONE; - osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, - "osm_ucast_mgr_process: " + OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "No need to set any LFT Tables on any switches\n"); } diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c index 3058038..76b94cb 100644 --- a/opensm/opensm/osm_ucast_updn.c +++ b/opensm/opensm/osm_ucast_updn.c @@ -140,8 +140,7 @@ __updn_bfs_by_node(IN osm_log_t * p_log, lid = cl_ntoh16(lid); osm_switch_set_hops(p_sw, lid, 0, 0); - osm_log(p_log, OSM_LOG_DEBUG, - "__updn_bfs_by_node: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Starting from switch - port GUID 0x%" PRIx64 " lid %u\n", cl_ntoh64(p_sw->p_node->node_info.port_guid), lid); @@ -186,8 +185,7 @@ __updn_bfs_by_node(IN osm_log_t * p_log, /* Check if this is a legal step : the only illegal step is going from DOWN to UP */ if ((current_dir == DOWN) && (next_dir == UP)) { - osm_log(p_log, OSM_LOG_DEBUG, - "__updn_bfs_by_node: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Avoiding move from 0x%016" PRIx64 " to 0x%016" PRIx64 "\n", cl_ntoh64(current_guid), @@ -207,8 +205,7 @@ __updn_bfs_by_node(IN osm_log_t * p_log, pn_rem, current_min_hop + 1); if (set_hop_return_value) { - osm_log(p_log, OSM_LOG_ERROR, - "__updn_bfs_by_node (less) ERR AA01: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AA01: " "Invalid value returned from set min hop is: %d\n", set_hop_return_value); } @@ -303,14 +300,12 @@ static cl_status_t updn_init(IN updn_t * const p_updn, IN osm_opensm_t * p_osm) goto Exit; /* For Debug Purposes ... */ - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "updn_init: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "UPDN - Fetching root nodes from file %s\n", p_osm->subn.opt.root_guid_file); guid_iterator = cl_list_head(p_updn->p_root_nodes); while (guid_iterator != cl_list_end(p_updn->p_root_nodes)) { - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "updn_init: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Inserting GUID 0x%" PRIx64 " as root node\n", *((uint64_t *) cl_list_obj(guid_iterator))); guid_iterator = cl_list_next(guid_iterator); @@ -353,16 +348,14 @@ updn_subn_rank(IN unsigned num_guids, osm_get_switch_by_guid(&p_updn->p_osm->subn, cl_hton64(guid_list[idx])); if (!p_sw) { - osm_log(p_log, OSM_LOG_ERROR, - "updn_subn_rank: ERR AA05: " + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AA05: " "Root switch GUID 0x%" PRIx64 " not found\n", guid_list[idx]); continue; } u = p_sw->priv; - osm_log(p_log, OSM_LOG_DEBUG, - "updn_subn_rank: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Ranking root port GUID 0x%" PRIx64 "\n", guid_list[idx]); u->rank = 0; @@ -375,8 +368,7 @@ updn_subn_rank(IN unsigned num_guids, /* Go over all remote nodes and rank them (if not already visited) */ p_sw = u->sw; num_ports = p_sw->num_ports; - osm_log(p_log, OSM_LOG_DEBUG, - "updn_subn_rank: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Handling switch GUID 0x%" PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); for (port_num = 1; port_num < num_ports; port_num++) { @@ -403,8 +395,7 @@ updn_subn_rank(IN unsigned num_guids, &remote_u->list); } - osm_log(p_log, OSM_LOG_DEBUG, - "updn_subn_rank: " + OSM_LOG(p_log, OSM_LOG_DEBUG, "Rank of port GUID 0x%" PRIx64 " = %u\n", cl_ntoh64(port_guid), remote_u->rank); @@ -414,8 +405,7 @@ updn_subn_rank(IN unsigned num_guids, } /* Print Summary of ranking */ - osm_log(p_log, OSM_LOG_VERBOSE, - "updn_subn_rank: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Subnet ranking completed. Max Node Rank = %d\n", max_rank); OSM_LOG_EXIT(p_log); return 0; @@ -453,8 +443,7 @@ static int __osm_subn_set_up_down_min_hop_table(IN updn_t * p_updn) /* Go over all the switches in the subnet - for each init their Min Hop Table */ - osm_log(p_log, OSM_LOG_VERBOSE, - "__osm_subn_set_up_down_min_hop_table: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Init Min Hop Table of all switches [\n"); p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); @@ -469,13 +458,11 @@ static int __osm_subn_set_up_down_min_hop_table(IN updn_t * p_updn) osm_switch_clear_hops(p_sw); } - osm_log(p_log, OSM_LOG_VERBOSE, - "__osm_subn_set_up_down_min_hop_table: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "Init Min Hop Table of all switches ]\n"); /* Now do the BFS for each port in the subnet */ - osm_log(p_log, OSM_LOG_VERBOSE, - "__osm_subn_set_up_down_min_hop_table: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "BFS through all port guids in the subnet [\n"); p_next_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl); @@ -485,8 +472,7 @@ static int __osm_subn_set_up_down_min_hop_table(IN updn_t * p_updn) __updn_bfs_by_node(p_log, p_subn, p_sw); } - osm_log(p_log, OSM_LOG_VERBOSE, - "__osm_subn_set_up_down_min_hop_table: " + OSM_LOG(p_log, OSM_LOG_VERBOSE, "BFS through all port guids in the subnet ]\n"); /* Cleanup */ OSM_LOG_EXIT(p_log); @@ -503,12 +489,10 @@ updn_build_lid_matrices(IN uint32_t num_guids, OSM_LOG_ENTER(&p_updn->p_osm->log); - osm_log(&p_updn->p_osm->log, OSM_LOG_VERBOSE, - "updn_build_lid_matrices: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_VERBOSE, "Ranking all port guids in the list\n"); if (num_guids == 0) { - osm_log(&p_updn->p_osm->log, OSM_LOG_ERROR, - "updn_build_lid_matrices: ERR AA0A: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AA0A: " "No guids were provided or number of guids is 0\n"); status = -1; goto _exit; @@ -516,8 +500,7 @@ updn_build_lid_matrices(IN uint32_t num_guids, /* Check if it's not a switched subnet */ if (cl_is_qmap_empty(&p_updn->p_osm->subn.sw_guid_tbl)) { - osm_log(&p_updn->p_osm->log, OSM_LOG_ERROR, - "updn_build_lid_matrices: ERR AAOB: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_ERROR, "ERR AAOB: " "This is not a switched subnet, cannot perform UPDN algorithm\n"); status = -1; goto _exit; @@ -527,8 +510,7 @@ updn_build_lid_matrices(IN uint32_t num_guids, updn_subn_rank(num_guids, guid_list, p_updn); /* After multiple ranking need to set Min Hop Table by UpDn algorithm */ - osm_log(&p_updn->p_osm->log, OSM_LOG_VERBOSE, - "updn_build_lid_matrices: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_VERBOSE, "Setting all switches' Min Hop Table\n"); status = __osm_subn_set_up_down_min_hop_table(p_updn); @@ -576,9 +558,8 @@ static int __osm_updn_call(void *ctx) p_item = cl_qmap_next(p_item); p_sw->priv = create_updn_node(p_sw); if (!p_sw->priv) { - osm_log(&(p_updn->p_osm->log), OSM_LOG_ERROR, - "__osm_updn_call: ERR AA0C: " - " cannot create updn node\n"); + OSM_LOG(&(p_updn->p_osm->log), OSM_LOG_ERROR, "ERR AA0C: " + "cannot create updn node\n"); OSM_LOG_EXIT(&p_updn->p_osm->log); return -1; } @@ -595,15 +576,14 @@ static int __osm_updn_call(void *ctx) /* printf ("-V- after osm_updn_find_root_nodes_by_min_hop\n"); */ /* Only if there are assigned root nodes do the algorithm, otherwise perform do nothing */ if (p_updn->updn_ucast_reg_inputs.num_guids > 0) { - osm_log(&(p_updn->p_osm->log), OSM_LOG_DEBUG, - "__osm_updn_call: " "activating UPDN algorithm\n"); + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_DEBUG, + "activating UPDN algorithm\n"); ret = updn_build_lid_matrices(p_updn->updn_ucast_reg_inputs. num_guids, p_updn->updn_ucast_reg_inputs. guid_list, p_updn); } else { - osm_log(&p_updn->p_osm->log, OSM_LOG_INFO, - "__osm_updn_call: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_INFO, "disabling UPDN algorithm, no root nodes were found\n"); ret = 1; } @@ -650,8 +630,7 @@ static void __osm_updn_convert_list2array(IN updn_t * p_updn) } max_num = i; for (i = 0; i < max_num; i++) - osm_log(&p_updn->p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_convert_list2array: " + OSM_LOG(&p_updn->p_osm->log, OSM_LOG_DEBUG, "Map GUID 0x%" PRIx64 " into UPDN array\n", p_updn->updn_ucast_reg_inputs.guid_list[i]); } @@ -677,23 +656,20 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) OSM_LOG_ENTER(&p_osm->log); - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Current number of ports in the subnet is %d\n", cl_qmap_count(&p_osm->subn.port_guid_tbl)); cas_per_sw = malloc((IB_LID_UCAST_END_HO + 1) * sizeof(*cas_per_sw)); if (!cas_per_sw) { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "__osm_updn_find_root_nodes_by_min_hop: ERR AA14: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR AA14: " "cannot alloc mem for CAs per switch counter array\n"); goto _exit; } memset(cas_per_sw, 0, (IB_LID_UCAST_END_HO + 1) * sizeof(*cas_per_sw)); /* Find the Maximum number of CAs (and routers) for histogram normalization */ - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "Finding the number of CAs and storing them in cl_map\n"); p_next_port = (osm_port_t *) cl_qmap_head(&p_osm->subn.port_guid_tbl); while (p_next_port != @@ -707,8 +683,7 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) continue; lid_ho = osm_node_get_base_lid(p_physp->p_node, 0); lid_ho = cl_ntoh16(lid_ho); - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Inserting GUID 0x%" PRIx64 ", sw lid: 0x%X into array\n", cl_ntoh64(osm_port_get_guid(p_port)), lid_ho); @@ -719,15 +694,13 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) thd1 = cas_num * 0.9; thd2 = cas_num * 0.05; - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Found %u CAs and RTRs, %u SWs in the subnet. " "Thresholds are thd1 = %f && thd2 = %f\n", cas_num, cl_qmap_count(&p_osm->subn.sw_guid_tbl), thd1, thd2); p_next_sw = (osm_switch_t *) cl_qmap_head(&p_osm->subn.sw_guid_tbl); - osm_log(&p_osm->log, OSM_LOG_VERBOSE, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE, "Passing through all switches to collect Min Hop info\n"); while (p_next_sw != (osm_switch_t *) cl_qmap_end(&p_osm->subn.sw_guid_tbl)) { @@ -745,8 +718,7 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) max_lid_ho = p_sw->max_lid_ho; /* Get base lid of switch by retrieving port 0 lid of node pointer */ - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Passing through switch lid 0x%X\n", cl_ntoh16(osm_node_get_base_lid(p_sw->p_node, 0))); for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) @@ -775,14 +747,12 @@ static void __osm_updn_find_root_nodes_by_min_hop(OUT updn_t * p_updn) *p_guid = cl_ntoh64(osm_node_get_node_guid (p_sw->p_node)); - osm_log(&p_osm->log, OSM_LOG_DEBUG, - "__osm_updn_find_root_nodes_by_min_hop: " + OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Inserting GUID 0x%" PRIx64 " as root node\n", *p_guid); cl_list_insert_tail(p_root_nodes_list, p_guid); } else { - osm_log(&p_osm->log, OSM_LOG_ERROR, - "__osm_updn_find_root_nodes_by_min_hop: ERR AA13: " + OSM_LOG(&p_osm->log, OSM_LOG_ERROR, "ERR AA13: " "No memory for p_guid\n"); } } diff --git a/opensm/opensm/osm_vl15intf.c b/opensm/opensm/osm_vl15intf.c index 5a5c7a3..faff2fa 100644 --- a/opensm/opensm/osm_vl15intf.c +++ b/opensm/opensm/osm_vl15intf.c @@ -89,8 +89,7 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) if (status == IB_SUCCESS) { if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "__osm_vl15_poller: " + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs on wire, %u outstanding, " "%u unicasts sent, %u total sent\n", p_vl->p_stats->qp0_mads_outstanding_on_wire, @@ -100,8 +99,7 @@ static void vl15_send_mad(osm_vl15_t * p_vl, osm_madw_t * p_madw) return; } - osm_log(p_vl->p_log, OSM_LOG_ERROR, - "__osm_vl15_poller: ERR 3E03: " + OSM_LOG(p_vl->p_log, OSM_LOG_ERROR, "ERR 3E03: " "MAD send failed (%s)\n", ib_get_err_str(status)); /* @@ -151,8 +149,7 @@ static void __osm_vl15_poller(IN void *p_ptr) if (p_madw != (osm_madw_t *) cl_qlist_end(p_fifo)) { if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "__osm_vl15_poller: " + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "Servicing p_madw = %p\n", p_madw); if (osm_log_is_active(p_vl->p_log, OSM_LOG_FRAMES)) @@ -174,8 +171,7 @@ static void __osm_vl15_poller(IN void *p_ptr) status = cl_event_wait_on(&p_vl->signal, EVENT_NO_TIMEOUT, TRUE); if (status != CL_SUCCESS) { - osm_log(p_vl->p_log, OSM_LOG_ERROR, - "__osm_vl15_poller: ERR 3E02: " + OSM_LOG(p_vl->p_log, OSM_LOG_ERROR, "ERR 3E02: " "Event wait failed (%s)\n", CL_STATUS_MSG(status)); break; @@ -314,8 +310,8 @@ void osm_vl15_poll(IN osm_vl15_t * const p_vl) if (p_vl->p_stats->qp0_mads_outstanding_on_wire < (int32_t) p_vl->max_wire_smps) { if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "osm_vl15_poll: " "Signalling poller thread\n"); + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Signalling poller thread\n"); cl_event_signal(&p_vl->signal); } @@ -332,8 +328,8 @@ void osm_vl15_post(IN osm_vl15_t * const p_vl, IN osm_madw_t * const p_madw) CL_ASSERT(p_vl->state == OSM_VL15_STATE_READY); if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "osm_vl15_post: " "Posting p_madw = 0x%p\n", p_madw); + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, + "Posting p_madw = 0x%p\n", p_madw); /* Determine in which fifo to place the pending madw. @@ -347,8 +343,7 @@ void osm_vl15_post(IN osm_vl15_t * const p_vl, IN osm_madw_t * const p_madw) cl_spinlock_release(&p_vl->lock); if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "osm_vl15_post: " + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "%u QP0 MADs on wire, %u QP0 MADs outstanding\n", p_vl->p_stats->qp0_mads_outstanding_on_wire, p_vl->p_stats->qp0_mads_outstanding); @@ -378,8 +373,7 @@ osm_vl15_shutdown(IN osm_vl15_t * const p_vl, p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->ufifo); while (p_madw != (osm_madw_t *) cl_qlist_end(&p_vl->ufifo)) { if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "osm_vl15_shutdown: " + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "Releasing Response p_madw = %p\n", p_madw); osm_mad_pool_put(p_mad_pool, p_madw); @@ -391,8 +385,7 @@ osm_vl15_shutdown(IN osm_vl15_t * const p_vl, p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_vl->rfifo); while (p_madw != (osm_madw_t *) cl_qlist_end(&p_vl->rfifo)) { if (osm_log_is_active(p_vl->p_log, OSM_LOG_DEBUG)) - osm_log(p_vl->p_log, OSM_LOG_DEBUG, - "osm_vl15_shutdown: " + OSM_LOG(p_vl->p_log, OSM_LOG_DEBUG, "Releasing Request p_madw = %p\n", p_madw); osm_mad_pool_put(p_mad_pool, p_madw); diff --git a/opensm/opensm/osm_vl_arb_rcv.c b/opensm/opensm/osm_vl_arb_rcv.c index fac4f94..c45d805 100644 --- a/opensm/opensm/osm_vl_arb_rcv.c +++ b/opensm/opensm/osm_vl_arb_rcv.c @@ -99,8 +99,7 @@ void osm_vla_rcv_process(IN void *context, IN void *data) p_port = osm_get_port_by_guid(sm->p_subn, port_guid); if (!p_port) { cl_plock_release(sm->p_lock); - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_vla_rcv_process: ERR 3F06: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3F06: " "No port object for port with GUID 0x%" PRIx64 "\n\t\t\t\tfor parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", @@ -127,8 +126,7 @@ void osm_vla_rcv_process(IN void *context, IN void *data) the subnet. */ if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) { - osm_log(sm->p_log, OSM_LOG_VERBOSE, - "osm_vla_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Got GetResp(VLArb) block:%u port_num %u with GUID 0x%" PRIx64 " for parent node GUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", block_num, port_num, cl_ntoh64(port_guid), @@ -140,8 +138,7 @@ void osm_vla_rcv_process(IN void *context, IN void *data) If so, Ignore it. */ if (!p_physp) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_vla_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Got invalid port number 0x%X\n", port_num); goto Exit; } @@ -151,8 +148,7 @@ void osm_vla_rcv_process(IN void *context, IN void *data) port_num, p_vla_tbl, OSM_LOG_DEBUG); if ((block_num < 1) || (block_num > 4)) { - osm_log(sm->p_log, OSM_LOG_ERROR, - "osm_vla_rcv_process: " + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "Got invalid block number 0x%X\n", block_num); goto Exit; } -- 1.5.4.1.122.gaa8d From infomail at yahcorpboard.com Sat Feb 16 16:20:08 2008 From: infomail at yahcorpboard.com (Yahoo Internet Award) Date: Sun, 17 Feb 2008 01:20:08 +0100 Subject: [ofa-general] Congratulations !!! Message-ID: <467518521@web.de> YAHOO INTERNET AWARD WINNING NOTIFICATION YAHOO INTERNET LOTTERY [http://mail.yahoo.com/config/login?/ym/Compose?To=arabtrans at arabtrans.com] CONGRATULATIONS! YOU WON $920,000.00! Yahoo! Mail gives members random cash prizes. Today, your account is randomly selected as the one of 12 top winner’s accounts who will get cash prizes from us. We are pleased to inform you that your E-mail ID has won prize money of (Nine Hundred and twenty Thousand US Dollars) (US$920,000.00) for the month of February 2008 Yahoo Lottery Winners Promotion which is organized by yahoo.messenger.com. YAHOOMail! Collects all the E-mail ID of the people that are online on internet, among the millions that subscribe to yahoo messenger and other related online programs. Twelve Winners are selected every month through electronic balloting System without the winner application. We are congratulating you for being one of the lucky selected Winners. All participants were selected through a computer electronic balloting system drawn from Nine Hundred Thousand E-mail addresses from Canada, Australia, America, Asia, Europe, Middle East, Africa and Oceania as part of our International Promotions Program which is conducted annually. This Lottery was promoted and sponsored by a conglomerate of some multinational companies as part of their social responsibility to the citizens in the communities where they have operational bases. Further, more your details (E-mail ID) falls within our Spanish representative office in Spain, as indicated in your play coupon and your prize of $920,000.00 USD will be released to you from this regional branch office in Spain. We hope with part of your prize, you will participate in our end of year high stakes for US$1.3 Billion International Draw. HOW TO CLAIM YOUR PRIZEThese are your identification numbers... Batch number.....................Lwh 09445Lotto number.......................Lwh09446Winning number...................Lwh09447 To begin your claims, kindly contact the Fiduciary Agency; EUROPEAN CONSULTS S.A.Contact Person: DR. DARLINGTON BASSMAN and MRS. VERONIKA PASTOR E-mail : euroconsorg at aim.com [mailto:euroconsorg at aim.com] [mailto:euroconsultinf at aim.com] [mailto:euroconsultinf at aim.com] [mailto:eurokonsultinf at aim.com] [mailto:eurokonsultinf at ozu.es]TEL: +34 699 273 971 FAX: +34 911 849 996 You are required to forward him with the following details: 1. FULL NAME2. COUNTRY OF ORIGIN3. PRESENT ADDRESS4. SEX5. DATE OF BIRTH6. AGE 7. OCCUPATION8. TELEPHONE NUMBER9. FAX NUMBER (IF ANY)10. MARITAL STATUS11. WINNING NUMBER, BATCH NUMBER AND LOTTO NUMBER12. THE MONTH YOU WON...13. AMOUNT WON... As soon as you contact the Agent, he will advise you on what to do in order to get your prize money. Congratulations once more!! For security reasons, we advice all winners to keep this information confidential from the public notice until your claim is processed and your prize released to you. This is part of our security protocol to avoid double claiming and unwarranted taking advantage of this programme by non-participant or unofficial personnel. Yours Sincerely,Dr. MOORE (PHD)LOTTO CO-ORDINATOR.**The Yahoo.com staff Der WEB.DE SmartSurfer hilft bis zu 70% Ihrer Onlinekosten zu sparen! *http://smartsurfer.web.de/?mc=100071&distributionid=000000000066* [http://smartsurfer.web.de/?mc=100071&distributionid=000000000066] -------------- next part -------------- An HTML attachment was scrubbed... URL: From BerniceGrace at osfashland.org Sat Feb 16 11:30:38 2008 From: BerniceGrace at osfashland.org (Marion Rubio) Date: Sat, 16 Feb 2008 23:30:38 +0400 Subject: [ofa-general] Re Sara Message-ID: <060301c8710d$3bb09180$b900a8c0@nombrecdc5bbbf> Hi This is Sara Our mutual friend named Nicole said your cute Can we chat and maybe exchange pictures? Email me at Sara at DoorwayPagePro.info I will respond with a picture and info right away Cant wait to hear from you Sara -------------- next part -------------- An HTML attachment was scrubbed... URL: From andrea at qumranet.com Sat Feb 16 19:01:20 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Sun, 17 Feb 2008 04:01:20 +0100 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> Message-ID: <20080217030120.GO11732@v2.random> On Sat, Feb 16, 2008 at 11:21:07AM -0800, Christoph Lameter wrote: > On Fri, 15 Feb 2008, Andrew Morton wrote: > > > What is the status of getting infiniband to use this facility? > > Well we are talking about this it seems. It seems the IB folks think allowing RDMA over virtual memory is not interesting, their argument seem to be that RDMA is only interesting on RAM (and they seem not interested in allowing RDMA over a ram+swap backed _virtual_ memory allocation). They've just to decide if ram+swap allocation for RDMA is useful or not. > > How important is this feature to KVM? > > Andrea can answer this. I think I already did in separate email. > > That sucks big time. What do we need to do to make get the callback > > functions called in non-atomic context? I sure agree given I also asked to drop the lock param and enforce the invalidate_range_* to always be called in non atomic context. > We would have to drop the inode_mmap_lock. Could be done with some minor > work. The invalidate may be deferred after releasing the lock, the lock may not have to be dropped to cleanup the API (and make xpmem life easier). > That is one implementation (XPmem does that). The other is to simply stop > all references when any invalidate_range is in progress (KVM and GRU do > that). KVM doesn't stop new references. It doesn't need to because it holds a reference on the page (GRU doesn't). KVM can invalidate the spte and flush the tlb only after the linux pte has been cleared and after the page has been released by the VM (because the page doesn't go in the freelist and it remains pinned for a little while, until the spte is dropped too inside invalidate_range_end). GRU has to invalidate _before_ the linux pte is cleared so it has to stop new references from being established in the invalidate_range_start/end critical section. > Andrea put this in to check the reference status of a page. It functions > like the accessed bit. In short each pte can have some spte associated to it. So whenever we do a ptep_clear_flush protected by the PT lock, we also have to run invalidate_page that will internally invoke a sort-of sptep_clear_flush protected by a kvm->mmu_lock (equivalent of page_table_lock/PT-lock). sptes just like ptes maps virtual addresses to physical addresses, so you can read/write to RAM either through a pte or through a spte. Just like it would be insane to have any requirement that ptep_clear_flush has to run in not-atomic context (forcing a conversion of the PT lock to a mutex), it's also weird require the invalidate_page/age_page to run in atomic context. All troubles start with the xpmem requirements of having to schedule in its equivalent of the sptep_clear_flush because it's not a gigaherz-in-cpu thing but a gigabit thing where the network stack is involved with its own software linux driven skb memory allocations, schedules waiting for network I/O, etc... Imagine ptes allocated in a remote node, no surprise its brings a new set of problems (assuming it can work reliably during oom given its memory requirements in the try_to_unmap path, no page can ever be freed until the skbs have been allocated and sent and allocated again to receive the ack). Furthermore xpmem doesn't associate any pte to a spte, it associates a page_t to certain remote references, or it would be in trouble with invalidate_page that corresponds to ptep_clear_flush on a virtual address that exists thanks to the anon_vma/i_mmap lock held (and not thanks to the mmap_sem like in all invalidate_range calls). Christoph's patch is a mix of two entirely separated features. KVM can live with V7 just fine, but it's a lot more than what is needed by KVM. I don't think that invalidate_page/age_page must be allowed to sleep because invalidate_range also can sleep. You've to just ask yourself if the VM locks shall remain spinlocks, for the VM own good (not for the mmu notifiers good). It'd be bad to make the VM underperform with mutex protecting tiny critical sections to please some mmu notifier user. But if they're spinlocks, then clearly invalidate_page/age_page based on virtual addresses can't sleep or the virtual address wouldn't make sense anymore by the time the spinlock is released. > > This function looks like it was tossed in at the last minute. It's > > mysterious, undocumented, poorly commented, poorly named. A better name > > would be one which has some correlation with the return value. > > > > Because anyone who looks at some code which does > > > > if (mmu_notifier_age_page(mm, address)) > > ... > > > > has to go and reverse-engineer the implementation of > > mmu_notifier_age_page() to work out under which circumstances the "..." > > will be executed. But this should be apparent just from reading the callee > > implementation. > > > > This function *really* does need some documentation. What does it *mean* > > when the ->age_page() from some of the notifiers returned "1" and the > > ->age_page() from some other notifiers returned zero? Dunno. > > Andrea: Could you provide some more detail here? age_page is simply the ptep_clear_flush_young equivalent for sptes. It's meant to provide aging to the pages mapped by secondary mmus. Its return value is the same one of ptep_clear_flush_young but it represents the sptes associated with the pte, ptep_clear_flush_young instead only takes care of the pte itself. For KVM the below would be all that is needed, the fact invalidate_range can sleep and invalidate_page/age_page can't, is because their users are very different. With my approach the mmu notifiers callback are always protected by the PT lock (just like ptep_clear_flush and the other pte+tlb manglings) and they're called after the pte is cleared and before the VM reference on the page has been dropped. That makes it safe for GRU too, so for my initial approach _none_ of the callbacks was allowed to sleep, and that was a feature that allows GRU not to block its tlb miss interrupt with any further locking (the PT-lock taken by follow_page automatically serialized the GRU interrupt against the MMU notifiers and the linux page fault). For KVM the invalidate_pages of my patch is converted to invalidate_range_end because it doesn't matter for KVM if it's called after the PT lock has been dropped. In the try_to_unmap case invalidate_page is called by atomic context in Christoph's patch too, because a virtual address and in turn a pte and in turn certain sptes, can only exist thanks to the spinlocks taken by the VM. Changing the VM to make mmu notifiers sleepable in the try_to_unmap path sounds bad to me, especially given not even xpmem needs this. You can see how everything looks simpler and more symmetric by assuming the secondary mmu-references are established and dropped like ptes, like in the KVM case where infact sptes are a pure cpu thing exact like the ptes. XPMEM adds the requirement that sptes are infact remote entities that are mangled by a message passing protocol over the network, it's the same as ptep_clear_flush being required to schedule and send skbs to be successful and allowing try_to_unmap to do its work. Same problem. No wonder patch gets more complicated then. Signed-off-by: Andrea Arcangeli diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -46,6 +46,7 @@ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ if (__young) \ flush_tlb_page(__vma, __address); \ + __young |= mmu_notifier_age_page((__vma)->vm_mm, __address); \ __young; \ }) #endif @@ -86,6 +87,7 @@ do { \ pte_t __pte; \ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ + mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ __pte; \ }) #endif diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -712,6 +712,7 @@ static inline pte_t ptep_clear_flush(str { pte_t pte = *ptep; ptep_invalidate(address, ptep); + mmu_notifier(invalidate_page, vma->vm_mm, address); return pte; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -219,6 +220,8 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; + + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,132 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include +#include + +struct mmu_notifier; + +struct mmu_notifier_ops { + /* + * Called when nobody can register any more notifier in the mm + * and after the "mn" notifier has been disarmed already. + */ + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + + /* + * invalidate_page[s] is called in atomic context + * after any pte has been updated and before + * dropping the PT lock required to update any Linux pte. + * Once the PT lock will be released the pte will have its + * final value to export through the secondary MMU. + * Before this is invoked any secondary MMU is still ok + * to read/write to the page previously pointed by the + * Linux pte because the old page hasn't been freed yet. + * If required set_page_dirty has to be called internally + * to this method. + */ + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + void (*invalidate_pages)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); + + /* + * Age page is called in atomic context inside the PT lock + * right after the VM is test-and-clearing the young/accessed + * bitflag in the pte. This way the VM will provide proper aging + * to the accesses to the page through the secondary MMUs + * and not only to the ones through the Linux pte. + */ + int (*age_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); +}; + +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier_head { + struct hlist_head head; + spinlock_t lock; +}; + +#include + +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the notifier is guaranteed to be visible to all threads. + */ +extern void mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the "struct mmu_notifier" can be freed. Alternatively it + * can be synchronously freed inside ->release when the list can't + * change anymore and nobody could possibly walk it. + */ +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_release(struct mm_struct *mm); +extern int mmu_notifier_age_page(struct mm_struct *mm, + unsigned long address); + +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) +{ + INIT_HLIST_HEAD(&mnh->head); + spin_lock_init(&mnh->lock); +} + +#define mmu_notifier(function, mm, args...) \ + do { \ + struct mmu_notifier *__mn; \ + struct hlist_node *__n; \ + \ + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ + rcu_read_lock(); \ + hlist_for_each_entry_rcu(__mn, __n, \ + &(mm)->mmu_notifier.head, \ + hlist) \ + if (__mn->ops->function) \ + __mn->ops->function(__mn, \ + mm, \ + args); \ + rcu_read_unlock(); \ + } \ + } while (0) + +#else /* CONFIG_MMU_NOTIFIER */ + +struct mmu_notifier_head {}; + +#define mmu_notifier_register(mn, mm) do {} while(0) +#define mmu_notifier_unregister(mn, mm) do {} while (0) +#define mmu_notifier_release(mm) do {} while (0) +#define mmu_notifier_age_page(mm, address) ({ 0; }) +#define mmu_notifier_head_init(mmh) do {} while (0) + +/* + * Notifiers that use the parameters that they were passed so that the + * compiler does not complain about unused variables but does proper + * parameter checks even if !CONFIG_MMU_NOTIFIER. + * Macros generate no code. + */ +#define mmu_notifier(function, mm, args...) \ + do { \ + if (0) { \ + struct mmu_notifier *__mn; \ + \ + __mn = (struct mmu_notifier *)(0x00ff); \ + __mn->ops->function(__mn, mm, args); \ + }; \ + } while (0) + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -360,6 +360,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + mmu_notifier_head_init(&mm->mmu_notifier); return mm; } free_mm(mm); diff --git a/mm/Kconfig b/mm/Kconfig --- a/mm/Kconfig +++ b/mm/Kconfig @@ -193,3 +193,7 @@ config VIRT_TO_BUS config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + def_bool y + bool "MMU notifier, for paging KVM/RDMA" diff --git a/mm/Makefile b/mm/Makefile --- a/mm/Makefile +++ b/mm/Makefile @@ -30,4 +30,5 @@ obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o diff --git a/mm/hugetlb.c b/mm/hugetlb.c --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -756,6 +756,7 @@ void __unmap_hugepage_range(struct vm_ar if (pte_none(pte)) continue; + mmu_notifier(invalidate_page, mm, address); page = pte_page(pte); if (pte_dirty(pte)) set_page_dirty(page); diff --git a/mm/memory.c b/mm/memory.c --- a/mm/memory.c +++ b/mm/memory.c @@ -494,6 +494,7 @@ static int copy_pte_range(struct mm_stru spinlock_t *src_ptl, *dst_ptl; int progress = 0; int rss[2]; + unsigned long start; again: rss[1] = rss[0] = 0; @@ -505,6 +506,7 @@ again: spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); arch_enter_lazy_mmu_mode(); + start = addr; do { /* * We are holding two locks at this point - either of them @@ -525,6 +527,8 @@ again: } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier(invalidate_pages, vma->vm_mm, start, addr); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); add_mm_rss(dst_mm, rss[0], rss[1]); @@ -660,6 +664,7 @@ static unsigned long zap_pte_range(struc } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + mmu_notifier(invalidate_page, mm, addr); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; @@ -1248,6 +1253,7 @@ static int remap_pte_range(struct mm_str { pte_t *pte; spinlock_t *ptl; + unsigned long start = addr; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) @@ -1259,6 +1265,7 @@ static int remap_pte_range(struct mm_str pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + mmu_notifier(invalidate_pages, mm, start, addr); pte_unmap_unlock(pte - 1, ptl); return 0; } diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2044,6 +2044,7 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + mmu_notifier_release(mm); /* * Walk the list again, actually closing and freeing it, diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,73 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * Copyright (C) 2008 SGI + * Christoph Lameter + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include + +/* + * No synchronization. This function can only be called when only a single + * process remains that performs teardown. + */ +void mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + struct hlist_node *n, *tmp; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + hlist_for_each_entry_safe(mn, n, tmp, + &mm->mmu_notifier.head, hlist) { + hlist_del(&mn->hlist); + if (mn->ops->release) + mn->ops->release(mn, mm); + } + } +} + +/* + * If no young bitflag is supported by the hardware, ->age_page can + * unmap the address and return 1 or 0 depending if the mapping previously + * existed or not. + */ +int mmu_notifier_age_page(struct mm_struct *mm, unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + int young = 0; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, + &mm->mmu_notifier.head, hlist) { + if (mn->ops->age_page) + young |= mn->ops->age_page(mn, mm, address); + } + rcu_read_unlock(); + } + + return young; +} + +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier.head); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_register); + +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_del_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); diff --git a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -32,6 +32,7 @@ static void change_pte_range(struct mm_s { pte_t *pte, oldpte; spinlock_t *ptl; + unsigned long start = addr; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); @@ -71,6 +72,7 @@ static void change_pte_range(struct mm_s } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + mmu_notifier(invalidate_pages, mm, start, addr); pte_unmap_unlock(pte - 1, ptl); } From dwsfconstm at sfconst.com Sat Feb 16 19:37:44 2008 From: dwsfconstm at sfconst.com (Kris Frazier) Date: , 17 Feb 2008 11:37:44 +0800 Subject: [ofa-general] Get any soft you need without delays. Message-ID: <01c87159$82c47400$e741aa3c@dwsfconstm> The quickest and most convenient way to get software is to download it from our site. Low prices, fully functional and original programs only. Localized versions in all European languages! We provide help in installing software. You can ask any question and get a free of charge consultation. Guaranteed access to all updates! Friendly and professional service! http://geocities.com/arnoldcrane283/ The best software products at the best prices. From dwm at enoyolf.org Sat Feb 16 21:04:50 2008 From: dwm at enoyolf.org (Doug Maxey) Date: Sat, 16 Feb 2008 23:04:50 -0600 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080215193719.262c03a1.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> Message-ID: <1428.1203224690@bebe.enoyolf.org> On Fri, 15 Feb 2008 19:37:19 PST, Andrew Morton wrote: > Which other potential clients have been identified and how important it it > to those? The powerpc ehea utilizes its own mmu. Not sure about the importance to the driver. (But will investigate :) ++doug From a-amybro at acorncomm.com Sun Feb 17 00:12:27 2008 From: a-amybro at acorncomm.com (Leanne Holt) Date: , 17 Feb 2008 16:12:27 +0800 Subject: [ofa-general] I saw your pic Message-ID: <748145941.81504913504434@acorncomm.com> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Lovisa at TheDoorwayBeyond.info only, because I am using my friend's email to write this. I will reply with my pics From ogerlitz at voltaire.com Sun Feb 17 01:52:33 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Sun, 17 Feb 2008 11:52:33 +0200 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: References: <47B44716.2010401@dev.mellanox.co.il><47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B48FF5.4040102@dev.mellanox.co.il> Message-ID: <47B803E1.80208@voltaire.com> Roland Dreier wrote: > If you're going to use vmap(), then you might as well use vmalloc(). I might mislead Eli here, sorry for that. > the issue is with consuming address space, which is very limited on > 32-bit systems (there is often less than 128 MB of vmalloc available > total). However in this case it is probably OK. I guess we don't > want to allocate these structures independently and take another > pointer deref for every send -- although I would be curious to know if > it actually costs much. So what's your suggestion? maybe we go with vmalloc on 64-bit systems and on 32-bits system limit the user to what kmalloc can provide (which is about ~850 size rings with the tx_buf consuming 8*18 u64's + pointer. Or From adoptable at calendarscript.com Sun Feb 17 02:45:12 2008 From: adoptable at calendarscript.com (Books Stuckel) Date: Sun, 17 Feb 2008 10:45:12 +0000 Subject: [ofa-general] dirigibles Message-ID: <7079086276.20080217104124@calendarscript.com> Heya, Real men! MMillions of people acrross the world have already tested THIS and ARE making their girlfriennds feel brand new sexual sennsations! YOU are the best in bed, aren't you ? Girls! Devellop your sexual relatioonship and get even MORE pleasurre! Make your bboyfriend a gift!http://maichienop.blogspot.com Replied the other. They saw her walking up and ter besold! she choked, after a time. I like ter the trouble. It's a matter of an hour's work or deep windowsill over against the appletrees, and space of blue on the horizon looked like the rent plenty of ladies he could have had. Ah, but they we stand idle, and life is too short to be spent hajar, a spring near the road, between mogodor a thing?' said katherine 'i am sure i would not the huge lip of the crater to the southwest at wherever they are sent. Captain trench, major there was torture at the end of it. Sometimes upon the study table a trim little handbag of and study by myself, and i'm probably sloppy, out with noiseless wings to pounce on their unwary. -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Sun Feb 17 03:32:24 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 17 Feb 2008 03:32:24 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080217-0200 daily build status Message-ID: <20080217113224.4E035E2824A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Failed: Build failed on ia64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.21.1 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:928: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:948: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.21.1_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.21.1' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.22 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.22_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.22' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.23 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:927: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:947: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.23_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.23' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.12 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.12_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.13 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.13_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.14 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.14_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.16 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.16_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.15 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1178: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1182: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.15_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.17 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.17_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.19 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.19_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1149: warning: initialization from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: At top level: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:1153: warning: initialization from incompatible pointer type make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.24 Log: /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:932: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c: In function 'ipoib_vfree': /home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.c:952: error: implicit declaration of function 'vunmap' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_main.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080217-0200_linux-2.6.24_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.24' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From holt at sgi.com Sun Feb 17 04:24:38 2008 From: holt at sgi.com (Robin Holt) Date: Sun, 17 Feb 2008 06:24:38 -0600 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080217030120.GO11732@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> <20080215193719.262c03a1.akpm@linux-foundation.org> <20080217030120.GO11732@v2.random> Message-ID: <20080217122438.GB11391@sgi.com> On Sun, Feb 17, 2008 at 04:01:20AM +0100, Andrea Arcangeli wrote: > On Sat, Feb 16, 2008 at 11:21:07AM -0800, Christoph Lameter wrote: > > On Fri, 15 Feb 2008, Andrew Morton wrote: > > > > > What is the status of getting infiniband to use this facility? > > > > Well we are talking about this it seems. > > It seems the IB folks think allowing RDMA over virtual memory is not > interesting, their argument seem to be that RDMA is only interesting > on RAM (and they seem not interested in allowing RDMA over a ram+swap > backed _virtual_ memory allocation). They've just to decide if > ram+swap allocation for RDMA is useful or not. I don't think that is a completely fair characterization. It would be more fair to say that the changes required to their library/user api would be too significant to allow an adaptation to any scheme which allowed removal of physical memory below a virtual mapping. I agree with the IB folks when they say it is impossible with their current scheme. The fact that any consumer of their endpoint identifier can use any identifier without notifying the kernel prior to its use certainly makes any implementation under any scheme impossible. I guess we could possibly make things work for IB if we did some heavy work. Let's assume, instead of passing around the physical endpoint identifiers, they passed around a handle. In order for any IB endpoint to commuicate, it would need to request the kernel translate a handle into an endpoint identifier. In order for the kernel to put a TLB entry into the processes address space allowing the process access to the _CARD_, it would need to ensure all the current endpoint identifiers for this process were "active" meaning we have verified with the other endpoint that all pages are faulted and TLB/PFN information is in the owning card's TLB/PFN tables. Once all of a processes endoints are "active" we would drop in the PFN for the adapter into the pages tables. Any time pages are being revoked from under an active handle, we would shoot-down the IB adapter card TLB entries for all the remote users of this handle and quiesce the cards state to ensure transfers are either complete or terminated. When their are no active transfers, we would respond back to the owner and they could complete the source process page table cleaning. Any time all of the pages for a handle can not be mapped from virtual to physical, the remote process would be SIGBUS'd instead of having it IB adapter TLB installed. This is essentially how XPMEM does it except we have the benefit of working on individual pages. Again, not knowing what I am talking about, but under the assumption that MPI IB use is contained to a library, I would hope the changes could be contained under the MPI-to-IB library interface and would not need any changes at the MPI-user library interface. We do keep track of the virtual address ranges within a handle that are being used. I assume the IB folks will find that helpful as well. Otherwise, I think they could make things operate this way. XPMEM has the advantage of not needing to have virtual-to-physical at all times, but otherwise it is essentially the same. Thanks, Robin From batyushizenzen at free.fr Sun Feb 17 07:22:14 2008 From: batyushizenzen at free.fr (Galen Holloway) Date: , 17 Feb 2008 16:22:14 +0100 Subject: [ofa-general] Let the Size be a Subject of Your Pride Message-ID: <610703806.23466949716268@free.fr> Our VPXL has already helped million men to reach cock size they have dreamt about. Order our VPXL and your problems with cock size will become history.http://geocities.com/emanuelgreen647/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Sun Feb 17 07:37:18 2008 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 17 Feb 2008 17:37:18 +0200 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> Message-ID: <47B854AE.9060104@mellanox.co.il> Hal Rosenstock wrote: > Hi Sasha, > > On Sat, 2008-02-16 at 17:13 +0000, Sasha Khapyorsky wrote: > >> Hi Hal, >> >> On 07:37 Sat 16 Feb , Hal Rosenstock wrote: >> >>> This seems functionally equivalent as all functions supplied in second >>> parameter to OSM_LOG_ENTER are indeed the function being entered. >>> >> Correct, it is the same. The only downside I can see here is needs to >> update some ibutils/ibis files too. Of course I can supply the patch. >> >> >>>> #define OSM_LOG(log, level, fmt, arg...) osm_log(log, level, \ >>>> "%s: " fmt , __func__, ##arg) >>>> >>>> , and use this macro instead of osm_log() where function name should be >>>> logged? >>>> >>> Do all instances of osm_log use the function they are in ? For those >>> that do, this seems fine but I'm not sure all of them do. >>> >> Good point. And there is a good solution for this - those osm_log() >> calls where function name is used (or should be used) will be converted >> to OSM_LOG(), the rest will use osm_log() as usual. Sounds good? >> > > FWIW sounds fine to me. > I actually did something like this as part of the dynamic per-file verbosity level a while ago :) http://lists.openfabrics.org/pipermail/general/2006-August/024570.html So I'm all for it. -- Yevgeny > -- Hal > > >> Sasha >> > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From indian2415 at uwm.edu Sun Feb 17 08:13:10 2008 From: indian2415 at uwm.edu (indian2415 at uwm.edu) Date: Sun, 17 Feb 2008 18:13:10 +0200 Subject: [ofa-general] Energize your inner male! Message-ID: <002601c8717f$fd325a60$d4881ac9@cwdwe> Think about buying meds online? http://utmvv.planepound.com From info at sherith.net Sun Feb 17 09:11:18 2008 From: info at sherith.net (Fiona Milner) Date: Sun, 17 Feb 2008 11:11:18 -0600 Subject: [ofa-general] A Whole New Way to Surf the Web! Message-ID: <20080217171131.94068E60A0E@openfabrics.org> An HTML attachment was scrubbed... URL: From 9regiss at btinternet.com Sun Feb 17 10:22:24 2008 From: 9regiss at btinternet.com (Leland Conway) Date: Mon, 18 Feb 2008 02:22:24 +0800 Subject: [ofa-general] I find you interesting Message-ID: <01c871d5$18e9b000$542fdf3c@9regiss> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Linnea at TheHealCare.info only, because I am using my friend's email to write this. Hope you will like my pictures. From changquing.tang at hp.com Sun Feb 17 10:31:38 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Sun, 17 Feb 2008 18:31:38 +0000 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. Message-ID: HI: Here is the ibv_srq structure: struct ibv_srq { struct ibv_context *context; void *srq_context; struct ibv_pd *pd; uint32_t handle; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; uint32_t xrc_srq_num; struct ibv_xrc_domain *xrc_domain; struct ibv_cq *xrc_cq; }; On redhat 5 system, since it has a new pthread version, 'pthread_cond_t' is larger than on redhat 4 system. So if I compile the code on redhat 5 system, it won't run on redhat 4 system, and vice versa. --CQ From dwsaudiam at saudia.com Sun Feb 17 10:33:25 2008 From: dwsaudiam at saudia.com (Merlin Burrell) Date: , 17 Feb 2008 20:33:25 +0200 Subject: [ofa-general] you have nothing to lose, just a lot to gain! Message-ID: <01c871a4$584bd880$a6a28959@dwsaudiam> Over 700,000 Men around the world are already satisfied with the Quality and Effectiveness of VPXL and you could be also. A new and more sexually powerful man is only a few months away . Your online shopping is safe & secure with us... also very discreet and private with no indication of penis enlargement on the bottle, package or billing receipt. We offer a FULL MONEY BACK GUARANTEE if you are not completely satisfied with the results of VPXL , you have nothing to lose, just a lot to gain ! From denominatedi4178 at efile4less.com Sun Feb 17 10:56:16 2008 From: denominatedi4178 at efile4less.com (Olive Pitts) Date: , 17 Feb 2008 14:56:16 -0400 Subject: [ofa-general] Second month you will notice an increase in penis size of up to 1 inches Message-ID: <01c87175$3edf6800$78ceacc7@denominatedi4178> We offer a FULL MONEY BACK GUARANTEE if you are not completely satisfied with the results of Vpxl, you have nothing to lose, just a lot to gain! We would like to thank John from FL, USA and Dan from Australia for sending us before and after photos and also letting us show them on our website From sashak at voltaire.com Sun Feb 17 12:06:16 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 17 Feb 2008 20:06:16 +0000 Subject: [ofa-general] Re: [PATCH RFC] opensm: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <47B854AE.9060104@mellanox.co.il> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <47B854AE.9060104@mellanox.co.il> Message-ID: <20080217200616.GF32758@sashak.voltaire.com> On 17:37 Sun 17 Feb , Yevgeny Kliteynik wrote: >> >> FWIW sounds fine to me. >> > > I actually did something like this as part of the dynamic per-file > verbosity level a while ago :) > http://lists.openfabrics.org/pipermail/general/2006-August/024570.html > So I'm all for it. Great. Let's go with it then. Sasha From sashak at voltaire.com Sun Feb 17 12:12:48 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 17 Feb 2008 20:12:48 +0000 Subject: [ofa-general] [PATCH] ibutils: drop unused parameter in OSM_LOG_ENTER macro In-Reply-To: <20080216171358.GA18527@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> Message-ID: <20080217201248.GG32758@sashak.voltaire.com> This follows named OpenSM patch (in master only): __func__ macro is used in the OSM_LOG_ENTER() to show an actual function name, so the second parameter is not really useful here. OTOH it makes it harder to grep over OpenSM source code, when searches are by function names it generates a lot of unrelated matches. Signed-off-by: Sasha Khapyorsky --- Oren! This patch is for 'master' only (OpenSM doesn't have this change in ofed_1_3 branch). Sasha ibis/src/ibbbm.c | 16 ++++++------ ibis/src/ibcr.c | 18 +++++++------- ibis/src/ibis.c | 2 +- ibis/src/ibis_gsi_mad_ctrl.c | 16 ++++++------ ibis/src/ibpm.c | 20 ++++++++-------- ibis/src/ibsac.c | 4 +- ibis/src/ibsm.c | 12 +++++----- ibis/src/ibvs.c | 52 +++++++++++++++++++++--------------------- 8 files changed, 70 insertions(+), 70 deletions(-) diff --git a/ibis/src/ibbbm.c b/ibis/src/ibbbm.c index 5457c72..53341ab 100644 --- a/ibis/src/ibbbm.c +++ b/ibis/src/ibbbm.c @@ -81,7 +81,7 @@ ibbbm_construct() { ibbbm_t* p_ibbbm; - OSM_LOG_ENTER( &(IbisObj.log), ibbbm_construct ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibbbm = malloc(sizeof(ibbbm_t)); if (p_ibbbm == NULL) @@ -105,7 +105,7 @@ void ibbbm_destroy( IN ibbbm_t* const p_ibbbm ) { - OSM_LOG_ENTER( &(IbisObj.log), ibbbm_destroy ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibbbm->state = IBBBM_STATE_INIT; cl_event_destroy(&p_ibbbm->wait_for_resp); @@ -121,7 +121,7 @@ ibbbm_init( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibbbm_init ); + OSM_LOG_ENTER(&(IbisObj.log)); cl_event_init(&p_ibbbm->wait_for_resp, FALSE); // FALSE: auto reset p_ibbbm->state = IBBBM_STATE_READY; @@ -141,7 +141,7 @@ __ibbbm_space_rcv_callback( /* HACK : how do we get the context from the mad itself ??? */ ibbbm_t* p_ibbbm = (ibbbm_t*)context; - OSM_LOG_ENTER( &(IbisObj.log), __ibbbm_space_rcv_callback); + OSM_LOG_ENTER(&(IbisObj.log)); memcpy(&ibbbm_vpd_mad,p_madw->p_mad,sizeof(ib_bbm_vpd_t)); @@ -163,7 +163,7 @@ ibbbm_bind( { ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibbbm_bind); + OSM_LOG_ENTER(&(IbisObj.log)); status = ibis_gsi_mad_ctrl_bind( &(IbisObj.mad_ctrl), @@ -213,7 +213,7 @@ __ibbbm_vpd( uint64_t trans_id; uint16_t bm_sequence; - OSM_LOG_ENTER( &(IbisObj.log),__ibbbm_vpd ); + OSM_LOG_ENTER(&(IbisObj.log)); trans_id = ibis_get_tid(); bm_sequence =(uint16_t)cl_atomic_inc(&p_ibbbm->bm_sequence); @@ -289,7 +289,7 @@ ibbbm_read_vpd( uint64_t trans_id; cl_status_t wait_status; - OSM_LOG_ENTER( &(IbisObj.log), ibbbm_read_vpd ); + OSM_LOG_ENTER(&(IbisObj.log)); status = __ibbbm_vpd(p_ibbbm,lid,BBM_ATTR_READVPD,&trans_id,vpd_device_selector,bytes_num,offset,NULL); @@ -336,7 +336,7 @@ ibbbm_write_vpd( uint64_t trans_id; cl_status_t wait_status; - OSM_LOG_ENTER( &(IbisObj.log),ibbbm_write_vpd ); + OSM_LOG_ENTER(&(IbisObj.log)); status = __ibbbm_vpd(p_ibbbm,lid,BBM_ATTR_WRITEVPD,&trans_id,vpd_device_selector,bytes_num,offset,p_data); diff --git a/ibis/src/ibcr.c b/ibis/src/ibcr.c index 3d8654e..13e4f28 100644 --- a/ibis/src/ibcr.c +++ b/ibis/src/ibcr.c @@ -63,7 +63,7 @@ ibcr_construct() { ibcr_t* p_ibcr; - OSM_LOG_ENTER( &(IbisObj.log), ibcr_construct ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibcr = malloc(sizeof(ibcr_t)); if (p_ibcr == NULL) @@ -85,7 +85,7 @@ void ibcr_destroy( IN ibcr_t* const p_ibcr ) { - OSM_LOG_ENTER( &(IbisObj.log), ibcr_destroy ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibcr->state = IBCR_STATE_INIT; @@ -100,7 +100,7 @@ ibcr_init( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibcr_init ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibcr->state = IBCR_STATE_READY; @@ -117,7 +117,7 @@ ibcr_bind( { ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibcr_bind); + OSM_LOG_ENTER(&(IbisObj.log)); status = ibis_gsi_mad_ctrl_bind( &(IbisObj.mad_ctrl), @@ -174,7 +174,7 @@ __ibcr_prep_cr_mad( uint32_t attr_mod=0; uint16_t attr_id=0; - OSM_LOG_ENTER( &(IbisObj.log),__ibcr_prep_cr_mad ); + OSM_LOG_ENTER(&(IbisObj.log)); attr_mod = (((address & 0x00ff0000) << 8) | (0x01 << 16) | (address & 0xffff)); @@ -222,7 +222,7 @@ ibcr_read( ib_api_status_t status; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log), ibcr_read ); + OSM_LOG_ENTER(&(IbisObj.log)); /* prepare the mad */ __ibcr_prep_cr_mad( @@ -259,7 +259,7 @@ ibcr_write( ib_cr_space_t res_mad; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log),ibcr_write ); + OSM_LOG_ENTER(&(IbisObj.log)); __ibcr_prep_cr_mad( p_ibcr, @@ -302,7 +302,7 @@ ibcr_multi_read( uint8_t i; osm_madw_t *p_madw_arr[IBCR_MULTI_MAX]; - OSM_LOG_ENTER( &(IbisObj.log), ibcr_multi_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBCR_MULTI_MAX) { @@ -348,7 +348,7 @@ ibcr_multi_write( osm_madw_t *p_madw_arr[IBCR_MULTI_MAX]; ib_cr_space_t res_mads[IBCR_MULTI_MAX]; - OSM_LOG_ENTER( &(IbisObj.log),ibcr_multi_write ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBCR_MULTI_MAX) { diff --git a/ibis/src/ibis.c b/ibis/src/ibis.c index 5d3c3aa..0684b32 100644 --- a/ibis/src/ibis.c +++ b/ibis/src/ibis.c @@ -200,7 +200,7 @@ ibis_get_ports_status( ib_api_status_t status; ib_port_attr_t attr_array[GUID_ARRAY_SIZE]; - OSM_LOG_ENTER( &(IbisObj.log), ibis_get_ports_status); + OSM_LOG_ENTER(&(IbisObj.log)); *num_ports = GUID_ARRAY_SIZE; status = osm_vendor_get_all_port_attr( diff --git a/ibis/src/ibis_gsi_mad_ctrl.c b/ibis/src/ibis_gsi_mad_ctrl.c index a147642..356d33d 100644 --- a/ibis/src/ibis_gsi_mad_ctrl.c +++ b/ibis/src/ibis_gsi_mad_ctrl.c @@ -147,7 +147,7 @@ __ibis_gsi_mad_ctrl_disp_done_callback( { osm_madw_t* const p_madw = (osm_madw_t*)p_data; - OSM_LOG_ENTER( &(IbisObj.log), __ibis_gsi_mad_ctrl_disp_done_callback ); + OSM_LOG_ENTER(&(IbisObj.log)); CL_ASSERT( p_madw ); /* @@ -180,7 +180,7 @@ __ibis_gsi_mad_ctrl_process( cl_disp_reg_handle_t h_disp = CL_DISP_INVALID_HANDLE; uint8_t mgmt_class; - OSM_LOG_ENTER( &(IbisObj.log), __ibis_gsi_mad_ctrl_process ); + OSM_LOG_ENTER(&(IbisObj.log)); p_mad = osm_madw_get_mad_ptr( p_madw ); @@ -286,7 +286,7 @@ __ibis_gsi_mad_ctrl_rcv_callback( ibis_gsi_mad_ctrl_t* p_ctrl = (ibis_gsi_mad_ctrl_t*)bind_context; ib_mad_t* p_mad; - OSM_LOG_ENTER( p_ctrl->p_log, __ibis_gsi_mad_ctrl_rcv_callback ); + OSM_LOG_ENTER(p_ctrl->p_log); CL_ASSERT( p_madw ); @@ -330,7 +330,7 @@ __ibis_gsi_mad_ctrl_send_err_callback( ib_mad_t* p_mad; ibis_gsi_mad_ctrl_t* p_ctrl = (ibis_gsi_mad_ctrl_t*)bind_context; - OSM_LOG_ENTER( p_ctrl->p_log, __ibis_gsi_mad_ctrl_send_err_callback ); + OSM_LOG_ENTER(p_ctrl->p_log); // TODO . General call_back for errors. @@ -640,7 +640,7 @@ ibis_gsi_mad_ctrl_bind( osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( p_ctrl->p_log, ibis_gsi_mad_ctrl_bind ); + OSM_LOG_ENTER(p_ctrl->p_log); if( *p_h_bind != OSM_BIND_INVALID_HANDLE ) { @@ -707,7 +707,7 @@ ibis_gsi_mad_ctrl_set_class_attr_cb( ibis_gsi_cb_msg_pair_t *p_cb_msg_pair; cl_disp_reg_handle_t disp_reg_hdl; - OSM_LOG_ENTER( p_ctrl->p_log, ibis_gsi_mad_ctrl_set_class_attr_cb ); + OSM_LOG_ENTER(p_ctrl->p_log); mid = (cl_disp_msgid_t)cl_atomic_inc( &p_ctrl->msg_id); @@ -840,7 +840,7 @@ ibis_gsi_sync_mad_batch_callback( ibis_gsi_mad_ctrl_t* const p_ctrl = p_batch_ctx->p_ctrl; uint8_t *p_result; ib_mad_t *p_mad; - OSM_LOG_ENTER( p_ctrl->p_log, ibis_gsi_sync_mad_batch_callback); + OSM_LOG_ENTER(p_ctrl->p_log); /* obtain the lock */ cl_spinlock_acquire(&p_batch_ctx->lock); @@ -937,7 +937,7 @@ ibis_gsi_send_sync_mad_batch( cl_status_t wait_status; ib_api_status_t status; - OSM_LOG_ENTER( p_ctrl->p_log, ibis_gsi_send_sync_mad_batch); + OSM_LOG_ENTER(p_ctrl->p_log); /* initialize the batch context */ p_batch_ctx = __gsi_new_mad_batch_context(); diff --git a/ibis/src/ibpm.c b/ibis/src/ibpm.c index 0680deb..f7dd094 100644 --- a/ibis/src/ibpm.c +++ b/ibis/src/ibpm.c @@ -63,7 +63,7 @@ ibpm_construct() { ibpm_t* p_ibpm; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_construct ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibpm = malloc(sizeof(ibpm_t)); if (p_ibpm == NULL) @@ -83,7 +83,7 @@ void ibpm_destroy( IN ibpm_t* const p_ibpm ) { - OSM_LOG_ENTER( &(IbisObj.log), ibpm_destroy ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibpm->state = IBPM_STATE_INIT; OSM_LOG_EXIT( &(IbisObj.log) ); @@ -97,7 +97,7 @@ ibpm_init( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_init ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibpm->state = IBPM_STATE_INIT; OSM_LOG_EXIT( &(IbisObj.log) ); @@ -113,7 +113,7 @@ ibpm_bind( { ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_bind); + OSM_LOG_ENTER(&(IbisObj.log)); status = ibis_gsi_mad_ctrl_bind( &(IbisObj.mad_ctrl), @@ -163,7 +163,7 @@ __ibpm_prep_port_counter_mad( osm_mad_addr_t mad_addr; osm_madw_t *p_madw; - OSM_LOG_ENTER( &(IbisObj.log), __ibpm_prep_port_counter_mad ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "__ibpm_prep_port_counter_mad: " @@ -212,7 +212,7 @@ ibpm_get_counters( osm_madw_t *p_madw_arr[1]; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_get_counters ); + OSM_LOG_ENTER(&(IbisObj.log)); /* prepare the mad */ __ibpm_prep_port_counter_mad( @@ -252,7 +252,7 @@ ibpm_get_multi_counters( unsigned int i; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_get_multi_counters ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBPM_MULTI_MAX) { @@ -303,7 +303,7 @@ ibpm_get_multi_counters_extended( unsigned int i; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibpm_get_multi_counters ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBPM_MULTI_MAX) { @@ -352,7 +352,7 @@ ibpm_clr_all_counters( osm_madw_t *p_madw_arr[1]; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log),ibpm_clr_all_counters ); + OSM_LOG_ENTER(&(IbisObj.log)); /* prepare the mad */ __ibpm_prep_port_counter_mad( @@ -398,7 +398,7 @@ ibpm_clr_all_multi_counters( ib_api_status_t status; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log),ibpm_clr_all_multi_counters ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBPM_MULTI_MAX) { diff --git a/ibis/src/ibsac.c b/ibis/src/ibsac.c index b8845a6..bdbfd8a 100644 --- a/ibis/src/ibsac.c +++ b/ibis/src/ibsac.c @@ -255,7 +255,7 @@ ibsac_query_res_cb( IN osmv_query_res_t * p_rec ) ( ibsac_req_context_t * ) p_rec->query_context; ibis_t *const p_ibis = p_ctxt->p_ibis; - OSM_LOG_ENTER( &p_ibis->log, ibsac_query_res_cb ); + OSM_LOG_ENTER(&p_ibis->log); p_ctxt->result = *p_rec; @@ -290,7 +290,7 @@ ibsac_query( IN ibis_t * const p_ibis, ibsac_req_context_t context; ibsac_req_context_t *p_context = &context; - OSM_LOG_ENTER( &p_ibis->log, ibsac_query ); + OSM_LOG_ENTER(&p_ibis->log); if( osm_log_is_active( &p_ibis->log, OSM_LOG_DEBUG ) ) { diff --git a/ibis/src/ibsm.c b/ibis/src/ibsm.c index eacd210..26fbbec 100644 --- a/ibis/src/ibsm.c +++ b/ibis/src/ibsm.c @@ -63,7 +63,7 @@ ibsm_construct() { ibsm_t* p_ibsm; - OSM_LOG_ENTER( &(IbisObj.log), ibsm_construct ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibsm = malloc(sizeof(ibsm_t)); if (p_ibsm == NULL) @@ -83,7 +83,7 @@ void ibsm_destroy( IN ibsm_t* const p_ibsm ) { - OSM_LOG_ENTER( &(IbisObj.log), ibsm_destroy ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibsm->state = IBSM_STATE_INIT; OSM_LOG_EXIT( &(IbisObj.log) ); @@ -97,7 +97,7 @@ ibsm_init( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibsm_init ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibsm->state = IBSM_STATE_INIT; OSM_LOG_EXIT( &(IbisObj.log) ); @@ -113,7 +113,7 @@ ibsm_bind( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibsm_bind); + OSM_LOG_ENTER(&(IbisObj.log)); /* no need to bind the Directed Route class as it will automatically be handled by the osm_vendor_bind if asked for LID route */ @@ -238,7 +238,7 @@ ibsm_send_mad_by_lid ( ib_smp_t response_mad = {0}; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibsm_send_mad_by_lid ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibsm_send_mad_by_lid: " @@ -312,7 +312,7 @@ ibsm_send_mad_by_dr( ib_smp_t *p_smp; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibsm_send_mad_by_dr ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibsm_send_mad_by_dr: " diff --git a/ibis/src/ibvs.c b/ibis/src/ibvs.c index 2857278..b81ef78 100644 --- a/ibis/src/ibvs.c +++ b/ibis/src/ibvs.c @@ -63,7 +63,7 @@ ibvs_construct() { ibvs_t* p_ibvs; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_construct ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibvs = malloc(sizeof(ibvs_t)); if (p_ibvs == NULL) @@ -85,7 +85,7 @@ void ibvs_destroy( IN ibvs_t* const p_ibvs ) { - OSM_LOG_ENTER( &(IbisObj.log), ibvs_destroy ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibvs->state = IBVS_STATE_INIT; @@ -100,7 +100,7 @@ ibvs_init( { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_init ); + OSM_LOG_ENTER(&(IbisObj.log)); p_ibvs->state = IBVS_STATE_READY; @@ -117,7 +117,7 @@ ibvs_bind( { ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_bind); + OSM_LOG_ENTER(&(IbisObj.log)); status = ibis_gsi_mad_ctrl_bind( &(IbisObj.mad_ctrl), @@ -291,7 +291,7 @@ __ibvs_prep_ext_port_access_mad( osm_madw_t *p_madw; uint8_t i,dword_size; - OSM_LOG_ENTER( &(IbisObj.log),__ibvs_prep_ext_port_access_mad); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "__ibvs_prep_ext_port_access_mad: " @@ -362,7 +362,7 @@ ibvs_cpu_read( osm_madw_t *p_madw_arr[1]; ib_api_status_t status; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_cpu_read); + OSM_LOG_ENTER(&(IbisObj.log)); __ibvs_prep_ext_port_access_mad( p_ibvs, @@ -405,7 +405,7 @@ ibvs_cpu_write( osm_madw_t *p_madw_arr[1]; ib_vs_t res_mad; - OSM_LOG_ENTER( &(IbisObj.log),ibvs_cpu_write ); + OSM_LOG_ENTER(&(IbisObj.log)); __ibvs_prep_ext_port_access_mad( p_ibvs, @@ -452,7 +452,7 @@ ibvs_i2c_read( ib_api_status_t status; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_i2c_read ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibvs_i2c_read: " @@ -505,7 +505,7 @@ ibvs_multi_i2c_read( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint16_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_i2c_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -567,7 +567,7 @@ ibvs_i2c_write( osm_madw_t *p_madw_arr[1]; ib_vs_t res_mad; - OSM_LOG_ENTER( &(IbisObj.log),ibvs_i2c_write ); + OSM_LOG_ENTER(&(IbisObj.log)); __ibvs_prep_ext_port_access_mad( p_ibvs, @@ -613,7 +613,7 @@ ibvs_multi_i2c_write( uint8_t i; osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; - OSM_LOG_ENTER( &(IbisObj.log),ibvs_multi_i2c_write ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -684,7 +684,7 @@ ibvs_gpio_read( ib_api_status_t status; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_gpio_read ); + OSM_LOG_ENTER(&(IbisObj.log)); __ibvs_prep_ext_port_access_mad( p_ibvs, @@ -728,7 +728,7 @@ ibvs_gpio_write( osm_madw_t *p_madw_arr[1]; ib_vs_t res_mad; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_gpio_write ); + OSM_LOG_ENTER(&(IbisObj.log)); __ibvs_prep_ext_port_access_mad( p_ibvs, @@ -772,7 +772,7 @@ __ibvs_prep_sw_reset_mad( osm_mad_addr_t mad_addr; osm_madw_t *p_madw; - OSM_LOG_ENTER( &(IbisObj.log),__ibvs_prep_sw_reset_mad); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "__ibvs_prep_sw_reset_mad: " @@ -806,7 +806,7 @@ ibvs_multi_sw_reset( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; ib_vs_t res_mads[IBVS_MULTI_MAX]; - OSM_LOG_ENTER( &(IbisObj.log),ibvs_multi_sw_reset ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -869,7 +869,7 @@ __ibvs_prep_flash_access_mad( osm_madw_t *p_madw; uint8_t i,dword_size; - OSM_LOG_ENTER( &(IbisObj.log),__ibvs_prep_flash_access_mad); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "__ibvs_prep_flash_access_mad: " @@ -920,7 +920,7 @@ ibvs_multi_flash_open( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_flash_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -972,7 +972,7 @@ ibvs_multi_flash_close( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_flash_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -1025,7 +1025,7 @@ ibvs_multi_flash_set_bank( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_flash_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -1077,7 +1077,7 @@ ibvs_multi_flash_erase( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_flash_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -1129,7 +1129,7 @@ ibvs_multi_flash_read( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; uint8_t i; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_multi_flash_read ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -1182,7 +1182,7 @@ ibvs_multi_flash_write( osm_madw_t *p_madw_arr[IBVS_MULTI_MAX]; ib_vs_t res_mads[IBVS_MULTI_MAX]; - OSM_LOG_ENTER( &(IbisObj.log),ibvs_multi_flash_write ); + OSM_LOG_ENTER(&(IbisObj.log)); if (num > IBVS_MULTI_MAX) { @@ -1246,7 +1246,7 @@ ibvs_mirror_read( ib_api_status_t status; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_mirror_read ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibvs_mirror_read: " @@ -1296,7 +1296,7 @@ ibvs_mirror_write( osm_madw_t *p_madw_arr[1]; ib_vs_t res_mad; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_mirror_write ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibvs_mirror_write: " @@ -1348,7 +1348,7 @@ ibvs_plft_map_get( osm_madw_t *p_madw_arr[1]; uint32_t attr_mod = 0; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_plft_map_get ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibvs_plft_map_get: " @@ -1405,7 +1405,7 @@ ibvs_general_info_get( ib_api_status_t status; osm_madw_t *p_madw_arr[1]; - OSM_LOG_ENTER( &(IbisObj.log), ibvs_general_info_get ); + OSM_LOG_ENTER(&(IbisObj.log)); osm_log(&(IbisObj.log), OSM_LOG_DEBUG, "ibvs_general_info_get: " -- 1.5.4.1.122.gaa8d From dwplazasam at plazasa.com Sun Feb 17 13:19:00 2008 From: dwplazasam at plazasa.com (Lilia Mcdaniel) Date: , 17 Feb 2008 22:19:00 +0100 Subject: [ofa-general] CanadianPharmacy makes quality medications affordable. Message-ID: <01c871b3$189baf80$429dbf4d@dwplazasam> The information given below is for those who�re looking for some reliable online drugstore. Canadian Pharmacy is a known and trustful Canadian online drugstore which provides customers with a variety of high-quality low cost generic medications. You can rely on Canadian Pharmacy experience and excellent reputation. Canadian Pharmacy has friendly and helpful customer care staff and you can always contact them with any question. Large selection of meds and easy secure ordering process which ensures safety and confidentiality of your information. http://geocities.com/gabriellewong530/ We hope this information will help you to make the right choice. Tom Acosta From info at donauschwaben83.de Sun Feb 17 13:27:38 2008 From: info at donauschwaben83.de (Jasper Sylvester) Date: , 17 Feb 2008 23:27:38 +0200 Subject: [ofa-general] Wir wissen was Frauen wollern Message-ID: <01c871bc$aec51100$ce386755@info> Haben Sie endlich wieder Spass am Leben! Preise die keine Konkurrenz kennen - Diskrete Verpackung und Zahlung - Kein peinlicher Arz tbesuch erforderlich - Kein langes Warten - Auslieferung innerhalb von 2-3 Tagen - Bequem und diskret online bestellen. - Visa verifizierter Onlineshop - Kostenlose, arztliche Telefon-Beratung - keine versteckte Kosten Originalmedikamente Ciiaaaaaalis... 10 Pack. 21,00 Euro Viiaaaagra... 10 Pack. 11,00 Euro Klicken Sie HIER und Sie erhalten vier Dosen umsonst http://coldchick.com (bitte warten Sie einen Moment bis die Seite vollstandig geladen ist) -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Sun Feb 17 15:16:35 2008 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 17 Feb 2008 15:16:35 -0800 Subject: [ofa-general] Re: RDMA/nes: Fix vlan support References: <200802160300.m1G30aP8030038@velma.neteffect.com> Message-ID: Thanks, applied. Overall this is very clean (one of the best formatted patches I've gotten in a while ;), but scripts/checkpatch.pl does complain: WARNING: __func__ should be used instead of gcc specific __FUNCTION__ #83: FILE: drivers/infiniband/hw/nes/nes_nic.c:1504: + nes_debug(NES_DBG_NETDEV, "%s: %s\n", __FUNCTION__, netdev->name); I edited this by hand when I applied, but to partially answer your question from your other email, one thing to do when submitting patches is to run them through checkpatch.pl and consider the output. You don't have to fix everything it flags, but at least think about whether there's a good reason not to fix it (and there often is a good reason to ignore the false positives it flags). - R. From dwplayalindam at playalinda.com Sun Feb 17 15:16:44 2008 From: dwplayalindam at playalinda.com (Bryan Hickman) Date: Mon, 18 Feb 2008 07:16:44 +0800 Subject: [ofa-general] Purchase software at surprisingly low prices! Message-ID: <01c871fe$3717d600$dae3c9dd@dwplayalindam> Brilliant opportunity to get software right at the same time you need it without waiting for a CD to be delivered. Just pay money and download your soft. Low prices, discounts and special offers! Most popular localized software in German, French, Italian, Spanish, English and many other languages of the world! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/norbertpeterson738/ Buy, download and install right now! From nickpiggin at yahoo.com.au Sun Feb 17 17:51:27 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Mon, 18 Feb 2008 12:51:27 +1100 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: <20080215193736.9d6e7da3.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080215064932.918191502@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> Message-ID: <200802181251.28813.nickpiggin@yahoo.com.au> On Saturday 16 February 2008 14:37, Andrew Morton wrote: > On Thu, 14 Feb 2008 22:49:02 -0800 Christoph Lameter wrote: > > Two callbacks to remove individual pages as done in rmap code > > > > invalidate_page() > > > > Called from the inner loop of rmap walks to invalidate pages. > > > > age_page() > > > > Called for the determination of the page referenced status. > > > > If we do not care about page referenced status then an age_page callback > > may be be omitted. PageLock and pte lock are held when either of the > > functions is called. > > The age_page mystery shallows. BTW. can this callback be called mmu_notifier_clear_flush_young? To match the core VM. From gstreiff at NetEffect.com Sun Feb 17 19:09:57 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Sun, 17 Feb 2008 21:09:57 -0600 Subject: [ofa-general] RE: RDMA/nes: Fix vlan support In-Reply-To: Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFB3@venom2> > Thanks, applied. Overall this is very clean (one of the best > formatted patches I've gotten in a while ;), but scripts/checkpatch.pl > does complain: > > WARNING: __func__ should be used instead of gcc specific > __FUNCTION__ > #83: FILE: drivers/infiniband/hw/nes/nes_nic.c:1504: > + nes_debug(NES_DBG_NETDEV, "%s: %s\n", > __FUNCTION__, netdev->name); > > I edited this by hand when I applied, but to partially answer your > question from your other email, one thing to do when submitting > patches is to run them through checkpatch.pl and consider the output. > You don't have to fix everything it flags, but at least think about > whether there's a good reason not to fix it (and there often is a good > reason to ignore the false positives it flags). > > - R. > Will do. Thanks for the feedback. Glenn From dwsbcitym at sbcity.org Sun Feb 17 20:27:05 2008 From: dwsbcitym at sbcity.org (Errol Kline) Date: Mon, 18 Feb 2008 12:27:05 +0800 Subject: [ofa-general] Want to be a hero in bed? Message-ID: <01c87229$92a6a700$eea2133a@dwsbcitym> Are U Tired with erectile dysfunction? Enhance your sexual life now! Want to be ready for sex in few minutes? Reproductive and ED problems solution http://geocities.com/alishagentry357/ We are verified by VISA. Confidential purchase. From a.frank8 at laposte.net Sun Feb 17 22:00:47 2008 From: a.frank8 at laposte.net (a.frank8) Date: Mon, 18 Feb 2008 07:00:47 +0100 (CET) Subject: [ofa-general] Good news from Fed ex Express Message-ID: <16502829.3506251203314447051.JavaMail.www@wwinf8402> Plot 8,Ahmed Way, Victoria Island,Lagos Nigeria. Customers Service Hours--Monday To Saturday: Office Hours Monday To Saturday: Attention, Attention, Attention. FedEx provides access to a growing global market place through a network of supply chain, transportation, and business and related information services. The FedEx courier Service Company is hereby passing an essential message to all our valuable customers to be very careful while presenting their Receivers/Residential Address to avoid wrong delivery. Authoritatively, this is the FedEx courier service company mailing you in respect of your parcel that was brought to this company to be delivered to you by one Mrs. Sandra Williams. Before the delivery protocols commenced, there was a misunderstanding between you and the National Insurance Corporation of Nigeria (NICON) over the Insurance Certificate which caused the delay of you receiving your parcel for the past one year. Meanwhile we are happy to inform you that the FedEx has finalized everything with the (NICON) and subsequently approved yours among the 24 valuable parcels’s to be couriered after the released of the parcels from the (NICON) We are happy to inform you once again that your parcel that contains a cheque worth $1.3 Million is among the 24 parcel’s listed which is now in our office and also with your name as the receiver despise that we lost your private residential address which is an indication that you can now re-send your residential address back to the FedEx company where your parcel can be delivered to you without hesitation. Reply via below: fedexexpressservice at y7mail.com Meanwhile remember that the sender of this parcel Mrs. Sandra Williams still owes this company the sum of $85 before the incident occued. Know you that we have spent some money in the process by recovering back your parcel. We once again appreciate your patronage. Without hesitation you are to pay for just the balance left by your sender via western union money transfer so that your parcel can be delivered to your residential address before it attracts demurrage. Your parcel is not just an ordinary parcel but with a huge amount and I think you understand what I mean by accumulating a demurrage? Which you will not allow to happen to the recovery parcel that almost gone if not for the love that the good God has for you. I urge you to make hay while the sun shines. We assure you that your parcel will arrive your country within 3 days as soon as this company receive your full residential address and the balance left by you and the tracking number of your parcel will be sent to you via e-mail immediately so that you can track it yourself to see whether we are competent in the discharge of our duties. FedEx courier Service Company do hereby inform all their customers to be at alert especially with allsorts of scam mails that might be coming to you Globally. Be careful with their e-mails so that your parcel will not be in danger with their evil planes. YOU ARE TO CONFIRM YOUR INTEREST FOR PAYMENT INFORMATION IMMEDIATELY. FedEx is one of the world's great success stories, the start-up that revolutionized the delivery of packages and information. In the past 30 years, we've grown up and grown into a diverse family of companies’ -a FedEx that's bigger, stronger, better than ever. ATTENTION: FEDEX COURIER MANAGING DIRECTOR Thanks for your Co-operation. Yours Affectionately. Mr. ANDREW FRANK For the Ikeja Branch Manager FedEx Express Services Lagos -Nigeria . Créez votre adresse électronique prénom.nom at laposte.net 1 Go d'espace de stockage, anti-spam et anti-virus intégrés. -------------- next part -------------- An HTML attachment was scrubbed... URL: From tziporet at dev.mellanox.co.il Sun Feb 17 22:53:22 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 18 Feb 2008 08:53:22 +0200 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: References: Message-ID: <47B92B62.7080902@mellanox.co.il> Tang, Changqing wrote: > HI: > Here is the ibv_srq structure: > > struct ibv_srq { > struct ibv_context *context; > void *srq_context; > struct ibv_pd *pd; > uint32_t handle; > > pthread_mutex_t mutex; > pthread_cond_t cond; > uint32_t events_completed; > > uint32_t xrc_srq_num; > struct ibv_xrc_domain *xrc_domain; > struct ibv_cq *xrc_cq; > }; > > On redhat 5 system, since it has a new pthread version, 'pthread_cond_t' is larger > than on redhat 4 system. > > So if I compile the code on redhat 5 system, it won't run on redhat 4 system, and > vice versa. > > > I guess we need to add this to the RN, or anyone has a solution for this? Tziporet From tziporet at dev.mellanox.co.il Sun Feb 17 23:03:32 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 18 Feb 2008 09:03:32 +0200 Subject: [ofa-general] [ANNOUCE] dapl-1.2.5 and dapl-2.0.7 released In-Reply-To: References: Message-ID: <47B92DC4.8010106@mellanox.co.il> Davis, Arlin R wrote: > There are new releases for dapl 1.2 and 2.0 available on the > OFA download page and in my git tree. > > md5sum: db0e27ed9389de8f748660f3b582bc29 dapl-1.2.5.tar.gz > md5sum: c0947ab91a518913776c1fe5aadb79cd dapl-2.0.7.tar.gz > > Vlad, please pull both releases into OFED 1.3 RC5 and > include the following packages: > > dapl-1.2.5-1 > dapl-devel-1.2.5-1 > dapl-2.0.7-1 > dapl-utils-2.0.7-1 > dapl-devel-2.0.7-1 > dapl-debuginfo-2.0.7-1 > > Tags: v1 - dapl-1.2.5-1, ofed_1_3-v1 > v2 - dapl-2.0.7-1, ofed_1_3-v2 > > See http://www.openfabrics.org/downloads/dapl/README.html for details. > > Arlin, Can you send update for uDAPL release notes in OFED 1.3 The uDAPL_release_notes.txt is in my git tree: git://git.openfabrics.org/~tziporet/docs.git branch ofed_1_3. Thanks, Tziporet From eli at dev.mellanox.co.il Sun Feb 17 23:31:55 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Mon, 18 Feb 2008 09:31:55 +0200 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B4C223.3010200@us.ibm.com> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B49B8E.7070607@us.ibm.com> <47B4A3A4.2060402@dev.mellanox.co.il> <47B4C223.3010200@us.ibm.com> Message-ID: <4e6a6b3c0802172331w766fd2a4kaa23d2b95f7c9f92@mail.gmail.com> On 2/15/08, David Wilder wrote: > > I saw that :) > You need to make the same change in three more places: > Ok, now CM should have these allocations too. From jackm at dev.mellanox.co.il Mon Feb 18 00:10:53 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Mon, 18 Feb 2008 10:10:53 +0200 Subject: [ofa-general] Re: [PATCH]: mlx4: move table_find from fmr_alloc to fmr_enable In-Reply-To: References: <200802141341.29577.jackm@dev.mellanox.co.il> Message-ID: <200802181010.54231.jackm@dev.mellanox.co.il> On Thursday 14 February 2008 20:48, Roland Dreier wrote: > by the way, it seems we never release ICM table entries when freeing > MPTs. Does the patch below make sense to you? > > diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c > index 679dfdb..3ffce7a 100644 > --- a/drivers/net/mlx4/mr.c > +++ b/drivers/net/mlx4/mr.c > @@ -287,6 +287,8 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) > (dev->caps.num_mpts - 1)); > if (err) > mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err); > + > + mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key)); > } > > mlx4_mtt_cleanup(dev, &mr->mtt); > I'm nervous about freeing ICM table entries when HW2SW fails (hw may still access the memory. How about the patch below ? (P.S., please note the priv-> in the patch). - Jack --- Index: ofed_kernel/drivers/net/mlx4/mr.c =================================================================== --- ofed_kernel.orig/drivers/net/mlx4/mr.c 2008-02-18 09:49:28.000000000 +0200 +++ ofed_kernel/drivers/net/mlx4/mr.c 2008-02-18 10:10:21.956745000 +0200 @@ -287,6 +287,9 @@ void mlx4_mr_free(struct mlx4_dev *dev, (dev->caps.num_mpts - 1)); if (err) mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err); + else + mlx4_table_put(dev, &priv->mr_table.dmpt_table, + key_to_hw_index(mr->key); } mlx4_mtt_cleanup(dev, &mr->mtt); From 9biemx.svt0 at korea.com Mon Feb 18 02:52:13 2008 From: 9biemx.svt0 at korea.com (Jeremiah Shirley) Date: Mon, 18 Feb 2008 18:52:13 +0800 Subject: [ofa-general] You told me that you will reply back Message-ID: <01c8725f$5f83ec80$d56582de@9biemx.svt0> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Linnea at TheHealCare.info only, because I am using my friend's email to write this. To see my pics From vlad at lists.openfabrics.org Mon Feb 18 03:08:00 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 18 Feb 2008 03:08:00 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080218-0200 daily build status Message-ID: <20080218110801.0C87EE601AA@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From andrea at qumranet.com Mon Feb 18 04:17:15 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Mon, 18 Feb 2008 13:17:15 +0100 Subject: [ofa-general] Re: [PATCH] KVM swapping with MMU Notifiers V7 In-Reply-To: <20080216030817.965ff1f7.akpm@linux-foundation.org> References: <20080215064859.384203497@sgi.com> <20080216104827.GI11732@v2.random> <20080216030817.965ff1f7.akpm@linux-foundation.org> Message-ID: <20080218121715.GR11732@v2.random> On Sat, Feb 16, 2008 at 03:08:17AM -0800, Andrew Morton wrote: > On Sat, 16 Feb 2008 11:48:27 +0100 Andrea Arcangeli wrote: > > > +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, > > + struct mm_struct *mm, > > + unsigned long start, unsigned long end, > > + int lock) > > +{ > > + for (; start < end; start += PAGE_SIZE) > > + kvm_mmu_notifier_invalidate_page(mn, mm, start); > > +} > > + > > +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { > > + .invalidate_page = kvm_mmu_notifier_invalidate_page, > > + .age_page = kvm_mmu_notifier_age_page, > > + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, > > +}; > > So this doesn't implement ->invalidate_range_start(). Correct. range_start is needed by subsystems that don't pin the pages (so they've to drop the secondary mmu mappings on the physical page before the page is released by the linux VM). > By what means does it prevent new mappings from being established in the > range after core mm has tried to call ->invalidate_rande_start()? > mmap_sem, I assume? No, populate range only takes the mmap_sem in read mode and the kvm page fault also is of course taking it only in read mode. What makes it safe, is that invalidate_range_end is called _after_ the linux pte is clear. The kvm page fault, if it triggers, it will call into get_user_pages again to re-establish the linux pte _before_ establishing the spte. It's the same reason why it's safe to flush the tlb after clearing the linux pte. sptes are like a secondary tlb. > > + /* set userspace_addr atomically for kvm_hva_to_rmapp */ > > + spin_lock(&kvm->mmu_lock); > > + memslot->userspace_addr = userspace_addr; > > + spin_unlock(&kvm->mmu_lock); > > are you sure? kvm_unmap_hva() and kvm_age_hva() read ->userspace_addr a > single time and it doesn't immediately look like there's a need to take the > lock here? gcc will always write it with a movq but this is to be C-specs-compliant and because this is by far not a performance critical path I thought it was simpler than some other atomic move in a single insn. From andrea at qumranet.com Mon Feb 18 04:35:51 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Mon, 18 Feb 2008 13:35:51 +0100 Subject: [ofa-general] Re: [PATCH] KVM swapping with MMU Notifiers V7 In-Reply-To: <20080216115138.GA11391@sgi.com> References: <20080215064859.384203497@sgi.com> <20080216104827.GI11732@v2.random> <20080216115138.GA11391@sgi.com> Message-ID: <20080218123551.GS11732@v2.random> On Sat, Feb 16, 2008 at 05:51:38AM -0600, Robin Holt wrote: > I am doing this in xpmem with a stack-based structure in the function > calling get_user_pages. That structure describes the start and > end address of the range we are doing the get_user_pages on. If an > invalidate_range_begin comes in while we are off to the kernel doing > the get_user_pages, the invalidate_range_begin marks that structure > indicating an invalidate came in. When the get_user_pages gets the > structures relocked, it checks that flag (really a generation counter) > and if it is set, retries the get_user_pages. After 3 retries, it > returns -EAGAIN and the fault is started over from the remote side. A seqlock sounds a good optimization for the non-swapping fast path, a per-VM-guest seqlock number can allow us to know when we need to worry to call get_user_pages a second time, but won't be really a retry like in 99% of seqlock usages for the reader side, but just a second get_user_pages to trigger a minor fault. Then if the page is different in the second run, we'll really retry (so not in function of the seqlock but in function of the get_user_pages page array), and there's no risk of livelocks because get_user_pages returning a different page won't be the common case. The seqlock should be increased first before the invalidate and a second time once the invalidate is over. From dwsnnm at snn.no Mon Feb 18 04:41:22 2008 From: dwsnnm at snn.no (Shanna Knapp) Date: Mon, 18 Feb 2008 20:41:22 +0800 Subject: [ofa-general] Medications that you need. Message-ID: <01c8726e$9f060500$b9c92a3a@dwsnnm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/jonasshields321/ We provide confidential and secure purchase! From timhadeen33 at gmail.com Mon Feb 18 05:17:02 2008 From: timhadeen33 at gmail.com (Tim Hadeen) Date: Mon, 18 Feb 2008 07:17:02 -0600 Subject: [ofa-general] RDMA Channel State Message-ID: Hello, We have server (Initiator) connected to Target via IB switch. We use add_target to add the targets on the initiator. SM is running on switch. After issuing add_target, we see RDMA channel ACTIVE, but after that even if we shut down the initiator, we still see RDMA channel Active. Is it due to Switch? What should we expect? Is there any way to remove the target on initiator other than host shutdown? Thanks Tim -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Feb 18 05:24:08 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 05:24:08 -0800 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. References: Message-ID: > Here is the ibv_srq structure: > > struct ibv_srq { ... > pthread_cond_t cond; > On redhat 5 system, since it has a new pthread version, 'pthread_cond_t' is larger > than on redhat 4 system. Yikes... I don't see any way to handle this without breaking the libibverbs ABI for all existing binaries, since we have to move pthread_cond_t out of all exposed structures.... Any ideas?? - R. From rdreier at cisco.com Mon Feb 18 06:04:09 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 06:04:09 -0800 Subject: [ofa-general] Move man pages from man1 into -utils package References: <20082151257.VlKtl1Q6lnjta4gM@cisco.com> Message-ID: Move the man pages for the executables that are shipped with the -utils package from the -devel package into the -utils package itself. Signed-off-by: Roland Dreier --- One more patch for librdmacm packaging... librdmacm.spec.in | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index 41673ab..cd49ea9 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -59,13 +59,13 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/lib*.so %{_libdir}/*.a %{_includedir}/* -%{_mandir}/man1/* %{_mandir}/man3/* %{_mandir}/man7/* %files utils %defattr(-,root,root,-) %{_bindir}/* +%{_mandir}/man1/* %changelog -- 1.5.3.8 From changquing.tang at hp.com Mon Feb 18 07:15:01 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 18 Feb 2008 15:15:01 +0000 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: References: Message-ID: Without using XRC fields, everything seems to work OK. --CQ > -----Original Message----- > From: Roland Dreier [mailto:rdreier at cisco.com] > Sent: Monday, February 18, 2008 7:24 AM > To: Tang, Changqing > Cc: general at lists.openfabrics.org > Subject: Re: [ofa-general] Another XRC binary compatable > issue for different pthread version. > > > Here is the ibv_srq structure: > > > > struct ibv_srq { > ... > > pthread_cond_t cond; > > > On redhat 5 system, since it has a new pthread version, > 'pthread_cond_t' is larger > than on redhat 4 system. > > Yikes... I don't see any way to handle this without breaking > the libibverbs ABI for all existing binaries, since we have > to move pthread_cond_t out of all exposed structures.... > > Any ideas?? > > - R. > From hartlch14 at gmail.com Mon Feb 18 07:19:07 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Mon, 18 Feb 2008 10:19:07 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> References: <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> Message-ID: Thanks for the tips on the commands - that was my next question. > Is the rest of the subnet pure DDR and a mix of SDR/DDR ? The rest of the network is a mixture of SDR/DDR. Here is what we have: ]# ibnodes -V Ca : 0x0005ad00001dc9e4 ports 2 " HCA-1" Ca : 0x0005ad00001dc970 ports 2 " HCA-1" Ca : 0x0020c28001067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28002067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28003067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28004067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0030487a06600000 ports 1 "Linux62 HCA-1" Ca : 0x0030487a226a0000 ports 1 "linux64 HCA-1" Ca : 0x0030487a071e0000 ports 1 "Linux61 HCA-1" Ca : 0x0002c9020023bb14 ports 1 "Linux60 HCA-1" Ca : 0x0030487a2a480000 ports 1 "Linux63 HCA-1" Switch : 0x000b8cffff00441c ports 24 "MT47396 Infiniscale-III Mellanox Technologies" base port 0 lid 3 lmc 0 Switch : 0x0005ad0000094076 ports 24 "Topspin Switch" enhanced port 0 lid 15 lmc 0 The first two nodes in the list are the SDR blades off of the Topspin switch. The Linux boxes are all Mellanox DDR HCAs and the IB900 interfaces are SDR. The Linux and IB900 ports are all connected to the Mallanox switch. The Topspin switch is connected to the Mellanox switch. > Does it work "right" with OpenSM off/using the vendor SM ? No, it does not work with only the SM in the Topspin switch running. That was the initial configuration we had. Then I started OpenSM on one of the Linux boxes and that did not work any better. I also tried disconnection the TMS IB900 so there was only DDR devices on the subnet and that did not make a difference either. It seems like the SM in the Topspin switch only understands SDR maybe. There does not appear to be an option for turning off the SM in the Topspin switch, however you can change its priority relative to other SM on the subnet. That is what I did and now the OpenSM is the master. I didn't see anything in the opensm.conf file that indicates that OpenSM has a concept of priority. Is there some way to force it to always be the master? Is there some advantage or disadvantage to running multiple copies of OpenSM on the subnet? If you have multiple switches connected as we do, should some of the default settings on opensm.conf be changed? In particular, should REASSIGN_LIDS be set to "yes"? Chuck -------------- next part -------------- An HTML attachment was scrubbed... URL: From glebn at voltaire.com Mon Feb 18 07:20:32 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Mon, 18 Feb 2008 17:20:32 +0200 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: References: Message-ID: <20080218152031.GJ21651@minantech.com> On Mon, Feb 18, 2008 at 03:15:01PM +0000, Tang, Changqing wrote: > > Without using XRC fields, everything seems to work OK. > It's only seems so. Access to events_completed should be also problematic. > --CQ > > > > -----Original Message----- > > From: Roland Dreier [mailto:rdreier at cisco.com] > > Sent: Monday, February 18, 2008 7:24 AM > > To: Tang, Changqing > > Cc: general at lists.openfabrics.org > > Subject: Re: [ofa-general] Another XRC binary compatable > > issue for different pthread version. > > > > > Here is the ibv_srq structure: > > > > > > struct ibv_srq { > > ... > > > pthread_cond_t cond; > > > > > On redhat 5 system, since it has a new pthread version, > > 'pthread_cond_t' is larger > than on redhat 4 system. > > > > Yikes... I don't see any way to handle this without breaking > > the libibverbs ABI for all existing binaries, since we have > > to move pthread_cond_t out of all exposed structures.... > > > > Any ideas?? > > > > - R. > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -- Gleb. From changquing.tang at hp.com Mon Feb 18 07:29:09 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Mon, 18 Feb 2008 15:29:09 +0000 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: <20080218152031.GJ21651@minantech.com> References: <20080218152031.GJ21651@minantech.com> Message-ID: Any application code access events_completed field ? HP-MPI does not. If no user code access 'mutex' 'cond' and 'events_completed', I suggest to put the XRC fields in the middle of this structure. --CQ > -----Original Message----- > From: Gleb Natapov [mailto:glebn at voltaire.com] > Sent: Monday, February 18, 2008 9:21 AM > To: Tang, Changqing > Cc: Roland Dreier; general at lists.openfabrics.org > Subject: Re: [ofa-general] Another XRC binary compatable > issue for different pthread version. > > On Mon, Feb 18, 2008 at 03:15:01PM +0000, Tang, Changqing wrote: > > > > Without using XRC fields, everything seems to work OK. > > > It's only seems so. Access to events_completed should be also > problematic. > > > --CQ > > > > > > > -----Original Message----- > > > From: Roland Dreier [mailto:rdreier at cisco.com] > > > Sent: Monday, February 18, 2008 7:24 AM > > > To: Tang, Changqing > > > Cc: general at lists.openfabrics.org > > > Subject: Re: [ofa-general] Another XRC binary compatable > issue for > > > different pthread version. > > > > > > > Here is the ibv_srq structure: > > > > > > > > struct ibv_srq { > > > ... > > > > pthread_cond_t cond; > > > > > > > On redhat 5 system, since it has a new pthread version, > > > 'pthread_cond_t' is larger > than on redhat 4 system. > > > > > > Yikes... I don't see any way to handle this without breaking the > > > libibverbs ABI for all existing binaries, since we have to move > > > pthread_cond_t out of all exposed structures.... > > > > > > Any ideas?? > > > > > > - R. > > > > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > -- > Gleb. > From dwraymuckm at raymuck.de Mon Feb 18 07:47:33 2008 From: dwraymuckm at raymuck.de (Wilbur Hutchison) Date: Mon, 18 Feb 2008 16:47:33 +0100 Subject: [ofa-general] What is Generic Medication? Message-ID: <01c8724d$f5168080$46a84e58@dwraymuckm> What is Generic Medication? A generic drug is identical, or bioequivalent to a brand name drug in dosage form, safety, strength, route of administration, quality, performance characteristics and intended use. Although generic drugs are chemically identical to their branded counterparts, they are typically sold at substantial discounts from the branded price. Generic drugs save consumers an estimated $8 to $10 billion a year at retail pharmacies. http://geocities.com/juniorhansen944/ From diplomatology at che-usa.com Tue Feb 19 00:08:15 2008 From: diplomatology at che-usa.com (Holly Newton) Date: Tue, 19 Feb 2008 00:08:15 -0800 Subject: [ofa-general] Quark Xpress Passporte 7.3 for XP, Vis+a 79. Retail 716 ^save 2833^ Message-ID: <000a01c87246$939ee380$0100007f@dtfgg> autodesk architectural desktop 2006 - 119 kmt software officeready 4 pro cdmenupro 6.23 biz edition - 39 autodesk architectural studio 3.0 - 39 view ^buycheapmicrosoft .com^ !n Web Browser Take off ^ before you view !n Web Browser 2003 microsoft office professional with business contact manager for outlook - 69 intuit quickbooks premier edition 2007 - 79 kmt software officeready 4 pro roxio digitalmedia studio deluxe suite 7.0 - 49 From sweitzen at cisco.com Mon Feb 18 09:12:36 2008 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Mon, 18 Feb 2008 09:12:36 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com><1203107626.26729.210.camel@hrosenstock-ws.xsigo.com><1203109449.26729.219.camel@hrosenstock-ws.xsigo.com><20080216185043.GB18527@sashak.voltaire.com><1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> Message-ID: Chuck, the SM in the Topspin/Cisco switch can be turned off from CLI, Web UI, or Element Manager UI. There was a SM bug last year where DDR throughput was not as high as it should be sometimes, what does "show version" display from the Topspin/Cisco switch CLI? Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems ________________________________ From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Chuck Hartley Sent: Monday, February 18, 2008 7:19 AM To: Hal Rosenstock Cc: OpenFabrics General Subject: Re: [ofa-general] Performance of UDAPL RDMA vs IB verbs Thanks for the tips on the commands - that was my next question. > Is the rest of the subnet pure DDR and a mix of SDR/DDR ? The rest of the network is a mixture of SDR/DDR. Here is what we have: ]# ibnodes -V Ca : 0x0005ad00001dc9e4 ports 2 " HCA-1" Ca : 0x0005ad00001dc970 ports 2 " HCA-1" Ca : 0x0020c28001067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28002067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28003067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0020c28004067759 ports 1 "IB900 TMS Infiniband interface" Ca : 0x0030487a06600000 ports 1 "Linux62 HCA-1" Ca : 0x0030487a226a0000 ports 1 "linux64 HCA-1" Ca : 0x0030487a071e0000 ports 1 "Linux61 HCA-1" Ca : 0x0002c9020023bb14 ports 1 "Linux60 HCA-1" Ca : 0x0030487a2a480000 ports 1 "Linux63 HCA-1" Switch : 0x000b8cffff00441c ports 24 "MT47396 Infiniscale-III Mellanox Technologies" base port 0 lid 3 lmc 0 Switch : 0x0005ad0000094076 ports 24 "Topspin Switch" enhanced port 0 lid 15 lmc 0 The first two nodes in the list are the SDR blades off of the Topspin switch. The Linux boxes are all Mellanox DDR HCAs and the IB900 interfaces are SDR. The Linux and IB900 ports are all connected to the Mallanox switch. The Topspin switch is connected to the Mellanox switch. > Does it work "right" with OpenSM off/using the vendor SM ? No, it does not work with only the SM in the Topspin switch running. That was the initial configuration we had. Then I started OpenSM on one of the Linux boxes and that did not work any better. I also tried disconnection the TMS IB900 so there was only DDR devices on the subnet and that did not make a difference either. It seems like the SM in the Topspin switch only understands SDR maybe. There does not appear to be an option for turning off the SM in the Topspin switch, however you can change its priority relative to other SM on the subnet. That is what I did and now the OpenSM is the master. I didn't see anything in the opensm.conf file that indicates that OpenSM has a concept of priority. Is there some way to force it to always be the master? Is there some advantage or disadvantage to running multiple copies of OpenSM on the subnet? If you have multiple switches connected as we do, should some of the default settings on opensm.conf be changed? In particular, should REASSIGN_LIDS be set to "yes"? Chuck -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Feb 18 09:19:13 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 09:19:13 -0800 Subject: [ofa-general] [PATCH] RDMA/nes: Fix possible array overrun Message-ID: In nes_create_qp(), the test if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { is used to error out if the db_index is too large; however, if the test doesn't trigger, then the index is used as nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; and mmap_nesqp is declared as struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; which leads to an array overrun if the index is exactly equal to NES_MAX_USER_WQ_REGIONS. Fix this by bailing out if the index is greater than or equal to NES_MAX_USER_WQ_REGIONS. This was spotted by the Coverity checker (CID 2162). Signed-off-by: Roland Dreier --- Glenn, if this looks good to you, just ack it and I will merge it upstream. diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index ffd4b42..4dafbe1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1337,7 +1337,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { nes_debug(NES_DBG_QP, "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); From 9v2l4dfpb0 at newyorkcity.com Mon Feb 18 10:06:10 2008 From: 9v2l4dfpb0 at newyorkcity.com (Corina Givens) Date: Tue, 19 Feb 2008 02:06:10 +0800 Subject: [ofa-general] I was looking for you Message-ID: <829783143.65691909792849@newyorkcity.com> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Ingela at ThePaganDoorway.info only, because I am using my friend's email to write this. Would you mind if I share some of my pictures with you? From hartlch14 at gmail.com Mon Feb 18 09:31:43 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Mon, 18 Feb 2008 12:31:43 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> Message-ID: Scott- Here is what we have: Cisco BladeCenterH> show version ================================================================================ System Version Information ================================================================================ system-version : Cisco-BCH TopspinOS 2.6.0 releng #195 06/01/2006 01:32:43 (UUID:00000000-0000-0000-0005-AD0000094076) On Feb 18, 2008 12:12 PM, Scott Weitzenkamp (sweitzen) wrote: > Chuck, the SM in the Topspin/Cisco switch can be turned off from CLI, Web > UI, or Element Manager UI. > > There was a SM bug last year where DDR throughput was not as high as it > should be sometimes, what does "show version" display from the Topspin/Cisco > switch CLI? > > > Scott Weitzenkamp > SQA and Release Manager > Server Virtualization Business Unit > Cisco Systems > > > ------------------------------ > *From:* general-bounces at lists.openfabrics.org [mailto: > general-bounces at lists.openfabrics.org] *On Behalf Of *Chuck Hartley > *Sent:* Monday, February 18, 2008 7:19 AM > *To:* Hal Rosenstock > *Cc:* OpenFabrics General > *Subject:* Re: [ofa-general] Performance of UDAPL RDMA vs IB verbs > > Thanks for the tips on the commands - that was my next question. > > > Is the rest of the subnet pure DDR and a mix of SDR/DDR ? > > The rest of the network is a mixture of SDR/DDR. Here is what we have: > > ]# ibnodes -V > Ca : 0x0005ad00001dc9e4 ports 2 " HCA-1" > Ca : 0x0005ad00001dc970 ports 2 " HCA-1" > Ca : 0x0020c28001067759 ports 1 "IB900 TMS Infiniband interface" > Ca : 0x0020c28002067759 ports 1 "IB900 TMS Infiniband interface" > Ca : 0x0020c28003067759 ports 1 "IB900 TMS Infiniband interface" > Ca : 0x0020c28004067759 ports 1 "IB900 TMS Infiniband interface" > Ca : 0x0030487a06600000 ports 1 "Linux62 HCA-1" > Ca : 0x0030487a226a0000 ports 1 "linux64 HCA-1" > Ca : 0x0030487a071e0000 ports 1 "Linux61 HCA-1" > Ca : 0x0002c9020023bb14 ports 1 "Linux60 HCA-1" > Ca : 0x0030487a2a480000 ports 1 "Linux63 HCA-1" > Switch : 0x000b8cffff00441c ports 24 "MT47396 Infiniscale-III Mellanox > Technologies" base port 0 lid 3 lmc 0 > Switch : 0x0005ad0000094076 ports 24 "Topspin Switch" enhanced port 0 lid > 15 lmc 0 > > The first two nodes in the list are the SDR blades off of the Topspin > switch. The Linux boxes are all Mellanox DDR HCAs and the IB900 interfaces > are SDR. The Linux and IB900 ports are all connected to the Mallanox > switch. The Topspin switch is connected to the Mellanox switch. > > > > Does it work "right" with OpenSM off/using the vendor SM ? > > No, it does not work with only the SM in the Topspin switch running. That > was the initial configuration we had. Then I started OpenSM on one of the > Linux boxes and that did not work any better. I also tried disconnection > the TMS IB900 so there was only DDR devices on the subnet and that did not > make a difference either. It seems like the SM in the Topspin switch only > understands SDR maybe. There does not appear to be an option for turning > off the SM in the Topspin switch, however you can change its priority > relative to other SM on the subnet. That is what I did and now the OpenSM > is the master. > > I didn't see anything in the opensm.conf file that indicates that OpenSM > has a concept of priority. Is there some way to force it to always be the > master? Is there some advantage or disadvantage to running multiple copies > of OpenSM on the subnet? If you have multiple switches connected as we do, > should some of the default settings on opensm.conf be changed? In > particular, should REASSIGN_LIDS be set to "yes"? > > Chuck > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From gstreiff at NetEffect.com Mon Feb 18 09:52:12 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Mon, 18 Feb 2008 11:52:12 -0600 Subject: [ofa-general] [PATCH] RDMA/nes: Fix possible array overrun Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFBF@venom2> In nes_create_qp(), the test if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { is used to error out if the db_index is too large; however, if the test doesn't trigger, then the index is used as nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp; and mmap_nesqp is declared as struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; which leads to an array overrun if the index is exactly equal to NES_MAX_USER_WQ_REGIONS. Fix this by bailing out if the index is greater than or equal to NES_MAX_USER_WQ_REGIONS. This was spotted by the Coverity checker (CID 2162). Signed-off-by: Roland Dreier Acked-by: Glenn Streiff --- Roland, Here's my ack. Thanks for catching this. Will look into Coverity. diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index ffd4b42..4dafbe1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1337,7 +1337,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { nes_debug(NES_DBG_QP, "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); From dwreacontractm at reacontract.com Mon Feb 18 10:20:47 2008 From: dwreacontractm at reacontract.com (Beatriz Bledsoe) Date: Mon, 18 Feb 2008 14:20:47 -0400 Subject: [ofa-general] Show your close ones you care, quit smoking today Message-ID: <795352604.43277346597230@reacontract.com> An HTML attachment was scrubbed... URL: From michael.brooks at qlogic.com Mon Feb 18 10:28:15 2008 From: michael.brooks at qlogic.com (Michael Brooks) Date: Mon, 18 Feb 2008 12:28:15 -0600 Subject: [ofa-general] [PATCH] 1.3-rc4 leak in SA notification patch Message-ID: Commit 2aec5c602c6a44e2a3a173339a9ab94549658e4b on 2008-02-05 modified the SA notification patch to use alloc_mad() from within sa_query.c:ib_sa_informinfo_query(), but didn't also modify the function to use the associated free_mad() call on exit, resulting in a lost kref_put() on the AH structure. Patch follows: diff --git a/kernel_patches/fixes/sean_local_sa_1_notifications.patch b/kernel_patches/fixes/sean_local_sa_1_notifications.patch index 9d272ff..e710897 100644 --- a/kernel_patches/fixes/sean_local_sa_1_notifications.patch +++ b/kernel_patches/fixes/sean_local_sa_1_notifications.patch @@ -1061,7 +1061,7 @@ Index: ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); -+ ib_free_send_mad(query->sa_query.mad_buf); ++ free_mad(&query->sa_query); +err1: + kfree(query); + return ret; From taylor at hpc.ufl.edu Mon Feb 18 10:36:57 2008 From: taylor at hpc.ufl.edu (Charles Taylor) Date: Mon, 18 Feb 2008 13:36:57 -0500 Subject: [ofa-general] Re: [Lustre-discuss] kernel-2.6.18-8.1.14 + lustre 1.6.4.2 + OFED 1.2 In-Reply-To: <1C8E7B2D-9300-405C-A33A-8011764FE93F@iges.org> References: <164072F1-DCA2-4563-87EE-ACE815052E4C@hpc.ufl.edu> <1C8E7B2D-9300-405C-A33A-8011764FE93F@iges.org> Message-ID: Turns out that when you build your patched kernel, you DO NOT want to change the EXTRAVERSION string in the Makefile such that it does not include ".el5." somewhere in the string. If you do that, some of the OFED configure scripts pick some patch files that are for a different (vanilla) kernel. So once we put ".el5." back into our version string, things worked as expected. It sure seems like we should be able to set "EXTRAVERSION" to anything we want without breaking builds i.e., there should be a more reliable way of figuring out which kernel you are building against. Sigh, Charlie Taylor UF HPC Center On Feb 18, 2008, at 11:35 AM, Aaron Knister wrote: > > On Feb 18, 2008, at 11:20 AM, Charles Taylor wrote: > >> >> We seemed to have it a stumbling block when building with the above >> (supported) versions. Our process... >> >> 1. Start with stock rhel5 2.6.18-8.1.14 source tree >> 2. Configure InfiniBand support out of the the kernel (we will build >> OFED separately). >> 3. Apply the 1.6.4.2 kernel patches to the kernel source. >> 4. Build the kernel. >> 5. Build OFED 1.2 against the patched kernel >> 6. Build Lustre using the patched Kernel+OFED 1.2 >> >> This worked fine for 1.6.3 but for 1.6.4.2, after patching the kernel >> and trying to build OFED 1.2, we get the error below. Now, we find >> references to this problem all over the place but nothing specific to >> building lustre. Since Lustre 1.6.4.2 is supposed to support OFED >> 1.2, it seems that OFED should build against a kernel patched for >> lustre 1.6.4.2 but apparently it does not. If we use the un- >> patched kernel (i.e. do not apply the lustre 1.6.4.2 kernel patches >> to it), OFED 1.2 builds without a problem. We are looking for an >> easy #define way out of this hole right now but if there is a known >> solution to this, we would love to know about it. >> >> If there is a better way to do this, we are open to that as well. >> >> Thanks, >> >> charlie taylor >> uf hpc center >> >> >> = >> = >> ===================================================================== >> = >> =========================================================== >> >> gcc -Wp,-MD,/root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ >> ulp/ipoib/.ipoib_fs.o.d -nostdinc -isystem /usr/lib/gcc/x86_64- >> redhat-linux/4.1.1/include -D__KERNEL__ \ >> -I/root/lustre/1.6.4.2/ofa_kernel-1.2/kernel_addons/backport/2.6.18/ >> include/ \ >> -I/root/lustre/1.6.4.2/ofa_kernel-1.2/include \ >> -I/root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/include \ >> -Iinclude \ >> \ >> -include include/linux/autoconf.h \ >> -include /root/lustre/1.6.4.2/ofa_kernel-1.2/include/linux/ >> autoconf.h \ >> -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict- >> aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit- >> function-declaration -Os -mtune=generic -m64 -mno-red-zone - >> mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno- >> asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno- >> sse2 -mno-3dnow -fomit-frame-pointer -g -fno-stack-protector - >> Wdeclaration-after-statement -Wno-pointer-sign -I/root/lustre/ >> 1.6.4.2/ofa_kernel-1.2/include -I/root/lustre/1.6.4.2/ofa_kernel-1.2/ >> drivers/infiniband/include -I/root/lustre/1.6.4.2/ >> ofa_kernel-1.2/drivers/infiniband/ulp/ipoib -I/root/lustre/ >> 1.6.4.2/ofa_kernel-1.2/drivers/infiniband/debug -I/root/lustre/ >> 1.6.4.2/ofa_kernel-1.2/drivers/infiniband/hw/cxgb3/core -I/root/ >> lustre/1.6.4.2/ofa_kernel-1.2/drivers/net/cxgb3 -I/root/lustre/ >> 1.6.4.2/ofa_kernel-1.2/drivers/net/rds -DMODULE -D"KBUILD_STR(s) >> =#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_fs)" - >> D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /root/lustre/1.6.4.2/ >> ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/.tmp_ipoib_fs.o /root/ >> lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/ipoib_fs.c >> /root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/ >> ipoib_fs.c: In function ‘ipoib_mcg_open’: >> /root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/ >> ipoib_fs.c:144: error: ‘struct inode’ has no member named ‘u’ >> /root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/ >> ipoib_fs.c: In function ‘ipoib_path_open’: >> /root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ulp/ipoib/ >> ipoib_fs.c:250: error: ‘struct inode’ has no member named ‘u’ >> make[4]: *** [/root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ >> ulp/ipoib/ipoib_fs.o] Error 1 >> make[3]: *** [/root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband/ >> ulp/ipoib] Error 2 >> make[2]: *** [/root/lustre/1.6.4.2/ofa_kernel-1.2/drivers/infiniband] >> Error 2 >> make[1]: *** [_module_/root/lustre/1.6.4.2/ofa_kernel-1.2] Error 2 >> make[1]: Leaving directory `/root/lustre/1.6.4.2/linux-2.6.18' >> make: *** [kernel] Error 2 >> >> >> >> _______________________________________________ >> Lustre-discuss mailing list >> Lustre-discuss at lists.lustre.org >> http://lists.lustre.org/mailman/listinfo/lustre-discuss > > Aaron Knister > Associate Systems Analyst > Center for Ocean-Land-Atmosphere Studies > > (301) 595-7000 > aaron at iges.org > > > > From brie at mail.com Mon Feb 18 10:41:36 2008 From: brie at mail.com (Urban Mcgowan) Date: Mon, 18 Feb 2008 19:41:36 +0100 Subject: [ofa-general] Improve Quality of Life Increasing Size Message-ID: <135477709.90910144762495@mail.com> Among many other similar products and methods a VPXL system is known to give great results and to be the safest one. Order VPXL and have a pleasure of being confident. http://geocities.com/mercedescabrera88/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Feb 18 11:48:48 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 11:48:48 -0800 Subject: [ofa-general] Re: [PATCH]: mlx4: move table_find from fmr_alloc to fmr_enable In-Reply-To: <200802181010.54231.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 18 Feb 2008 10:10:53 +0200") References: <200802141341.29577.jackm@dev.mellanox.co.il> <200802181010.54231.jackm@dev.mellanox.co.il> Message-ID: > I'm nervous about freeing ICM table entries when HW2SW fails (hw may still access the > memory. Hmm, from that point of view, the current code is risky because it frees the MTT entries even if HW2SW_MPT fails. Maybe the below: > (P.S., please note the priv-> in the patch). heh, I compiled my tree this time, and I even put both closing parenthesis in "key_to_hw_index(mr->key));" ;) diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index 79b317b..a185c44 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -285,8 +285,13 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); - if (err) + if (err) { mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err); + return; + } + + mlx4_table_put(dev, &priv->mr_table.dmpt_table, + key_to_hw_index(mr->key)); } mlx4_mtt_cleanup(dev, &mr->mtt); From rdreier at cisco.com Mon Feb 18 11:53:01 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 11:53:01 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs/examples: Fixes some issues in the examples files In-Reply-To: <200710101126.18284.dotanb@dev.mellanox.co.il> (Dotan Barak's message of "Wed, 10 Oct 2007 11:26:18 +0200") References: <200710101126.18284.dotanb@dev.mellanox.co.il> Message-ID: thanks, applied at last From rdreier at cisco.com Mon Feb 18 12:35:16 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 12:35:16 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get a few post-2.6.25-rc2 fixes, mostly for the new nes driver: Chien Tung (1): RDMA/nes: Fix VLAN support Glenn Streiff (1): RDMA/nes: Fix MAC interrupt erroneously masked on ifdown Li Zefan (1): IB: Fix return value in ib_device_register_sysfs() Roland Dreier (1): RDMA/nes: Fix possible array overrun drivers/infiniband/core/sysfs.c | 4 ++- drivers/infiniband/hw/nes/nes_nic.c | 62 +++++++++++++++++++++++--------- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c864ef7..5a4b2e6 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -686,8 +686,10 @@ int ib_device_register_sysfs(struct ib_device *device) device->ports_parent = kobject_create_and_add("ports", kobject_get(&class_dev->kobj)); - if (!device->ports_parent) + if (!device->ports_parent) { + ret = -ENOMEM; goto err_put; + } if (device->node_type == RDMA_NODE_IB_SWITCH) { ret = add_port(device, 0); diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index b6cc265..eee77da 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -148,14 +148,15 @@ static int nes_netdev_open(struct net_device *netdev) struct nes_device *nesdev = nesvnic->nesdev; int ret; int i; - struct nes_vnic *first_nesvnic; + struct nes_vnic *first_nesvnic = NULL; u32 nic_active_bit; u32 nic_active; + struct list_head *list_pos, *list_temp; assert(nesdev != NULL); - first_nesvnic = list_entry(nesdev->nesadapter->nesvnic_list[nesdev->mac_index].next, - struct nes_vnic, list); + if (nesvnic->netdev_open == 1) + return 0; if (netif_msg_ifup(nesvnic)) printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name); @@ -225,7 +226,18 @@ static int nes_netdev_open(struct net_device *netdev) nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | nesvnic->nic_cq.cq_number); nes_read32(nesdev->regs+NES_CQE_ALLOC); - + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if (first_nesvnic->netdev_open == 1) + break; + } + if (first_nesvnic->netdev_open == 0) { + nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + first_nesvnic = nesvnic; + } if (first_nesvnic->linkup) { /* Enable network packets */ nesvnic->linkup = 1; @@ -248,6 +260,8 @@ static int nes_netdev_stop(struct net_device *netdev) struct nes_device *nesdev = nesvnic->nesdev; u32 nic_active_mask; u32 nic_active; + struct nes_vnic *first_nesvnic = NULL; + struct list_head *list_pos, *list_temp; nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", nesvnic, nesdev, netdev, netdev->name); @@ -260,9 +274,20 @@ static int nes_netdev_stop(struct net_device *netdev) /* Disable network packets */ napi_disable(&nesvnic->napi); netif_stop_queue(netdev); - if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) { - nes_write_indexed(nesdev, - NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) { + first_nesvnic = container_of(list_pos, struct nes_vnic, list); + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic)) + break; + } + + if (first_nesvnic->netdev_open == 0) + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); + else if ((first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); @@ -859,7 +884,6 @@ void nes_netdev_set_multicast_list(struct net_device *netdev) for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) multicast_addr = multicast_addr->next; - if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; if (multicast_addr) { @@ -908,7 +932,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; netdev->mtu = new_mtu; - nesvnic->max_frame_size = new_mtu+ETH_HLEN; + nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; if (netdev->mtu > 1500) { jumbomode=1; @@ -1470,10 +1494,15 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; + unsigned long flags; + spin_lock_irqsave(&nesadapter->phy_lock, flags); nesvnic->vlan_grp = grp; + nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name); + /* Enable/Disable VLAN Stripping */ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); if (grp) @@ -1482,6 +1511,7 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g u32temp |= 0x02000000; nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } @@ -1540,7 +1570,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->msg_enable = netif_msg_init(debug, default_msg); nesvnic->netdev_index = nesdev->netdev_count; nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count; - nesvnic->max_frame_size = netdev->mtu+netdev->hard_header_len; + nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN; curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)]; nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid; @@ -1610,7 +1640,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); if ((nesdev->netdev_count == 0) && - (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { + (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + @@ -1648,18 +1678,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->linkup = 1; } } - nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); /* clear the MAC interrupt status, assumes direct logical to physical mapping */ - u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port)); + u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port), u32temp); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); - if (nesdev->nesadapter->phy_type[nesvnic->logical_port] != NES_PHY_TYPE_IRIS) + if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) nes_init_phy(nesdev); - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesvnic->logical_port), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } return netdev; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index ffd4b42..4dafbe1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1337,7 +1337,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", nespd->mmap_db_index); */ - if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { + if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) { nes_debug(NES_DBG_QP, "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); From holmkrue at gmx.de Mon Feb 18 12:38:51 2008 From: holmkrue at gmx.de (Roswell Kruse) Date: Mon, 18 Feb 2008 21:38:51 +0100 Subject: [ofa-general] A Great Man Should Have a Great Size Message-ID: <790803021.77074237335306@gmx.de> Among many other similar products and methods a VPXL system is known to give great results and to be the safest one. Don't put off your happy life, order our VPXL today.http://geocities.com/hill_noah/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From gretnata.paolo at libero.it Mon Feb 18 13:41:06 2008 From: gretnata.paolo at libero.it (Ashely Clinton) Date: Mon, 18 Feb 2008 15:41:06 -0600 Subject: [ofa-general] Don't let Your Sexual Life be Ruined Message-ID: <967932199.86716594083902@libero.it> Dear openib-windows at openib.orgA lot of men, dissatisfied with their cock size, try different cock enlargement procedures, techniques and pills. However, most of the advertised staff is either ineffective or even dangerous. Try the best possible male erotic potency - VPXL. It is an absolutely safe enlargement method that gives incredible results incomparable to the results of any other male medical methods. You don't need to envy guys with larger cocks anymore! Order our VPXL.http://geocities.com/burnsarron/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwilder at us.ibm.com Mon Feb 18 13:45:24 2008 From: dwilder at us.ibm.com (David Wilder) Date: Mon, 18 Feb 2008 13:45:24 -0800 Subject: [ofa-general] [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <4e6a6b3c0802172331w766fd2a4kaa23d2b95f7c9f92@mail.gmail.com> References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B49B8E.7070607@us.ibm.com> <47B4A3A4.2060402@dev.mellanox.co.il> <47B4C223.3010200@us.ibm.com> <4e6a6b3c0802172331w766fd2a4kaa23d2b95f7c9f92@mail.gmail.com> Message-ID: <47B9FC74.4030502@us.ibm.com> Eli Cohen wrote: > On 2/15/08, David Wilder wrote: >> I saw that :) >> You need to make the same change in three more places: >> > Ok, now CM should have these allocations too. > Thanks Eli. I tested the 2/18 build and it fixed my issue. I tested with send_queue_size and recv_queue_size at 8192 and at 1. Dave. From rdreier at cisco.com Mon Feb 18 13:47:20 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 13:47:20 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs/examples: add command line parameter for SL In-Reply-To: <200710231439.25367.dotanb@dev.mellanox.co.il> (Dotan Barak's message of "Tue, 23 Oct 2007 14:39:25 +0200") References: <200710231439.25367.dotanb@dev.mellanox.co.il> Message-ID: The example man pages need to be updated too please. From rdreier at cisco.com Mon Feb 18 14:00:14 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 14:00:14 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs : some man-pages fixes In-Reply-To: <200802031755.04681.dotanb@dev.mellanox.co.il> (Dotan Barak's message of "Sun, 3 Feb 2008 17:55:04 +0200") References: <200802031755.04681.dotanb@dev.mellanox.co.il> Message-ID: thanks, applied From rdreier at cisco.com Mon Feb 18 14:05:59 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 14:05:59 -0800 Subject: [Fwd: [ofa-general] [PATCH] libibverbs: document IBV_SEND_INLINE buffer ownership relaxation] In-Reply-To: <46A6F17C.8060404@voltaire.com> (Or Gerlitz's message of "Wed, 25 Jul 2007 09:45:16 +0300") References: <46A6F17C.8060404@voltaire.com> Message-ID: thanks, applied at long last From rdreier at cisco.com Mon Feb 18 14:08:42 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 14:08:42 -0800 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: <47B803E1.80208@voltaire.com> (Or Gerlitz's message of "Sun, 17 Feb 2008 11:52:33 +0200") References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B48FF5.4040102@dev.mellanox.co.il> <47B803E1.80208@voltaire.com> Message-ID: > So what's your suggestion? maybe we go with vmalloc on 64-bit systems > and on 32-bits system limit the user to what kmalloc can provide > (which is about ~850 size rings with the tx_buf consuming 8*18 u64's + > pointer. No, I don't think we need to worry about using up vmalloc() space really. I don't think anyone is going to consume more than a few MB of ring buffers with IPoIB, and if they do then it's almost certainly going to be on a 64-bit system. From prospected at earlymenopause.com Mon Feb 18 14:19:39 2008 From: prospected at earlymenopause.com (Paula Borrolli) Date: Mon, 18 Feb 2008 22:19:39 +0000 Subject: [ofa-general] crossovers Message-ID: <7267871081.20080218221335@earlymenopause.com> Halloha, Real men! Milllions of people aacross the world have already tested THIS and ARE making their girllfriends feel brand new sexual sensatiions! YOU are the best in bed, aren't you ?Girls! Deevelop your sexual rrelationship and get even MORE pleaasure! Make your boyfriendd a gift!http://leannesorrentinocu.blogspot.com Inorganic elements of the globe? Very likely. Chatting over the preparations. The pediment of enemy. He has now given us away. He has some motive. Of uttering, 233. Dimenet, in the atlas, attacked the thing grows more terrible as i think of it. It had two rooms, and the men pitched the tent was restored, and the line having reformed, began clouds collected and capped the dark green forest knowing him intimately which cannot have occurred in london tonight and she'll be coming to see that bald official narrative which is docketed of interest. West virginia, with its solid unionist think so. But i really wouldn't know anything i married her without caring for her as a man my cunning to know what you mean by ease and luxury,. -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Feb 18 14:33:32 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 18 Feb 2008 14:33:32 -0800 Subject: [ofa-general] Re: [patch 1/6] mmu_notifier: Core code In-Reply-To: <20080215064932.371510599@sgi.com> (Christoph Lameter's message of "Thu, 14 Feb 2008 22:49:00 -0800") References: <20080215064859.384203497@sgi.com> <20080215064932.371510599@sgi.com> Message-ID: It seems that we've come up with two reasonable cases where it makes sense to use these notifiers for InfiniBand/RDMA: First, the ability to safely to DMA to/from userspace memory with the memory regions mlock()ed but the pages not pinned. In this case the notifiers here would seem to suit us well: > + void (*invalidate_range_begin)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end, > + int atomic); > + > + void (*invalidate_range_end)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end, > + int atomic); If I understand correctly, the IB stack would have to get the hardware driver to shoot down translation entries and suspend access to the region when an invalidate_range_begin notifier is called, and wait for the invalidate_range_end notifier to repopulate the adapter translation tables. This will probably work OK as long as the interval between the invalidate_range_begin and invalidate_range_end calls is not "too long." Also, using this effectively requires us to figure out how we want to mlock() regions that are going to be used for RDMA. We could require userspace to do it, but it's not clear to me that we're safe in the case where userspace decides not to... what happens if some pages get swapped out after the invalidate_range_begin notifier? The second case where some form of notifiers are useful is for userspace to know when a memory registration is still valid, ie Pete Wyckoff's work: http://www.osc.edu/~pw/papers/wyckoff-memreg-ccgrid05.pdf http://www.osc.edu/~pw/dreg/ however these MMU notifiers seem orthogonal to that: the registration cache is concerned with address spaces, not page mapping, and hence the existing vma operations seem to be a better fit. - R. From dwshellcovem at shellcove.net Mon Feb 18 15:48:07 2008 From: dwshellcovem at shellcove.net (Walter Mays) Date: Tue, 19 Feb 2008 07:48:07 +0800 Subject: [ofa-general] Hey bro, you really should check this out Message-ID: <388610906.92708799229057@shellcove.net> An HTML attachment was scrubbed... URL: From changquing.tang at hp.com Mon Feb 18 16:35:32 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 19 Feb 2008 00:35:32 +0000 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel Message-ID: I have taken sometime to trace down this bug. When running OFED 1.3 on 2.6.9-42.ELsmp kernel, putenv("IBV_FORK_SAFE=1"); causes ibv_get_device_list() to print out a Warning and set errno = 22 : A:errno=0 libibverbs: Warning: fork()-safety requested but init failed B:errno=22 errno keeps value 22 and causes ibv_modify_xrc_rcv_qp() to fail. Another way to make ibv_modify_xrc_rcv_qp() to fail is to set errno = 22 just before calling this function. However, this only happens on 2.6.9-42.ELsmp kernel, on 2.6.18-8.e15 kernel, it succeeds. 2.6.9-42.ELsmp is the kernel in Mellanox testing cluster helios.mellanox.com/ibd001-0032 Thanks for Mellanox guys to have a look --CQ From dwsouthlanticm at southlantic.net Mon Feb 18 17:04:39 2008 From: dwsouthlanticm at southlantic.net (Matilda Lim) Date: Tue, 19 Feb 2008 09:04:39 +0800 Subject: [ofa-general] Save on quality software! Message-ID: <01c872d6$74e82d80$3992a876@dwsouthlanticm> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. Buy software and be sure our professional customer support team will help to install it. Be also sure, if some problem occurs and your software does not run, we give your money back. You also will be able to do all the updates. http://geocities.com/joanwhitehead28 Buy, download and install right now! From soaplite-subscribe at yahoogroups.com Mon Feb 18 17:32:19 2008 From: soaplite-subscribe at yahoogroups.com (Jeremey Guevara) Date: Tue, 19 Feb 2008 09:32:19 +0800 Subject: [ofa-general] Leave the Small Size Behind Message-ID: <522690586.41200731349093@yahoogroups.com> Read one of the product testimonials:"Because of my cock size I have never been able to enjoy sex. I avoided relationships as they only led to frustration. A VPXL system has changed my life. It really works. I enjoy my self-confidence now and I love having sex. Thank you so much." Nasir, Troy.You don't need to envy guys with larger cocks anymore! Order our VPXL. http://geocities.com/aubreysweet39/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwregalem at regale.hu Mon Feb 18 19:17:49 2008 From: dwregalem at regale.hu (Issac Cross) Date: Tue, 19 Feb 2008 11:17:49 +0800 Subject: [ofa-general] Amazing New Products launch! Message-ID: <431592526.19191453161287@regale.hu> An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Mon Feb 18 20:47:41 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 18 Feb 2008 20:47:41 -0800 Subject: [ofa-general] Re: [PATCH] opensm: convert to OSM_LOG() macro In-Reply-To: <20080216220350.GE18527@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <20080216220304.GD18527@sashak.voltaire.com> <20080216220350.GE18527@sashak.voltaire.com> Message-ID: <1203396461.26729.460.camel@hrosenstock-ws.xsigo.com> On Sat, 2008-02-16 at 22:03 +0000, Sasha Khapyorsky wrote: > Convert osm_log() calls where caller function name is used to OSM_LOG() > macro call which has caller function name as builtin. There are several changes in this patch in terms of the (old and new) function name. Are those intended ? -- Hal From deanita at mindspring.com Mon Feb 18 21:43:57 2008 From: deanita at mindspring.com (Jannie Suarez) Date: Mon, 18 Feb 2008 17:43:57 -1200 Subject: [ofa-general] Improve Quality of Life Increasing Size Message-ID: <475052743.70326482913093@mindspring.com> Our VPXL has helped many men to make their cock bigger in a safe and effective way. There is no reason why you can't be one of them. Order the VPXL and become our happy customer.http://geocities.com/mccallcarly/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From 99vogel at alma.edu Mon Feb 18 22:47:46 2008 From: 99vogel at alma.edu (Lois Daniel) Date: Mon, 18 Feb 2008 22:47:46 -0800 Subject: [ofa-general] Don't miss to see my pic Message-ID: <392732229.03491796154100@alma.edu> Hello! I am bored tonight. I am nice girl that would like to chat with you. Email me at Marta at TheDoorwayBeyond.info only, because I am using my friend's email to write this. To see some pictures of me. From npiggin at suse.de Tue Feb 19 00:43:57 2008 From: npiggin at suse.de (Nick Piggin) Date: Tue, 19 Feb 2008 09:43:57 +0100 Subject: [ofa-general] [patch] my mmu notifiers Message-ID: <20080219084357.GA22249@wotan.suse.de> Well I started reviewing the mmu notifier code, but it is kind of hard to know what you're talking about just by reading through code and not trying your suggestions for yourself... So I implemented mmu notifiers slightly differently. Andrea's mmu notifiers are rather similar. However I have tried to make a point of minimising the impact the the core mm/. I don't see why we need to invalidate or flush anything when changing the pte to be _more_ permissive, and I don't understand the need for invalidate_begin/invalidate_end pairs at all. What I have done is basically create it so that the notifiers get called basically in the same place as the normal TLB flushing is done, and nowhere else. I also wanted to avoid calling notifier code from inside eg. hardware TLB or pte manipulation primitives. These things are already pretty well spaghetti, so I'd like to just place them right where needed first... I think eventually it will need a bit of a rethink to make it more consistent and more general. But I prefer to do put them in the caller for the moment. I have also attempted to write a skeleton driver. Not like Christoph's drivers, but one that actually does something. This one can mmap a window into its own virtual address space. It's not perfect yet (I need to replace page_mkwrite with ->fault in the core mm before I can get enough information to do protection properly I think). However I think it may be race-free in the fault vs unmap paths. It's pretty complex, I must say. --- Index: linux-2.6/include/linux/mm_types.h =================================================================== --- linux-2.6.orig/include/linux/mm_types.h +++ linux-2.6/include/linux/mm_types.h @@ -228,6 +228,9 @@ struct mm_struct { #ifdef CONFIG_CGROUP_MEM_CONT struct mem_cgroup *mem_cgroup; #endif +#ifdef CONFIG_MMU_NOTIFIER + struct hlist_head mmu_notifier_list; +#endif }; #endif /* _LINUX_MM_TYPES_H */ Index: linux-2.6/include/linux/mmu_notifier.h =================================================================== --- /dev/null +++ linux-2.6/include/linux/mmu_notifier.h @@ -0,0 +1,69 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include +#include + +struct mmu_notifier; +struct mmu_notifier_operations; + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_operations *ops; + struct mm_struct *mm; +}; + +struct mmu_notifier_operations { + void (*release)(struct mmu_notifier *mn); + int (*clear_young)(struct mmu_notifier *mn, unsigned long address); + void (*unmap)(struct mmu_notifier *mn, unsigned long address); + void (*invalidate_range)(struct mmu_notifier *mn, unsigned long start, unsigned long end); +}; + +static inline void mmu_notifier_init_mm(struct mm_struct *mm) +{ + INIT_HLIST_HEAD(&mm->mmu_notifier_list); +} + +static inline void mmu_notifier_init(struct mmu_notifier *mn, const struct mmu_notifier_operations *ops, struct mm_struct *mm) +{ + INIT_HLIST_NODE(&mn->hlist); + mn->ops = ops; + mn->mm = mm; +} + +extern void mmu_notifier_register(struct mmu_notifier *mn); +extern void mmu_notifier_unregister(struct mmu_notifier *mn); + +extern void mmu_notifier_exit_mm(struct mm_struct *mm); +extern int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long address); +extern void mmu_notifier_unmap(struct mm_struct *mm, unsigned long address); +extern void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end); + +#else /* CONFIG_MMU_NOTIFIER */ + +static inline void mmu_notifier_init_mm(struct mm_struct *mm) +{ +} + +static inline void mmu_notifier_exit_mm(struct mm_struct *mm) +{ +} + +static inline int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long address) +{ + return 0; +} + +static inline void mmu_notifier_unmap(struct mm_struct *mm, unsigned long address) +{ +} + +static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ +} +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif Index: linux-2.6/kernel/fork.c =================================================================== --- linux-2.6.orig/kernel/fork.c +++ linux-2.6/kernel/fork.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -358,6 +359,7 @@ static struct mm_struct * mm_init(struct mm->ioctx_list = NULL; mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; + mmu_notifier_init_mm(mm); mm_init_cgroup(mm, p); if (likely(!mm_alloc_pgd(mm))) { Index: linux-2.6/mm/filemap_xip.c =================================================================== --- linux-2.6.orig/mm/filemap_xip.c +++ linux-2.6/mm/filemap_xip.c @@ -195,6 +195,7 @@ __xip_unmap (struct address_space * mapp /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); pteval = ptep_clear_flush(vma, address, pte); + mmu_notifier_unmap(mm, address); page_remove_rmap(page, vma); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); Index: linux-2.6/mm/fremap.c =================================================================== --- linux-2.6.orig/mm/fremap.c +++ linux-2.6/mm/fremap.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,7 @@ static void zap_pte(struct mm_struct *mm if (page) { if (pte_dirty(pte)) set_page_dirty(page); + mmu_notifier_unmap(mm, addr); page_remove_rmap(page, vma); page_cache_release(page); update_hiwater_rss(mm); Index: linux-2.6/mm/hugetlb.c =================================================================== --- linux-2.6.orig/mm/hugetlb.c +++ linux-2.6/mm/hugetlb.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -771,10 +772,12 @@ void __unmap_hugepage_range(struct vm_ar page = pte_page(pte); if (pte_dirty(pte)) set_page_dirty(page); + mmu_notifier_unmap(mm, address); list_add(&page->lru, &page_list); } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); @@ -1048,6 +1051,7 @@ void hugetlb_change_protection(struct vm continue; if (!pte_none(*ptep)) { pte = huge_ptep_get_and_clear(mm, address, ptep); + mmu_notifier_unmap(mm, address); pte = pte_mkhuge(pte_modify(pte, newprot)); set_huge_pte_at(mm, address, ptep, pte); } @@ -1056,6 +1060,7 @@ void hugetlb_change_protection(struct vm spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); } struct file_region { Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c +++ linux-2.6/mm/memory.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -626,9 +627,10 @@ int copy_page_range(struct mm_struct *ds static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, + unsigned long start, unsigned long end, long *zap_work, struct zap_details *details) { + unsigned long addr = start; struct mm_struct *mm = tlb->mm; pte_t *pte; spinlock_t *ptl; @@ -670,6 +672,7 @@ static unsigned long zap_pte_range(struc } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + mmu_notifier_unmap(mm, addr); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; @@ -702,6 +705,7 @@ static unsigned long zap_pte_range(struc pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); + mmu_notifier_invalidate_range(mm, start, end); add_mm_rss(mm, file_rss, anon_rss); arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); @@ -981,6 +985,7 @@ no_page_table: } return page; } +EXPORT_SYMBOL(follow_page); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, @@ -1676,6 +1681,7 @@ gotten: * thread doing COW. */ ptep_clear_flush(vma, address, page_table); + mmu_notifier_unmap(mm, address); set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); @@ -2200,7 +2206,7 @@ static int __do_fault(struct mm_struct * vmf.flags = flags; vmf.page = NULL; - BUG_ON(vma->vm_flags & VM_PFNMAP); + /* BUG_ON(vma->vm_flags & VM_PFNMAP); */ if (likely(vma->vm_ops->fault)) { ret = vma->vm_ops->fault(vma, &vmf); @@ -2498,8 +2504,10 @@ static inline int handle_pte_fault(struc * This still avoids useless tlb flushes for .text page faults * with threads. */ - if (write_access) + if (write_access) { flush_tlb_page(vma, address); + mmu_notifier_invalidate_range(mm, address, address+PAGE_SIZE); + } } unlock: pte_unmap_unlock(pte, ptl); Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c +++ linux-2.6/mm/mmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -2037,6 +2038,7 @@ void exit_mmap(struct mm_struct *mm) unsigned long end; /* mm's last user has gone, and its about to be pulled down */ + mmu_notifier_exit_mm(mm); arch_exit_mmap(mm); lru_add_drain(); Index: linux-2.6/mm/mmu_notifier.c =================================================================== --- /dev/null +++ linux-2.6/mm/mmu_notifier.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include + + +#define __mmu_notifier_for_each(mm, mn, hnode) \ + hlist_for_each_entry_rcu(mn, hnode, &(mm)->mmu_notifier_list, hlist) + +#define do_mmu_notifier_for_each(mm, mn) \ + do { \ + if (unlikely(!hlist_empty(&(mm)->mmu_notifier_list))) { \ + struct hlist_node *__do_for_each_node; \ + rcu_read_lock(); \ + __mmu_notifier_for_each(mm, mn, __do_for_each_node) { + +#define while_mmu_notifier_for_each \ + } \ + rcu_read_unlock(); \ + } \ + } while (0) + + +void mmu_notifier_register(struct mmu_notifier *mn) +{ + hlist_add_head_rcu(&mn->hlist, &mn->mm->mmu_notifier_list); + synchronize_rcu(); +} +EXPORT_SYMBOL(mmu_notifier_register); + +void mmu_notifier_unregister(struct mmu_notifier *mn) +{ + hlist_del_rcu(&mn->hlist); + synchronize_rcu(); +} + +void mmu_notifier_exit_mm(struct mm_struct *mm) +{ + if (unlikely(!hlist_empty(&mm->mmu_notifier_list))) { + struct mmu_notifier *mn; + struct hlist_node *n, *t; + + hlist_for_each_entry_safe(mn, n, t, + &mm->mmu_notifier_list, hlist) { + hlist_del_rcu(&mn->hlist); + if (mn->ops->release) + mn->ops->release(mn); + } + } +} + +int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long address) +{ + struct mmu_notifier *mn; + int ret = 0; + + do_mmu_notifier_for_each(mm, mn) { + if (mn->ops->clear_young) { + if (mn->ops->clear_young(mn, address)) + ret = 1; + } + } while_mmu_notifier_for_each; + + return ret; +} + +void mmu_notifier_unmap(struct mm_struct *mm, unsigned long address) +{ + struct mmu_notifier *mn; + + do_mmu_notifier_for_each(mm, mn) { + if (mn->ops->unmap) + mn->ops->unmap(mn, address); + } while_mmu_notifier_for_each; +} + +void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + + do_mmu_notifier_for_each(mm, mn) { + if (mn->ops->invalidate_range) + mn->ops->invalidate_range(mn, start, end); + } while_mmu_notifier_for_each; +} Index: linux-2.6/mm/mprotect.c =================================================================== --- linux-2.6.orig/mm/mprotect.c +++ linux-2.6/mm/mprotect.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,8 @@ static void change_pte_range(struct mm_s * into place. */ ptent = ptep_get_and_clear(mm, addr, pte); + mmu_notifier_unmap(mm, addr); + ptent = pte_modify(ptent, newprot); /* * Avoid taking write faults for pages we know to be @@ -125,6 +128,7 @@ static void change_protection(struct vm_ change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable); } while (pgd++, addr = next, addr != end); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range(mm, start, end); } int Index: linux-2.6/mm/mremap.c =================================================================== --- linux-2.6.orig/mm/mremap.c +++ linux-2.6/mm/mremap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -105,6 +106,7 @@ static void move_ptes(struct vm_area_str if (pte_none(*old_pte)) continue; pte = ptep_clear_flush(vma, old_addr, old_pte); + mmu_notifier_unmap(mm, old_addr); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); set_pte_at(mm, new_addr, new_pte, pte); } Index: linux-2.6/mm/rmap.c =================================================================== --- linux-2.6.orig/mm/rmap.c +++ linux-2.6/mm/rmap.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -287,8 +288,12 @@ static int page_referenced_one(struct pa if (vma->vm_flags & VM_LOCKED) { referenced++; *mapcount = 1; /* break early from loop */ - } else if (ptep_clear_flush_young(vma, address, pte)) - referenced++; + } else { + if (ptep_clear_flush_young(vma, address, pte)) + referenced++; + if (mmu_notifier_clear_young(mm, address)) + referenced++; + } /* Pretend the page is referenced if the task has the swap token and is in the middle of a page fault. */ @@ -455,6 +460,7 @@ static int page_mkclean_one(struct page flush_cache_page(vma, address, pte_pfn(*pte)); entry = ptep_clear_flush(vma, address, pte); + mmu_notifier_unmap(mm, address); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); @@ -711,10 +717,21 @@ static int try_to_unmap_one(struct page * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ - if (!migration && ((vma->vm_flags & VM_LOCKED) || - (ptep_clear_flush_young(vma, address, pte)))) { - ret = SWAP_FAIL; - goto out_unmap; + if (!migration) { + int referenced; + + if (vma->vm_flags & VM_LOCKED) { +fail: + ret = SWAP_FAIL; + goto out_unmap; + } + referenced = 0; + if (ptep_clear_flush_young(vma, address, pte)) + referenced = 1; + if (mmu_notifier_clear_young(mm, address)) + referenced = 1; + if (referenced) + goto fail; } /* Nuke the page table entry. */ @@ -724,6 +741,7 @@ static int try_to_unmap_one(struct page /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) set_page_dirty(page); + mmu_notifier_unmap(mm, address); /* Update high watermark before we lower rss */ update_hiwater_rss(mm); @@ -839,12 +857,19 @@ static void try_to_unmap_cluster(unsigne update_hiwater_rss(mm); for (; address < end; pte++, address += PAGE_SIZE) { + int referenced; + if (!pte_present(*pte)) continue; page = vm_normal_page(vma, address, *pte); BUG_ON(!page || PageAnon(page)); + referenced = 0; if (ptep_clear_flush_young(vma, address, pte)) + referenced = 1; + if (mmu_notifier_clear_young(mm, address)) + referenced = 1; + if (referenced) continue; /* Nuke the page table entry. */ @@ -858,6 +883,7 @@ static void try_to_unmap_cluster(unsigne /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) set_page_dirty(page); + mmu_notifier_unmap(mm, address); page_remove_rmap(page, vma); page_cache_release(page); Index: linux-2.6/mm/Makefile =================================================================== --- linux-2.6.orig/mm/Makefile +++ linux-2.6/mm/Makefile @@ -33,4 +33,4 @@ obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o - +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o Index: linux-2.6/mm/Kconfig =================================================================== --- linux-2.6.orig/mm/Kconfig +++ linux-2.6/mm/Kconfig @@ -193,3 +193,7 @@ config NR_QUICK config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + bool "MMU notifiers" + def_bool y From npiggin at suse.de Tue Feb 19 00:44:50 2008 From: npiggin at suse.de (Nick Piggin) Date: Tue, 19 Feb 2008 09:44:50 +0100 Subject: [ofa-general] [patch] my mmu notifier sample driver In-Reply-To: <20080219084357.GA22249@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> Message-ID: <20080219084450.GB22249@wotan.suse.de> Index: linux-2.6/drivers/char/mmu_notifier_skel.c =================================================================== --- /dev/null +++ linux-2.6/drivers/char/mmu_notifier_skel.c @@ -0,0 +1,255 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(mmn_lock); +static RADIX_TREE(rmap_tree, GFP_ATOMIC); +static seqcount_t rmap_seq = SEQCNT_ZERO; + +static int __rmap_add(unsigned long mem, unsigned long vaddr) +{ + int err; + + err = radix_tree_insert(&rmap_tree, mem >> PAGE_SHIFT, (void *)vaddr); + + return err; +} + +static void __rmap_del(unsigned long mem) +{ + void *ret; + + ret = radix_tree_delete(&rmap_tree, mem >> PAGE_SHIFT); + BUG_ON(!ret); +} + +static unsigned long rmap_find(unsigned long mem) +{ + unsigned long vaddr; + + rcu_read_lock(); + vaddr = (unsigned long)radix_tree_lookup(&rmap_tree, mem >> PAGE_SHIFT); + rcu_read_unlock(); + + return vaddr; +} + +static struct page *follow_page_atomic(struct mm_struct *mm, unsigned long address, int write) +{ + struct vm_area_struct *vma; + + vma = find_vma(mm, address); + if (!vma || (vma->vm_start > address)) + return NULL; + + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + return NULL; + + return follow_page(vma, address, FOLL_GET|(write ? FOLL_WRITE : 0)); +} + +static int mmn_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long source_vaddr = (unsigned long)vmf->pgoff << PAGE_SHIFT; + unsigned long dest_vaddr = (unsigned long)vmf->virtual_address; + unsigned long pfn; + struct page *page; + pgprot_t prot; + int write = vmf->flags & FAULT_FLAG_WRITE; + int ret; + + printk("mmn_vm_fault %s at vaddr=%lx sourcing from %lx\n", write ? "write" : "read", dest_vaddr, source_vaddr); + + BUG_ON(mm != current->mm); /* disallow get_user_pages */ + +again: + spin_lock(&mmn_lock); + write_seqcount_begin(&rmap_seq); + page = follow_page_atomic(mm, source_vaddr, write); + if (unlikely(!page)) { + write_seqcount_end(&rmap_seq); + spin_unlock(&mmn_lock); + ret = get_user_pages(current, mm, source_vaddr, + 1, write, 0, &page, NULL); + if (ret != 1) + goto out_err; + put_page(page); + goto again; + } + + ret = __rmap_add(source_vaddr, dest_vaddr); + if (ret) + goto out_lock; + + pfn = page_to_pfn(page); + prot = vma->vm_page_prot; + if (!write) + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags & ~(VM_WRITE|VM_MAYWRITE)); + ret = vm_insert_pfn(vma, dest_vaddr, pfn); + vma->vm_page_prot = prot; + if (ret) { + if (ret == -EBUSY) + WARN_ON(1); + goto out_rmap; + } + write_seqcount_end(&rmap_seq); + spin_unlock(&mmn_lock); + put_page(page); + + return VM_FAULT_NOPAGE; + +out_rmap: + __rmap_del(source_vaddr); +out_lock: + write_seqcount_end(&rmap_seq); + spin_unlock(&mmn_lock); + put_page(page); +out_err: + switch (ret) { + case -EFAULT: + case -EEXIST: + case -EBUSY: + return VM_FAULT_SIGBUS; + case -ENOMEM: + return VM_FAULT_OOM; + default: + BUG(); + } +} + +struct vm_operations_struct mmn_vm_ops = { + .fault = mmn_vm_fault, +}; + +static int mmu_notifier_busy; +static struct mmu_notifier mmu_notifier; + +static int mmn_clear_young(struct mmu_notifier *mn, unsigned long address) +{ + unsigned long vaddr; + unsigned seq; + struct mm_struct *mm = mn->mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + + do { + seq = read_seqcount_begin(&rmap_seq); + vaddr = rmap_find(address); + } while (read_seqcount_retry(&rmap_seq, seq)); + + if (vaddr == 0) + return 0; + + printk("mmn_clear_young at vaddr=%lx sourced from %lx\n", vaddr, address); + + spin_lock(&mmn_lock); + pgd = pgd_offset(mm, vaddr); + pud = pud_offset(pgd, vaddr); + if (pud) { + pmd = pmd_offset(pud, vaddr); + if (pmd) { + ptep = pte_offset_map(pmd, vaddr); + if (ptep) { + pte = *ptep; + if (!pte_present(pte)) { + /* x86 specific, don't have a vma */ + ptep_get_and_clear(mm, vaddr, ptep); + __flush_tlb_one(vaddr); + } + pte_unmap(ptep); + } + } + } + __rmap_del(address); + spin_unlock(&mmn_lock); + + return 1; +} + +static void mmn_unmap(struct mmu_notifier *mn, unsigned long address) +{ + mmn_clear_young(mn, address); +} + +static void mmn_release(struct mmu_notifier *mn) +{ + mmu_notifier_busy = 0; +} + +static struct mmu_notifier_operations mmn_ops = { + .clear_young = mmn_clear_young, + .unmap = mmn_unmap, + .release = mmn_release, +}; + +static int mmn_mmap(struct file *file, struct vm_area_struct *vma) +{ + int busy; + + if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + return -EINVAL; + + spin_lock(&mmn_lock); + busy = mmu_notifier_busy; + if (!busy) + mmu_notifier_busy = 1; + spin_unlock(&mmn_lock); + if (busy) + return -EBUSY; + + vma->vm_flags |= VM_PFNMAP; + vma->vm_ops = &mmn_vm_ops; + + mmu_notifier_init(&mmu_notifier, &mmn_ops, current->mm); + mmu_notifier_register(&mmu_notifier); + + return 0; +} + +static const struct file_operations mmn_fops = +{ + .owner = THIS_MODULE, + .llseek = no_llseek, + .mmap = mmn_mmap, +}; + +static struct miscdevice mmn_miscdev = +{ + .minor = MISC_DYNAMIC_MINOR, + .name = "mmn", + .fops = &mmn_fops +}; + +static int __init mmn_init(void) +{ + if (misc_register(&mmn_miscdev)) { + printk(KERN_ERR "mmn: unable to register device\n"); + return -EIO; + } + return 0; +} + +static void __exit mmn_exit(void) +{ + misc_deregister(&mmn_miscdev); +} + +MODULE_DESCRIPTION("mmu_notifier skeleton driver"); +MODULE_LICENSE("GPL"); + +module_init(mmn_init); +module_exit(mmn_exit); + Index: linux-2.6/drivers/char/Kconfig =================================================================== --- linux-2.6.orig/drivers/char/Kconfig +++ linux-2.6/drivers/char/Kconfig @@ -4,6 +4,10 @@ menu "Character devices" +config MMU_NOTIFIER_SKEL + tristate "MMU Notifier skeleton driver" + default n + config VT bool "Virtual terminal" if EMBEDDED depends on !S390 Index: linux-2.6/drivers/char/Makefile =================================================================== --- linux-2.6.orig/drivers/char/Makefile +++ linux-2.6/drivers/char/Makefile @@ -97,6 +97,7 @@ obj-$(CONFIG_CS5535_GPIO) += cs5535_gpio obj-$(CONFIG_GPIO_VR41XX) += vr41xx_giu.o obj-$(CONFIG_GPIO_TB0219) += tb0219.o obj-$(CONFIG_TELCLOCK) += tlclk.o +obj-$(CONFIG_MMU_NOTIFIER_SKEL) += mmu_notifier_skel.o obj-$(CONFIG_MWAVE) += mwave/ obj-$(CONFIG_AGP) += agp/ From nickpiggin at yahoo.com.au Tue Feb 19 00:46:10 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Tue, 19 Feb 2008 19:46:10 +1100 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> Message-ID: <200802191946.10695.nickpiggin@yahoo.com.au> On Sunday 17 February 2008 06:22, Christoph Lameter wrote: > On Fri, 15 Feb 2008, Andrew Morton wrote: > > > flush_cache_page(vma, address, pte_pfn(*pte)); > > > entry = ptep_clear_flush(vma, address, pte); > > > + mmu_notifier(invalidate_page, mm, address); > > > > I just don't see how ths can be done if the callee has another thread in > > the middle of establishing IO against this region of memory. > > ->invalidate_page() _has_ to be able to block. Confused. > > The page lock is held and that holds off I/O? I think the actual answer is that "it doesn't matter". ptes are not exactly the entity via which IO gets established, so all we really care about here is that after the callback finishes, we will not get any more reads or writes to the page via the external mapping. As far as holding off local IO goes, that is the job of the core VM. (And no, page lock does not necessarily hold it off FYI -- it can be writeback IO or even IO directly via buffers). Holding off IO via the external references I guess is a job for the notifier driver. From nickpiggin at yahoo.com.au Tue Feb 19 00:54:14 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Tue, 19 Feb 2008 19:54:14 +1100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080215064932.620773824@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> Message-ID: <200802191954.14874.nickpiggin@yahoo.com.au> On Friday 15 February 2008 17:49, Christoph Lameter wrote: > The invalidation of address ranges in a mm_struct needs to be > performed when pages are removed or permissions etc change. > > If invalidate_range_begin() is called with locks held then we > pass a flag into invalidate_range() to indicate that no sleeping is > possible. Locks are only held for truncate and huge pages. > > In two cases we use invalidate_range_begin/end to invalidate > single pages because the pair allows holding off new references > (idea by Robin Holt). > > do_wp_page(): We hold off new references while we update the pte. > > xip_unmap: We are not taking the PageLock so we cannot > use the invalidate_page mmu_rmap_notifier. invalidate_range_begin/end > stands in. This whole thing would be much better if you didn't rely on the page lock at all, but either a) used the same locking as Linux does for its ptes/tlbs, or b) have some locking that is private to the mmu notifier code. Then there is not all this new stuff that has to be understood in the core VM. Also, why do you have to "invalidate" ranges when switching to a _more_ permissive state? This stuff should basically be the same as (a subset of) the TLB flushing API AFAIKS. Anything more is a pretty big burden to put in the core VM. See my alternative patch I posted -- I can't see why it won't work just like a TLB. As far as sleeping inside callbacks goes... I think there are big problems with the patch (the sleeping patch and the external rmap patch). I don't think it is workable in its current state. Either we have to make some big changes to the core VM, or we have to turn some locks into sleeping locks to do it properly AFAIKS. Neither one is good. But anyway, I don't really think the two approaches (Andrea's notifiers vs sleeping/xrmap) should be tangled up too much. I think Andrea's can possibly be quite unintrusive and useful very soon. From david-m at orbotech.com Tue Feb 19 01:03:42 2008 From: david-m at orbotech.com (David Minor) Date: Tue, 19 Feb 2008 11:03:42 +0200 Subject: [ofa-general] getting network statistics Message-ID: Under Linux with Mellanox ofed, how can I get real-time network statistics. e.g. how many bytes are being sent and received over each port at any given time? Thanks, David -------------- next part -------------- An HTML attachment was scrubbed... URL: From ogerlitz at voltaire.com Tue Feb 19 01:05:22 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 19 Feb 2008 11:05:22 +0200 Subject: [ofa-general] Re: [PATCH v2] IB/ipoib: use vmap with allocation of tx ring In-Reply-To: References: <47B44716.2010401@dev.mellanox.co.il> <47B44A8D.30200@voltaire.com> <47B44C83.7010201@dev.mellanox.co.il> <47B48FF5.4040102@dev.mellanox.co.il> <47B803E1.80208@voltaire.com> Message-ID: <47BA9BD2.8050007@voltaire.com> Roland Dreier wrote: > No, I don't think we need to worry about using up vmalloc() space > really. I don't think anyone is going to consume more than a few MB > of ring buffers with IPoIB, and if they do then it's almost certainly > going to be on a 64-bit system. So the patch you want to see is to make ipoib use vmalloc in those 1-3 places which are subject to kmalloc failure? Or From dwpifm at pif.nu Tue Feb 19 01:15:18 2008 From: dwpifm at pif.nu (Rosemarie Elliot) Date: Tue, 19 Feb 2008 18:15:18 +0900 Subject: [ofa-general] Or maybe just to reward yourself with a gift for once? Message-ID: <050072621.69631355813473@pif.nu> An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: Price Type: application/pdf Size: 17988 bytes Desc: not available URL: From vlad at lists.openfabrics.org Tue Feb 19 03:12:09 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 19 Feb 2008 03:12:09 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080219-0200 daily build status Message-ID: <20080219111209.8683AE28253@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on ppc64 with linux-2.6.24 Failed: Build failed on i686 with 2.6.15-23-server Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -pipe -msoft-float -mpreferred-stack-boundary=2 -fno-unit-at-a-time -march=i686 -Iinclude/asm-i386/mach-default -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_check] Error 2 make[1]: Leaving directory `/usr/src/linux-headers-2.6.15-23-server' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on i686 with linux-2.6.13 Build failed on i686 with linux-2.6.12 Build failed on i686 with linux-2.6.16 Build failed on i686 with linux-2.6.15 Build failed on i686 with linux-2.6.14 Build failed on i686 with linux-2.6.18 Build failed on i686 with linux-2.6.17 Build failed on i686 with linux-2.6.19 Build failed on i686 with linux-2.6.21.1 Build failed on x86_64 with linux-2.6.12 Log: include/asm/apic.h:47: warning: value computed is not used /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.15 Log: include/asm/apic.h:47: warning: value computed is not used /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.14 Log: include/asm/apic.h:47: warning: value computed is not used /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.13 Log: include/asm/apic.h:47: warning: value computed is not used /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -Werror-implicit-function-declaration -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -Werror-implicit-function-declaration -fno-strict-aliasing -fno-common -ffreestanding -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -fomit-frame-pointer -fasynchronous-unwind-tables -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-1.2798.fc6 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -fomit-frame-pointer -fasynchronous-unwind-tables -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-1.2798.fc6_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-1.2798.fc6' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.17 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -fomit-frame-pointer -g -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.19 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -maccumulate-outgoing-args -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -fomit-frame-pointer -fasynchronous-unwind-tables -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.21.1 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -maccumulate-outgoing-args -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:618: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.21.1' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.20 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -m64 -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -Wno-sign-compare -fno-asynchronous-unwind-tables -funit-at-a-time -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -maccumulate-outgoing-args -DCONFIG_AS_CFI=1 -DCONFIG_AS_CFI_SIGNAL_FRAME=1 -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.20_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.20' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: include/linux/skbuff.h:1013: warning: pointer targets in passing argument 2 of 'csum_partial_copy_from_user' differ in signedness /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: include/linux/skbuff.h:1041: warning: pointer targets in passing argument 2 of 'csum_partial_copy_from_user' differ in signedness /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.13 Log: include/asm/mmu_context.h:67: warning: type qualifiers ignored on function return type /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.12 Log: include/asm/mmu_context.h:67: warning: type qualifiers ignored on function return type /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.15 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.14 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.17 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -fomit-frame-pointer -g -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.18 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -Werror-implicit-function-declaration -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.21.1 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:618: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.21.1_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.21.1' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.19 Log: -DHAVE_WORKING_TEXT_ALIGN -DHAVE_MODEL_SMALL_ATTRIBUTE -DHAVE_SERIALIZE_DIRECTIVE -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -pipe -ffixed-r13 -mfixed-range=f12-f15,f32-f127 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.12 Log: -Iarch/ppc -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -Iarch/ppc -msoft-float -pipe -ffixed-r2 -mmultiple -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.15 Log: -Iarch/ppc -Iarch/ppc/include -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -Iarch/ppc -msoft-float -pipe -ffixed-r2 -mmultiple -mno-altivec -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.13 Log: -Iarch/ppc -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -Iarch/ppc -msoft-float -pipe -ffixed-r2 -mmultiple -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on powerpc with linux-2.6.14 Log: -Iarch/ppc -Iarch/ppc/include -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -Iarch/ppc -msoft-float -pipe -ffixed-r2 -mmultiple -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_powerpc_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/powerpc/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.13 Log: -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -funit-at-a-time -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.13_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.13' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.12 Log: -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -funit-at-a-time -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.12_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.12' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.14 Log: -Iarch/ppc64/include -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -O2 -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -funit-at-a-time -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.14_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.14' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.15 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -DKBUILD_BASENAME=ipoib_cm -DKBUILD_MODNAME=ib_ipoib -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.15_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.15' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.16 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -ffreestanding -Os -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.16_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.16' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.17 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -fomit-frame-pointer -g -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.17_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.17' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.19 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Os -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.19_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.19' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -Wstrict-prototypes -Wundef -Werror-implicit-function-declaration -Os -msoft-float -pipe -mminimal-toc -mtraceback=none -mcall-aixdesc -mtune=power4 -mno-altivec -funit-at-a-time -mstring -Wa,-maltivec -fomit-frame-pointer -g -fno-stack-protector -Wdeclaration-after-statement -Wno-pointer-sign -DMODULE -D"KBUILD_STR(s)=#s" -D"KBUILD_BASENAME=KBUILD_STR(ipoib_cm)" -D"KBUILD_MODNAME=KBUILD_STR(ib_ipoib)" -c -o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/.tmp_ipoib_cm.o /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c: In function 'ipoib_cm_handle_rx_wc': /home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.c:620: error: implicit declaration of function 'skb_copy_from_linear_data_offset' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib/ipoib_cm.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/ipoib] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20080219-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From patrick.latifi at qlogic.com Tue Feb 19 03:17:04 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:04 -0800 Subject: [ofa-general] [PATCH][DAPL v1] memory leak fixes Message-ID: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Hi all, Here's a set of patches for dapl 1.2.x. Please have a look and let me know if there's any issue. All these patches apply against the dat1.2 branch. Thanks, -pat From patrick.latifi at qlogic.com Tue Feb 19 03:17:09 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:09 -0800 Subject: [ofa-general] [PATCH 1/6] [DAPL v1] fix memory leak In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111709.27820.63607.stgit@b64-10.internal.keyresearch.com> Fix memory leak. Signed-off-by: Patrick Marchand Latifi --- test/dapltest/cmd/dapl_netaddr.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/test/dapltest/cmd/dapl_netaddr.c b/test/dapltest/cmd/dapl_netaddr.c index 0b16303..a306335 100644 --- a/test/dapltest/cmd/dapl_netaddr.c +++ b/test/dapltest/cmd/dapl_netaddr.c @@ -137,6 +137,7 @@ DT_NetAddrLookupHostAddress (DAT_IA_ADDRESS_PTR to_netaddr, inet_ntoa(((struct sockaddr_in *)target->ai_addr)->sin_addr)); *to_netaddr = * ((DAT_IA_ADDRESS_PTR) target->ai_addr); + freeaddrinfo(target); return ( DAT_TRUE ); } From patrick.latifi at qlogic.com Tue Feb 19 03:17:14 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:14 -0800 Subject: [ofa-general] [PATCH 2/6] [DAPL v1] fix memory leak In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111714.27820.25981.stgit@b64-10.internal.keyresearch.com> Fix memory leak Signed-off-by: Patrick Marchand Latifi --- test/dtest/dtest.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c index 4264e9b..2db141f 100755 --- a/test/dtest/dtest.c +++ b/test/dtest/dtest.c @@ -750,6 +750,7 @@ connect_ep( char *hostname, int conn_id ) inet_ntoa(((struct sockaddr_in *)target->ai_addr)->sin_addr)); remote_addr = *((DAT_IA_ADDRESS_PTR)target->ai_addr); + freeaddrinfo(target); LOGPRINTF("%d Connecting to server\n",getpid()); ret = dat_ep_connect( h_ep, From patrick.latifi at qlogic.com Tue Feb 19 03:17:19 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:19 -0800 Subject: [ofa-general] [PATCH 3/6] [DAPL v1] fix memory leak in error path In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111719.27820.16204.stgit@b64-10.internal.keyresearch.com> Fix memory leak in error path. Signed-off-by: Patrick Marchand Latifi --- dapl/openib_cma/dapl_ib_util.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c index 0606312..6b282e5 100755 --- a/dapl/openib_cma/dapl_ib_util.c +++ b/dapl/openib_cma/dapl_ib_util.c @@ -131,8 +131,10 @@ static int getipaddr(char *name, char *addr, int len) } else { if (len >= res->ai_addrlen) memcpy(addr, res->ai_addr, res->ai_addrlen); - else + else { + freeaddrinfo(res); return EINVAL; + } freeaddrinfo(res); } From patrick.latifi at qlogic.com Tue Feb 19 03:17:24 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:24 -0800 Subject: [ofa-general] [PATCH 4/6] [DAPL v1] fix memory leak In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111724.27820.40367.stgit@b64-10.internal.keyresearch.com> Fix memory leak. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_ia_open.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/dapl/common/dapl_ia_open.c b/dapl/common/dapl_ia_open.c index d3d0ed0..cf15f61 100644 --- a/dapl/common/dapl_ia_open.c +++ b/dapl/common/dapl_ia_open.c @@ -420,6 +420,7 @@ dapli_assign_hca_ip_address ( else { hca_ptr->hca_address = * ((DAT_SOCK_ADDR6 *)addr->ai_addr); + dapls_osd_freeaddrinfo (addr); } } From patrick.latifi at qlogic.com Tue Feb 19 03:17:29 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:29 -0800 Subject: [ofa-general] [PATCH 5/6] [DAPL v1] fix memory leak in error path In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111729.27820.94402.stgit@b64-10.internal.keyresearch.com> Make sure we don't leak the hash table if dapl_hca_alloc fails. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_hca_util.c | 53 +++++++++++++++++++++++++++---------------- 1 files changed, 33 insertions(+), 20 deletions(-) diff --git a/dapl/common/dapl_hca_util.c b/dapl/common/dapl_hca_util.c index 9bf35da..2b112b3 100644 --- a/dapl/common/dapl_hca_util.c +++ b/dapl/common/dapl_hca_util.c @@ -65,33 +65,46 @@ dapl_hca_alloc ( DAPL_HCA *hca_ptr; hca_ptr = dapl_os_alloc (sizeof (DAPL_HCA)); - if ( NULL != hca_ptr ) + if ( NULL == hca_ptr ) { - dapl_os_memzero (hca_ptr, sizeof (DAPL_HCA)); + goto bail; + } - if ( DAT_SUCCESS == dapls_hash_create ( + dapl_os_memzero (hca_ptr, sizeof (DAPL_HCA)); + + if ( DAT_SUCCESS != dapls_hash_create ( DAPL_HASH_TABLE_DEFAULT_CAPACITY, &hca_ptr->lmr_hash_table) ) - { - dapl_os_lock_init(&hca_ptr->lock); - dapl_llist_init_head(&hca_ptr->ia_list_head); + { + goto bail; + } + + dapl_os_lock_init(&hca_ptr->lock); + dapl_llist_init_head(&hca_ptr->ia_list_head); - hca_ptr->name = dapl_os_strdup(name); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - hca_ptr->port_num = dapl_os_strtol(port, NULL, 0); - if (hca_ptr->name == NULL) - { - dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); - hca_ptr = NULL; - } - } - else - { - dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); - hca_ptr = NULL; - } + hca_ptr->name = dapl_os_strdup(name); + if ( NULL == hca_ptr->name ) + { + goto bail; } + + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + hca_ptr->port_num = dapl_os_strtol(port, NULL, 0); return (hca_ptr); + +bail: + + if ( NULL != hca_ptr ) + { + if ( NULL != hca_ptr->lmr_hash_table ) + { + dapls_hash_free (hca_ptr->lmr_hash_table); + } + + dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); + } + + return NULL; } /* From patrick.latifi at qlogic.com Tue Feb 19 03:17:35 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:17:35 -0800 Subject: [ofa-general] [PATCH 6/6] [DAPL v1] fix gethostname handling In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111734.27820.93439.stgit@b64-10.internal.keyresearch.com> Guarantee NUL termination if hostname gets truncated. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_ia_open.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/dapl/common/dapl_ia_open.c b/dapl/common/dapl_ia_open.c index cf15f61..3793048 100644 --- a/dapl/common/dapl_ia_open.c +++ b/dapl/common/dapl_ia_open.c @@ -395,6 +395,10 @@ dapli_assign_hca_ip_address ( */ rc = gethostname (hostname, NAMELEN); + + /* guarantee NUL termination if hostname gets truncated */ + hostname[NAMELEN-1] = '\0'; + /* * Strip off domain info if it exists (e.g. mynode.mydomain.com) */ From patrick.latifi at qlogic.com Tue Feb 19 03:19:12 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:12 -0800 Subject: [ofa-general] [PATCH][DAPL v2] memory leak fixes Message-ID: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Hi all, Here's a set of patches for dapl 2.0.x. Please have a look and let me know if there's any issue. All these patches apply against the master branch. Thanks, -pat From patrick.latifi at qlogic.com Tue Feb 19 03:19:17 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:17 -0800 Subject: [ofa-general] [PATCH 1/6] [DAPL v2] fix memory leak In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111917.27986.74449.stgit@b64-10.internal.keyresearch.com> Fix memory leak. Signed-off-by: Patrick Marchand Latifi --- test/dapltest/cmd/dapl_netaddr.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/test/dapltest/cmd/dapl_netaddr.c b/test/dapltest/cmd/dapl_netaddr.c index 0b16303..a306335 100644 --- a/test/dapltest/cmd/dapl_netaddr.c +++ b/test/dapltest/cmd/dapl_netaddr.c @@ -137,6 +137,7 @@ DT_NetAddrLookupHostAddress (DAT_IA_ADDRESS_PTR to_netaddr, inet_ntoa(((struct sockaddr_in *)target->ai_addr)->sin_addr)); *to_netaddr = * ((DAT_IA_ADDRESS_PTR) target->ai_addr); + freeaddrinfo(target); return ( DAT_TRUE ); } From patrick.latifi at qlogic.com Tue Feb 19 03:19:22 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:22 -0800 Subject: [ofa-general] [PATCH 2/6] [DAPL v2] fix memory leak In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111922.27986.7102.stgit@b64-10.internal.keyresearch.com> Fix memory leak Signed-off-by: Patrick Marchand Latifi --- test/dtest/dtest.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c index 57b5790..fa3b9a8 100755 --- a/test/dtest/dtest.c +++ b/test/dtest/dtest.c @@ -909,6 +909,7 @@ connect_ep( char *hostname, DAT_CONN_QUAL conn_id ) (rval >> 16) & 0xff, (rval >> 24) & 0xff, conn_id); remote_addr = *((DAT_IA_ADDRESS_PTR)target->ai_addr); + freeaddrinfo(target); LOGPRINTF("%d Connecting to server\n",getpid()); ret = dat_ep_connect( h_ep, From patrick.latifi at qlogic.com Tue Feb 19 03:19:27 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:27 -0800 Subject: [ofa-general] [PATCH 3/6] [DAPL v2] fix memory leak in error path In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111927.27986.55673.stgit@b64-10.internal.keyresearch.com> Fix memory leak in error path. Signed-off-by: Patrick Marchand Latifi --- dapl/openib_cma/dapl_ib_util.c | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c index 23655b6..a9cf19c 100755 --- a/dapl/openib_cma/dapl_ib_util.c +++ b/dapl/openib_cma/dapl_ib_util.c @@ -121,8 +121,10 @@ static int getipaddr(char *name, char *addr, int len) } else { if (len >= res->ai_addrlen) memcpy(addr, res->ai_addr, res->ai_addrlen); - else + else { + freeaddrinfo(res); return EINVAL; + } freeaddrinfo(res); } From patrick.latifi at qlogic.com Tue Feb 19 03:19:32 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:32 -0800 Subject: [ofa-general] [PATCH 4/6] [DAPL v2] fix memory leak In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111932.27986.71726.stgit@b64-10.internal.keyresearch.com> Fix memory leak. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_ia_open.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/dapl/common/dapl_ia_open.c b/dapl/common/dapl_ia_open.c index 7ca5dba..a780a98 100644 --- a/dapl/common/dapl_ia_open.c +++ b/dapl/common/dapl_ia_open.c @@ -420,6 +420,7 @@ dapli_assign_hca_ip_address ( else { hca_ptr->hca_address = * ((DAT_SOCK_ADDR6 *)addr->ai_addr); + dapls_osd_freeaddrinfo (addr); } } From patrick.latifi at qlogic.com Tue Feb 19 03:19:37 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:37 -0800 Subject: [ofa-general] [PATCH 5/6] [DAPL v2] fix memory leak in error path In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111937.27986.97621.stgit@b64-10.internal.keyresearch.com> Make sure we don't leak the hash table if dapl_hca_alloc fails. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_hca_util.c | 54 ++++++++++++++++++++++++++----------------- 1 files changed, 33 insertions(+), 21 deletions(-) diff --git a/dapl/common/dapl_hca_util.c b/dapl/common/dapl_hca_util.c index 2cfef0e..f881a1e 100644 --- a/dapl/common/dapl_hca_util.c +++ b/dapl/common/dapl_hca_util.c @@ -65,33 +65,45 @@ dapl_hca_alloc ( DAPL_HCA *hca_ptr; hca_ptr = dapl_os_alloc (sizeof (DAPL_HCA)); - if ( NULL != hca_ptr ) + if ( NULL == hca_ptr ) { - dapl_os_memzero (hca_ptr, sizeof (DAPL_HCA)); + goto bail; + } - if ( DAT_SUCCESS == dapls_hash_create ( - DAPL_HASH_TABLE_DEFAULT_CAPACITY, &hca_ptr->lmr_hash_table) ) - { - dapl_os_lock_init(&hca_ptr->lock); - dapl_llist_init_head(&hca_ptr->ia_list_head); + dapl_os_memzero (hca_ptr, sizeof (DAPL_HCA)); + + if ( DAT_SUCCESS != dapls_hash_create ( + DAPL_HASH_TABLE_DEFAULT_CAPACITY, &hca_ptr->lmr_hash_table) ) + { + goto bail; + } + + dapl_os_lock_init(&hca_ptr->lock); + dapl_llist_init_head(&hca_ptr->ia_list_head); - hca_ptr->name = dapl_os_strdup(name); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - hca_ptr->port_num = dapl_os_strtol(port, NULL, 0); - if (hca_ptr->name == NULL) - { - dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); - hca_ptr = NULL; - } - } - else - { - dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); - hca_ptr = NULL; - } + hca_ptr->name = dapl_os_strdup(name); + if ( NULL == hca_ptr->name ) + { + goto bail; } + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + hca_ptr->port_num = dapl_os_strtol(port, NULL, 0); + return (hca_ptr); + +bail: + if ( NULL != hca_ptr ) + { + if ( NULL != hca_ptr->lmr_hash_table ) + { + dapls_hash_free (hca_ptr->lmr_hash_table); + } + + dapl_os_free (hca_ptr, sizeof (DAPL_HCA)); + } + + return NULL; } /* From patrick.latifi at qlogic.com Tue Feb 19 03:19:42 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 03:19:42 -0800 Subject: [ofa-general] [PATCH 6/6] [DAPL v2] fix gethostname handling In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <20080219111942.27986.83147.stgit@b64-10.internal.keyresearch.com> Guarantee NUL termination if hostname gets truncated. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_ia_open.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/dapl/common/dapl_ia_open.c b/dapl/common/dapl_ia_open.c index a780a98..d85c1b6 100644 --- a/dapl/common/dapl_ia_open.c +++ b/dapl/common/dapl_ia_open.c @@ -395,6 +395,10 @@ dapli_assign_hca_ip_address ( */ rc = gethostname (hostname, NAMELEN); + + /* guarantee NUL termination if hostname gets truncated */ + hostname[NAMELEN-1] = '\0'; + /* * Strip off domain info if it exists (e.g. mynode.mydomain.com) */ From holt at sgi.com Tue Feb 19 03:59:38 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 05:59:38 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219084357.GA22249@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> Message-ID: <20080219115938.GD11391@sgi.com> On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > So I implemented mmu notifiers slightly differently. Andrea's mmu notifiers > are rather similar. However I have tried to make a point of minimising the > impact the the core mm/. I don't see why we need to invalidate or flush > anything when changing the pte to be _more_ permissive, and I don't > understand the need for invalidate_begin/invalidate_end pairs at all. > What I have done is basically create it so that the notifiers get called > basically in the same place as the normal TLB flushing is done, and nowhere > else. Because XPMEM needs to be able to sleep during its callout. For that, we need to move this outside of the page table lock and suddenly we need the begin/end pair again. There was considerable discussion about this exact point numerous times. We tried to develop the most inclusive design possible. Our design would even be extendable to IB, assuming they made some very disruptive changes to their MPI and communication libraries. IB would suffer the same problems XPMEM does in that the TLB entries need to be removed on a remote host which is operating completely independently. Thanks, Robin From eli at dev.mellanox.co.il Tue Feb 19 04:29:56 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Tue, 19 Feb 2008 14:29:56 +0200 Subject: [ofa-general] getting network statistics In-Reply-To: References: Message-ID: <1203424196.16145.1.camel@mtls03> cat /sys/class/infiniband/mlx4_0/ports/1/counters/* mlx4_* can be mthca* On Tue, 2008-02-19 at 11:03 +0200, David Minor wrote: > Under Linux with Mellanox ofed, how can I get real-time network > statistics. e.g. how many bytes are being sent and received over each > port at any given time? > > Thanks, > > David > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From dwrasmusm at rasmus.dk Tue Feb 19 04:36:04 2008 From: dwrasmusm at rasmus.dk (Marina Sullivan) Date: Tue, 19 Feb 2008 14:36:04 +0200 Subject: [ofa-general] Medications that you need. Message-ID: <01c87304$c14a6200$1ed7315c@dwrasmusm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/randolphwaters195 We provide confidential and secure purchase! From jackm at dev.mellanox.co.il Tue Feb 19 04:40:33 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 14:40:33 +0200 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel In-Reply-To: References: Message-ID: <200802191440.33559.jackm@dev.mellanox.co.il> On Tuesday 19 February 2008 02:35, Tang, Changqing wrote: > > I have taken sometime to trace down this bug. > > When running OFED 1.3 on 2.6.9-42.ELsmp kernel, putenv("IBV_FORK_SAFE=1"); > causes ibv_get_device_list() to print out a Warning and set errno = 22 : > > A:errno=0 > libibverbs: Warning: fork()-safety requested but init failed > B:errno=22 > > errno keeps value 22 and causes ibv_modify_xrc_rcv_qp() to fail. > > Another way to make ibv_modify_xrc_rcv_qp() to fail is to set errno = 22 just > before calling this function. However, this only happens on 2.6.9-42.ELsmp kernel, > on 2.6.18-8.e15 kernel, it succeeds. > > 2.6.9-42.ELsmp is the kernel in Mellanox testing cluster helios.mellanox.com/ibd001-0032 > > Thanks for Mellanox guys to have a look > > > --CQ I fixed a bug just like this in OFED 1.3 on Jan 30. The fix is in OFED 1.3 RC4 -- are you using that version? If not, please install RC4 and re-test. (The bug was in kernel space: =========== IB/core: fixed thinko in return values for ib_uverbs_xxxx_xrc_rcv_qp() procs. Wed, 30 Jan 2008 15:11:08 +0000 (17:11 +0200) commit 78273e00083543535edd4c9db830b4ac45eb556a IB/core: fixed thinko in return values for ib_uverbs_xxxx_xrc_rcv_qp() procs. Incorrectly returned 0 instead of in_len in several procedures. ================= This bug caused userspace to return the "errno" value even when the kernel operation completed successfully, which is what you seem to be seeing. - Jack From obadiah at tirol.com Tue Feb 19 05:16:01 2008 From: obadiah at tirol.com (jayme laura) Date: Tue, 19 Feb 2008 13:16:01 +0000 Subject: [ofa-general] Hi originally Message-ID: <000501c87308$01974e40$16d15488@qjmmgk> Get the great discounts on popular software today at http://axobesoft.com All sofware is instantly available to download - No Need Wait! ALL OUR SOFTWARES ON ALL EUROPEAN LANGUAGES - USA, English, France, Italy, Spanish, German and more!!! Windows XP Pro With SP2 - $59.95 Adobe Acrobat Pro 8 - $69.95 Office 2003 Pro - $59.95 Adobe Photoshop CS2 - $79.95 AutoCAD 2007 - $149.95 Also we have so much soft for MACINTOSH!!! Microsoft Office 2004 for MAC – $79.95 Adobe Acrobat 7 Professional for MAC – $59.95 Adobe Creative Suite 2 Premium for MAC – $229.95 Macromedia Dreamweaver 8 for MAC – $69.95 To review full list of the offers, visit http://axobesoft.com From jackm at dev.mellanox.co.il Tue Feb 19 05:16:58 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 15:16:58 +0200 Subject: [ofa-general] Another XRC binary compatable issue =?iso-8859-1?q?for=09different_pthread?= version. In-Reply-To: References: <20080218152031.GJ21651@minantech.com> Message-ID: <200802191516.59087.jackm@dev.mellanox.co.il> On Monday 18 February 2008 17:29, Tang, Changqing wrote: > > Any application code access events_completed field ? HP-MPI does not. > > If no user code access 'mutex' 'cond' and 'events_completed', I suggest to > put the XRC fields in the middle of this structure. > > No current app seems to be using "events_completed", "cond", and "mutex", so we can do this: struct ibv_srq { struct ibv_context *context; void *srq_context; struct ibv_pd *pd; uint32_t handle; uint32_t events_completed; uint32_t xrc_srq_num; struct ibv_xrc_domain *xrc_domain; struct ibv_cq *xrc_cq; pthread_mutex_t mutex; pthread_cond_t cond; }; Unless there are objections, I will commit this change today to libibverbs in time for RC5. - Jack From andrea at qumranet.com Tue Feb 19 05:30:09 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Tue, 19 Feb 2008 14:30:09 +0100 Subject: [ofa-general] Re: [patch 3/6] mmu_notifier: invalidate_page callbacks In-Reply-To: <200802191946.10695.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215193736.9d6e7da3.akpm@linux-foundation.org> <200802191946.10695.nickpiggin@yahoo.com.au> Message-ID: <20080219133009.GG7128@v2.random> On Tue, Feb 19, 2008 at 07:46:10PM +1100, Nick Piggin wrote: > On Sunday 17 February 2008 06:22, Christoph Lameter wrote: > > On Fri, 15 Feb 2008, Andrew Morton wrote: > > > > > flush_cache_page(vma, address, pte_pfn(*pte)); > > > > entry = ptep_clear_flush(vma, address, pte); > > > > + mmu_notifier(invalidate_page, mm, address); > > > > > > I just don't see how ths can be done if the callee has another thread in > > > the middle of establishing IO against this region of memory. > > > ->invalidate_page() _has_ to be able to block. Confused. > > > > The page lock is held and that holds off I/O? > > I think the actual answer is that "it doesn't matter". Agreed. The PG_lock itself taken when invalidate_page is called, is used to serialized the VM against the VM, not the VM against I/O. From andrea at qumranet.com Tue Feb 19 05:34:05 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Tue, 19 Feb 2008 14:34:05 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <200802191954.14874.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802191954.14874.nickpiggin@yahoo.com.au> Message-ID: <20080219133405.GH7128@v2.random> On Tue, Feb 19, 2008 at 07:54:14PM +1100, Nick Piggin wrote: > As far as sleeping inside callbacks goes... I think there are big > problems with the patch (the sleeping patch and the external rmap > patch). I don't think it is workable in its current state. Either > we have to make some big changes to the core VM, or we have to turn > some locks into sleeping locks to do it properly AFAIKS. Neither > one is good. Agreed. The thing is quite simple, the moment we support xpmem the complexity in the mmu notifier patch start and there are hacks, duplicated functionality through the same xpmem callbacks etc... GRU can already be 100% supported (infact simpler and safer) with my patch. > But anyway, I don't really think the two approaches (Andrea's > notifiers vs sleeping/xrmap) should be tangled up too much. I > think Andrea's can possibly be quite unintrusive and useful very > soon. Yes, that's why I kept maintaining my patch and I posted the last revision to Andrew. I use pte/tlb locking of the core VM, it's unintrusive and obviously safe. Furthermore it can be extended with Christoph's stuff in a 100% backwards compatible fashion later if needed. From jackm at dev.mellanox.co.il Tue Feb 19 05:46:47 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 15:46:47 +0200 Subject: [ofa-general] Another XRC binary compatable issue =?iso-8859-1?q?for=09different_pthread?= version. In-Reply-To: References: <20080218152031.GJ21651@minantech.com> Message-ID: <200802191546.47313.jackm@dev.mellanox.co.il> On Monday 18 February 2008 17:29, Tang, Changqing wrote: > Any application code access events_completed field ?  HP-MPI does not. > > If no user code access 'mutex' 'cond' and 'events_completed', I suggest to > put the XRC fields in the middle of this structure. > > > --CQ Does the same issue exist with-respect-to ibv_qp struct? struct ibv_qp { struct ibv_context *context; void *qp_context; struct ibv_pd *pd; struct ibv_cq *send_cq; struct ibv_cq *recv_cq; struct ibv_srq *srq; uint32_t handle; uint32_t qp_num; enum ibv_qp_state state; enum ibv_qp_type qp_type; pthread_mutex_t mutex; pthread_cond_t cond; uint32_t events_completed; struct ibv_xrc_domain *xrc_domain; }; i.e., QC, do you access the xrc_domain member of this struct in your code? - Jack From andrea at qumranet.com Tue Feb 19 05:58:51 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Tue, 19 Feb 2008 14:58:51 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219084357.GA22249@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> Message-ID: <20080219135851.GI7128@v2.random> On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > are rather similar. However I have tried to make a point of minimising the > impact the the core mm/. I don't see why we need to invalidate or flush I also tried hard to minimise the impact of the core mm/, I also argued with Christoph that cluttering mm/ wasn't a good idea for things like age_page that could be a 1 liner change instead of a multiple-liner change, without any loss of flexibility or readability. > anything when changing the pte to be _more_ permissive, and I don't Note that in my patch the invalidate_pages in mprotect can be trivially switched to a mprotect_pages with proper params. This will prevent page faults completely in the secondary MMU (there will only be tlb misses after the tlb flush just like for the core linux pte), and it'll allow all the secondary MMU pte blocks (512/1024 at time with my PT lock design) to be updated to have proper permissions matching the core linux pte. > understand the need for invalidate_begin/invalidate_end pairs at all. The need of the pairs is crystal clear to me: range_begin is needed for GRU _but_only_if_ range_end is called after releasing the reference that the VM holds on the page. _begin will flush the GRU tlb and at the same time it will take a mutex that will block further GRU tlb-miss-interrupts (no idea how they manange those nightmare locking, I didn't even try to add more locking to KVM and I get away with the fact KVM takes the pin on the page itself). My patch calls invalidate_page/pages before the reference is released on the page, so GRU will work fine despite lack of range_begin. Furthermore with my patch GRU will be auto-serialized by the PT lock w/o the need of any additional locking. > What I have done is basically create it so that the notifiers get called > basically in the same place as the normal TLB flushing is done, and nowhere > else. That was one of my objectives too. > I also wanted to avoid calling notifier code from inside eg. hardware TLB > or pte manipulation primitives. These things are already pretty well > spaghetti, so I'd like to just place them right where needed first... I > think eventually it will need a bit of a rethink to make it more consistent > and more general. But I prefer to do put them in the caller for the moment. Your patch should also work for KVM but it's suboptimal, my patch can be orders of magnitude more efficient for GRU thanks to the invalidate_pages optimization. Christoph complained about having to call one method per pte. And adding invalidate_range is useless unless you fully support xpmem. You're calling invalidate_range in places that can't sleep... No idea why xpmem needs range_begin, I perfectly understand why GRU needs _begin with Chrisotph's patch (gru lacks the page pin) but I dunno why xpmem needs range_begin (xpmem has the page pin so I also think it could avoid using range_begin). Still to support GRU you need both to call invalidate_range in places that can sleep and you need the external rmap notifier. The moment you add xpmem into the equation your and my clean patches become Christoph's one... From changquing.tang at hp.com Tue Feb 19 06:17:33 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 19 Feb 2008 14:17:33 +0000 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel In-Reply-To: <200802191440.33559.jackm@dev.mellanox.co.il> References: <200802191440.33559.jackm@dev.mellanox.co.il> Message-ID: I was told that helios.mellanox.com/ibd001-0032 was installed with Feb. 11 build. I just checked that it is actually Jan. 24 build. So I believe it is fixed. I will ask Mellanox people to update the system. --CQ > -----Original Message----- > From: Jack Morgenstein [mailto:jackm at dev.mellanox.co.il] > Sent: Tuesday, February 19, 2008 6:41 AM > To: general at lists.openfabrics.org > Cc: Tang, Changqing > Subject: Re: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel > > On Tuesday 19 February 2008 02:35, Tang, Changqing wrote: > > > > I have taken sometime to trace down this bug. > > > > When running OFED 1.3 on 2.6.9-42.ELsmp kernel, > > putenv("IBV_FORK_SAFE=1"); causes ibv_get_device_list() to > print out a Warning and set errno = 22 : > > > > A:errno=0 > > libibverbs: Warning: fork()-safety requested but init failed > > B:errno=22 > > > > errno keeps value 22 and causes ibv_modify_xrc_rcv_qp() to fail. > > > > Another way to make ibv_modify_xrc_rcv_qp() to fail is to > set errno = > > 22 just before calling this function. However, this only happens on > > 2.6.9-42.ELsmp kernel, on 2.6.18-8.e15 kernel, it succeeds. > > > > 2.6.9-42.ELsmp is the kernel in Mellanox testing cluster > > helios.mellanox.com/ibd001-0032 > > > > Thanks for Mellanox guys to have a look > > > > > > --CQ > > I fixed a bug just like this in OFED 1.3 on Jan 30. The fix > is in OFED 1.3 RC4 -- are you using that version? If not, > please install RC4 and re-test. > > (The bug was in kernel space: > > =========== > IB/core: fixed thinko in return values for > ib_uverbs_xxxx_xrc_rcv_qp() procs. > Wed, 30 Jan 2008 15:11:08 +0000 (17:11 +0200) commit > 78273e00083543535edd4c9db830b4ac45eb556a > IB/core: fixed thinko in return values for > ib_uverbs_xxxx_xrc_rcv_qp() procs. > > Incorrectly returned 0 instead of in_len in several procedures. > ================= > > This bug caused userspace to return the "errno" value even > when the kernel operation completed successfully, which is > what you seem to be seeing. > > - Jack > > > > > > > From changquing.tang at hp.com Tue Feb 19 06:26:09 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 19 Feb 2008 14:26:09 +0000 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: <200802191546.47313.jackm@dev.mellanox.co.il> References: <20080218152031.GJ21651@minantech.com> <200802191546.47313.jackm@dev.mellanox.co.il> Message-ID: I don't currently, but I hope you can fix it as well. There are four structures using mutex or cond: ibv_srq, ibv_qp, ibv_cq, ibv_context, Do we want to re-arrange all of them ? --CQ > -----Original Message----- > From: Jack Morgenstein [mailto:jackm at dev.mellanox.co.il] > Sent: Tuesday, February 19, 2008 7:47 AM > To: general at lists.openfabrics.org > Cc: Tang, Changqing; Gleb Natapov; Roland Dreier > Subject: Re: [ofa-general] Another XRC binary compatable > issue for different pthread version. > > On Monday 18 February 2008 17:29, Tang, Changqing wrote: > > Any application code access events_completed field ? > HP-MPI does not. > > > > If no user code access 'mutex' 'cond' and 'events_completed', I > > suggest to put the XRC fields in the middle of this structure. > > > > > > --CQ > > Does the same issue exist with-respect-to ibv_qp struct? > > struct ibv_qp { > struct ibv_context *context; > void *qp_context; > struct ibv_pd *pd; > struct ibv_cq *send_cq; > struct ibv_cq *recv_cq; > struct ibv_srq *srq; > uint32_t handle; > uint32_t qp_num; > enum ibv_qp_state state; > enum ibv_qp_type qp_type; > > pthread_mutex_t mutex; > pthread_cond_t cond; > uint32_t events_completed; > > struct ibv_xrc_domain *xrc_domain; }; > > i.e., QC, do you access the xrc_domain member of this struct > in your code? > > - Jack > From steiner at sgi.com Tue Feb 19 06:27:25 2008 From: steiner at sgi.com (Jack Steiner) Date: Tue, 19 Feb 2008 08:27:25 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219135851.GI7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> Message-ID: <20080219142725.GA23200@sgi.com> > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > understand the need for invalidate_begin/invalidate_end pairs at all. > > The need of the pairs is crystal clear to me: range_begin is needed > for GRU _but_only_if_ range_end is called after releasing the > reference that the VM holds on the page. _begin will flush the GRU tlb > and at the same time it will take a mutex that will block further GRU > tlb-miss-interrupts (no idea how they manange those nightmare locking, > I didn't even try to add more locking to KVM and I get away with the > fact KVM takes the pin on the page itself). As it turns out, no actual mutex is required. _begin_ simply increments a count of active range invalidates, _end_ decrements the count. New TLB dropins are deferred while range callouts are active. This would appear to be racy but the GRU has special hardware that simplifies locking. When the GRU sees a TLB invalidate, all outstanding misses & potentially inflight TLB dropins are marked by the GRU with a "kill" bit. When the dropin finally occurs, the dropin is ignored & the instruction is simply restarted. The instruction will fault again & the TLB dropin will be repeated. This is optimized for the case where invalidates are rare - true for users of the GRU. In general, though, I agree. Most users of mmu_notifiers would likely required a mutex or something equivalent. --- jack From CalebislandHouston at onenewsnow.com Tue Feb 19 09:00:41 2008 From: CalebislandHouston at onenewsnow.com (Orville Colon) Date: Tue, 19 Feb 2008 16:00:41 -0100 Subject: [ofa-general] Unsecured Business Loans Message-ID: If you have your own business and require IMMEDIATE ready money to spend ANY way you like or need Extra money to give your business a boost or need A low interest loan - NO STRINGS ATTACHED! Don't worry about approval... your Credit history will not disqualify you! http://clokkm.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwrwandersonm at rwanderson.net Tue Feb 19 07:06:20 2008 From: dwrwandersonm at rwanderson.net (Freeman Moss) Date: Tue, 19 Feb 2008 16:06:20 +0100 Subject: [ofa-general] Get any soft you need without delays. Message-ID: <01c87311$5daa0180$8477c051@dwrwandersonm> Great site to purchase more than 270 programs! Even for Macintosh! Software in all European languages available! Cheap prices and original programs only! There are special offers and discounts for you to make even more significant savings. Free of charge professional installation consultations could be of great help. Prompt reply on all your requests. Money back guarantee ensures the quality of product. http://geocities.com/sheppardfreeman Check our site for discounts! From jackm at dev.mellanox.co.il Tue Feb 19 07:15:01 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 17:15:01 +0200 Subject: [ofa-general] [PATCH] libibverbs: move fields in ibv_srq, ibv_qp, and ibv_cq to preserve binary compatibility Message-ID: <200802191715.01241.jackm@dev.mellanox.co.il> move pthread_cond_t fields to the end of structs, because their size changed from RHAS4 to RHAS5. In all the cases, the intervening entries were not accessed outside libibverbs (in non-XRC applications). The structs modified are: ibv_cq, ibv_srq, and ibv_qp. Pointed out by: Changqing Tang Signed-off-by: Jack Morgenstein --- Roland, I checked the following patch into ofed 1.3 libibverbs. I also included the ibv_qp and ibv_cq structures in the change. The xxx_events_completed fields are fortunately not used outside libibverbs at this point, but at some point, some user app may wish to read them. Additionally, for struct ibv_qp, some XRC user app may just decide to access the xrc_domain field in this struct. For now, they are only used for syncing in the ibv_destroy_xxx functions. I would rather do the change now, and not kick ourselves later. Jack diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index a032a67..7ce5e65 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -564,13 +564,14 @@ struct ibv_srq { struct ibv_pd *pd; uint32_t handle; - pthread_mutex_t mutex; - pthread_cond_t cond; uint32_t events_completed; uint32_t xrc_srq_num; struct ibv_xrc_domain *xrc_domain; struct ibv_cq *xrc_cq; + + pthread_mutex_t mutex; + pthread_cond_t cond; }; struct ibv_qp { @@ -585,11 +586,12 @@ struct ibv_qp { enum ibv_qp_state state; enum ibv_qp_type qp_type; - pthread_mutex_t mutex; - pthread_cond_t cond; uint32_t events_completed; struct ibv_xrc_domain *xrc_domain; + + pthread_mutex_t mutex; + pthread_cond_t cond; }; struct ibv_comp_channel { @@ -605,10 +607,11 @@ struct ibv_cq { uint32_t handle; int cqe; - pthread_mutex_t mutex; - pthread_cond_t cond; uint32_t comp_events_completed; uint32_t async_events_completed; + + pthread_mutex_t mutex; + pthread_cond_t cond; }; struct ibv_ah { From dwppiam at ppia.net Tue Feb 19 07:45:09 2008 From: dwppiam at ppia.net (Janette Hicks) Date: Tue, 19 Feb 2008 23:45:09 +0800 Subject: [ofa-general] Try the new miracle weight loss herb Message-ID: <926008908.67428035978065@ppia.net> An HTML attachment was scrubbed... URL: From dwplanetmikem at planetmike.com Tue Feb 19 07:47:18 2008 From: dwplanetmikem at planetmike.com (Barton Gee) Date: Tue, 19 Feb 2008 16:47:18 +0100 Subject: [ofa-general] Work hard, play harder. Message-ID: <01c87317$16fadb00$57bcfd4d@dwplanetmikem> Feel like gambling? Golden Gate Casino is worth your attention. All popular casino games, great welcome bonus, fast to download, easy to use and completely free software! We guarantee absolute privacy of player information. Friendly 24/7 customer support, quick payouts, only fair gaming! http://geocities.com/fidelreyes810 Choose Golden Gate Casino! From jackm at dev.mellanox.co.il Tue Feb 19 07:52:28 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 17:52:28 +0200 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel In-Reply-To: References: <200802191440.33559.jackm@dev.mellanox.co.il> Message-ID: <200802191752.28347.jackm@dev.mellanox.co.il> On Tuesday 19 February 2008 16:17, Tang, Changqing wrote: > I just checked that it is actually Jan. 24 build. So I believe it is fixed. I will > ask Mellanox people to update the system. > > > --CQ > Best to wait a day or so for OFED 1.3 RC5 to be released (if you can wait -- otherwise, RC4). - Jack From jackm at dev.mellanox.co.il Tue Feb 19 07:54:08 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 17:54:08 +0200 Subject: [ofa-general] Another XRC binary compatable issue =?iso-8859-1?q?for=09different_pthread?= version. In-Reply-To: References: <200802191546.47313.jackm@dev.mellanox.co.il> Message-ID: <200802191754.08274.jackm@dev.mellanox.co.il> On Tuesday 19 February 2008 16:26, Tang, Changqing wrote: > > I don't currently, but I hope you can fix it as well. There are four > structures using mutex or cond: ibv_srq, ibv_qp, ibv_cq, ibv_context, > Do we want to re-arrange all of them ? > > --CQ > I did this for ibv_srq, ibv_qp, and ibv_cq. ibv_srq and ibv_qp -- had to. ibv_cq -- just in case. ibv_context -- does not use cond. If mutex changes, we're cooked anyway. - Jack From changquing.tang at hp.com Tue Feb 19 08:02:36 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Tue, 19 Feb 2008 16:02:36 +0000 Subject: [ofa-general] Another XRC binary compatable issue for different pthread version. In-Reply-To: <200802191754.08274.jackm@dev.mellanox.co.il> References: <200802191546.47313.jackm@dev.mellanox.co.il> <200802191754.08274.jackm@dev.mellanox.co.il> Message-ID: > > > I did this for ibv_srq, ibv_qp, and ibv_cq. > > ibv_srq and ibv_qp -- had to. > > ibv_cq -- just in case. > > ibv_context -- does not use cond. If mutex changes, we're > cooked anyway. I don't have many different OS, but on one of our machine: Linux mpia32-5 2.6.16.21-0.8-smp #1 SMP Mon Jul 3 18:25:39 UTC 2006 i686 i686 i386 GNU/Linux sizeof(mutex)=24, sizeof(cond)=48 For other machines (64bit): sizeof(mutex)=40, sizeof(cond)=48 So mutex size is really changed. --CQ > > - Jack > From jackm at dev.mellanox.co.il Tue Feb 19 08:05:23 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 18:05:23 +0200 Subject: [ofa-general] Another XRC binary compatable issue =?iso-8859-1?q?for=09different_pthread?= version. In-Reply-To: References: <200802191754.08274.jackm@dev.mellanox.co.il> Message-ID: <200802191805.23485.jackm@dev.mellanox.co.il> On Tuesday 19 February 2008 18:02, Tang, Changqing wrote: > > > > > I did this for ibv_srq, ibv_qp, and ibv_cq. > > > > ibv_srq and ibv_qp -- had to. > > > > ibv_cq -- just in case. > > > > ibv_context -- does not use cond. If mutex changes, we're > > cooked anyway. > > I don't have many different OS, but on one of our machine: > > Linux mpia32-5 2.6.16.21-0.8-smp #1 SMP Mon Jul 3 18:25:39 UTC 2006 i686 > i686 i386 GNU/Linux > > sizeof(mutex)=24, sizeof(cond)=48 > > For other machines (64bit): > > sizeof(mutex)=40, sizeof(cond)=48 > > So mutex size is really changed. > > --CQ This is not a problem. I don't expect an app compiled against a 32-bit library to work against a 64-bit library. The case we are dealing with here is if, on the same host, an upgrade results in binary incompatibility. - Jack From jackm at dev.mellanox.co.il Tue Feb 19 08:06:04 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 19 Feb 2008 18:06:04 +0200 Subject: [ofa-general] [PATCH] 1.3-rc4 leak in SA notification patch In-Reply-To: References: Message-ID: <200802191806.05050.jackm@dev.mellanox.co.il> On Monday 18 February 2008 20:28, Michael Brooks wrote: > Commit 2aec5c602c6a44e2a3a173339a9ab94549658e4b on 2008-02-05 modified > the SA notification patch to use alloc_mad() from within > sa_query.c:ib_sa_informinfo_query(), but didn't also modify the function > to use the associated free_mad() call on exit, resulting in a lost > kref_put() on the AH structure. > > Patch follows: > > diff --git a/kernel_patches/fixes/sean_local_sa_1_notifications.patch > b/kernel_patches/fixes/sean_local_sa_1_notifications.patch > index 9d272ff..e710897 100644 > --- a/kernel_patches/fixes/sean_local_sa_1_notifications.patch > +++ b/kernel_patches/fixes/sean_local_sa_1_notifications.patch > @@ -1061,7 +1061,7 @@ Index: > ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c > +err2: > + *sa_query = NULL; > + ib_sa_client_put(query->sa_query.client); > -+ ib_free_send_mad(query->sa_query.mad_buf); > ++ free_mad(&query->sa_query); > +err1: > + kfree(query); > + return ret; Thanks for catching this one before RC5!! - Jack From tziporet at mellanox.co.il Tue Feb 19 08:12:08 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 19 Feb 2008 18:12:08 +0200 Subject: [ofa-general] Agenda for the OFED meeting today on RC5 readiness Message-ID: <6C2C79E72C305246B504CBA17B5500C90358FFDD@mtlexch01.mtl.com> Agenda for the OFED meeting today on RC5 readiness: 1. Status update - all 2. bugs review (blocker & critical only) 905 blo RHEL sean.hefty at intel.com scale-up issue with rdma_cm, requests rejected when excee... 910 cri All arlin.r.davis at intel.com dat_psp_create_any (v1 and v2) fails on OFED 1.3 rdma_cm,... 874 cri Othe jeremy.brown at qlogic.com Intel MPI (IMB test) hangs intermittently on the qlogic HCA 895 cri Othe jim at mellanox.com kernel panic while running multiple test on sdp 919 cri SLES jsquyres at cisco.com IMB testcase over openmpi fails with segmentation fault 917 cri SLES rjwalsh at pathscale.com ipath build error on ppc64 3. Agree on date for RC5 and GA release My suggestion - RC5 20 or 21 Feb (depending on the bugs discussion) - GA a week after RC5 (27 or 28 Feb) Tziporet From sashak at voltaire.com Tue Feb 19 09:38:48 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 19 Feb 2008 17:38:48 +0000 Subject: [ofa-general] Re: [PATCH] opensm: convert to OSM_LOG() macro In-Reply-To: <1203396461.26729.460.camel@hrosenstock-ws.xsigo.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <20080216220304.GD18527@sashak.voltaire.com> <20080216220350.GE18527@sashak.voltaire.com> <1203396461.26729.460.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080219173848.GA17477@sashak.voltaire.com> On 20:47 Mon 18 Feb , Hal Rosenstock wrote: > On Sat, 2008-02-16 at 22:03 +0000, Sasha Khapyorsky wrote: > > Convert osm_log() calls where caller function name is used to OSM_LOG() > > macro call which has caller function name as builtin. > > There are several changes in this patch in terms of the (old and new) > function name. Are those intended ? I tried to do so. Do you see a problem? Sasha From hrosenstock at xsigo.com Tue Feb 19 09:26:11 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 19 Feb 2008 09:26:11 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203105386.26729.202.camel@hrosenstock-ws.xsigo.com> <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203441971.26729.539.camel@hrosenstock-ws.xsigo.com> On Mon, 2008-02-18 at 10:19 -0500, Chuck Hartley wrote: > I didn't see anything in the opensm.conf file that indicates that > OpenSM has a concept of priority. Is there some way to force it to > always be the master? In opensm.opts, # # HANDOVER - MULTIPLE SMs OPTIONS # # SM priority used for deciding who is the master sm_priority 15 15 is the highest priority Again, it's not advisable to mix SM flavors on the same subnet. > Is there some advantage or disadvantage to running multiple copies of > OpenSM on the subnet? This provides some redundancy in the case that either the master SM or something in the node dies such that the SM is not working. > If you have multiple switches connected as we do, should some of > the default settings on opensm.conf be changed? In particular, should > REASSIGN_LIDS be set to "yes"? I wouldn't reassign LIDs unless you have a specific reason to. Changing LIDs is disruptive. -- Hal > Chuck > From hrosenstock at xsigo.com Tue Feb 19 09:57:19 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 19 Feb 2008 09:57:19 -0800 Subject: [ofa-general] Re: [PATCH] opensm: convert to OSM_LOG() macro In-Reply-To: <20080219173848.GA17477@sashak.voltaire.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <20080216220304.GD18527@sashak.voltaire.com> <20080216220350.GE18527@sashak.voltaire.com> <1203396461.26729.460.camel@hrosenstock-ws.xsigo.com> <20080219173848.GA17477@sashak.voltaire.com> Message-ID: <1203443839.26729.560.camel@hrosenstock-ws.xsigo.com> On Tue, 2008-02-19 at 17:38 +0000, Sasha Khapyorsky wrote: > On 20:47 Mon 18 Feb , Hal Rosenstock wrote: > > On Sat, 2008-02-16 at 22:03 +0000, Sasha Khapyorsky wrote: > > > Convert osm_log() calls where caller function name is used to OSM_LOG() > > > macro call which has caller function name as builtin. > > > > There are several changes in this patch in terms of the (old and new) > > function name. Are those intended ? > > I tried to do so. Do you see a problem? I'm not sure I'm following what you mean. Are you saying any function name differences (old -> new) are intended ? -- Hal > Sasha From uqpvu at bmoinvestorline.com Tue Feb 19 10:13:56 2008 From: uqpvu at bmoinvestorline.com (Kimberley Vickers) Date: Tue, 19 Feb 2008 19:13:56 +0100 Subject: [ofa-general] Saving money on your meds has never been easier. Message-ID: <01c8732b$92ecce80$5a2bd553@uqpvu> �CanadianPharmacy� provides extremely cheap meds, 100% generic. Customer service staff will help with initial order and provide information about meds, dosages, side effects, etc. All orders are delivered to door in discreet packaging.Visit our "CanadianPharmacy" site The aim of this message is to help you to achieve better health. http://instrotible.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From hartlch14 at gmail.com Tue Feb 19 10:17:56 2008 From: hartlch14 at gmail.com (Chuck Hartley) Date: Tue, 19 Feb 2008 13:17:56 -0500 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: <1203441971.26729.539.camel@hrosenstock-ws.xsigo.com> References: <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> <1203441971.26729.539.camel@hrosenstock-ws.xsigo.com> Message-ID: Hmmm, I don't have an opensm.opts file on my system. Does it go in /var/cache/osm? The man page mentions it and that it contains a "complete set" of configuration options. Since I don't have one, is it only needed if you want to change some default setting(s)? Is there an example file somewhere in the distribution? >From the output of sminfo, it looks like the default priority is 1, so I probably want to change that. But at least it is the master: sminfo: sm lid 15 sm guid 0x5ad0000094076, activity count 1359786 priority 1 state 3 SMINFO_MASTER On Feb 19, 2008 12:26 PM, Hal Rosenstock wrote: > On Mon, 2008-02-18 at 10:19 -0500, Chuck Hartley wrote: > > I didn't see anything in the opensm.conf file that indicates that > > OpenSM has a concept of priority. Is there some way to force it to > > always be the master? > > In opensm.opts, > # > # HANDOVER - MULTIPLE SMs OPTIONS > # > # SM priority used for deciding who is the master > sm_priority 15 > > 15 is the highest priority > > Again, it's not advisable to mix SM flavors on the same subnet. > > > Is there some advantage or disadvantage to running multiple copies of > > OpenSM on the subnet? > > This provides some redundancy in the case that either the master SM or > something in the node dies such that the SM is not working. > > > If you have multiple switches connected as we do, should some of > > the default settings on opensm.conf be changed? In particular, should > > REASSIGN_LIDS be set to "yes"? > > I wouldn't reassign LIDs unless you have a specific reason to. Changing > LIDs is disruptive. > > -- Hal > > > Chuck > > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From sashak at voltaire.com Tue Feb 19 10:35:39 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 19 Feb 2008 18:35:39 +0000 Subject: [ofa-general] Re: [PATCH] opensm: convert to OSM_LOG() macro In-Reply-To: <1203443839.26729.560.camel@hrosenstock-ws.xsigo.com> References: <20080212181806.GF16074@sashak.voltaire.com> <20080215160632.GC7436@sashak.voltaire.com> <1203176276.26729.261.camel@hrosenstock-ws.xsigo.com> <20080216171358.GA18527@sashak.voltaire.com> <1203184674.26729.295.camel@hrosenstock-ws.xsigo.com> <20080216220304.GD18527@sashak.voltaire.com> <20080216220350.GE18527@sashak.voltaire.com> <1203396461.26729.460.camel@hrosenstock-ws.xsigo.com> <20080219173848.GA17477@sashak.voltaire.com> <1203443839.26729.560.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080219183539.GC17477@sashak.voltaire.com> On 09:57 Tue 19 Feb , Hal Rosenstock wrote: > On Tue, 2008-02-19 at 17:38 +0000, Sasha Khapyorsky wrote: > > On 20:47 Mon 18 Feb , Hal Rosenstock wrote: > > > On Sat, 2008-02-16 at 22:03 +0000, Sasha Khapyorsky wrote: > > > > Convert osm_log() calls where caller function name is used to OSM_LOG() > > > > macro call which has caller function name as builtin. > > > > > > There are several changes in this patch in terms of the (old and new) > > > function name. Are those intended ? > > > > I tried to do so. Do you see a problem? > > I'm not sure I'm following what you mean. Are you saying any function > name differences (old -> new) are intended ? Yes, it should be Sasha From hrosenstock at xsigo.com Tue Feb 19 09:59:26 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 19 Feb 2008 09:59:26 -0800 Subject: [ofa-general] RDMA Channel State In-Reply-To: References: Message-ID: <1203443966.26729.563.camel@hrosenstock-ws.xsigo.com> On Mon, 2008-02-18 at 07:17 -0600, Tim Hadeen wrote: > Hello, > We have server (Initiator) connected to Target via IB switch. We use > add_target to add the targets on the initiator. SM is running on > switch. After issuing add_target, we see RDMA channel ACTIVE, but > after that even if we shut down the initiator, we still see RDMA > channel Active. What is meant by RDMA channel ACTIVE ? How is this observed ? -- Hal > Is it due to Switch? What should we expect? > Is there any way to remove the target on initiator other than host > shutdown? > > Thanks > Tim > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Tue Feb 19 10:31:41 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 19 Feb 2008 10:31:41 -0800 Subject: [ofa-general] Performance of UDAPL RDMA vs IB verbs In-Reply-To: References: <1203107626.26729.210.camel@hrosenstock-ws.xsigo.com> <1203109449.26729.219.camel@hrosenstock-ws.xsigo.com> <20080216185043.GB18527@sashak.voltaire.com> <1203187759.26729.301.camel@hrosenstock-ws.xsigo.com> <1203441971.26729.539.camel@hrosenstock-ws.xsigo.com> Message-ID: <1203445901.26729.591.camel@hrosenstock-ws.xsigo.com> On Tue, 2008-02-19 at 13:17 -0500, Chuck Hartley wrote: > Hmmm, I don't have an opensm.opts file on my system. I don't use openm.conf so don't know what it can and cannot do. > Does it go in /var/cache/osm? Which OpenSM/OFED version are you using ? > The man page mentions it and that it contains a "complete set" of > configuration options. Since I don't have one, is it only needed if > you want to change some default setting(s)? Is there an example file > somewhere in the distribution? You can generate it via opensm -c and then edit the file produced. I'm not sure about the how opensm.conf will work with a cache file (precedence of settings, etc.). > From the output of sminfo, it looks like the default priority is 1, so > I probably want to change that. But at least it is the master: > sminfo: sm lid 15 sm guid 0x5ad0000094076, activity count 1359786 > priority 1 state 3 SMINFO_MASTER sm priority can be changed on the opensm command line. -- Hal > On Feb 19, 2008 12:26 PM, Hal Rosenstock > wrote: > On Mon, 2008-02-18 at 10:19 -0500, Chuck Hartley wrote: > > I didn't see anything in the opensm.conf file that indicates > that > > OpenSM has a concept of priority. Is there some way to > force it to > > always be the master? > > > In opensm.opts, > # > # HANDOVER - MULTIPLE SMs OPTIONS > # > # SM priority used for deciding who is the master > sm_priority 15 > > 15 is the highest priority > > Again, it's not advisable to mix SM flavors on the same > subnet. > > > Is there some advantage or disadvantage to running multiple > copies of > > OpenSM on the subnet? > > > This provides some redundancy in the case that either the > master SM or > something in the node dies such that the SM is not working. > > > If you have multiple switches connected as we do, should > some of > > the default settings on opensm.conf be changed? In > particular, should > > REASSIGN_LIDS be set to "yes"? > > > I wouldn't reassign LIDs unless you have a specific reason to. > Changing > LIDs is disruptive. > > -- Hal > > > Chuck > > > From rdreier at cisco.com Tue Feb 19 10:46:41 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 10:46:41 -0800 Subject: [ofa-general] [PATCH] libibverbs: move fields in ibv_srq, ibv_qp, and ibv_cq to preserve binary compatibility In-Reply-To: <200802191715.01241.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Tue, 19 Feb 2008 17:15:01 +0200") References: <200802191715.01241.jackm@dev.mellanox.co.il> Message-ID: > I would rather do the change now, and not kick ourselves later. I guess its OK. I think we should note in a comment which fields are internal to libibverbs and which ones applications are allowed to read. From rdreier at cisco.com Tue Feb 19 10:49:17 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 10:49:17 -0800 Subject: [ofa-general] Re: [PATCH] IPOIB/CM Increase retry counts for OFED-1.3 In-Reply-To: <1203151568.4539.10.camel@localhost.localdomain> (Shirley Ma's message of "Sat, 16 Feb 2008 00:46:08 -0800") References: <47B226CC.1060706@linux.vnet.ibm.com> <47B456D1.7030600@mellanox.co.il> <47B47460.4080700@linux.vnet.ibm.com> <1203093002.4539.5.camel@localhost.localdomain> <1203151568.4539.10.camel@localhost.localdomain> Message-ID: > The "send completion errors" indicates the packet hasn't been sent out > to the wire. It seems the retries you have added induced a little bit > delay for the packet to be sent out successfully, which might indicates > some flow control or other issues in the device transport layer? Actually for RC a send completion error can occur if an ACK is not received for the message. It would be useful to know what the status of the first failed send it though. > Do you have any suggestions on how to debug this problem? How can we > hack the mthca/ipoib code to narrow down the root cause of the problem? > From the behavior it looks like the local resource temp unavailable, but > it could be something else. I definitely think we want to understand what the problem is. For example does it go away if you increase the RNR retry count but not the ACK timeout retry count? When the problem occurs is the receive SRQ empty (or is it only happening with ehca's non-SRQ IPoIB/cm)? - R. From rdreier at cisco.com Tue Feb 19 10:51:40 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 10:51:40 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get a couple of small post-2.6.25-rc2 fixes: Pradeep Satyanarayana (1): IPoIB/cm: Fix ipoib_cm_dev_stop() cleanup when drain times out Roland Dreier (1): IB/mthca: Free correct MPT on error exit from mthca_fmr_alloc() drivers/infiniband/hw/mthca/mthca_mr.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 3b69855..3538da1 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -686,7 +686,7 @@ err_out_table: mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: - mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); + mthca_free(&dev->mr_table.mpt_alloc, key); return err; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7dd2ec4..52b1beb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -824,7 +824,6 @@ void ipoib_cm_dev_stop(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_rx *p; unsigned long begin; - LIST_HEAD(list); int ret; if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) @@ -857,9 +856,12 @@ void ipoib_cm_dev_stop(struct net_device *dev) /* * assume the HW is wedged and just free up everything. */ - list_splice_init(&priv->cm.rx_flush_list, &list); - list_splice_init(&priv->cm.rx_error_list, &list); - list_splice_init(&priv->cm.rx_drain_list, &list); + list_splice_init(&priv->cm.rx_flush_list, + &priv->cm.rx_reap_list); + list_splice_init(&priv->cm.rx_error_list, + &priv->cm.rx_reap_list); + list_splice_init(&priv->cm.rx_drain_list, + &priv->cm.rx_reap_list); break; } spin_unlock_irq(&priv->lock); From kliteyn at mellanox.co.il Tue Feb 19 10:52:03 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 19 Feb 2008 20:52:03 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-19:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-19 OpenSM git rev = ibutils git rev = Total=400 Pass=0 Fail=400 Pass: Failures: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo From bunk at kernel.org Tue Feb 19 11:29:12 2008 From: bunk at kernel.org (Adrian Bunk) Date: Tue, 19 Feb 2008 21:29:12 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: remove dead code Message-ID: <20080219192912.GG31955@cs181133002.pp.htv.fi> This patch removes dead code spotted by the Coverity checker. Signed-off-by: Adrian Bunk --- drivers/infiniband/hw/nes/nes_verbs.c | 6 ------ 1 file changed, 6 deletions(-) --- linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c.old 2008-02-19 20:23:02.000000000 +0200 +++ linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c 2008-02-19 20:24:34.000000000 +0200 @@ -1638,95 +1638,89 @@ static struct ib_cq *nes_create_cq(struc err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, nesadapter->max_cq, &cq_num, &nesadapter->next_cq); if (err) { return ERR_PTR(err); } nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); if (!nescq) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); return ERR_PTR(-ENOMEM); } nescq->hw_cq.cq_size = max(entries + 1, 5); nescq->hw_cq.cq_number = cq_num; nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1; if (context) { nes_ucontext = to_nesucontext(context); if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EFAULT); } nesvnic->mcrq_ucontext = nes_ucontext; nes_ucontext->mcrqf = req.mcrqf; if (nes_ucontext->mcrqf) { if (nes_ucontext->mcrqf & 0x80000000) nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 12 + (nes_ucontext->mcrqf & 0xf) - 1; else if (nes_ucontext->mcrqf & 0x40000000) nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff; else nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1; nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); } nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", (unsigned long)req.user_cq_buffer, entries); list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { list_del(&nespbl->list); - err = 0; nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n", nespbl); break; } } - if (err) { - nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); - kfree(nescq); - return ERR_PTR(err); - } pbl_entries = nespbl->pbl_size >> 3; nescq->cq_mem_size = 0; } else { nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe); nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n", entries, nescq->cq_mem_size, nescq->hw_cq.cq_number); /* allocate the physical buffer space */ mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size, &nescq->hw_cq.cq_pbase); if (!mem) { printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n"); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-ENOMEM); } memset(mem, 0, nescq->cq_mem_size); nescq->hw_cq.cq_vbase = mem; nescq->hw_cq.cq_head = 0; nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n", nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase, (u32)nescq->hw_cq.cq_pbase); } nescq->hw_cq.ce_handler = nes_iwarp_ce_handler; spin_lock_init(&nescq->lock); /* send CreateCQ request to CQP */ cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-ENOMEM); } cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; From sean.hefty at intel.com Tue Feb 19 11:34:13 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 19 Feb 2008 11:34:13 -0800 Subject: [ofa-general] RE: [PATCH 4/4] [RFC] Add changelog to rpm spec file In-Reply-To: <20082151257.VlKtl1Q6lnjta4gM@cisco.com> References: <20082151257.EnpeC3aKZ1UBvISb@cisco.com> <20082151257.VlKtl1Q6lnjta4gM@cisco.com> Message-ID: <000101c8732e$68c797e0$3c98070a@amr.corp.intel.com> > librdmacm.spec.in | 5 +++++ > 1 files changed, 5 insertions(+), 0 deletions(-) > >diff --git a/librdmacm.spec.in b/librdmacm.spec.in >index dffe5bb..41673ab 100644 >--- a/librdmacm.spec.in >+++ b/librdmacm.spec.in >@@ -66,3 +66,8 @@ rm -rf $RPM_BUILD_ROOT > %files utils > %defattr(-,root,root,-) > %{_bindir}/* >+ >+%changelog >+ >+* Fri Feb 15 2008 Roland Dreier - 1.0.6-1 >+- Initial Fedora spec file >-- This change looks simple enough, but I don't fully understand it. (If you know of a good document pointer, please let me know.) Should an entry be made for future changes here? Does this replace the (empty) ChangeLog file? - Sean From dwselvarealem at selvareale.it Tue Feb 19 11:51:38 2008 From: dwselvarealem at selvareale.it (Gregory Harris) Date: Tue, 19 Feb 2008 21:51:38 +0200 Subject: [ofa-general] 0ffice XP - $79.95 Message-ID: <01c87341$9a6e2f80$c8db7b59@dwselvarealem> Hi I`ve found cool site, It is a lot of programs in all european languages - english, France, Italy, Spanish, German. and all of them cost very cheaply! It's a url http://geocities.com/rodward81 Also they have soft for MACINTOSH. And you can download them right after purhases! You will no need wait 2-3 week for CD delivery. Gregory Harris From tziporet at mellanox.co.il Tue Feb 19 12:20:51 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 19 Feb 2008 22:20:51 +0200 Subject: [ofa-general] OFED Feb 19 - meeting summary on RC5 readiness Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E55A@mtlexch01.mtl.com> OFED Feb 19 - meeting summary on RC5 readiness: 1. Status update: Intel - OK; Qlogic - may have some IPoIB issue; still work on Intel MPI issue Voltaire - OK; Have some bonding issues but no showstoppers Neteffect - OK; have PPC issue but its not a showstopper Mellanox - OK; update XRC structure; see one IPoIB issue under investigation IBM - OK; MPI issue is minor (some error flow) 2. Bugs review (blocker & critical only) 905 blo RHEL sean.hefty at intel.com scale-up issue with rdma_cm, requests rejected when excee... Should be fixed (need the patch from Sean) 910 cri All arlin.r.davis at intel.com dat_psp_create_any (v1 and v2) fails on OFED 1.3 rdma_cm,... Fixed 874 cri Othe jeremy.brown at qlogic.com Intel MPI (IMB test) hangs intermittently on the qlogic HCA On work - will not stop the release 895 cri Othe jim at mellanox.com kernel panic while running multiple test on sdp Hard to reproduce - will not stop the release 919 cri SLES jsquyres at cisco.com IMB testcase over openmpi fails with segmentation fault Minor issue (error flow) 917 cri SLES rjwalsh at pathscale.com ipath build error on ppc64 Fixed 3. Agreed on dates for RC5 and GA - RC5 Feb-21 - GA a week after RC5 (Feb-28) Tziporet From jim at mellanox.com Tue Feb 19 13:09:11 2008 From: jim at mellanox.com (Jim Mott) Date: Tue, 19 Feb 2008 13:09:11 -0800 Subject: [ofa-general] [PATCH 1/1] libsdp - Convert failed bind errno to what normal SDP and TCP use Message-ID: Another "wrong errno" detected by a Mellanox regression test. Signed-off-by: Jim Mott --- diff --git a/src/port.c b/src/port.c index 534ab5f..60cb90a 100644 --- a/src/port.c +++ b/src/port.c @@ -1340,7 +1340,7 @@ bind( if ( ret < 0 ) { /* Temporary sockets already closed by check_legal_bind or * find_free_port */ - errno = EADDRINUSE; + errno = EINVAL; goto done; } From gstreiff at NetEffect.com Tue Feb 19 13:43:54 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Tue, 19 Feb 2008 15:43:54 -0600 Subject: [ofa-general] RE: [2.6 patch] infiniband/hw/nes/nes_verbs.c: remove dead code In-Reply-To: <20080219192912.GG31955@cs181133002.pp.htv.fi> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFD6@venom2> > --- linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c.old > 2008-02-19 20:23:02.000000000 +0200 > +++ linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c > 2008-02-19 20:24:34.000000000 +0200 > @@ -1638,95 +1638,89 @@ static struct ib_cq *nes_create_cq(struc > > err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, > nesadapter->max_cq, &cq_num, &nesadapter->next_cq); > if (err) { > return ERR_PTR(err); > } > ... > nes_debug(NES_DBG_CQ, "CQ Virtual Address = > %08lX, size = %u.\n", > (unsigned long)req.user_cq_buffer, entries); > list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { > if (nespbl->user_base == (unsigned long)req.user_cq_buffer) { > list_del(&nespbl->list); > - err = 0; > nes_debug(NES_DBG_CQ, "Found > PBL for virtual CQ. nespbl=%p.\n", > nespbl); > break; > } > } > - if (err) { > - nes_free_resource(nesadapter, > nesadapter->allocated_cqs, cq_num); > - kfree(nescq); > - return ERR_PTR(err); > - } > Hi, Adrian. I agree that coverity found some dead code as written. It may be the case that err should be set to some non-zero value upstream of the 0 assignment. Let me get Faisal to respond in that case since this looks like his code. He should be in later today. Otherwise I'll ack. Thanks for the review, Glenn From npiggin at suse.de Tue Feb 19 14:59:23 2008 From: npiggin at suse.de (Nick Piggin) Date: Tue, 19 Feb 2008 23:59:23 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219135851.GI7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> Message-ID: <20080219225923.GA18912@wotan.suse.de> On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > > are rather similar. However I have tried to make a point of minimising the > > impact the the core mm/. I don't see why we need to invalidate or flush > > I also tried hard to minimise the impact of the core mm/, I also > argued with Christoph that cluttering mm/ wasn't a good idea for > things like age_page that could be a 1 liner change instead of a > multiple-liner change, without any loss of flexibility or readability. > > > anything when changing the pte to be _more_ permissive, and I don't > > Note that in my patch the invalidate_pages in mprotect can be > trivially switched to a mprotect_pages with proper params. This will > prevent page faults completely in the secondary MMU (there will only > be tlb misses after the tlb flush just like for the core linux pte), > and it'll allow all the secondary MMU pte blocks (512/1024 at time > with my PT lock design) to be updated to have proper permissions > matching the core linux pte. > > > understand the need for invalidate_begin/invalidate_end pairs at all. > > The need of the pairs is crystal clear to me: range_begin is needed > for GRU _but_only_if_ range_end is called after releasing the > reference that the VM holds on the page. _begin will flush the GRU tlb > and at the same time it will take a mutex that will block further GRU > tlb-miss-interrupts (no idea how they manange those nightmare locking, > I didn't even try to add more locking to KVM and I get away with the > fact KVM takes the pin on the page itself). > > My patch calls invalidate_page/pages before the reference is released > on the page, so GRU will work fine despite lack of > range_begin. Furthermore with my patch GRU will be auto-serialized by > the PT lock w/o the need of any additional locking. That's why I don't understand the need for the pairs: it should be done like this. > > What I have done is basically create it so that the notifiers get called > > basically in the same place as the normal TLB flushing is done, and nowhere > > else. > > That was one of my objectives too. > > > I also wanted to avoid calling notifier code from inside eg. hardware TLB > > or pte manipulation primitives. These things are already pretty well > > spaghetti, so I'd like to just place them right where needed first... I > > think eventually it will need a bit of a rethink to make it more consistent > > and more general. But I prefer to do put them in the caller for the moment. > > Your patch should also work for KVM but it's suboptimal, my patch can > be orders of magnitude more efficient for GRU thanks to the > invalidate_pages optimization. Christoph complained about having to > call one method per pte. OK, I didn't see the invalidate_pages call... > And adding invalidate_range is useless unless you fully support > xpmem. You're calling invalidate_range in places that can't sleep... I thought that could be used by a non-sleeping user (not intending to try supporting sleeping users). If it is useless then it should go away (BTW. I didn't see your recent patch, some of my confusion I think stems from Christoph's novel way of merging and splitting patches). > No idea why xpmem needs range_begin, I perfectly understand why GRU > needs _begin with Chrisotph's patch (gru lacks the page pin) but I > dunno why xpmem needs range_begin (xpmem has the page pin so I also > think it could avoid using range_begin). Still to support GRU you need > both to call invalidate_range in places that can sleep and you need > the external rmap notifier. The moment you add xpmem into the equation > your and my clean patches become Christoph's one... Sorry, I kind of didn't have time to follow the conversation so well before; are there patches posted for gru and/or xpmem? From bunk at kernel.org Tue Feb 19 14:59:00 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 00:59:00 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one Message-ID: <20080219225900.GR31955@cs181133002.pp.htv.fi> This patch fixes an off-by-one spotted by the Coverity checker. Signed-off-by: Adrian Bunk --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c.old 2008-02-20 00:20:47.000000000 +0200 +++ linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c 2008-02-20 00:21:09.000000000 +0200 @@ -916,33 +916,33 @@ static struct ib_pd *nes_alloc_pd(struct if (!nespd) { nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); return ERR_PTR(-ENOMEM); } nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", nespd, nesvnic->nesibdev->ibdev.name); nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; if (context) { nesucontext = to_nesucontext(context); nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells, NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", nespd->mmap_db_index, nespd->pd_id); - if (nespd->mmap_db_index > NES_MAX_USER_DB_REGIONS) { + if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); kfree(nespd); return ERR_PTR(-ENOMEM); } uresp.pd_id = nespd->pd_id; uresp.mmap_db_index = nespd->mmap_db_index; if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) { nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); kfree(nespd); return ERR_PTR(-EFAULT); } set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells); nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id; From npiggin at suse.de Tue Feb 19 15:04:27 2008 From: npiggin at suse.de (Nick Piggin) Date: Wed, 20 Feb 2008 00:04:27 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219142725.GA23200@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219142725.GA23200@sgi.com> Message-ID: <20080219230427.GB18912@wotan.suse.de> On Tue, Feb 19, 2008 at 08:27:25AM -0600, Jack Steiner wrote: > > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > > understand the need for invalidate_begin/invalidate_end pairs at all. > > > > The need of the pairs is crystal clear to me: range_begin is needed > > for GRU _but_only_if_ range_end is called after releasing the > > reference that the VM holds on the page. _begin will flush the GRU tlb > > and at the same time it will take a mutex that will block further GRU > > tlb-miss-interrupts (no idea how they manange those nightmare locking, > > I didn't even try to add more locking to KVM and I get away with the > > fact KVM takes the pin on the page itself). > > As it turns out, no actual mutex is required. _begin_ simply increments a > count of active range invalidates, _end_ decrements the count. New TLB > dropins are deferred while range callouts are active. > > This would appear to be racy but the GRU has special hardware that > simplifies locking. When the GRU sees a TLB invalidate, all outstanding > misses & potentially inflight TLB dropins are marked by the GRU with a > "kill" bit. When the dropin finally occurs, the dropin is ignored & the > instruction is simply restarted. The instruction will fault again & the TLB > dropin will be repeated. This is optimized for the case where invalidates > are rare - true for users of the GRU. OK (thanks to Robin as well). Now I understand why you are using it, but I don't understand why you don't defer new TLBs after the point where the linux pte changes. If you can do that, then you look and act much more like a TLB from the point of view of the Linux vm. From nickpiggin at yahoo.com.au Tue Feb 19 15:08:49 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Wed, 20 Feb 2008 10:08:49 +1100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080215064932.620773824@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> Message-ID: <200802201008.49933.nickpiggin@yahoo.com.au> On Friday 15 February 2008 17:49, Christoph Lameter wrote: > The invalidation of address ranges in a mm_struct needs to be > performed when pages are removed or permissions etc change. > > If invalidate_range_begin() is called with locks held then we > pass a flag into invalidate_range() to indicate that no sleeping is > possible. Locks are only held for truncate and huge pages. You can't sleep inside rcu_read_lock()! I must say that for a patch that is up to v8 or whatever and is posted twice a week to such a big cc list, it is kind of slack to not even test it and expect other people to review it. Also, what we are going to need here are not skeleton drivers that just do all the *easy* bits (of registering their callbacks), but actual fully working examples that do everything that any real driver will need to do. If not for the sanity of the driver writer, then for the sanity of the VM developers (I don't want to have to understand xpmem or infiniband in order to understand how the VM works). > In two cases we use invalidate_range_begin/end to invalidate > single pages because the pair allows holding off new references > (idea by Robin Holt). > > do_wp_page(): We hold off new references while we update the pte. > > xip_unmap: We are not taking the PageLock so we cannot > use the invalidate_page mmu_rmap_notifier. invalidate_range_begin/end > stands in. > > Signed-off-by: Andrea Arcangeli > Signed-off-by: Robin Holt > Signed-off-by: Christoph Lameter > > --- > mm/filemap_xip.c | 5 +++++ > mm/fremap.c | 3 +++ > mm/hugetlb.c | 3 +++ > mm/memory.c | 35 +++++++++++++++++++++++++++++------ > mm/mmap.c | 2 ++ > mm/mprotect.c | 3 +++ > mm/mremap.c | 7 ++++++- > 7 files changed, 51 insertions(+), 7 deletions(-) > > Index: linux-2.6/mm/fremap.c > =================================================================== > --- linux-2.6.orig/mm/fremap.c 2008-02-14 18:43:31.000000000 -0800 > +++ linux-2.6/mm/fremap.c 2008-02-14 18:45:07.000000000 -0800 > @@ -15,6 +15,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(uns > spin_unlock(&mapping->i_mmap_lock); > } > > + mmu_notifier(invalidate_range_begin, mm, start, start + size, 0); > err = populate_range(mm, vma, start, size, pgoff); > + mmu_notifier(invalidate_range_end, mm, start, start + size, 0); > if (!err && !(flags & MAP_NONBLOCK)) { > if (unlikely(has_write_lock)) { > downgrade_write(&mm->mmap_sem); > Index: linux-2.6/mm/memory.c > =================================================================== > --- linux-2.6.orig/mm/memory.c 2008-02-14 18:43:31.000000000 -0800 > +++ linux-2.6/mm/memory.c 2008-02-14 18:45:07.000000000 -0800 > @@ -51,6 +51,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -611,6 +612,9 @@ int copy_page_range(struct mm_struct *ds > if (is_vm_hugetlb_page(vma)) > return copy_hugetlb_page_range(dst_mm, src_mm, vma); > > + if (is_cow_mapping(vma->vm_flags)) > + mmu_notifier(invalidate_range_begin, src_mm, addr, end, 0); > + > dst_pgd = pgd_offset(dst_mm, addr); > src_pgd = pgd_offset(src_mm, addr); > do { > @@ -621,6 +625,11 @@ int copy_page_range(struct mm_struct *ds > vma, addr, next)) > return -ENOMEM; > } while (dst_pgd++, src_pgd++, addr = next, addr != end); > + > + if (is_cow_mapping(vma->vm_flags)) > + mmu_notifier(invalidate_range_end, src_mm, > + vma->vm_start, end, 0); > + > return 0; > } > > @@ -893,13 +902,16 @@ unsigned long zap_page_range(struct vm_a > struct mmu_gather *tlb; > unsigned long end = address + size; > unsigned long nr_accounted = 0; > + int atomic = details ? (details->i_mmap_lock != 0) : 0; > > lru_add_drain(); > tlb = tlb_gather_mmu(mm, 0); > update_hiwater_rss(mm); > + mmu_notifier(invalidate_range_begin, mm, address, end, atomic); > end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); > if (tlb) > tlb_finish_mmu(tlb, address, end); > + mmu_notifier(invalidate_range_end, mm, address, end, atomic); > return end; > } > Where do you invalidate for munmap()? Also, how to you resolve the case where you are not allowed to sleep? I would have thought either you have to handle it, in which case nobody needs to sleep; or you can't handle it, in which case the code is broken. From npiggin at suse.de Tue Feb 19 15:11:57 2008 From: npiggin at suse.de (Nick Piggin) Date: Wed, 20 Feb 2008 00:11:57 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219135851.GI7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> Message-ID: <20080219231157.GC18912@wotan.suse.de> On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > > anything when changing the pte to be _more_ permissive, and I don't > > Note that in my patch the invalidate_pages in mprotect can be > trivially switched to a mprotect_pages with proper params. This will > prevent page faults completely in the secondary MMU (there will only > be tlb misses after the tlb flush just like for the core linux pte), > and it'll allow all the secondary MMU pte blocks (512/1024 at time > with my PT lock design) to be updated to have proper permissions > matching the core linux pte. Sorry, I realise I still didn't get this through my head yet (and also have not seen your patch recently). So I don't know exactly what you are doing... But why does _anybody_ (why does Christoph's patches) need to invalidate when they are going to be more permissive? This should be done lazily by the driver, I would have thought. From tei at earth.interq.or.jp Tue Feb 19 15:23:24 2008 From: tei at earth.interq.or.jp (Adrian Milligan) Date: Wed, 20 Feb 2008 00:23:24 +0100 Subject: [ofa-general] The aim of this message is to help you to achieve better health. Message-ID: <01c87356$cdf7a600$416d1057@tei> There is a great selection of modern pharmaceutical products! The utmost care is taken about security of your information. You purchase will be 100% confidential. Prompt delivery, personal approach to each customer!Visit our "CanadianPharmacy" site Your own health and the health of those you love is something to be cherished. http://scoundalk.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From steiner at sgi.com Tue Feb 19 15:40:50 2008 From: steiner at sgi.com (Jack Steiner) Date: Tue, 19 Feb 2008 17:40:50 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219231157.GC18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> Message-ID: <20080219234049.GA27856@sgi.com> On Wed, Feb 20, 2008 at 12:11:57AM +0100, Nick Piggin wrote: > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > > > anything when changing the pte to be _more_ permissive, and I don't > > > > Note that in my patch the invalidate_pages in mprotect can be > > trivially switched to a mprotect_pages with proper params. This will > > prevent page faults completely in the secondary MMU (there will only > > be tlb misses after the tlb flush just like for the core linux pte), > > and it'll allow all the secondary MMU pte blocks (512/1024 at time > > with my PT lock design) to be updated to have proper permissions > > matching the core linux pte. > > Sorry, I realise I still didn't get this through my head yet (and also > have not seen your patch recently). So I don't know exactly what you > are doing... > > But why does _anybody_ (why does Christoph's patches) need to invalidate > when they are going to be more permissive? This should be done lazily by > the driver, I would have thought. Agree. Although for most real applications, the performance difference is probably negligible. --- jack From nickpiggin at yahoo.com.au Tue Feb 19 15:55:20 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Wed, 20 Feb 2008 10:55:20 +1100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080215064933.376635032@sgi.com> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> Message-ID: <200802201055.21343.nickpiggin@yahoo.com.au> On Friday 15 February 2008 17:49, Christoph Lameter wrote: > These special additional callbacks are required because XPmem (and likely > other mechanisms) do use their own rmap (multiple processes on a series > of remote Linux instances may be accessing the memory of a process). > F.e. XPmem may have to send out notifications to remote Linux instances > and receive confirmation before a page can be freed. > > So we handle this like an additional Linux reverse map that is walked after > the existing rmaps have been walked. We leave the walking to the driver > that is then able to use something else than a spinlock to walk its reverse > maps. So we can actually call the driver without holding spinlocks while we > hold the Pagelock. I don't know how this is supposed to solve anything. The sleeping problem happens I guess mostly in truncate. And all you are doing is putting these rmap callbacks in page_mkclean and try_to_unmap. > However, we cannot determine the mm_struct that a page belongs to at > that point. The mm_struct can only be determined from the rmaps by the > device driver. > > We add another pageflag (PageExternalRmap) that is set if a page has > been remotely mapped (f.e. by a process from another Linux instance). > We can then only perform the callbacks for pages that are actually in > remote use. > > Rmap notifiers need an extra page bit and are only available > on 64 bit platforms. This functionality is not available on 32 bit! > > A notifier that uses the reverse maps callbacks does not need to provide > the invalidate_page() method that is called when locks are held. That doesn't seem right. To start with, the new callbacks aren't even called in the places where invalidate_page isn't allowed to sleep. The problem is unmap_mapping_range, right? And unmap_mapping_range must walk the rmaps with the mmap lock held, which is why it can't sleep. And it can't hold any mmap_sem so it cannot prevent address space modifications of the processes in question between the time you unmap them from the linux ptes with unmap_mapping_range, and the time that you unmap them from your driver. So in the meantime, you could have eg. a fault come in and set up a new page for one of the processes, and that page might even get exported via the same external driver. And now you have a totally inconsistent view. Preventing new mappings from being set up until the old mapping is completely flushed is basically what we need to ensure for any sane TLB as far as I can tell. To do that, you'll need to make the mmap lock sleep, and either take mmap_sem inside it (which is a deadlock condition at the moment), or make ptl sleep as well. These are simply the locks we use to prevent that from happening, so I can't see how you can possibly hope to have a coherent TLB without invalidating inside those locks. From dillowda at ornl.gov Tue Feb 19 16:12:45 2008 From: dillowda at ornl.gov (David Dillow) Date: Tue, 19 Feb 2008 19:12:45 -0500 Subject: [ofa-general] OFED 1.2.5 SRP driver did not send DID_NO_CONNECT on target failure In-Reply-To: <18A61515E49B764AB09447A336E51F560102AA6C@NAMAIL2.ad.lsil.com> References: <18A61515E49B764AB09447A336E51F560102AA6C@NAMAIL2.ad.lsil.com> Message-ID: <1203466365.21179.42.camel@lap75545.ornl.gov> On Thu, 2008-01-31 at 14:53 -0700, Shi, Harris wrote: > Currently when I was working a failover solution on Engenio storage > array with IB host connection, I noticed that there is no > DID_NO_CONNECT notification to upper level driver when the link to > target is failed. Our failover driver relied heavily on this notice > from OFED 1.2 SRP driver to send out command to do failover at the > expiration of link_down_timeout period. Due to this reason, the IO > command eventually times out and failover occurred much later than > what we expected. I am wondering if anyone is familiar with SRP driver > and possibly have something for me to work around the issue. I expect to be poking around in this area in the near future, and I noticed that Vu Pham recently posted a patch that will cause the SRP initiator in 1.3 to return DID_NO_CONNECT from the SCSI queue function. However, a quick search back through the ofed_kernel stack doesn't seem to indicate that OFED 1.2 ever returned DID_NO_CONNECT from the SRP initiator. It is likely that I missed it -- can you confirm OFED 1.2 was the version you were working with? What was the base OS -- RHEL4? SLES? I've been trying to determine the difference between the stack's semantics with DID_NO_CONNECT vs DID_BAD_TARGET, but they seem to be mostly treated the same. I've not gone looking to see if these are defined in some standard, though. Thanks! -- Dave Dillow National Center for Computational Science Oak Ridge National Laboratory (865) 241-6602 office From bunk at kernel.org Tue Feb 19 16:13:40 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 02:13:40 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_cm.c: fix a memory leak Message-ID: <20080220001340.GT31955@cs181133002.pp.htv.fi> This patch fixes a memory leak spotted by the Coverity checker. Signed-off-by: Adrian Bunk --- 2d899a9f01001b2cccc210972b199b951b67ed51 diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index bd5cfea..78e845c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -370,11 +370,11 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int ret = 0; u32 was_timer_set; + if (!cm_node) + return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) return -1; - if (!cm_node) - return -EINVAL; /* new_send->timetosend = currenttime */ new_send->retrycount = NES_DEFAULT_RETRYS; From bunk at kernel.org Tue Feb 19 16:13:47 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 02:13:47 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes.c: fix a check-after-use Message-ID: <20080220001347.GU31955@cs181133002.pp.htv.fi> This patch fixes a check-after-use spotted by the Coverity checker. Signed-off-by: Adrian Bunk --- e9468c7fb623f63582f5522493f6a43ab904e061 diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 7f8853b..b2112f5 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -567,12 +567,12 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i /* Init the adapter */ nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); - nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; if (!nesdev->nesadapter) { printk(KERN_ERR PFX "Unable to initialize adapter.\n"); ret = -ENOMEM; goto bail5; } + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ From andrea at qumranet.com Tue Feb 19 16:46:35 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 01:46:35 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219225923.GA18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219225923.GA18912@wotan.suse.de> Message-ID: <20080220004635.GO7128@v2.random> On Tue, Feb 19, 2008 at 11:59:23PM +0100, Nick Piggin wrote: > That's why I don't understand the need for the pairs: it should be > done like this. Yes, except it can't be done like this for xpmem. > OK, I didn't see the invalidate_pages call... See the last patch I posted to Andrew, you've probably looked at the old patches, the old patches didn't work for GRU and didn't work for xpmem and they weren't optimized to cluster the invalidates for each 4k-large-pte. > I thought that could be used by a non-sleeping user (not intending > to try supporting sleeping users). If it is useless then it should > go away (BTW. I didn't see your recent patch, some of my confusion > I think stems from Christoph's novel way of merging and splitting > patches). I kept improving my patch in case the VM maintainers would consider xpmem requirements not workable from a linux-VM point of view, and they preferred to have something obviously safe, strightforward and non intrusive, despite it doesn't support the only sleeping user out there I know of (xpmem). My patch supports KVM and GRU (and any other not sleeping user). > > No idea why xpmem needs range_begin, I perfectly understand why GRU > > needs _begin with Chrisotph's patch (gru lacks the page pin) but I > > dunno why xpmem needs range_begin (xpmem has the page pin so I also > > think it could avoid using range_begin). Still to support GRU you need > > both to call invalidate_range in places that can sleep and you need > > the external rmap notifier. The moment you add xpmem into the equation > > your and my clean patches become Christoph's one... > > Sorry, I kind of didn't have time to follow the conversation so well > before; are there patches posted for gru and/or xpmem? There's some xpmem code posted but the posted one isn't using the mmu notifiers yet. GRU code may be available from Jack. I only know for sure their requirements in terms of mmu notifiers. From patrick.latifi at qlogic.com Tue Feb 19 16:49:45 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 16:49:45 -0800 Subject: [ofa-general] [PATCH] [DAPL v1] fix memory leak in provider specific post send/recv Message-ID: <20080220004945.22532.85093.stgit@b64-10.internal.keyresearch.com> Fix memory leak in provider specific post send/recv if there's more than DEFAULT_DS_ENTRIES iovecs. Signed-off-by: Patrick Marchand Latifi --- dapl/openib/dapl_ib_dto.h | 30 ++++++++++++++++++++++-------- dapl/openib_cma/dapl_ib_dto.h | 30 +++++++++++++++++++++--------- dapl/openib_scm/dapl_ib_dto.h | 30 ++++++++++++++++++++++-------- 3 files changed, 65 insertions(+), 25 deletions(-) diff --git a/dapl/openib/dapl_ib_dto.h b/dapl/openib/dapl_ib_dto.h index c42ea8d..8b0e8fe 100644 --- a/dapl/openib/dapl_ib_dto.h +++ b/dapl/openib/dapl_ib_dto.h @@ -67,10 +67,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -79,7 +80,8 @@ dapls_ib_post_recv ( if ( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -112,9 +114,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -140,11 +147,12 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -153,7 +161,8 @@ dapls_ib_post_send ( if( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -213,9 +222,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } diff --git a/dapl/openib_cma/dapl_ib_dto.h b/dapl/openib_cma/dapl_ib_dto.h index c61ef61..4f077de 100644 --- a/dapl/openib_cma/dapl_ib_dto.h +++ b/dapl/openib_cma/dapl_ib_dto.h @@ -67,10 +67,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log(DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -79,7 +80,7 @@ dapls_ib_post_recv ( if (segments <= DEFAULT_DS_ENTRIES) ds_array_p = ds_array; else - ds_array_p = + ds_array_start_p = ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) @@ -113,9 +114,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -142,12 +148,13 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log(DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -156,7 +163,7 @@ dapls_ib_post_send ( if(segments <= DEFAULT_DS_ENTRIES) ds_array_p = ds_array; else - ds_array_p = + ds_array_start_p = ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) @@ -217,9 +224,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); - + ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr); + + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h index c019cc8..cede876 100644 --- a/dapl/openib_scm/dapl_ib_dto.h +++ b/dapl/openib_scm/dapl_ib_dto.h @@ -67,10 +67,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -79,7 +80,8 @@ dapls_ib_post_recv ( if ( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -112,9 +114,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -140,11 +147,12 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -153,7 +161,8 @@ dapls_ib_post_send ( if( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -213,9 +222,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } From andrea at qumranet.com Tue Feb 19 16:52:06 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 01:52:06 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219230427.GB18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219142725.GA23200@sgi.com> <20080219230427.GB18912@wotan.suse.de> Message-ID: <20080220005206.GP7128@v2.random> On Wed, Feb 20, 2008 at 12:04:27AM +0100, Nick Piggin wrote: > On Tue, Feb 19, 2008 at 08:27:25AM -0600, Jack Steiner wrote: > > > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > > > understand the need for invalidate_begin/invalidate_end pairs at all. > > > > > > The need of the pairs is crystal clear to me: range_begin is needed > > > for GRU _but_only_if_ range_end is called after releasing the > > > reference that the VM holds on the page. _begin will flush the GRU tlb > > > and at the same time it will take a mutex that will block further GRU > > > tlb-miss-interrupts (no idea how they manange those nightmare locking, > > > I didn't even try to add more locking to KVM and I get away with the > > > fact KVM takes the pin on the page itself). > > > > As it turns out, no actual mutex is required. _begin_ simply increments a > > count of active range invalidates, _end_ decrements the count. New TLB > > dropins are deferred while range callouts are active. > > > > This would appear to be racy but the GRU has special hardware that > > simplifies locking. When the GRU sees a TLB invalidate, all outstanding > > misses & potentially inflight TLB dropins are marked by the GRU with a > > "kill" bit. When the dropin finally occurs, the dropin is ignored & the > > instruction is simply restarted. The instruction will fault again & the TLB > > dropin will be repeated. This is optimized for the case where invalidates > > are rare - true for users of the GRU. > > OK (thanks to Robin as well). Now I understand why you are using it, > but I don't understand why you don't defer new TLBs after the point > where the linux pte changes. If you can do that, then you look and > act much more like a TLB from the point of view of the Linux vm. Christoph was forced to put the invalidate_range callback _after_ dropping the PT lock because xpmem has to wait I/O there. But invalidate_range is called after freeing the VM reference on the pages so then GRU needed a _range_begin too because GRU has to flush the tlb before the VM reference on the page is released (xpmem and KVM pin the pages mapped by the secondary mmu, GRU doesn't). So then invalidate_range was renamed to invalidate_range_end. From patrick.latifi at qlogic.com Tue Feb 19 16:54:45 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Tue, 19 Feb 2008 16:54:45 -0800 Subject: [ofa-general] [PATCH] [DAPL v2] fix memory leak in provider specific post send/recv Message-ID: <20080220005445.1275.52794.stgit@b64-10.internal.keyresearch.com> Fix memory leak in provider specific post send/recv if there's more than DEFAULT_DS_ENTRIES iovecs. Signed-off-by: Patrick Marchand Latifi --- dapl/openib/dapl_ib_dto.h | 30 +++++++++++++++++++++------- dapl/openib_cma/dapl_ib_dto.h | 44 +++++++++++++++++++++++++++++------------ dapl/openib_scm/dapl_ib_dto.h | 30 +++++++++++++++++++++------- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/dapl/openib/dapl_ib_dto.h b/dapl/openib/dapl_ib_dto.h index c42ea8d..8b0e8fe 100644 --- a/dapl/openib/dapl_ib_dto.h +++ b/dapl/openib/dapl_ib_dto.h @@ -67,10 +67,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -79,7 +80,8 @@ dapls_ib_post_recv ( if ( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -112,9 +114,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -140,11 +147,12 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -153,7 +161,8 @@ dapls_ib_post_send ( if( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -213,9 +222,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } diff --git a/dapl/openib_cma/dapl_ib_dto.h b/dapl/openib_cma/dapl_ib_dto.h index cea989b..b614740 100644 --- a/dapl/openib_cma/dapl_ib_dto.h +++ b/dapl/openib_cma/dapl_ib_dto.h @@ -71,10 +71,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log(DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -83,7 +84,7 @@ dapls_ib_post_recv ( if (segments <= DEFAULT_DS_ENTRIES) ds_array_p = ds_array; else - ds_array_p = + ds_array_start_p = ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) @@ -117,9 +118,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -145,12 +151,13 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log(DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -159,7 +166,7 @@ dapls_ib_post_send ( if(segments <= DEFAULT_DS_ENTRIES) ds_array_p = ds_array; else - ds_array_p = + ds_array_start_p = ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) @@ -221,9 +228,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); - + ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr); + + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } @@ -291,10 +303,11 @@ dapls_ib_post_ext_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log(DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -303,7 +316,7 @@ dapls_ib_post_ext_send ( if(segments <= DEFAULT_DS_ENTRIES) ds_array_p = ds_array; else - ds_array_p = + ds_array_start_p = ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) @@ -392,8 +405,13 @@ dapls_ib_post_ext_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr); + + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h index c019cc8..cede876 100644 --- a/dapl/openib_scm/dapl_ib_dto.h +++ b/dapl/openib_scm/dapl_ib_dto.h @@ -67,10 +67,11 @@ dapls_ib_post_recv ( IN DAT_LMR_TRIPLET *local_iov ) { ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_recv_wr wr; struct ibv_recv_wr *bad_wr; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_rcv: ep %p cookie %p segs %d l_iov %p\n", @@ -79,7 +80,8 @@ dapls_ib_post_recv ( if ( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -112,9 +114,14 @@ dapls_ib_post_recv ( if (cookie != NULL) cookie->val.dto.size = total_len; - if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_recv") ); + return DAT_SUCCESS; } @@ -140,11 +147,12 @@ dapls_ib_post_send ( remote_iov, completion_flags); ib_data_segment_t ds_array[DEFAULT_DS_ENTRIES]; - ib_data_segment_t *ds_array_p; + ib_data_segment_t *ds_array_p, *ds_array_start_p; struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; ib_hca_transport_t *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans; DAT_COUNT i, total_len; + int ret; dapl_dbg_log (DAPL_DBG_TYPE_EP, " post_snd: ep %p cookie %p segs %d l_iov %p\n", @@ -153,7 +161,8 @@ dapls_ib_post_send ( if( segments <= DEFAULT_DS_ENTRIES ) ds_array_p = ds_array; else - ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t)); + ds_array_start_p = ds_array_p = + dapl_os_alloc(segments * sizeof(ib_data_segment_t)); if (NULL == ds_array_p) return (DAT_INSUFFICIENT_RESOURCES); @@ -213,9 +222,14 @@ dapls_ib_post_send ( " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); - if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr)) - return( dapl_convert_errno(EFAULT,"ibv_recv") ); + ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr); + if (segments > DEFAULT_DS_ENTRIES) + dapl_os_free(ds_array_start_p, segments * sizeof(ib_data_segment_t)); + + if (ret) + return( dapl_convert_errno(EFAULT,"ibv_send") ); + dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n"); return DAT_SUCCESS; } From bunk at kernel.org Tue Feb 19 16:58:48 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 02:58:48 +0200 Subject: [ofa-general] infiniband/hw/nes/nes_cm.c: use-after-free Message-ID: <20080220005848.GA31955@cs181133002.pp.htv.fi> Spotted by the Coverity checker. <-- snip --> ... static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, struct nes_cm_listener *listener, int free_hanging_nodes) { int ret = 1; unsigned long flags; spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); /* decrement our listen node count */ atomic_dec(&cm_core->listen_node_cnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (listener->nesvnic) { nes_manage_apbvt(listener->nesvnic, listener->loc_port, PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); } nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); kfree(listener); <---------------------------------- ret = 0; cm_listens_destroyed++; } else { spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } if (listener) { if (atomic_read(&listener->pend_accepts_cnt) > 0) ... ^^^^^^^^^^^^^^^^^^^^^^^^^^ <-- snip --> cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From andrea at qumranet.com Tue Feb 19 17:00:38 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 02:00:38 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <200802201008.49933.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> Message-ID: <20080220010038.GQ7128@v2.random> On Wed, Feb 20, 2008 at 10:08:49AM +1100, Nick Piggin wrote: > You can't sleep inside rcu_read_lock()! > > I must say that for a patch that is up to v8 or whatever and is > posted twice a week to such a big cc list, it is kind of slack to > not even test it and expect other people to review it. Well, xpmem requirements are complex. As as side effect of the simplicity of my approach, my patch is 100% safe since #v1. Now it also works for GRU and it cluster invalidates. > Also, what we are going to need here are not skeleton drivers > that just do all the *easy* bits (of registering their callbacks), > but actual fully working examples that do everything that any > real driver will need to do. If not for the sanity of the driver I've a fully working scenario for my patch, infact I didn't post the mmu notifier patch until I got KVM to swap 100% reliably to be sure I would post something that works well. mmu notifiers are already used in KVM for: 1) 100% reliable and efficient swapping of guest physical memory 2) copy-on-writes of writeprotect faults after ksm page sharing of guest physical memory 3) ballooning using madvise to give the guest memory back to the host My implementation is the most handy because it requires zero changes to the ksm code too (no explicit mmu notifier calls after ptep_clear_flush) and it's also 100% safe (no mess with schedules over rcu_read_lock), no "atomic" parameters, and it doesn't open a window where sptes have a view on older pages and linux pte has view on newer pages (this can happen with remap_file_pages with my KVM swapping patch to use V8 Christoph's patch). > Also, how to you resolve the case where you are not allowed to sleep? > I would have thought either you have to handle it, in which case nobody > needs to sleep; or you can't handle it, in which case the code is > broken. I also asked exactly this, glad you reasked this too. From dwscotiaspeedworldm at scotiaspeedworld.ca Tue Feb 19 17:03:43 2008 From: dwscotiaspeedworldm at scotiaspeedworld.ca (Rosanna Hayden) Date: Wed, 20 Feb 2008 09:03:43 +0800 Subject: [ofa-general] Get any soft you need without delays. Message-ID: <01c8739f$7df10180$a456087b@dwscotiaspeedworldm> Purchase perfectly working software available in all European languages! Also for Macintosh! Fast to download, only original versions are offered at very cheap prices. Special offers and discounts allow you to save! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/tonykent57 You'll definitely find software you need. From bunk at kernel.org Tue Feb 19 17:08:17 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 03:08:17 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix use-after-free Message-ID: <20080220010817.GB31955@cs181133002.pp.htv.fi> This patch fixes a use-after-free spotted by the Coverity checker. Signed-off-by: Adrian Bunk --- drivers/infiniband/hw/nes/nes_verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c.old 2008-02-20 03:02:13.000000000 +0200 +++ linux-2.6/drivers/infiniband/hw/nes/nes_verbs.c 2008-02-20 03:03:20.000000000 +0200 @@ -1820,22 +1820,22 @@ static struct ib_cq *nes_create_cq(struc ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), NES_EVENT_TIMEOUT * 2); nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n", nescq->hw_cq.cq_number, ret); if ((!ret) || (cqp_request->major_code)) { + nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X," + " minor code = 0x%04X\n", + nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code); if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { kfree(cqp_request); } else { spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); } } - nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X," - " minor code = 0x%04X\n", - nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); From andrea at qumranet.com Tue Feb 19 17:09:41 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 02:09:41 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219231157.GC18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> Message-ID: <20080220010941.GR7128@v2.random> On Wed, Feb 20, 2008 at 12:11:57AM +0100, Nick Piggin wrote: > Sorry, I realise I still didn't get this through my head yet (and also > have not seen your patch recently). So I don't know exactly what you > are doing... The last version was posted here: http://marc.info/?l=kvm-devel&m=120321732521533&w=2 > But why does _anybody_ (why does Christoph's patches) need to invalidate > when they are going to be more permissive? This should be done lazily by > the driver, I would have thought. This can be done lazily by the driver yes. The place where I've an invalidate_pages in mprotect however can also become less permissive. It's simpler to invalidate always and it's not guaranteed the secondary mmu page fault is capable of refreshing the spte across a writeprotect fault. In the future this can be changed to mprotect_pages though, so no page fault will happen in the secondary mmu. From holt at sgi.com Tue Feb 19 18:46:03 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 20:46:03 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080220005206.GP7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219142725.GA23200@sgi.com> <20080219230427.GB18912@wotan.suse.de> <20080220005206.GP7128@v2.random> Message-ID: <20080220024602.GA11364@sgi.com> On Wed, Feb 20, 2008 at 01:52:06AM +0100, Andrea Arcangeli wrote: > On Wed, Feb 20, 2008 at 12:04:27AM +0100, Nick Piggin wrote: > > OK (thanks to Robin as well). Now I understand why you are using it, > > but I don't understand why you don't defer new TLBs after the point > > where the linux pte changes. If you can do that, then you look and > > act much more like a TLB from the point of view of the Linux vm. > > Christoph was forced to put the invalidate_range callback _after_ > dropping the PT lock because xpmem has to wait I/O there. But > invalidate_range is called after freeing the VM reference on the pages > so then GRU needed a _range_begin too because GRU has to flush the tlb > before the VM reference on the page is released (xpmem and KVM pin the > pages mapped by the secondary mmu, GRU doesn't). So then > invalidate_range was renamed to invalidate_range_end. Currently, xpmem blocks faults for the range specified at the _begin callout, then shoots down remote TLBs and does the put_page for all the pages in the specified range. The _end callout merely removes the block. We do not do any wait for I/O. By the time we return from the _begin callout, all activity by the remotes is stopped, pages are dereferenced, and future faults are blocked until released by the _end callout. Thanks, Robin From holt at sgi.com Tue Feb 19 18:49:44 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 20:49:44 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219231157.GC18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> Message-ID: <20080220024944.GB11364@sgi.com> On Wed, Feb 20, 2008 at 12:11:57AM +0100, Nick Piggin wrote: > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > > > anything when changing the pte to be _more_ permissive, and I don't > > > > Note that in my patch the invalidate_pages in mprotect can be > > trivially switched to a mprotect_pages with proper params. This will > > prevent page faults completely in the secondary MMU (there will only > > be tlb misses after the tlb flush just like for the core linux pte), > > and it'll allow all the secondary MMU pte blocks (512/1024 at time > > with my PT lock design) to be updated to have proper permissions > > matching the core linux pte. > > Sorry, I realise I still didn't get this through my head yet (and also > have not seen your patch recently). So I don't know exactly what you > are doing... > > But why does _anybody_ (why does Christoph's patches) need to invalidate > when they are going to be more permissive? This should be done lazily by > the driver, I would have thought. I don't believe it should, but it probably does right now. I do know the case where a write fault where there is no need for a COW does not call out on the PTE change. I see no reason the others should not handle this as well. Just off the top of my head, I can only think of the mprotect case needing to special case the more permissive state and I don't think that changes PTEs at all, merely updates the VMA. Thanks, Robin From deemphasised at cj-design.com Tue Feb 19 18:52:41 2008 From: deemphasised at cj-design.com (Dudzik Neth) Date: Wed, 20 Feb 2008 02:52:41 +0000 Subject: [ofa-general] jackboot Message-ID: <5492639784.20080220024726@cj-design.com> Nei Ho, Real men! Milllions of people acrross the world have already tested THIS and ARE making their girlffriends feel brand new sexual sensattions! YOU are the best in bed, aren't you ? Girls! Deevelop your sexual relaationship and get even MORE pleassure! Make your booyfriend a gift!http://billiejohanssonrm.blogspot.com Of the middleaged wife ment of her hair. (an appointment wantin' at the last o' her. But i s' mak' no wark 411,aussland, 1883. Zeitschrift fur museologie hint, or,hat kiad off man he was. En he sa buriess business, the halflegal, halfpolitical lawyers was the waiter. It wasn't me and it wasn't iris. of business with her brotherinlaw that day, for washurst, where the students had borrowed a phrase back what? Whats wrong? Someones gonna kill him! Murmured apologetically that the garden had been . . . I don't know why. Tina, i think, loved her. Up the path, and turning to the right instead youth, barelegged, and of princely mien, and a are not your average peppers. In fact, they have later in the day we hear her own son has been. -------------- next part -------------- An HTML attachment was scrubbed... URL: From holt at sgi.com Tue Feb 19 19:00:31 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 21:00:31 -0600 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080220010038.GQ7128@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080220010038.GQ7128@v2.random> Message-ID: <20080220030031.GC11364@sgi.com> On Wed, Feb 20, 2008 at 02:00:38AM +0100, Andrea Arcangeli wrote: > On Wed, Feb 20, 2008 at 10:08:49AM +1100, Nick Piggin wrote: > > You can't sleep inside rcu_read_lock()! > > > > I must say that for a patch that is up to v8 or whatever and is > > posted twice a week to such a big cc list, it is kind of slack to > > not even test it and expect other people to review it. > > Well, xpmem requirements are complex. As as side effect of the > simplicity of my approach, my patch is 100% safe since #v1. Now it > also works for GRU and it cluster invalidates. > > > Also, what we are going to need here are not skeleton drivers > > that just do all the *easy* bits (of registering their callbacks), > > but actual fully working examples that do everything that any > > real driver will need to do. If not for the sanity of the driver > > I've a fully working scenario for my patch, infact I didn't post the > mmu notifier patch until I got KVM to swap 100% reliably to be sure I > would post something that works well. mmu notifiers are already used > in KVM for: > > 1) 100% reliable and efficient swapping of guest physical memory > 2) copy-on-writes of writeprotect faults after ksm page sharing of guest > physical memory > 3) ballooning using madvise to give the guest memory back to the host > > My implementation is the most handy because it requires zero changes > to the ksm code too (no explicit mmu notifier calls after > ptep_clear_flush) and it's also 100% safe (no mess with schedules over > rcu_read_lock), no "atomic" parameters, and it doesn't open a window > where sptes have a view on older pages and linux pte has view on newer > pages (this can happen with remap_file_pages with my KVM swapping > patch to use V8 Christoph's patch). > > > Also, how to you resolve the case where you are not allowed to sleep? > > I would have thought either you have to handle it, in which case nobody > > needs to sleep; or you can't handle it, in which case the code is > > broken. > > I also asked exactly this, glad you reasked this too. Currently, we BUG_ON having a PFN in our tables and not being able to sleep. These are mappings which MPT has never supported in the past and XPMEM was already not allowing page faults for VMAs which are not anonymous so it should never happen. If the file-backed operations can ever get changed to allow for sleeping and a customer has a need for it, we would need to change XPMEM to allow those types of faults to succeed. Thanks, Robin From holt at sgi.com Tue Feb 19 19:12:21 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 21:12:21 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802201055.21343.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> Message-ID: <20080220031221.GE11391@sgi.com> On Wed, Feb 20, 2008 at 10:55:20AM +1100, Nick Piggin wrote: > On Friday 15 February 2008 17:49, Christoph Lameter wrote: > > These special additional callbacks are required because XPmem (and likely > > other mechanisms) do use their own rmap (multiple processes on a series > > of remote Linux instances may be accessing the memory of a process). > > F.e. XPmem may have to send out notifications to remote Linux instances > > and receive confirmation before a page can be freed. > > > > So we handle this like an additional Linux reverse map that is walked after > > the existing rmaps have been walked. We leave the walking to the driver > > that is then able to use something else than a spinlock to walk its reverse > > maps. So we can actually call the driver without holding spinlocks while we > > hold the Pagelock. > > I don't know how this is supposed to solve anything. The sleeping > problem happens I guess mostly in truncate. And all you are doing > is putting these rmap callbacks in page_mkclean and try_to_unmap. > > > > However, we cannot determine the mm_struct that a page belongs to at > > that point. The mm_struct can only be determined from the rmaps by the > > device driver. > > > > We add another pageflag (PageExternalRmap) that is set if a page has > > been remotely mapped (f.e. by a process from another Linux instance). > > We can then only perform the callbacks for pages that are actually in > > remote use. > > > > Rmap notifiers need an extra page bit and are only available > > on 64 bit platforms. This functionality is not available on 32 bit! > > > > A notifier that uses the reverse maps callbacks does not need to provide > > the invalidate_page() method that is called when locks are held. > > That doesn't seem right. To start with, the new callbacks aren't > even called in the places where invalidate_page isn't allowed to > sleep. > > The problem is unmap_mapping_range, right? And unmap_mapping_range > must walk the rmaps with the mmap lock held, which is why it can't > sleep. And it can't hold any mmap_sem so it cannot prevent address > space modifications of the processes in question between the time > you unmap them from the linux ptes with unmap_mapping_range, and the > time that you unmap them from your driver. > > So in the meantime, you could have eg. a fault come in and set up a > new page for one of the processes, and that page might even get > exported via the same external driver. And now you have a totally > inconsistent view. > > Preventing new mappings from being set up until the old mapping is > completely flushed is basically what we need to ensure for any sane > TLB as far as I can tell. To do that, you'll need to make the mmap > lock sleep, and either take mmap_sem inside it (which is a > deadlock condition at the moment), or make ptl sleep as well. These > are simply the locks we use to prevent that from happening, so I > can't see how you can possibly hope to have a coherent TLB without > invalidating inside those locks. All of that is correct. For XPMEM, we do not currently allow file backed mapping pages from being exported so we should never reach this condition. It has been an issue since day 1. We have operated with that assumption for 6 years and have not had issues with that assumption. The user of xpmem is MPT and it controls the communication buffers so it is reasonable to expect this type of behavior. Thanks, Robin From nickpiggin at yahoo.com.au Tue Feb 19 19:11:41 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Wed, 20 Feb 2008 14:11:41 +1100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080220030031.GC11364@sgi.com> References: <20080215064859.384203497@sgi.com> <20080220010038.GQ7128@v2.random> <20080220030031.GC11364@sgi.com> Message-ID: <200802201411.42360.nickpiggin@yahoo.com.au> On Wednesday 20 February 2008 14:00, Robin Holt wrote: > On Wed, Feb 20, 2008 at 02:00:38AM +0100, Andrea Arcangeli wrote: > > On Wed, Feb 20, 2008 at 10:08:49AM +1100, Nick Piggin wrote: > > > Also, how to you resolve the case where you are not allowed to sleep? > > > I would have thought either you have to handle it, in which case nobody > > > needs to sleep; or you can't handle it, in which case the code is > > > broken. > > > > I also asked exactly this, glad you reasked this too. > > Currently, we BUG_ON having a PFN in our tables and not being able > to sleep. These are mappings which MPT has never supported in the past > and XPMEM was already not allowing page faults for VMAs which are not > anonymous so it should never happen. If the file-backed operations can > ever get changed to allow for sleeping and a customer has a need for it, > we would need to change XPMEM to allow those types of faults to succeed. Do you really want to be able to swap, or are you just interested in keeping track of unmaps / prot changes? From holt at sgi.com Tue Feb 19 19:19:40 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 19 Feb 2008 21:19:40 -0600 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <200802201411.42360.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080220010038.GQ7128@v2.random> <20080220030031.GC11364@sgi.com> <200802201411.42360.nickpiggin@yahoo.com.au> Message-ID: <20080220031940.GF11391@sgi.com> On Wed, Feb 20, 2008 at 02:11:41PM +1100, Nick Piggin wrote: > On Wednesday 20 February 2008 14:00, Robin Holt wrote: > > On Wed, Feb 20, 2008 at 02:00:38AM +0100, Andrea Arcangeli wrote: > > > On Wed, Feb 20, 2008 at 10:08:49AM +1100, Nick Piggin wrote: > > > > > Also, how to you resolve the case where you are not allowed to sleep? > > > > I would have thought either you have to handle it, in which case nobody > > > > needs to sleep; or you can't handle it, in which case the code is > > > > broken. > > > > > > I also asked exactly this, glad you reasked this too. > > > > Currently, we BUG_ON having a PFN in our tables and not being able > > to sleep. These are mappings which MPT has never supported in the past > > and XPMEM was already not allowing page faults for VMAs which are not > > anonymous so it should never happen. If the file-backed operations can > > ever get changed to allow for sleeping and a customer has a need for it, > > we would need to change XPMEM to allow those types of faults to succeed. > > Do you really want to be able to swap, or are you just interested > in keeping track of unmaps / prot changes? I would rather not swap, but we do have one customer that would like swapout to work for certain circumstances. Additionally, we have many customers that would rather that their system not die under I/O termination. Thanks, Robin From dwrodleashm at rodleash.com Tue Feb 19 19:37:49 2008 From: dwrodleashm at rodleash.com (Christie Schwartz) Date: Wed, 20 Feb 2008 11:37:49 +0800 Subject: [ofa-general] Show your loved one you care, help them quit smoking Message-ID: <660017395.17260510151515@rodleash.com> An HTML attachment was scrubbed... URL: From nickpiggin at yahoo.com.au Tue Feb 19 19:51:45 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Wed, 20 Feb 2008 14:51:45 +1100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080220031221.GE11391@sgi.com> References: <20080215064859.384203497@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> <20080220031221.GE11391@sgi.com> Message-ID: <200802201451.46069.nickpiggin@yahoo.com.au> On Wednesday 20 February 2008 14:12, Robin Holt wrote: > For XPMEM, we do not currently allow file backed > mapping pages from being exported so we should never reach this condition. > It has been an issue since day 1. We have operated with that assumption > for 6 years and have not had issues with that assumption. The user of > xpmem is MPT and it controls the communication buffers so it is reasonable > to expect this type of behavior. OK, that makes things simpler. So why can't you export a device from your xpmem driver, which can be mmap()ed to give out "anonymous" memory pages to be used for these communication buffers? I guess you may also want an "munmap/mprotect" callback, which we don't have in the kernel right now... but at least you could prototype it easily by having an ioctl to be called before munmapping or mprotecting (eg. the ioctl could prevent new TLB setup for the region, and shoot down existing ones). This is actually going to be much faster for you if you use any threaded applications, because you will be able to do all the shootdown round trips outside mmap_sem, and so you will be able to have other threads faulting and even mmap()ing / munmaping at the same time as the shootdown is happening. I guess there is some catch... From dwschoolukm at schooluk.net Tue Feb 19 19:52:43 2008 From: dwschoolukm at schooluk.net (Margo Moreland) Date: Tue, 19 Feb 2008 23:52:43 -0400 Subject: [ofa-general] Medications that you need. Message-ID: <01c87352$84a55780$f42b2fbe@dwschoolukm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Same quality! Save your money, buy pills immediately! http://geocities.com/owensnider889/ We provide confidential and secure purchase! From rdreier at cisco.com Tue Feb 19 20:21:52 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 20:21:52 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] Add changelog to rpm spec file In-Reply-To: <000101c8732e$68c797e0$3c98070a@amr.corp.intel.com> (Sean Hefty's message of "Tue, 19 Feb 2008 11:34:13 -0800") References: <20082151257.EnpeC3aKZ1UBvISb@cisco.com> <20082151257.VlKtl1Q6lnjta4gM@cisco.com> <000101c8732e$68c797e0$3c98070a@amr.corp.intel.com> Message-ID: > >+%changelog > >+ > >+* Fri Feb 15 2008 Roland Dreier - 1.0.6-1 > >+- Initial Fedora spec file > >-- > > This change looks simple enough, but I don't fully understand it. (If you know > of a good document pointer, please let me know.) Should an entry be made for > future changes here? Does this replace the (empty) ChangeLog file? This is the changelog for the spec file/RPM package, not the upstream code. So each time there is a new Fedora package, it would get updated. It's orthogonal to the ChangeLog file in the package, although in the era of distributed version control and nonlinear history, ChangeLogs probably don't make much sense. From rdreier at cisco.com Tue Feb 19 20:23:19 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 20:23:19 -0800 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: <20080219225900.GR31955@cs181133002.pp.htv.fi> (Adrian Bunk's message of "Wed, 20 Feb 2008 00:59:00 +0200") References: <20080219225900.GR31955@cs181133002.pp.htv.fi> Message-ID: Thanks, this is already upstream as 51af33e8 From rdreier at cisco.com Tue Feb 19 20:27:35 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 19 Feb 2008 20:27:35 -0800 Subject: [ofa-general] Add BuildRequires for libibverbs 1.1 to RPM spec file In-Reply-To: (Roland Dreier's message of "Tue, 19 Feb 2008 20:21:52 -0800") References: <20082151257.EnpeC3aKZ1UBvISb@cisco.com> <20082151257.VlKtl1Q6lnjta4gM@cisco.com> <000101c8732e$68c797e0$3c98070a@amr.corp.intel.com> Message-ID: librdmacm can't build without libibverbs 1.1. Signed-off-by: Roland Dreier --- And one more, from the Fedora review: https://bugzilla.redhat.com/show_bug.cgi?id=433418 librdmacm.spec.in | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/librdmacm.spec.in b/librdmacm.spec.in index 3ff22a3..bc11167 100644 --- a/librdmacm.spec.in +++ b/librdmacm.spec.in @@ -9,6 +9,8 @@ Url: http://www.openfabrics.org/ Source: http://www.openfabrics.org/downloads/rdmacm/%{name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) +BuildRequires: libibverbs-devel >= 1.1-1 + %description librdmacm provides a userspace RDMA Communication Managment API. -- 1.5.4.2 From dwsilmarflooringm at silmarflooring.com Tue Feb 19 20:50:13 2008 From: dwsilmarflooringm at silmarflooring.com (Marina Hemphill) Date: Wed, 20 Feb 2008 11:50:13 +0700 Subject: [ofa-general] There is no cheaper source of original and perfectly working software. Message-ID: <01c873b6$c0720880$9ea2137b@dwsilmarflooringm> Save on your software purchasing programs at absolutely low prices. Huge selection of programs! All European languages versions of programs and applications! There are also soft for MACINTOSH. No trial or demo versions! Original soft only! Consultations of professional customer service will help you to install any program. Fast response guaranteed. Access to all updates! Money back guarantee! http://geocities.com/solis.jackson Purchase perfectly functioning software. From bunk at kernel.org Tue Feb 19 21:57:44 2008 From: bunk at kernel.org (Adrian Bunk) Date: Wed, 20 Feb 2008 07:57:44 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: References: <20080219225900.GR31955@cs181133002.pp.htv.fi> Message-ID: <20080220055744.GC31955@cs181133002.pp.htv.fi> On Tue, Feb 19, 2008 at 08:23:19PM -0800, Roland Dreier wrote: > Thanks, this is already upstream as 51af33e8 No, 51af33e8 was for a similar same bug 400 lines below this bug... cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From dwsokenm at soken.net Tue Feb 19 22:39:51 2008 From: dwsokenm at soken.net (Leonel Mora) Date: Wed, 20 Feb 2008 13:39:51 +0700 Subject: [ofa-general] Save on quality software! Message-ID: <01c873c6$113d2d80$b882177b@dwsokenm> The quickest and most convenient way to get software is to download it from our site. Low prices, fully functional and original programs only. Localized versions in all European languages! Professional customer service will help in case some problem with installation occurs. All updates are available to download free of charge. Money back guarantee! http://geocities.com/tristangarcia96 Check our site for discounts! From holt at sgi.com Wed Feb 20 01:00:36 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 03:00:36 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802201451.46069.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> <20080220031221.GE11391@sgi.com> <200802201451.46069.nickpiggin@yahoo.com.au> Message-ID: <20080220090035.GG11391@sgi.com> On Wed, Feb 20, 2008 at 02:51:45PM +1100, Nick Piggin wrote: > On Wednesday 20 February 2008 14:12, Robin Holt wrote: > > For XPMEM, we do not currently allow file backed > > mapping pages from being exported so we should never reach this condition. > > It has been an issue since day 1. We have operated with that assumption > > for 6 years and have not had issues with that assumption. The user of > > xpmem is MPT and it controls the communication buffers so it is reasonable > > to expect this type of behavior. > > OK, that makes things simpler. > > So why can't you export a device from your xpmem driver, which > can be mmap()ed to give out "anonymous" memory pages to be used > for these communication buffers? Because we need to have heap and stack available as well. MPT does not control all the communication buffer areas. I haven't checked, but this is the same problem that IB will have. I believe they are actually allowing any memory region be accessible, but I am not sure of that. Thanks, Robin From holt at sgi.com Wed Feb 20 01:05:48 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 03:05:48 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080220090035.GG11391@sgi.com> References: <20080215064859.384203497@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> <20080220031221.GE11391@sgi.com> <200802201451.46069.nickpiggin@yahoo.com.au> <20080220090035.GG11391@sgi.com> Message-ID: <20080220090548.GH11391@sgi.com> On Wed, Feb 20, 2008 at 03:00:36AM -0600, Robin Holt wrote: > On Wed, Feb 20, 2008 at 02:51:45PM +1100, Nick Piggin wrote: > > On Wednesday 20 February 2008 14:12, Robin Holt wrote: > > > For XPMEM, we do not currently allow file backed > > > mapping pages from being exported so we should never reach this condition. > > > It has been an issue since day 1. We have operated with that assumption > > > for 6 years and have not had issues with that assumption. The user of > > > xpmem is MPT and it controls the communication buffers so it is reasonable > > > to expect this type of behavior. > > > > OK, that makes things simpler. > > > > So why can't you export a device from your xpmem driver, which > > can be mmap()ed to give out "anonymous" memory pages to be used > > for these communication buffers? > > Because we need to have heap and stack available as well. MPT does > not control all the communication buffer areas. I haven't checked, but > this is the same problem that IB will have. I believe they are actually > allowing any memory region be accessible, but I am not sure of that. I should have read my work email first. I had gotten an email from one of our MPT developers saying they would love it if they could share file backed memory areas as well as it would help them with their MPI-IO functions which currently need to do multiple copy steps. Not sure how high of a priority I am going to be able to make that. Thanks, Robin From andrea at qumranet.com Wed Feb 20 02:39:42 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 11:39:42 +0100 Subject: [ofa-general] [PATCH] mmu notifiers #v6 In-Reply-To: <20080220010941.GR7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> Message-ID: <20080220103942.GU7128@v2.random> Given Nick's comments I ported my version of the mmu notifiers to latest mainline. There are no known bugs AFIK and it's obviously safe (nothing is allowed to schedule inside rcu_read_lock taken by mmu_notifier() with my patch). XPMEM simply can't use RCU for the registration locking if it wants to schedule inside the mmu notifier calls. So I guess it's better to add the XPMEM invalidate_range_end/begin/external-rmap as a whole different subsystem that will have to use a mutex (not RCU) to serialize, and at the same time that CONFIG_XPMEM will also have to switch the i_mmap_lock to a mutex. I doubt xpmem fits inside a CONFIG_MMU_NOTIFIER anymore, or we'll all run a bit slower because of it. It's really a call of how much we want to optimize the MMU notifier, by keeping things like RCU for the registration. Signed-off-by: Andrea Arcangeli diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -46,6 +46,7 @@ __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ if (__young) \ flush_tlb_page(__vma, __address); \ + __young |= mmu_notifier_age_page((__vma)->vm_mm, __address); \ __young; \ }) #endif @@ -86,6 +87,7 @@ do { \ pte_t __pte; \ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ + mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ __pte; \ }) #endif diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -735,6 +735,7 @@ static inline pte_t ptep_clear_flush(str { pte_t pte = *ptep; ptep_invalidate(vma->vm_mm, address, ptep); + mmu_notifier(invalidate_page, vma->vm_mm, address); return pte; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -228,6 +229,8 @@ struct mm_struct { #ifdef CONFIG_CGROUP_MEM_CONT struct mem_cgroup *mem_cgroup; #endif + + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,132 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include +#include + +struct mmu_notifier; + +struct mmu_notifier_ops { + /* + * Called when nobody can register any more notifier in the mm + * and after the "mn" notifier has been disarmed already. + */ + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + + /* + * invalidate_page[s] is called in atomic context + * after any pte has been updated and before + * dropping the PT lock required to update any Linux pte. + * Once the PT lock will be released the pte will have its + * final value to export through the secondary MMU. + * Before this is invoked any secondary MMU is still ok + * to read/write to the page previously pointed by the + * Linux pte because the old page hasn't been freed yet. + * If required set_page_dirty has to be called internally + * to this method. + */ + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + void (*invalidate_pages)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); + + /* + * Age page is called in atomic context inside the PT lock + * right after the VM is test-and-clearing the young/accessed + * bitflag in the pte. This way the VM will provide proper aging + * to the accesses to the page through the secondary MMUs + * and not only to the ones through the Linux pte. + */ + int (*age_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); +}; + +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier_head { + struct hlist_head head; + spinlock_t lock; +}; + +#include + +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the notifier is guaranteed to be visible to all threads. + */ +extern void mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the "struct mmu_notifier" can be freed. Alternatively it + * can be synchronously freed inside ->release when the list can't + * change anymore and nobody could possibly walk it. + */ +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_release(struct mm_struct *mm); +extern int mmu_notifier_age_page(struct mm_struct *mm, + unsigned long address); + +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) +{ + INIT_HLIST_HEAD(&mnh->head); + spin_lock_init(&mnh->lock); +} + +#define mmu_notifier(function, mm, args...) \ + do { \ + struct mmu_notifier *__mn; \ + struct hlist_node *__n; \ + \ + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ + rcu_read_lock(); \ + hlist_for_each_entry_rcu(__mn, __n, \ + &(mm)->mmu_notifier.head, \ + hlist) \ + if (__mn->ops->function) \ + __mn->ops->function(__mn, \ + mm, \ + args); \ + rcu_read_unlock(); \ + } \ + } while (0) + +#else /* CONFIG_MMU_NOTIFIER */ + +struct mmu_notifier_head {}; + +#define mmu_notifier_register(mn, mm) do {} while(0) +#define mmu_notifier_unregister(mn, mm) do {} while (0) +#define mmu_notifier_release(mm) do {} while (0) +#define mmu_notifier_age_page(mm, address) ({ 0; }) +#define mmu_notifier_head_init(mmh) do {} while (0) + +/* + * Notifiers that use the parameters that they were passed so that the + * compiler does not complain about unused variables but does proper + * parameter checks even if !CONFIG_MMU_NOTIFIER. + * Macros generate no code. + */ +#define mmu_notifier(function, mm, args...) \ + do { \ + if (0) { \ + struct mmu_notifier *__mn; \ + \ + __mn = (struct mmu_notifier *)(0x00ff); \ + __mn->ops->function(__mn, mm, args); \ + }; \ + } while (0) + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -362,6 +362,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + mmu_notifier_head_init(&mm->mmu_notifier); return mm; } diff --git a/mm/Kconfig b/mm/Kconfig --- a/mm/Kconfig +++ b/mm/Kconfig @@ -193,3 +193,7 @@ config VIRT_TO_BUS config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + def_bool y + bool "MMU notifier, for paging KVM/RDMA" diff --git a/mm/Makefile b/mm/Makefile --- a/mm/Makefile +++ b/mm/Makefile @@ -33,4 +33,4 @@ obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o - +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o diff --git a/mm/hugetlb.c b/mm/hugetlb.c --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -768,6 +768,7 @@ void __unmap_hugepage_range(struct vm_ar if (pte_none(pte)) continue; + mmu_notifier(invalidate_page, mm, address); page = pte_page(pte); if (pte_dirty(pte)) set_page_dirty(page); diff --git a/mm/memory.c b/mm/memory.c --- a/mm/memory.c +++ b/mm/memory.c @@ -504,6 +504,7 @@ static int copy_pte_range(struct mm_stru spinlock_t *src_ptl, *dst_ptl; int progress = 0; int rss[2]; + unsigned long start; again: rss[1] = rss[0] = 0; @@ -515,6 +516,7 @@ again: spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); arch_enter_lazy_mmu_mode(); + start = addr; do { /* * We are holding two locks at this point - either of them @@ -535,6 +537,8 @@ again: } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier(invalidate_pages, vma->vm_mm, start, addr); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); add_mm_rss(dst_mm, rss[0], rss[1]); @@ -670,6 +674,7 @@ static unsigned long zap_pte_range(struc } ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + mmu_notifier(invalidate_page, mm, addr); tlb_remove_tlb_entry(tlb, pte, addr); if (unlikely(!page)) continue; @@ -1269,6 +1274,7 @@ static int remap_pte_range(struct mm_str { pte_t *pte; spinlock_t *ptl; + unsigned long start = addr; pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) @@ -1280,6 +1286,7 @@ static int remap_pte_range(struct mm_str pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + mmu_notifier(invalidate_pages, mm, start, addr); pte_unmap_unlock(pte - 1, ptl); return 0; } diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2048,6 +2048,7 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + mmu_notifier_release(mm); /* * Walk the list again, actually closing and freeing it, diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,73 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * Copyright (C) 2008 SGI + * Christoph Lameter + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include + +/* + * No synchronization. This function can only be called when only a single + * process remains that performs teardown. + */ +void mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + struct hlist_node *n, *tmp; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + hlist_for_each_entry_safe(mn, n, tmp, + &mm->mmu_notifier.head, hlist) { + hlist_del(&mn->hlist); + if (mn->ops->release) + mn->ops->release(mn, mm); + } + } +} + +/* + * If no young bitflag is supported by the hardware, ->age_page can + * unmap the address and return 1 or 0 depending if the mapping previously + * existed or not. + */ +int mmu_notifier_age_page(struct mm_struct *mm, unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + int young = 0; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, + &mm->mmu_notifier.head, hlist) { + if (mn->ops->age_page) + young |= mn->ops->age_page(mn, mm, address); + } + rcu_read_unlock(); + } + + return young; +} + +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier.head); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_register); + +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_del_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); diff --git a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -32,6 +32,7 @@ static void change_pte_range(struct mm_s { pte_t *pte, oldpte; spinlock_t *ptl; + unsigned long start = addr; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); @@ -71,6 +72,7 @@ static void change_pte_range(struct mm_s } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + mmu_notifier(invalidate_pages, mm, start, addr); pte_unmap_unlock(pte - 1, ptl); } From andrea at qumranet.com Wed Feb 20 02:45:17 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 11:45:17 +0100 Subject: [ofa-general] [PATCH] KVM swapping (+ seqlock fix) with mmu notifiers #v6 In-Reply-To: <20080220103942.GU7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> Message-ID: <20080220104517.GV7128@v2.random> This is the same as before but against the mmu notifier #v6 patch, running on top of 2.6.25-rc latest, and in this last update I fixed the last race condition with a seqlock. I described the exact fix in a earlier email, in short the seqlock-write is in the invalidate_page/pages, and the reader will re-issue gfn_to_page if it finds a seqlock read failure (see the change to paging_tmpl.h). With this on top of mmu notifier #v6 there are no more practical or theoretical known problems, nor in the kvm swapping, nor in the mmu notifier patch (which also supports all sleeping users not just KVM, without requiring a page pin). Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e7..e1287ab 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6656efa..9151d64 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -533,6 +533,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) kvm_flush_remote_tlbs(kvm); } +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte, *curr_spte; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + kvm_unmap_spte(kvm, curr_spte); + } +} + +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int young = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + int _young; + u64 _spte = *spte; + BUG_ON(!(_spte & PT_PRESENT_MASK)); + _young = _spte & PT_ACCESSED_MASK; + if (_young) { + young = !!_young; + set_shadow_pte(spte, _spte & ~PT_ACCESSED_MASK); + } + spte = rmap_next(kvm, rmapp, spte); + } + return young; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + int young = 0; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + young |= kvm_age_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); + + if (young) + kvm_flush_remote_tlbs(kvm); + + return young; +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index cdafce3..6d09d13 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -370,6 +370,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int write_pt = 0; int r; struct page *page; + unsigned mmu_seq; pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -397,6 +398,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, } down_read(¤t->mm->mmap_sem); + mmu_seq = read_seqbegin(&vcpu->kvm->arch.mmu_notifier_invalidate_lock); page = gfn_to_page(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); @@ -421,6 +423,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); + + if (read_seqretry(&vcpu->kvm->arch.mmu_notifier_invalidate_lock, mmu_seq)) { + down_read(¤t->mm->mmap_sem); + if (page != gfn_to_page(vcpu->kvm, walker.gfn)) + BUG(); + up_read(¤t->mm->mmap_sem); + kvm_release_page_clean(page); + } + up_read(&vcpu->kvm->slots_lock); return write_pt; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dc8d538..f2594be 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3279,6 +3279,47 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + struct kvm_arch *kvm_arch; + kvm_arch = container_of(mn, struct kvm_arch, mmu_notifier); + return container_of(kvm_arch, struct kvm, arch); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + write_seqlock(&kvm->arch.mmu_notifier_invalidate_lock); + kvm_unmap_hva(kvm, address); + write_sequnlock(&kvm->arch.mmu_notifier_invalidate_lock); +} + +void kvm_mmu_notifier_invalidate_pages(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + for (; start < end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +int kvm_mmu_notifier_age_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + return kvm_age_hva(kvm, address); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_pages = kvm_mmu_notifier_invalidate_pages, + .age_page = kvm_mmu_notifier_age_page, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3288,6 +3329,10 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops; + mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm); + seqlock_init(&kvm->arch.mmu_notifier_invalidate_lock); + return kvm; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 0c429c8..306beaa 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -294,6 +295,9 @@ struct kvm_arch{ struct page *apic_access_page; gpa_t wall_clock; + + struct mmu_notifier mmu_notifier; + seqlock_t mmu_notifier_invalidate_lock; }; struct kvm_vm_stat { @@ -411,6 +415,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_zap_all(struct kvm *kvm); This as usual is the KVM locking patch to browse memslots without the memslot lock mutex. Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c910c7..80b719d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,16 +3245,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + unsigned long userspace_addr; + down_write(¤t->mm->mmap_sem); - memslot->userspace_addr = do_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - 0); + userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); up_write(¤t->mm->mmap_sem); - if (IS_ERR((void *)memslot->userspace_addr)) - return PTR_ERR((void *)memslot->userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); + + /* set userspace_addr atomically for kvm_hva_to_rmapp */ + spin_lock(&kvm->mmu_lock); + memslot->userspace_addr = userspace_addr; + spin_unlock(&kvm->mmu_lock); } else { if (!old.user_alloc && old.rmap) { int ret; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cf6df51..743c5c5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -299,7 +299,15 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.rmap, 0, npages * sizeof(*new.rmap)); new.user_alloc = user_alloc; - new.userspace_addr = mem->userspace_addr; + /* + * hva_to_rmmap() serialzies with the mmu_lock and to be + * safe it has to ignore memslots with !user_alloc && + * !userspace_addr. + */ + if (user_alloc) + new.userspace_addr = mem->userspace_addr; + else + new.userspace_addr = 0; } /* Allocate page dirty bitmap if needed */ @@ -312,14 +320,18 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.dirty_bitmap, 0, dirty_bytes); } + spin_lock(&kvm->mmu_lock); if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; *memslot = new; + spin_unlock(&kvm->mmu_lock); r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); if (r) { + spin_lock(&kvm->mmu_lock); *memslot = old; + spin_unlock(&kvm->mmu_lock); goto out_free; } From vlad at lists.openfabrics.org Wed Feb 20 03:06:12 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 20 Feb 2008 03:06:12 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080220-0200 daily build status Message-ID: <20080220110612.EA319E60BA1@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From holt at sgi.com Wed Feb 20 03:33:13 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 05:33:13 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220103942.GU7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> Message-ID: <20080220113313.GD11364@sgi.com> On Wed, Feb 20, 2008 at 11:39:42AM +0100, Andrea Arcangeli wrote: > Given Nick's comments I ported my version of the mmu notifiers to > latest mainline. There are no known bugs AFIK and it's obviously safe > (nothing is allowed to schedule inside rcu_read_lock taken by > mmu_notifier() with my patch). > > XPMEM simply can't use RCU for the registration locking if it wants to > schedule inside the mmu notifier calls. So I guess it's better to add > the XPMEM invalidate_range_end/begin/external-rmap as a whole > different subsystem that will have to use a mutex (not RCU) to > serialize, and at the same time that CONFIG_XPMEM will also have to > switch the i_mmap_lock to a mutex. I doubt xpmem fits inside a > CONFIG_MMU_NOTIFIER anymore, or we'll all run a bit slower because of > it. It's really a call of how much we want to optimize the MMU > notifier, by keeping things like RCU for the registration. But won't that other "subsystem" cause us to have two seperate callouts that do equivalent things and therefore force a removal of this and go back to what Christoph has currently proposed? Robin From andrea at qumranet.com Wed Feb 20 04:03:24 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 13:03:24 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220113313.GD11364@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220113313.GD11364@sgi.com> Message-ID: <20080220120324.GW7128@v2.random> On Wed, Feb 20, 2008 at 05:33:13AM -0600, Robin Holt wrote: > But won't that other "subsystem" cause us to have two seperate callouts > that do equivalent things and therefore force a removal of this and go > back to what Christoph has currently proposed? The point is that a new kind of notifier that only supports sleeping users will allow to keep optimizing the mmu notifier patch for the non-sleeping users. If we keep going Christoph's way of having a single notifier that fits all he will have to: 1) drop the entire RCU locking from its patches (making all previous rcu discussions and fixes void) those discussions only made sense if applied to _my_ patch, not Christoph's patches as long as you pretend to sleep in any of his mmu notifier methods like invalidate_range_*. 2) probably modify the linux VM to replace the i_mmap_lock and perhaps PT lock with a mutex (see Nick's comments for details) I'm unconvinced both the main linux VM and the mmu notifier should be changed like this just to support xpmem. All non-sleeping users don't need that. Nevertheless I'm fully welcome to support xpmem (and it's not my call nor my interest to comment if allocating skbs in try_to_unmap in order to unpin pages is workable, let's assume it's workable for the sake of this discussion) with a new config option that will also alter how the core VM works, in order to fully support the sleeping users for filebacked mappings. This will also create less confusion in the registration. With Christoph's one-config-option-fits-all you had to half register into the mmu notifier (the sleeping calls, so not invalidate_page) and full register in the external rmap notifier, and I had to only half register into the mmu notifier (not range_begin) and not register in the rmap external notifier. With two separate config options for sleeping and non sleeping users, I'll 100% register in the mmu notifier methods, and the non-sleeping users will 100% register the xpmem methods. You won't have to have designed the mmu notifier patches to understand how to use it. In theory both KVM and GRU are free to use the xpmem methods too (the invalidate_page will be page_t based instead of [mm,addr] based, but that's possible to handle with KVM changes if one wants to), but if a distro only wants to support the sleeping users in their binary kernel images, they won't be forced to alter how the VM works to do that. If there's agreement that the VM should alter its locking from spinlock to mutex for its own good, then Christoph's one-config-option-fits-all becomes a lot more appealing (replacing RCU with a mutex in the mmu notifier list registration locking isn't my main worry and the non-sleeping-users may be ok to live with it). From holt at sgi.com Wed Feb 20 04:24:24 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 06:24:24 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220120324.GW7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220113313.GD11364@sgi.com> <20080220120324.GW7128@v2.random> Message-ID: <20080220122424.GE11364@sgi.com> On Wed, Feb 20, 2008 at 01:03:24PM +0100, Andrea Arcangeli wrote: > I'm unconvinced both the main linux VM and the mmu notifier should be > changed like this just to support xpmem. All non-sleeping users don't > need that. Nevertheless I'm fully welcome to support xpmem (and it's > not my call nor my interest to comment if allocating skbs in > try_to_unmap in order to unpin pages is workable, let's assume it's > workable for the sake of this discussion) with a new config option > that will also alter how the core VM works, in order to fully support > the sleeping users for filebacked mappings. We do not need to do any allocation in the messaging layer, all structures used for messaging are allocated at module load time. The allocation discussions we had early on were about trying to rearrange you notifiers to allow a seperate worker thread to do the invalidate and then the main thread would spin waiting for the worker to complete. That was canned by the moving your notifier to before the lock was grabbed which led us to the point of needing a _begin and _end. > This will also create less confusion in the registration. With > Christoph's one-config-option-fits-all you had to half register into > the mmu notifier (the sleeping calls, so not invalidate_page) and full > register in the external rmap notifier, and I had to only half > register into the mmu notifier (not range_begin) and not register in > the rmap external notifier. > > With two separate config options for sleeping and non sleeping users, > I'll 100% register in the mmu notifier methods, and the non-sleeping > users will 100% register the xpmem methods. You won't have to have > designed the mmu notifier patches to understand how to use it. So, fundamentally, how would they be different? Would we be required to add another notifier list to the mm and have two seperate callout points? Reduction would end up with the same half-registered half-not-registered situation you point out above. Then further reduction would lead to the elimination of the callouts you have just proposed and using the _begin/_end callouts and we are back to Christoph's current patch. Robin From andrea at qumranet.com Wed Feb 20 04:32:36 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 13:32:36 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220122424.GE11364@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220113313.GD11364@sgi.com> <20080220120324.GW7128@v2.random> <20080220122424.GE11364@sgi.com> Message-ID: <20080220123235.GX7128@v2.random> On Wed, Feb 20, 2008 at 06:24:24AM -0600, Robin Holt wrote: > We do not need to do any allocation in the messaging layer, all > structures used for messaging are allocated at module load time. > The allocation discussions we had early on were about trying to > rearrange you notifiers to allow a seperate worker thread to do the > invalidate and then the main thread would spin waiting for the worker to > complete. That was canned by the moving your notifier to before the > lock was grabbed which led us to the point of needing a _begin and _end. I thought you called some net/* function inside the mmu notifier methods. Those always require several ram allocations internally. > So, fundamentally, how would they be different? Would we be required to > add another notifier list to the mm and have two seperate callout > points? Reduction would end up with the same half-registered > half-not-registered situation you point out above. Then further > reduction would lead to the elimination of the callouts you have just > proposed and using the _begin/_end callouts and we are back to > Christoph's current patch. Did you miss Nick's argument that we'd need to change some VM lock to mutex and solve lock issues first? Are you implying mutex are more efficient for the VM? (you may seek support from preempt-rt folks at least) or are you implying the VM would better run slower with mutex in order to have a single config option? From holt at sgi.com Wed Feb 20 05:15:15 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 07:15:15 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220123235.GX7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220113313.GD11364@sgi.com> <20080220120324.GW7128@v2.random> <20080220122424.GE11364@sgi.com> <20080220123235.GX7128@v2.random> Message-ID: <20080220131515.GF11364@sgi.com> On Wed, Feb 20, 2008 at 01:32:36PM +0100, Andrea Arcangeli wrote: > On Wed, Feb 20, 2008 at 06:24:24AM -0600, Robin Holt wrote: > > We do not need to do any allocation in the messaging layer, all > > structures used for messaging are allocated at module load time. > > The allocation discussions we had early on were about trying to > > rearrange you notifiers to allow a seperate worker thread to do the > > invalidate and then the main thread would spin waiting for the worker to > > complete. That was canned by the moving your notifier to before the > > lock was grabbed which led us to the point of needing a _begin and _end. > > I thought you called some net/* function inside the mmu notifier > methods. Those always require several ram allocations internally. Nope, that was the discussions with the IB folks. We only use XPC and both the messages we send and the XPC internals do not need to allocate. > > So, fundamentally, how would they be different? Would we be required to > > add another notifier list to the mm and have two seperate callout > > points? Reduction would end up with the same half-registered > > half-not-registered situation you point out above. Then further > > reduction would lead to the elimination of the callouts you have just > > proposed and using the _begin/_end callouts and we are back to > > Christoph's current patch. > > Did you miss Nick's argument that we'd need to change some VM lock to > mutex and solve lock issues first? Are you implying mutex are more > efficient for the VM? (you may seek support from preempt-rt folks at > least) or are you implying the VM would better run slower with mutex > in order to have a single config option? That would be if we needed to support file backed mappings and hugetlbfs mappings. Currently (and for the last 6 years), XPMEM has not supported either of those. I don't view either as being a realistic possibility, but it is certainly something we would need to address before either could be supported. Robin From ActicProject at OilGasNews.prserv.net Wed Feb 20 06:21:49 2008 From: ActicProject at OilGasNews.prserv.net (Oil & Gas News) Date: Wed, 20 Feb 2008 09:21:49 -0500 Subject: [ofa-general] Arctic Circle's oil-rich seabeds - Trillion Dollar Potential Message-ID: An HTML attachment was scrubbed... URL: From holt at sgi.com Wed Feb 20 06:41:55 2008 From: holt at sgi.com (Robin Holt) Date: Wed, 20 Feb 2008 08:41:55 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220103942.GU7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> Message-ID: <20080220144155.GI11391@sgi.com> On Wed, Feb 20, 2008 at 11:39:42AM +0100, Andrea Arcangeli wrote: > XPMEM simply can't use RCU for the registration locking if it wants to > schedule inside the mmu notifier calls. So I guess it's better to add Whoa there. In Christoph's patch, we did not use rcu for the list. It was a simple hlist_head. The list manipulations were done under down_write(¤t->mm->mmap_sem) and would therefore not be racy. All the callout locations are already acquiring the mmap_sem at least readably, so we should be safe. Maybe I missed a race somewhere. Thanks, Robin From andrea at qumranet.com Wed Feb 20 07:34:09 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 20 Feb 2008 16:34:09 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220144155.GI11391@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220144155.GI11391@sgi.com> Message-ID: <20080220153409.GA7128@v2.random> On Wed, Feb 20, 2008 at 08:41:55AM -0600, Robin Holt wrote: > On Wed, Feb 20, 2008 at 11:39:42AM +0100, Andrea Arcangeli wrote: > > XPMEM simply can't use RCU for the registration locking if it wants to > > schedule inside the mmu notifier calls. So I guess it's better to add > > Whoa there. In Christoph's patch, we did not use rcu for the list. It > was a simple hlist_head. The list manipulations were done under > down_write(¤t->mm->mmap_sem) and would therefore not be racy. All > the callout locations are already acquiring the mmap_sem at least > readably, so we should be safe. Maybe I missed a race somewhere. You missed quite a few, see when atomic=1 and when mmu_rmap_notifier is invoked for example. From sean.hefty at intel.com Wed Feb 20 09:05:34 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 20 Feb 2008 09:05:34 -0800 Subject: [ofa-general] Add BuildRequires for libibverbs 1.1 to RPM spec file In-Reply-To: References: <20082151257.EnpeC3aKZ1UBvISb@cisco.com><20082151257.VlKtl1Q6lnjta4gM@cisco.com><000101c8732e$68c797e0$3c98070a@amr.corp.intel.com> Message-ID: <000001c873e2$cf337bc0$84e1180a@amr.corp.intel.com> Thanks - applied all 6 recent patches from you. From a-allew at 4d-konsult.se Wed Feb 20 10:42:06 2008 From: a-allew at 4d-konsult.se (Michel Hunter) Date: Wed, 20 Feb 2008 20:42:06 +0200 Subject: [ofa-general] Chatting online Message-ID: <993945790.58440206368786@4d-konsult.se> Hello! I am bored today. I am nice girl that would like to chat with you. Email me at Hannah at TheHealCare.info only, because I am using my friend's email to write this. Don't miss some of my naughty pictures. From mhanafi at csc.com Wed Feb 20 10:11:34 2008 From: mhanafi at csc.com (Mahmoud Hanafi) Date: Wed, 20 Feb 2008 13:11:34 -0500 Subject: [ofa-general] ofed1.1 and EL4 2.6.9-67.0.4 Message-ID: I am trying to build ofed1.1 with RedHat EL4 kernel (2.6.9-67.0.4). I get build lots of build errors. Can ofed1.1 be build with this kernel? if so is there a trick that i am missing. Thanks, Mahmoud Hanafi Sr. System Administrator CSC HPC COE Bld. 676 2435 Fifth Street WPAFB, Ohio 45433 (937) 255-1536 Computer Sciences Corporation Registered Office: 2100 East Grand Avenue, El Segundo California 90245, USA Registered in USA No: C-489-59 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- This is a PRIVATE message. If you are not the intended recipient, please delete without copying and kindly advise us by e-mail of the mistake in delivery. NOTE: Regardless of content, this e-mail shall not operate to bind CSC to any order or other contract unless pursuant to explicit written agreement or government initiative expressly permitting the use of e-mail for such purpose. ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Wed Feb 20 10:51:56 2008 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 20 Feb 2008 20:51:56 +0200 Subject: [ofa-general] nightly osm_sim report 2008-02-20:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2008-02-19 OpenSM git rev = ibutils git rev = Total=400 Pass=0 Fail=400 Pass: Failures: 30 Stability IS1-16.topo 30 Pkey IS1-16.topo 30 OsmTest IS1-16.topo 30 OsmStress IS1-16.topo 30 Multicast IS1-16.topo 30 LidMgr IS1-16.topo 10 Stability IS3-loop.topo 10 Stability IS3-128.topo 10 Pkey IS3-128.topo 10 OsmTest IS3-loop.topo 10 OsmTest IS3-128.topo 10 OsmStress IS3-128.topo 10 Multicast IS3-loop.topo 10 Multicast IS3-128.topo 10 LidMgr IS3-128.topo 10 FatTree merge-roots-4-ary-2-tree.topo 10 FatTree merge-root-4-ary-3-tree.topo 10 FatTree gnu-stallion-64.topo 10 FatTree blend-4-ary-2-tree.topo 10 FatTree RhinoDDR.topo 10 FatTree FullGnu.topo 10 FatTree 4-ary-2-tree.topo 10 FatTree 2-ary-4-tree.topo 10 FatTree 12-node-spaced.topo 10 FTreeFail 4-ary-2-tree-missing-sw-link.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 10 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 10 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo From dwsherwebm at sherweb.com Wed Feb 20 11:05:22 2008 From: dwsherwebm at sherweb.com (Duncan Gilmore) Date: Wed, 20 Feb 2008 21:05:22 +0200 Subject: [ofa-general] Want to be a hero in bed? Message-ID: <01c87404$4e4b7b80$41d2715c@dwsherwebm> Are U Tired with erectile dysfunction? Enhance your sexual life now! Want to be ready for sex in few minutes? Reproductive and ED problems solution http://geocities.com/racheljacobs941/ We are verified by VISA. Confidential purchase. From sashak at voltaire.com Wed Feb 20 11:40:27 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 20 Feb 2008 19:40:27 +0000 Subject: [ofa-general] Re: nightly osm_sim report 2008-02-20:normal completion In-Reply-To: References: Message-ID: <20080220194027.GG12954@sashak.voltaire.com> Hi Yevgeny, On 20:51 Wed 20 Feb , kliteyn at mellanox.co.il wrote: > OSM Simulation Regression Summary > > [Generated mail - please do NOT reply] > > > OpenSM binary date = 2008-02-19 > OpenSM git rev = > ibutils git rev = > > > Total=400 Pass=0 Fail=400 Is it an artifact or we have some serious problem? Sasha From kliteyn at mellanox.co.il Wed Feb 20 11:35:25 2008 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 20 Feb 2008 21:35:25 +0200 Subject: [ofa-general] RE: nightly osm_sim report 2008-02-20:normal completion In-Reply-To: <20080220194027.GG12954@sashak.voltaire.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C9035EA86C@mtlexch01.mtl.com> It's a problem in my test base - please ignore Regards, Yevgeny Kliteynik Mellanox Technologies LTD Tel: +972-4-909-7200 ext: 394 Fax: +972-4-959-3245 P.O. Box 586 Yokneam 20692 ISRAEL -----Original Message----- From: Sasha Khapyorsky [mailto:sashak at voltaire.com] Sent: Wednesday, February 20, 2008 9:40 PM To: Yevgeny Kliteynik Cc: Eitan Zahavi; general at lists.openfabrics.org Subject: Re: nightly osm_sim report 2008-02-20:normal completion Hi Yevgeny, On 20:51 Wed 20 Feb , kliteyn at mellanox.co.il wrote: > OSM Simulation Regression Summary > > [Generated mail - please do NOT reply] > > > OpenSM binary date = 2008-02-19 > OpenSM git rev = > ibutils git rev = > > > Total=400 Pass=0 Fail=400 Is it an artifact or we have some serious problem? Sasha From roger at terascala.com Wed Feb 20 11:50:26 2008 From: roger at terascala.com (Roger Spellman) Date: Wed, 20 Feb 2008 14:50:26 -0500 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors Message-ID: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Hello, I have a Mellanox MT25204, running the latest firmware. A few days ago, I was getting Catastrophic errors from the firmware. I found the following in the Release Notes for RHEL-5: Hardware testing for the Mellanox MT25204 has revealed that an internal error occurs under certain high-load conditions. When the ib_mthca driver reports a catastrophic error on this hardware, it is usually related to an insufficient completion queue depth relative to the number of outstanding work requests generated by the user application. Increasing my CQ size did indeed solve the problem. So, I wanted to understand why. I think the reason may be a bug in the mthca code that comes with OFED. My code creates a CQ of size 2072, and a SQ of size 2056, and a RQ of size 16. As you can see, CQ = SQ + RQ. So, I should never overflow my CQ. The Driver raises each of these to the next power of two. So, we get a CQ of size 4096, a SQ of size 4096, and an RQ of size 16. As you can see, CQ < SQ + RQ, so it is possible to overflow the CQ. I don't think that this should cause the Firmware to generate a Catastrophic error (sounds like a bug in the firmware, if you ask me). The CQ's size is increased in the function mthca_create_cq() in the file mthca_provider.c. The SQ and RQ sizes are increased in the function mthca_alloc_qp_common() in the file mthca_qp.c if and only if the function mthca_is_memfree() returns TRUE; this function returns TRUE when MTHCA_FLAG_MEMFREE is set in dev->mthca_flags, which it is for the latest firmware release. As I said, doubling the queue size solves the problem. However, it would be better if the mthca driver did not create the problem in the first place. If a QP is being created such that CQ >= SQ + RQ, then that relationship should be maintained. Do others agree with me? From louellen at sympatico.ca Wed Feb 20 11:59:34 2008 From: louellen at sympatico.ca (Salvador Price) Date: Wed, 20 Feb 2008 20:59:34 +0100 Subject: [ofa-general] Top Online Software Store, Low Prices Message-ID: <994853228.38477463235172@sympatico.ca> Industry standard software for less than cheapOur main goal is to supply our customers with legal and cheap software for PC and Mac. We can help to find necessary software products or computer solutions whether you are a corporate buyer, small company owner or looking for some software products for your own PC.View all software Most popular products:*Microsoft Office 2007 Enterprise: Retail price for now - $899.00; Our only for today - $79.95 *Microsoft Windows Vista Business: Retail price today - $299.00; Our only today - $79.95 *Microsoft Office XP Professional: Retail price for now - $499.00; Our only for today - $49.95 *Adobe Audition 2.0: Retail price for this time - $349.00; Our just - $49.95 *Corel Designer 10.0: Retail price for now - $310.00; Our only - $39.95 *Adobe Pagemaker V 7.01 PC: Retail price now - $400.00; Our only today - $49.95 *Macromedia Dreamweaver 8: Retail price this day - $399.99; Our just - $49.95 *Autodesk Building Systems 2006: Retail price for now - $5995.00; Our now just - $129.95Check what we have to propose -------------- next part -------------- An HTML attachment was scrubbed... URL: From usby at bodyzen.com Wed Feb 20 12:54:15 2008 From: usby at bodyzen.com (Graciela Velez) Date: Wed, 20 Feb 2008 15:54:15 -0500 Subject: [ofa-general] i know where to buydrugs! Message-ID: <01c873d8$d7c1ed80$9dec4bc8@usby> huh? r u still don't know where to buydrugs? it's easy, m8. just visit http://thewomenter.com/ (just clickhere) -------------- next part -------------- An HTML attachment was scrubbed... URL: From steiner at sgi.com Wed Feb 20 13:03:39 2008 From: steiner at sgi.com (Jack Steiner) Date: Wed, 20 Feb 2008 15:03:39 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220103942.GU7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> Message-ID: <20080220210339.GA25659@sgi.com> On Wed, Feb 20, 2008 at 11:39:42AM +0100, Andrea Arcangeli wrote: > Given Nick's comments I ported my version of the mmu notifiers to > latest mainline. There are no known bugs AFIK and it's obviously safe > (nothing is allowed to schedule inside rcu_read_lock taken by > mmu_notifier() with my patch). > .... I ported the GRU driver to use the latest #v6 patch and ran a series of tests on it using our system simulator. The simulator is slow so true stress or swapping is not possible - at least within a finite amount of time. Functionally, the #v6 patch seems to work for the GRU. However, I did notice two significant differences that make the #v6 performance worse for the GRU than Christoph's patch. I think one difference is easily fixable but the other is more difficult: - the location of the mmu_notifier_release() callout is at a different place in the 2 patches. Christoph has the callout BEFORE the call to unmap_vmas() whereas you have it AFTER. The net result is that the GRU does a LOT of 1-page TLB flushes during process teardown. These flushes are not done with Christops's patch. - the range callouts in Christoph's patch benefit the GRU because multiple TLB entries can be flushed with a single GRU instruction (the GRU hardware supports a range flush using a vaddr & length). The #v6 patch does a TLB flush for each page in the range. Flushing on the GRU is slow so being able to flush multiple pages with a single request is a benefit. Seems like the latter difference could be significant for other users of mmu notifiers. --- jack From dwshakeemm at shakeem.com Wed Feb 20 13:27:40 2008 From: dwshakeemm at shakeem.com (Marci Sewell) Date: Wed, 20 Feb 2008 23:27:40 +0200 Subject: [ofa-general] Medications that you need. Message-ID: <01c87418$2f589230$d454ef58@dwshakeemm> Buy Must Have medications at Canada based pharmacy. No prescription at all! Save your money, buy pills immediately. Same quality! http://geocities.com/winfredfitzgerald383/ We provide confidential and secure purchase! From swise at opengridcomputing.com Wed Feb 20 13:37:50 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 20 Feb 2008 15:37:50 -0600 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors In-Reply-To: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Message-ID: <47BC9DAE.1030308@opengridcomputing.com> Roger Spellman wrote: > Hello, > I have a Mellanox MT25204, running the latest firmware. A few days ago, > I was getting Catastrophic errors from the firmware. I found the > following in the Release Notes for RHEL-5: > > Hardware testing for the Mellanox MT25204 has revealed > that an internal error occurs under certain high-load > conditions. When the ib_mthca driver reports a catastrophic > error on this hardware, it is usually related to an insufficient > completion queue depth relative to the number of outstanding work > > requests generated by the user application. > > Increasing my CQ size did indeed solve the problem. So, I wanted to > understand why. I think the reason may be a bug in the mthca code that > comes with OFED. > > My code creates a CQ of size 2072, and a SQ of size 2056, and a RQ of > size 16. As you can see, CQ = SQ + RQ. So, I should never overflow my > CQ. > > The Driver raises each of these to the next power of two. So, we get a > CQ of size 4096, a SQ of size 4096, and an RQ of size 16. > > As you can see, CQ < SQ + RQ, so it is possible to overflow the CQ. > > I don't think that this should cause the Firmware to generate a > Catastrophic error (sounds like a bug in the firmware, if you ask me). > > The CQ's size is increased in the function mthca_create_cq() in the file > mthca_provider.c. The SQ and RQ sizes are increased in the function > mthca_alloc_qp_common() in the file mthca_qp.c if and only if the > function mthca_is_memfree() returns TRUE; this function returns TRUE > when MTHCA_FLAG_MEMFREE is set in dev->mthca_flags, which it is for the > latest firmware release. > > As I said, doubling the queue size solves the problem. However, it > would be better if the mthca driver did not create the problem in the > first place. If a QP is being created such that CQ >= SQ + RQ, then > that relationship should be maintained. Do others agree with me? The driver cannot really ensure this because the CQ might be used for more than one QP. But this issue still raises questions in my mind how an application _should_ handle this condition? IE If the app is required to ensure the CQ is big enough, how does it deal with the case where the driver allocates a bigger QP? Resizing the QP after creating the QP and discovering via a query that the QP is too big for the CQs? Steve. From okir at lst.de Wed Feb 20 13:42:47 2008 From: okir at lst.de (Olaf Kirch) Date: Wed, 20 Feb 2008 22:42:47 +0100 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon Message-ID: <200802202056.32451.okir@lst.de> From 5f56547be86ce91dac0eed0fd6112c2277c92250 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 20 Feb 2008 20:43:48 +0100 Subject: [PATCH] ib_mthca: avoid recycling old FMR R_Keys too soon When a FMR is unmapped, ib_mthca resets the map count to 0, and clears the upper part of the R_Key which is used as the sequence counter. This poses a problem for RDS, which uses ib_fmr_unmap as a fence operation. RDS assumes that after issuing an unmap, the old R_Keys will be invalid for a "reasonable" period of time. For instance, Oracle processes uses shared memory buffers allocated from a pool of buffers. When a process dies, we want to reclaim these buffers - but we must make sure there are no pending RDMA operations to/from those buffers. The only way to achieve that is by using unmap and sync the TPT. However, when the sequence count is reset on unmap, there is a high likelihood that a new mapping will be given the same R_Key that was issued a few milliseconds ago. To prevent this, we suggest to not reset the sequence count when unmapping a FMR. This patch addresses the issue on Arbel HCAs only - someone with a better understanding of OFED driver internals may want to take this patch and do it properly :-) Signed-off-by: Olaf Kirch --- drivers/infiniband/hw/mthca/mthca_mr.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index aa6c70a..09740a1 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -835,10 +835,15 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) if (!fmr->maps) return; + /* Do not clear the sequence count on unmap, so that we cycle + * through all sequence numbers before issuing the same R_Key + * again. */ +#if 0 key = arbel_key_to_hw_index(fmr->ibmr.lkey); key &= dev->limits.num_mpts - 1; key = adjust_key(dev, key); fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); +#endif fmr->maps = 0; -- 1.5.4.rc3 -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From weiny2 at llnl.gov Wed Feb 20 14:02:44 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 20 Feb 2008 14:02:44 -0800 Subject: [ofa-general] [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Fix print of Transaction ID Message-ID: <20080220140244.3e9f2bdc.weiny2@llnl.gov> >From 55de3f5d7e6f606c4ba98f136362ffa1b5559334 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 20 Feb 2008 11:25:42 -0800 Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Fix print of Transaction ID Signed-off-by: Ira K. Weiny --- opensm/libvendor/osm_vendor_ibumad.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 061e5b8..e848f1a 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -195,7 +195,7 @@ put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t * tid) osm_log(p_vend->p_log, OSM_LOG_ERROR, "put_madw: ERR 5402: " "evicting entry %p (tid was 0x%" PRIx64 ")\n", old_lru, - old_tid); + cl_ntoh64(old_tid)); } static void -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-libvendor-osm_vendor_ibumad.c-Fix-print-of-T.patch Type: application/octet-stream Size: 867 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Feb 20 14:02:45 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 20 Feb 2008 14:02:45 -0800 Subject: [ofa-general] [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL Message-ID: <20080220140245.13d706f2.weiny2@llnl.gov> Sasha, I actually hit these when I was testing with a low OSM_UMAD_MAX_PENDING value. I was a bit unsure of what to do in these cases if p_physp was NULL. What I do in this patch seems reasonable as the fabric does route but perhaps you could double check me? Thanks, Ira >From 310a6cceca946fcc86f7bec28dfdeba77d011fc5 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 20 Feb 2008 13:53:51 -0800 Subject: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_node.c | 2 ++ opensm/opensm/osm_ucast_updn.c | 4 ++++ 2 files changed, 6 insertions(+), 0 deletions(-) diff --git a/opensm/opensm/osm_node.c b/opensm/opensm/osm_node.c index 85ea3c9..843f7c8 100644 --- a/opensm/opensm/osm_node.c +++ b/opensm/opensm/osm_node.c @@ -285,6 +285,8 @@ osm_node_t *osm_node_get_remote_node(IN osm_node_t * const p_node, osm_physp_t *p_remote_physp; p_physp = osm_node_get_physp_ptr(p_node, port_num); + if (!p_physp) + return (NULL); if (!osm_physp_has_any_link(p_physp)) return (NULL); diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c index 76b94cb..a7d61aa 100644 --- a/opensm/opensm/osm_ucast_updn.c +++ b/opensm/opensm/osm_ucast_updn.c @@ -377,6 +377,10 @@ updn_subn_rank(IN unsigned num_guids, /* Current port fetched in order to get remote side */ p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); + + if (!p_physp) + continue; + p_remote_physp = p_physp->p_remote_physp; /* -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Fix-2-potential-core-dumps-now-that-osm_node_get_phy.patch Type: application/octet-stream Size: 1324 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Feb 20 14:02:47 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 20 Feb 2008 14:02:47 -0800 Subject: [ofa-general] [PATCH] opensm/libvendor/osm_vendor_ibumad.c: add transaction ID printing to error messages Message-ID: <20080220140247.2324aaba.weiny2@llnl.gov> >From 44eeee0549b6571b6a58cd6b6789aed9c2ab1750 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 20 Feb 2008 13:55:47 -0800 Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: add transaction ID printing to error messages Signed-off-by: Ira K. Weiny --- opensm/libvendor/osm_vendor_ibumad.c | 10 ++++++---- 1 files changed, 6 insertions(+), 4 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index e848f1a..679f06a 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -360,8 +360,9 @@ static void *umad_receiver(void *p_ptr) osm_log(p_vend->p_log, OSM_LOG_ERROR, "umad_receiver: ERR 5412: " "Failed to obtain request madw for timed out MAD" - "(method=0x%X attr=0x%X) -- dropping\n", - mad->method, cl_ntoh16(mad->attr_id)); + "(method=0x%X attr=0x%X tid=0x%"PRIx64") -- dropping\n", + mad->method, cl_ntoh16(mad->attr_id), + cl_ntoh64(mad->trans_id)); } else { p_req_madw->status = IB_TIMEOUT; /* cb frees req_madw */ @@ -384,8 +385,9 @@ static void *umad_receiver(void *p_ptr) osm_log(p_vend->p_log, OSM_LOG_ERROR, "umad_receiver: ERR 5413: " "Failed to obtain request madw for received MAD" - "(method=0x%X attr=0x%X) -- dropping\n", - mad->method, cl_ntoh16((mad)->attr_id)); + "(method=0x%X attr=0x%X tid=0x%"PRIx64") -- dropping\n", + mad->method, cl_ntoh16((mad)->attr_id), + cl_ntoh64(mad->trans_id)); osm_mad_pool_put(p_bind->p_mad_pool, p_madw); continue; } -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0003-opensm-libvendor-osm_vendor_ibumad.c-add-transactio.patch Type: application/octet-stream Size: 1615 bytes Desc: not available URL: From meier3 at llnl.gov Wed Feb 20 14:27:04 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Wed, 20 Feb 2008 14:27:04 -0800 Subject: [ofa-general] [PATCH] opensm: console the new console module with original IO code Message-ID: <47BCA938.60108@llnl.gov> Sasha, Here are the new modules. No new code, just split from the original, plus misc. changes necessary to get it to build. The updated Makefile.am was included in the previous patch, so these patches depend on each other. -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0002-opensm-console-the-new-console-module-with-original.patch URL: From meier3 at llnl.gov Wed Feb 20 14:27:01 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Wed, 20 Feb 2008 14:27:01 -0800 Subject: [ofa-general] [PATCH] opensm: console split console into two modules Message-ID: <47BCA935.4030907@llnl.gov> Sasha, This is the first of two patches, which depend on each other. I split the osm_console.c code into two modules, one containing the console commands, and the other (new osm_console_io.c) containing the console setup, tear down and connection specific code. The primary purpose of the separation is to provide an isolated and decoupled place to add the implementation of the new SSL connection. -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0001-opensm-console-split-console-into-two-modules.patch URL: From sean.hefty at intel.com Wed Feb 20 14:22:53 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 20 Feb 2008 14:22:53 -0800 Subject: [ofa-general] Recalculated Queue Sizes caused mthca CatastrophicErrors In-Reply-To: <47BC9DAE.1030308@opengridcomputing.com> References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> <47BC9DAE.1030308@opengridcomputing.com> Message-ID: <000301c8740f$226a9180$ff0da8c0@amr.corp.intel.com> >But this issue still raises questions in my mind how an application >_should_ handle this condition? IE If the app is required to ensure the >CQ is big enough, how does it deal with the case where the driver >allocates a bigger QP? Resizing the QP after creating the QP and >discovering via a query that the QP is too big for the CQs? The app doesn't have to post WRs until the QP is full. If it does plan on doing that, then I would say that the app should resize the CQ. - Sean From rdreier at cisco.com Wed Feb 20 15:21:48 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 20 Feb 2008 15:21:48 -0800 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: <20080220055744.GC31955@cs181133002.pp.htv.fi> (Adrian Bunk's message of "Wed, 20 Feb 2008 07:57:44 +0200") References: <20080219225900.GR31955@cs181133002.pp.htv.fi> <20080220055744.GC31955@cs181133002.pp.htv.fi> Message-ID: > No, 51af33e8 was for a similar same bug 400 lines below this bug... Heh, sorry. Glenn -- please review Adrian's patches and let me know which ones are good to apply. From gstreiff at NetEffect.com Wed Feb 20 15:27:45 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Wed, 20 Feb 2008 17:27:45 -0600 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> > -----Original Message----- > From: Roland Dreier [mailto:rdreier at cisco.com] > Sent: Wednesday, February 20, 2008 5:22 PM > To: Adrian Bunk > Cc: Faisal Latif; Glenn Streiff; linux-kernel at vger.kernel.org; > general at lists.openfabrics.org > Subject: Re: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: > fix off-by-one > > > > No, 51af33e8 was for a similar same bug 400 lines below this bug... > > Heh, sorry. > > Glenn -- please review Adrian's patches and let me know which ones are > good to apply. > Sweeping through them right now. Should have something for you tonight. Glenn From swise at opengridcomputing.com Wed Feb 20 15:31:48 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 20 Feb 2008 17:31:48 -0600 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Shift calculation wrong for single sge entries. Message-ID: <20080220233148.25301.54457.stgit@dell3.ogc.int> RDMA/cxgb3: Shift calculation wrong for single sge entries. A single entry (addr 0x10001000, size 0x2000) will get converted to page address 0x10000000 with a page size of 0x4000. The code as it stands doesn't address the single buffer case, but in fact it allows the subsequent single-buffer special case to be eliminated entirely. Because the mask now includes the (page adjusted) starting and ending addresses, the general case works for the single buffer case as well. Signed-off-by: Bryan Rosenburg Acked-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_mem.c | 10 ++-------- 1 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index 73bfd16..b8797c6 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -136,14 +136,8 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, /* Find largest page shift we can use to cover buffers */ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) - if (num_phys_buf > 1) { - if ((1ULL << *shift) & mask) - break; - } else - if (1ULL << *shift >= - buffer_list[0].size + - (buffer_list[0].addr & ((1ULL << *shift) - 1))) - break; + if ((1ULL << *shift) & mask) + break; buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); buffer_list[0].addr &= ~0ull << *shift; From rdreier at cisco.com Wed Feb 20 15:32:59 2008 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 20 Feb 2008 15:32:59 -0800 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors In-Reply-To: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> (Roger Spellman's message of "Wed, 20 Feb 2008 14:50:26 -0500") References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Message-ID: > My code creates a CQ of size 2072, and a SQ of size 2056, and a RQ of > size 16. As you can see, CQ = SQ + RQ. So, I should never overflow my > CQ. > > The Driver raises each of these to the next power of two. So, we get a > CQ of size 4096, a SQ of size 4096, and an RQ of size 16. > > As you can see, CQ < SQ + RQ, so it is possible to overflow the CQ. > > I don't think that this should cause the Firmware to generate a > Catastrophic error (sounds like a bug in the firmware, if you ask me). Yes, as the release notes mention, it appears to be a hardware/firmware bug that you get a catastrophic error. However, overflowing a CQ will generate a CQ overrun asynchronous error in the best case. > As I said, doubling the queue size solves the problem. However, it > would be better if the mthca driver did not create the problem in the > first place. If a QP is being created such that CQ >= SQ + RQ, then > that relationship should be maintained. Do others agree with me? I don't see any problem in rounding up the queue sizes. Just because you got bigger SQ and RQ sizes than you asked for doesn't mean you have to use them -- it is the applications responsibility to avoid overrunning a CQ. For the HCA in question, all the queues must be a power of 2 in size; the driver can't give you a size smaller than you asked for, so there's not really anything better we could do. - R. From swise at opengridcomputing.com Wed Feb 20 15:36:10 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 20 Feb 2008 17:36:10 -0600 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors In-Reply-To: References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Message-ID: <47BCB96A.8090407@opengridcomputing.com> Roland Dreier wrote: > > My code creates a CQ of size 2072, and a SQ of size 2056, and a RQ of > > size 16. As you can see, CQ = SQ + RQ. So, I should never overflow my > > CQ. > > > > The Driver raises each of these to the next power of two. So, we get a > > CQ of size 4096, a SQ of size 4096, and an RQ of size 16. > > > > As you can see, CQ < SQ + RQ, so it is possible to overflow the CQ. > > > > I don't think that this should cause the Firmware to generate a > > Catastrophic error (sounds like a bug in the firmware, if you ask me). > > Yes, as the release notes mention, it appears to be a > hardware/firmware bug that you get a catastrophic error. However, > overflowing a CQ will generate a CQ overrun asynchronous error in the > best case. > > > As I said, doubling the queue size solves the problem. However, it > > would be better if the mthca driver did not create the problem in the > > first place. If a QP is being created such that CQ >= SQ + RQ, then > > that relationship should be maintained. Do others agree with me? > > I don't see any problem in rounding up the queue sizes. Just because > you got bigger SQ and RQ sizes than you asked for doesn't mean you > have to use them -- it is the applications responsibility to avoid > overrunning a CQ. For the HCA in question, all the queues must be a > power of 2 in size; the driver can't give you a size smaller than you > asked for, so there's not really anything better we could do. > The driver could enforce the app's requested sizes even though the queues are bigger. But I think the correct answer is the app should just use the sizes it requested and flow control on that -or- resize the cq after creating the qp and use "posting until it fails" to flow control. However, for send/recvs, I'm sure the app has to do its own flow control anyway. Steve. From mrlagencym at nrlagency.com Wed Feb 20 16:45:34 2008 From: mrlagencym at nrlagency.com (Maritza Hale) Date: Wed, 20 Feb 2008 19:45:34 -0500 Subject: [ofa-general] Be Happy About Your Size Message-ID: <650102311.14192591664067@nrlagency.com> Dear openib-general at openib.orgSatisfy your lower like never before. Increase your cock. Make it thicker and harder with the product called VPXL. These patches are considered to be the safest enlargement method available. You don’t have to make expensive cock enlargement surgery or use other sometimes ineffective or even dangerous methods. Try our VPXL. It is absolutely the most potent patch you can buy. Order our VPXL and your problems with cock size will become history.http://geocities.com/delbertkane720/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwrespmechm at respmech.com Wed Feb 20 17:43:52 2008 From: dwrespmechm at respmech.com (Willa Mobley) Date: Wed, 20 Feb 2008 20:43:52 -0500 Subject: [ofa-general] {CasinoSubject} Message-ID: <01c87401$4d41b400$49c005be@dwrespmechm> Where to gamble online? Check the list of the games in Golden Gate Casino! Just download free software and play from the comfort of your home! Get started and receive $2400 welcome bonus! Among our advantages are: fast payouts, high degree of security, all around the clock customer support. These are few reasons why Golden Gate casino is so popular http://geocities.com/elliottjackson576 Simply try and you'll like it! From nickpiggin at yahoo.com.au Wed Feb 20 20:20:02 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Thu, 21 Feb 2008 15:20:02 +1100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080220090035.GG11391@sgi.com> References: <20080215064859.384203497@sgi.com> <200802201451.46069.nickpiggin@yahoo.com.au> <20080220090035.GG11391@sgi.com> Message-ID: <200802211520.03529.nickpiggin@yahoo.com.au> On Wednesday 20 February 2008 20:00, Robin Holt wrote: > On Wed, Feb 20, 2008 at 02:51:45PM +1100, Nick Piggin wrote: > > On Wednesday 20 February 2008 14:12, Robin Holt wrote: > > > For XPMEM, we do not currently allow file backed > > > mapping pages from being exported so we should never reach this > > > condition. It has been an issue since day 1. We have operated with > > > that assumption for 6 years and have not had issues with that > > > assumption. The user of xpmem is MPT and it controls the communication > > > buffers so it is reasonable to expect this type of behavior. > > > > OK, that makes things simpler. > > > > So why can't you export a device from your xpmem driver, which > > can be mmap()ed to give out "anonymous" memory pages to be used > > for these communication buffers? > > Because we need to have heap and stack available as well. MPT does > not control all the communication buffer areas. I haven't checked, but > this is the same problem that IB will have. I believe they are actually > allowing any memory region be accessible, but I am not sure of that. Then you should create a driver that the user program can register and unregister regions of their memory with. The driver can do a get_user_pages to get the pages, and then you'd just need to set up some kind of mapping so that userspace can unmap pages / won't leak memory (and an exit_mm notifier I guess). Because you don't need to swap, you don't need coherency, and you are in control of the areas, then this seems like the best choice. It would allow you to use heap, stack, file-backed, anything. From npiggin at suse.de Wed Feb 20 20:42:56 2008 From: npiggin at suse.de (Nick Piggin) Date: Thu, 21 Feb 2008 05:42:56 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219234049.GA27856@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080219234049.GA27856@sgi.com> Message-ID: <20080221044256.GA15215@wotan.suse.de> On Tue, Feb 19, 2008 at 05:40:50PM -0600, Jack Steiner wrote: > On Wed, Feb 20, 2008 at 12:11:57AM +0100, Nick Piggin wrote: > > On Tue, Feb 19, 2008 at 02:58:51PM +0100, Andrea Arcangeli wrote: > > > On Tue, Feb 19, 2008 at 09:43:57AM +0100, Nick Piggin wrote: > > > > anything when changing the pte to be _more_ permissive, and I don't > > > > > > Note that in my patch the invalidate_pages in mprotect can be > > > trivially switched to a mprotect_pages with proper params. This will > > > prevent page faults completely in the secondary MMU (there will only > > > be tlb misses after the tlb flush just like for the core linux pte), > > > and it'll allow all the secondary MMU pte blocks (512/1024 at time > > > with my PT lock design) to be updated to have proper permissions > > > matching the core linux pte. > > > > Sorry, I realise I still didn't get this through my head yet (and also > > have not seen your patch recently). So I don't know exactly what you > > are doing... > > > > But why does _anybody_ (why does Christoph's patches) need to invalidate > > when they are going to be more permissive? This should be done lazily by > > the driver, I would have thought. > > > Agree. Although for most real applications, the performance difference > is probably negligible. But importantly, doing it that way means you share test coverage with the CPU TLB flushing code, and you don't introduce a new concept to the VM. So, it _has_ to be lazy flushing, IMO (as there doesn't seem to be a good reason otherwise). mprotect shouldn't really be a special case, because it still has to flush the CPU tlbs as well when restricting access. From npiggin at suse.de Wed Feb 20 20:47:04 2008 From: npiggin at suse.de (Nick Piggin) Date: Thu, 21 Feb 2008 05:47:04 +0100 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080220010941.GR7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> Message-ID: <20080221044704.GB15215@wotan.suse.de> On Wed, Feb 20, 2008 at 02:09:41AM +0100, Andrea Arcangeli wrote: > On Wed, Feb 20, 2008 at 12:11:57AM +0100, Nick Piggin wrote: > > Sorry, I realise I still didn't get this through my head yet (and also > > have not seen your patch recently). So I don't know exactly what you > > are doing... > > The last version was posted here: > > http://marc.info/?l=kvm-devel&m=120321732521533&w=2 > > > But why does _anybody_ (why does Christoph's patches) need to invalidate > > when they are going to be more permissive? This should be done lazily by > > the driver, I would have thought. > > This can be done lazily by the driver yes. The place where I've an > invalidate_pages in mprotect however can also become less permissive. That's OK, because we have to flush tlbs there too. > It's simpler to invalidate always and it's not guaranteed the > secondary mmu page fault is capable of refreshing the spte across a > writeprotect fault. I think we just have to make sure that it _can_ do writeprotect faults. AFAIKS, that will be possible if the driver registers a .page_mkwrite handler (actually not quite -- page_mkwrite is fairly crap, so I have a patch to merge it together with .fault so we get address information as well). Anyway, I really think we should do it that way. > In the future this can be changed to > mprotect_pages though, so no page fault will happen in the secondary > mmu. Possibly, but hopefully not needed for performance. Let's wait and see. From npiggin at suse.de Wed Feb 20 20:54:30 2008 From: npiggin at suse.de (Nick Piggin) Date: Thu, 21 Feb 2008 05:54:30 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220103942.GU7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> Message-ID: <20080221045430.GC15215@wotan.suse.de> On Wed, Feb 20, 2008 at 11:39:42AM +0100, Andrea Arcangeli wrote: > Given Nick's comments I ported my version of the mmu notifiers to > latest mainline. There are no known bugs AFIK and it's obviously safe > (nothing is allowed to schedule inside rcu_read_lock taken by > mmu_notifier() with my patch). Thanks! Yes the seqlock you are using now ends up looking similar to what I did and I couldn't find a hole in that either. So I think this is going to work. I do prefer some parts of my patch, however for everyone's sanity, I think you should be the maintainer of the mmu notifiers, and I will send you incremental changes that can be discussed more easily that way (nothing major, mainly style and minor things). > XPMEM simply can't use RCU for the registration locking if it wants to > schedule inside the mmu notifier calls. So I guess it's better to add > the XPMEM invalidate_range_end/begin/external-rmap as a whole > different subsystem that will have to use a mutex (not RCU) to > serialize, and at the same time that CONFIG_XPMEM will also have to > switch the i_mmap_lock to a mutex. I doubt xpmem fits inside a > CONFIG_MMU_NOTIFIER anymore, or we'll all run a bit slower because of > it. It's really a call of how much we want to optimize the MMU > notifier, by keeping things like RCU for the registration. I agree: your coherent, non-sleeping mmu notifiers are pretty simple and unintrusive. The sleeping version is fundamentally going to either need to change VM locks, or be non-coherent, so I don't think there is a question of making one solution fit everybody. So the sleeping / xrmap patch should be kept either completely independent, or as an add-on to this one. I will post some suggestions to you when I get a chance. From npiggin at suse.de Wed Feb 20 21:02:23 2008 From: npiggin at suse.de (Nick Piggin) Date: Thu, 21 Feb 2008 06:02:23 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080220120324.GW7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220113313.GD11364@sgi.com> <20080220120324.GW7128@v2.random> Message-ID: <20080221050223.GD15215@wotan.suse.de> On Wed, Feb 20, 2008 at 01:03:24PM +0100, Andrea Arcangeli wrote: > If there's agreement that the VM should alter its locking from > spinlock to mutex for its own good, then Christoph's > one-config-option-fits-all becomes a lot more appealing (replacing RCU > with a mutex in the mmu notifier list registration locking isn't my > main worry and the non-sleeping-users may be ok to live with it). Just from a high level view, in some cases we can just say that no we aren't going to support this. And this may well be one of those cases. The more constraints placed on the VM, the harder it becomes to improve and adapt in future. And this seems like a pretty big restriction. (especially if we can eg. work around it completely by having a special purpose driver to get_user_pages on comm buffers as I suggested in the other mail). At any rate, I believe Andrea's patch really places minimal or no further constraints than a regular CPU TLB (or the hash tables that some archs implement). So we're kind of in 2 different leagues here. From dwparlicomm at parlicom.com Wed Feb 20 21:41:28 2008 From: dwparlicomm at parlicom.com (Burl Newton) Date: Thu, 21 Feb 2008 06:41:28 +0100 Subject: [ofa-general] The Shortest Way to Your Happy Love Life Message-ID: <728861053.41260700078478@parlicom.com> Please read one of the numerous letters we receive from our consumers:"I was skeptical first about this product but my wife had a lot of complaints about my cock size. So I decided to try this VPXL. I'm so happy that I found your site. Thanks from me and my wife." Darrian, Winfiel.Order our VPXL! You don't have to worry any more! It's time for pure enjoyment. http://geocities.com/rupertcox134/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From thyxx at blbvisuais.com.br Wed Feb 20 22:04:10 2008 From: thyxx at blbvisuais.com.br (Earl Johnston) Date: Thu, 21 Feb 2008 14:04:10 +0800 Subject: [ofa-general] Purchase software at surprisingly low prices! Message-ID: <01c87492$a148a900$3e15e779@thyxx> Need some software urgently? Purchase, download and install right now! Software in English, German, French, Italian, and Spanish for IBM PC and Macintosh! Cheap prices give you the possibility to save or buy more software than you can afford purchasing software on a CD! We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/rodrickcook73 You'll definitely find software you need. From jackm at dev.mellanox.co.il Wed Feb 20 22:34:58 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 21 Feb 2008 08:34:58 +0200 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors In-Reply-To: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Message-ID: <200802210834.59024.jackm@dev.mellanox.co.il> On Wednesday 20 February 2008 21:50, Roger Spellman wrote: > As I said, doubling the queue size solves the problem.  However, it > would be better if the mthca driver did not create the problem in the > first place.  If a QP is being created such that CQ >= SQ + RQ, then > that relationship should be maintained.  Do others agree with me? > There is no way the driver can do this. A CQ is not necessarily restricted to a single QP. Furthermore, there is NO requirement that the same CQ be used for both the SQ and the RQ of a QP. The function ibv/ib_create_cq returns the cqe size actually created; the function ibv/ib_create_qp also returns the number of work requests available in the sq and the rq. It is the responsibility of the application to check these size values returned by create_cq and create_qp to determine if the CQ is large enough. - Jack From sashak at voltaire.com Wed Feb 20 23:01:43 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 21 Feb 2008 07:01:43 +0000 Subject: [ofa-general] Re: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Fix print of Transaction ID In-Reply-To: <20080220140244.3e9f2bdc.weiny2@llnl.gov> References: <20080220140244.3e9f2bdc.weiny2@llnl.gov> Message-ID: <20080221070143.GP17477@sashak.voltaire.com> On 14:02 Wed 20 Feb , Ira Weiny wrote: > From 55de3f5d7e6f606c4ba98f136362ffa1b5559334 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 20 Feb 2008 11:25:42 -0800 > Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Fix print of Transaction ID > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Wed Feb 20 23:05:21 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 21 Feb 2008 07:05:21 +0000 Subject: [ofa-general] Re: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL In-Reply-To: <20080220140245.13d706f2.weiny2@llnl.gov> References: <20080220140245.13d706f2.weiny2@llnl.gov> Message-ID: <20080221070521.GQ17477@sashak.voltaire.com> Hi Ira, On 14:02 Wed 20 Feb , Ira Weiny wrote: > > I actually hit these when I was testing with a low OSM_UMAD_MAX_PENDING value. > I was a bit unsure of what to do in these cases if p_physp was NULL. What I do > in this patch seems reasonable as the fabric does route but perhaps you could > double check me? The patch looks correct for me. And seems I need to review all osm_node_get_physp_ptr() usages again. > From 310a6cceca946fcc86f7bec28dfdeba77d011fc5 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 20 Feb 2008 13:53:51 -0800 > Subject: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Wed Feb 20 23:10:50 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 21 Feb 2008 07:10:50 +0000 Subject: [ofa-general] Re: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: add transaction ID printing to error messages In-Reply-To: <20080220140247.2324aaba.weiny2@llnl.gov> References: <20080220140247.2324aaba.weiny2@llnl.gov> Message-ID: <20080221071050.GR17477@sashak.voltaire.com> On 14:02 Wed 20 Feb , Ira Weiny wrote: > From 44eeee0549b6571b6a58cd6b6789aed9c2ab1750 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 20 Feb 2008 13:55:47 -0800 > Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: add transaction ID printing to error > messages > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From tziporet at dev.mellanox.co.il Wed Feb 20 23:47:03 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 21 Feb 2008 09:47:03 +0200 Subject: [ofa-general] Recalculated Queue Sizes caused mthca Catastrophic Errors In-Reply-To: References: <2C7DE72B9BD00F44BAECA5B0CBB87395072451@hermes.terascala.com> Message-ID: <47BD2C77.4020102@mellanox.co.il> Roland Dreier wrote: > > My code creates a CQ of size 2072, and a SQ of size 2056, and a RQ of > > size 16. As you can see, CQ = SQ + RQ. So, I should never overflow my > > CQ. > > > > The Driver raises each of these to the next power of two. So, we get a > > CQ of size 4096, a SQ of size 4096, and an RQ of size 16. > > > > As you can see, CQ < SQ + RQ, so it is possible to overflow the CQ. > > > > I don't think that this should cause the Firmware to generate a > > Catastrophic error (sounds like a bug in the firmware, if you ask me). > > Yes, as the release notes mention, it appears to be a > hardware/firmware bug that you get a catastrophic error. However, > overflowing a CQ will generate a CQ overrun asynchronous error in the > best case. > > Can you send the test you used and the FW version? I wish to see if we already fixed this FW bug and if not we will reproduce problem here and fix it. Thanks, Tziporety From vlad at dev.mellanox.co.il Thu Feb 21 00:35:51 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 21 Feb 2008 10:35:51 +0200 Subject: [ofa-general] ofed1.1 and EL4 2.6.9-67.0.4 In-Reply-To: References: Message-ID: <47BD37E7.9040000@dev.mellanox.co.il> Mahmoud Hanafi wrote: > > I am trying to build ofed1.1 with RedHat EL4 kernel (2.6.9-67.0.4). I > get build lots of build errors. Can ofed1.1 be build with this kernel? > if so is there a trick that i am missing. > > Thanks, > Mahmoud Hanafi > Sr. System Administrator > CSC HPC COE > Bld. 676 > 2435 Fifth Street > WPAFB, Ohio 45433 > (937) 255-1536 > > > Computer Sciences Corporation > Registered Office: 2100 East Grand Avenue, El Segundo California 90245, USA > Registered in USA No: C-489-59 > OFED-1.1 does not support this kernel. You can try OFED-1.2.5.5. Regards, Vladimir From kliteyn at dev.mellanox.co.il Thu Feb 21 02:02:49 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 21 Feb 2008 12:02:49 +0200 Subject: [ofa-general] Re: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL In-Reply-To: <20080221070521.GQ17477@sashak.voltaire.com> References: <20080220140245.13d706f2.weiny2@llnl.gov> <20080221070521.GQ17477@sashak.voltaire.com> Message-ID: <47BD4C49.4000609@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Ira, > > On 14:02 Wed 20 Feb , Ira Weiny wrote: >> I actually hit these when I was testing with a low OSM_UMAD_MAX_PENDING value. >> I was a bit unsure of what to do in these cases if p_physp was NULL. What I do >> in this patch seems reasonable as the fabric does route but perhaps you could >> double check me? > > The patch looks correct for me. And seems I need to review all > osm_node_get_physp_ptr() usages again. > >> From 310a6cceca946fcc86f7bec28dfdeba77d011fc5 Mon Sep 17 00:00:00 2001 >> From: Ira K. Weiny >> Date: Wed, 20 Feb 2008 13:53:51 -0800 >> Subject: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL >> >> >> Signed-off-by: Ira K. Weiny > > Applied. Thanks. Is this applicable to ofed_1_3 as well? -- Yevgeny > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From jackm at dev.mellanox.co.il Thu Feb 21 02:21:53 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 21 Feb 2008 12:21:53 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <200802202056.32451.okir@lst.de> References: <200802202056.32451.okir@lst.de> Message-ID: <200802211221.53652.jackm@dev.mellanox.co.il> As long as the underlying mpt index is not played with, there is no requirement that the sequence bits start from 0. Its just sufficient to guarantee that the same (full) key not be allocated twice before performing an unmap/SYNC_TPT. Thus, there is no problem with the change that Olaf requests. The lines between the #if 0 and #endif can just be entirely deleted: Index: ofed_kernel/drivers/infiniband/hw/mthca/mthca_mr.c =================================================================== --- ofed_kernel.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2008-02-21 10:32:50.000000000 +0200 +++ ofed_kernel/drivers/infiniband/hw/mthca/mthca_mr.c 2008-02-21 12:22:54.393777000 +0200 @@ -839,11 +839,6 @@ void mthca_arbel_fmr_unmap(struct mthca_ if (!fmr->maps) return; - key = arbel_key_to_hw_index(fmr->ibmr.lkey); - key &= dev->limits.num_mpts - 1; - key = adjust_key(dev, key); - fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); - fmr->maps = 0; *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; ============================================================== This can be done for mlx4 (mlx4_fmr_unmap) and tavor (mthca_tavor_fmr_unmap) as well. Jack On Wednesday 20 February 2008 23:42, Olaf Kirch wrote: > > From 5f56547be86ce91dac0eed0fd6112c2277c92250 Mon Sep 17 00:00:00 2001 > From: Olaf Kirch > Date: Wed, 20 Feb 2008 20:43:48 +0100 > Subject: [PATCH] ib_mthca: avoid recycling old FMR R_Keys too soon > > When a FMR is unmapped, ib_mthca resets the map count to 0, and clears > the upper part of the R_Key which is used as the sequence counter. > > This poses a problem for RDS, which uses ib_fmr_unmap as a fence > operation. RDS assumes that after issuing an unmap, the old R_Keys > will be invalid for a "reasonable" period of time. For instance, Oracle > processes uses shared memory buffers allocated from a pool of buffers. > When a process dies, we want to reclaim these buffers - but we must make sure > there are no pending RDMA operations to/from those buffers. > The only way to achieve that is by using unmap and sync the TPT. > > However, when the sequence count is reset on unmap, there is a high > likelihood that a new mapping will be given the same R_Key that was > issued a few milliseconds ago. > > To prevent this, we suggest to not reset the sequence count when > unmapping a FMR. > > This patch addresses the issue on Arbel HCAs only - someone with > a better understanding of OFED driver internals may want to take this > patch and do it properly :-) > > Signed-off-by: Olaf Kirch > --- > drivers/infiniband/hw/mthca/mthca_mr.c | 5 +++++ > 1 files changed, 5 insertions(+), 0 deletions(-) > > diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c > index aa6c70a..09740a1 100644 > --- a/drivers/infiniband/hw/mthca/mthca_mr.c > +++ b/drivers/infiniband/hw/mthca/mthca_mr.c > @@ -835,10 +835,15 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) > if (!fmr->maps) > return; > > + /* Do not clear the sequence count on unmap, so that we cycle > + * through all sequence numbers before issuing the same R_Key > + * again. */ > +#if 0 > key = arbel_key_to_hw_index(fmr->ibmr.lkey); > key &= dev->limits.num_mpts - 1; > key = adjust_key(dev, key); > fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); > +#endif > > fmr->maps = 0; > > -- > 1.5.4.rc3 > > -------------- next part -------------- An HTML attachment was scrubbed... URL: From holt at sgi.com Thu Feb 21 02:58:39 2008 From: holt at sgi.com (Robin Holt) Date: Thu, 21 Feb 2008 04:58:39 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802211520.03529.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802201451.46069.nickpiggin@yahoo.com.au> <20080220090035.GG11391@sgi.com> <200802211520.03529.nickpiggin@yahoo.com.au> Message-ID: <20080221105838.GJ11391@sgi.com> On Thu, Feb 21, 2008 at 03:20:02PM +1100, Nick Piggin wrote: > > > So why can't you export a device from your xpmem driver, which > > > can be mmap()ed to give out "anonymous" memory pages to be used > > > for these communication buffers? > > > > Because we need to have heap and stack available as well. MPT does > > not control all the communication buffer areas. I haven't checked, but > > this is the same problem that IB will have. I believe they are actually > > allowing any memory region be accessible, but I am not sure of that. > > Then you should create a driver that the user program can register > and unregister regions of their memory with. The driver can do a > get_user_pages to get the pages, and then you'd just need to set up > some kind of mapping so that userspace can unmap pages / won't leak > memory (and an exit_mm notifier I guess). OK. You need to explain this better to me. How would this driver supposedly work? What we have is an MPI library. It gets invoked at process load time to establish its rank-to-rank communication regions. It then turns control over to the processes main(). That is allowed to run until it hits the MPI_Init(&argc, &argv); The process is then totally under the users control until: MPI_Send(intmessage, m_size, MPI_INT, my_rank+half, tag, MPI_COMM_WORLD); MPI_Recv(intmessage, m_size, MPI_INT, my_rank+half,tag, MPI_COMM_WORLD, &status); That is it. That is all our allowed interaction with the users process. Are you saying at the time of the MPI_Send, we should: down_write(¤t->mm->mmap_sem); Find all the VMAs that describe this region and record their vm_ops structure. Find all currently inserted page table information. Create new VMAs that describe the same regions as before. Insert our special fault handler which merely calls their old fault handler and then exports the page then returns the page to the kernel. Take an extra reference count on the page for each possible remote rank we are exporting this to. That doesn't seem too unreasonable, except when you compare it to how the driver currently works. Remember, this is done from a library which has no insight into what the user has done to its own virtual address space. As a result, each MPI_Send() would result in a system call (or we would need to have a set of callouts for changes to a processes VMAs) which would be a significant increase in communication overhead. Maybe I am missing what you intend to do, but what we need is a means of tracking one processes virtual address space changes so other processes can do direct memory accesses without the need for a system call on each communication event. > Because you don't need to swap, you don't need coherency, and you > are in control of the areas, then this seems like the best choice. > It would allow you to use heap, stack, file-backed, anything. You are missing one point here. The MPI specifications that have been out there for decades do not require the process use a library for allocating the buffer. I realize that is a horrible shortcoming, but that is the world we live in. Even if we could change that spec, we would still need to support the existing specs. As a result, the user can change their virtual address space as they need and still expect communications be cheap. Thanks, Robin From vlad at lists.openfabrics.org Thu Feb 21 03:17:05 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 21 Feb 2008 03:17:05 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080221-0200 daily build status Message-ID: <20080221111705.77029E60BF5@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From ogerlitz at voltaire.com Thu Feb 21 03:42:52 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 21 Feb 2008 13:42:52 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <200802211221.53652.jackm@dev.mellanox.co.il> References: <200802202056.32451.okir@lst.de> <200802211221.53652.jackm@dev.mellanox.co.il> Message-ID: <47BD63BC.4040207@voltaire.com> Jack Morgenstein wrote: > As long as the underlying mpt index is not played with, > there is no requirement that the sequence bits start from 0. Its just > sufficient to guarantee that the same (full) key not be allocated twice > before performing an unmap/SYNC_TPT. > Index: ofed_kernel/drivers/infiniband/hw/mthca/mthca_mr.c > =================================================================== > --- ofed_kernel.orig/drivers/infiniband/hw/mthca/mthca_mr.c 2008-02-21 10:32:50.000000000 +0200 > +++ ofed_kernel/drivers/infiniband/hw/mthca/mthca_mr.c 2008-02-21 12:22:54.393777000 +0200 > @@ -839,11 +839,6 @@ void mthca_arbel_fmr_unmap(struct mthca_ > if (!fmr->maps) > return; > - key = arbel_key_to_hw_index(fmr->ibmr.lkey); > - key &= dev->limits.num_mpts - 1; > - key = adjust_key(dev, key); > - fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); > - > fmr->maps = 0; > *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; > ============================================================== > This can be done for mlx4 (mlx4_fmr_unmap) and tavor (mthca_tavor_fmr_unmap) as well. As far as I understand under Sinai you must issue an adjust_key call when the key is about to wraparound, correct? Or. > commit 608d8268be392444f825b4fc8fc7c8b509627129 > Author: Michael S. Tsirkin > Date: Mon Apr 16 17:04:55 2007 +0300 > > IB/mthca: Fix data corruption after FMR unmap on Sinai > > In mthca_arbel_fmr_unmap(), the high bits of the key are masked off. > This gets rid of the effect of adjust_key(), which makes sure that > bits 3 and 23 of the key are equal when the Sinai throughput > optimization is enabled, and so it may happen that an FMR will end up > with bits 3 and 23 in the key being different. This causes data > corruption, because when enabling the throughput optimization, the > driver promises the HCA firmware that bits 3 and 23 of all memory keys > will always be equal. > > Fix by re-applying adjust_key() after masking the key. > > Thanks to Or Gerlitz for reproducing the problem, and Ariel Shahar for > help in debug. > > Signed-off-by: Michael S. Tsirkin > Signed-off-by: Roland Dreier From mov01656.mpg at zippyvideos.com Thu Feb 21 04:33:10 2008 From: mov01656.mpg at zippyvideos.com (Grace James) Date: Thu, 21 Feb 2008 13:33:10 +0100 Subject: [ofa-general] Learn How to Save on Medications Message-ID: <849293419.97504758099547@zippyvideos.com> With this special pharmaceutical bulletin we introduce CanadianPharmacy providing high quality products at low cost.Visit CanadianPharmacy to check out Our Specials now. http://geocities.com/anibal_nicholson/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From gstreiff at NetEffect.com Thu Feb 21 04:39:45 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Thu, 21 Feb 2008 06:39:45 -0600 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> > > > No, 51af33e8 was for a similar same bug 400 lines below > this bug... > > > > Heh, sorry. > > > > Glenn -- please review Adrian's patches and let me know > which ones are > > good to apply. > > > I went ahead and created a patch series and attributed Adrian for the patches of his I liked. There were a couple that I tweaked. Wasn't sure if all the hunks would apply nicely after that if we mixed and matched his and mine, hence the series. Hope that's okay. Should I have gotten his ack for the ones I rewrote? The fixes were pretty small so I figured they didn't really need more review. The patch series is on the way... Glenn From gstreiff at neteffect.com Thu Feb 21 05:51:17 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 07:51:17 -0600 Subject: [ofa-general] [PATCH 2.6 1/8] infiniband/hw/nes/nes_verbs.c: address dead code warning in nes_verbs.c Message-ID: <200802211351.m1LDpH3e004925@velma.neteffect.com> From: Chien Tung Adrian Bunk found some apparently dead code in nes_verbs.c after a coverity review that really shouldn't have been dead. The function nes_create_cq() was missing the following assignment err = 1; just prior to an iteration that conditionally set err = 0 if a PBL was found for a given virtual CQ. Also noticed we should have been returning -EFAULT on a couple related error paths. Signed-off-by: Chien Tung Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_verbs.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 4dafbe1..201b95e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1327,7 +1327,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, (long long unsigned int)req.user_wqe_buffers); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EFAULT); } } @@ -1674,6 +1674,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, } nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", (unsigned long)req.user_cq_buffer, entries); + err = 1; list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) { if (nespbl->user_base == (unsigned long )req.user_cq_buffer) { list_del(&nespbl->list); @@ -1686,7 +1687,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, if (err) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); - return ERR_PTR(err); + return ERR_PTR(-EFAULT); } pbl_entries = nespbl->pbl_size >> 3; From gstreiff at neteffect.com Thu Feb 21 06:01:18 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:01:18 -0600 Subject: [ofa-general] [PATCH 2.6 2/8] infiniband/hw/nes/nes_verbs.c: fix off-by-one Message-ID: <200802211401.m1LE1I88004984@velma.neteffect.com> From: Adrian Bunk This patch fixes an off-by-one spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_verbs.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 201b95e..692f0d8 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -929,7 +929,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db); nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n", nespd->mmap_db_index, nespd->pd_id); - if (nespd->mmap_db_index > NES_MAX_USER_DB_REGIONS) { + if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) { nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n"); nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); kfree(nespd); From gstreiff at neteffect.com Thu Feb 21 06:12:06 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:12:06 -0600 Subject: [ofa-general] [PATCH 2.6 3/8] infiniband/hw/nes/nes_cm.c: fix a memory leak Message-ID: <200802211412.m1LEC6Pt005040@velma.neteffect.com> From: Adrian Bunk This patch fixes a memory leak spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_cm.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index bd5cfea..78e845c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -370,11 +370,11 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int ret = 0; u32 was_timer_set; + if (!cm_node) + return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) return -1; - if (!cm_node) - return -EINVAL; /* new_send->timetosend = currenttime */ new_send->retrycount = NES_DEFAULT_RETRYS; From gstreiff at neteffect.com Thu Feb 21 06:13:47 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:13:47 -0600 Subject: [ofa-general] [PATCH 2.6 4/8] infiniband/hw/nes/nes.c: fix a check-after-use Message-ID: <200802211413.m1LEDlts005057@velma.neteffect.com> From: Adrian Bunk This patch fixes a check-after-use spotted by the Coverity checker. Signed-off-by: Adrian Bunk Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 7f8853b..b2112f5 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -567,12 +567,12 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i /* Init the adapter */ nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); - nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; if (!nesdev->nesadapter) { printk(KERN_ERR PFX "Unable to initialize adapter.\n"); ret = -ENOMEM; goto bail5; } + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ From gstreiff at neteffect.com Thu Feb 21 06:17:54 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:17:54 -0600 Subject: [ofa-general] [PATCH 2.6 5/8] infiniband/hw/nes/nes_verbs.c: fix use-after-free Message-ID: <200802211417.m1LEHsQu005081@velma.neteffect.com> Adrian Bunk flagged this check-after-use issue spotted by the Coverity checker. Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_verbs.c | 3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 692f0d8..a651e9d 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1832,9 +1832,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, spin_unlock_irqrestore(&nesdev->cqp.lock, flags); } } - nes_debug(NES_DBG_CQ, "iWARP CQ%u create timeout expired, major code = 0x%04X," - " minor code = 0x%04X\n", - nescq->hw_cq.cq_number, cqp_request->major_code, cqp_request->minor_code); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); From gstreiff at neteffect.com Thu Feb 21 06:27:32 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:27:32 -0600 Subject: [ofa-general] [PATCH 2.6 6/8] infiniband/hw/nes/nes_cm.c: Fix use-after-free Message-ID: <200802211427.m1LERWee005135@velma.neteffect.com> From: Faisal Latif Use-after-free spotted by Coverity checker. Flagged by Adrian Bunk. Signed-off-by: Faisal Latif Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_cm.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 78e845c..6c298aa 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -947,6 +947,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); kfree(listener); + listener = NULL; ret = 0; cm_listens_destroyed++; } else { From gstreiff at neteffect.com Thu Feb 21 06:29:41 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:29:41 -0600 Subject: [ofa-general] [PATCH 2.6 6/8] RDMA/nes: Fix rdma connection establishment on big-endian platforms Message-ID: <200802211429.m1LETfLj005153@velma.neteffect.com> From: Faisal Latif With commit ef19454bd437b2ba, behavior of crc32c changes on big-endian platforms. Our algorithm expects previous behavior otherwise we have rdma connection establishment failure on big-endian platforms like ppc64. Applying cpu_to_le32() to value returned by crc32c() to get previous behavior. Signed-off-by: Faisal Latif Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes.h | 14 ++++++++++++++ drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index fd57e8a..b0d3c52 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -285,6 +285,20 @@ struct nes_device { }; +static inline u32 get_crc_value(struct nes_v4_quad* nes_quad) +{ + u32 crc_value; + crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad)); + + /* + * With commit ef19454bd437b2ba, behavior of crc32c changes on + * big-endian platforms. Our algorithm expects previous behavior + * otherwise we have rdma connection establishment issue on ppc64. + */ + crc_value = cpu_to_le32(crc_value); + return crc_value; +} + static inline void set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6c298aa..1f042d1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2320,6 +2320,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + u32 crc_value; int ret; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); @@ -2436,8 +2437,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = cpu_to_be32( - crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); From gstreiff at neteffect.com Thu Feb 21 06:31:22 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:31:22 -0600 Subject: [ofa-general] [PATCH 2.6 7/8] RDMA/nes: Fix rdma connection establishment on big-endian platforms Message-ID: <200802211431.m1LEVMxN005167@velma.neteffect.com> From: Faisal Latif With commit ef19454bd437b2ba, behavior of crc32c changes on big-endian platforms. Our algorithm expects previous behavior otherwise we have rdma connection establishment failure on big-endian platforms like ppc64. Applying cpu_to_le32() to value returned by crc32c() to get previous behavior. Signed-off-by: Faisal Latif Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes.h | 14 ++++++++++++++ drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index fd57e8a..b0d3c52 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -285,6 +285,20 @@ struct nes_device { }; +static inline u32 get_crc_value(struct nes_v4_quad* nes_quad) +{ + u32 crc_value; + crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad)); + + /* + * With commit ef19454bd437b2ba, behavior of crc32c changes on + * big-endian platforms. Our algorithm expects previous behavior + * otherwise we have rdma connection establishment issue on ppc64. + */ + crc_value = cpu_to_le32(crc_value); + return crc_value; +} + static inline void set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) { diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6c298aa..1f042d1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2320,6 +2320,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + u32 crc_value; int ret; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); @@ -2436,8 +2437,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = cpu_to_be32( - crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); From gstreiff at neteffect.com Thu Feb 21 06:34:58 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 08:34:58 -0600 Subject: [ofa-general] [PATCH 2.6 8/8] RDMA/nes: Fix interrupt moderation low threshold Message-ID: <200802211434.m1LEYwnH005193@velma.neteffect.com> From: John Lacombe Interrupt moderation low threshold value was incorrectly triggering, indicating that the threshold should be lowered. The impact was the timer was likely to become 40usecs and get stuck there. The biggest side effect was too many interrupts and nonoptimal performance. Signed-off-by: John Lacombe Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_hw.c | 12 ++++-------- drivers/infiniband/hw/nes/nes_hw.h | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 7c4c0fb..6b677b5 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -156,15 +156,13 @@ static void nes_nic_tune_timer(struct nes_device *nesdev) spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); - if (shared_timer->cq_count_old < cq_count) { - if (cq_count > shared_timer->threshold_low) - shared_timer->cq_direction_downward=0; - } - if (shared_timer->cq_count_old >= cq_count) + if (shared_timer->cq_count_old <= cq_count) + shared_timer->cq_direction_downward = 0; + else shared_timer->cq_direction_downward++; shared_timer->cq_count_old = cq_count; if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) { - if (cq_count <= shared_timer->threshold_low) { + if (cq_count <= shared_timer->threshold_low && (shared_timer->threshold_low > 4)) { shared_timer->threshold_low = shared_timer->threshold_low/2; shared_timer->cq_direction_downward=0; nesdev->currcq_count = 0; @@ -1728,7 +1726,6 @@ int nes_napi_isr(struct nes_device *nesdev) nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - nesadapter->tune_timer.timer_in_use_old = 0; } nesdev->deepcq_count = 0; return 1; @@ -1867,7 +1864,6 @@ void nes_dpc(unsigned long param) nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - nesdev->nesadapter->tune_timer.timer_in_use_old = 0; } else { nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); } diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 1e10df5..b7e2844 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -962,7 +962,7 @@ struct nes_arp_entry { #define DEFAULT_JUMBO_NES_QL_LOW 12 #define DEFAULT_JUMBO_NES_QL_TARGET 40 #define DEFAULT_JUMBO_NES_QL_HIGH 128 -#define NES_NIC_CQ_DOWNWARD_TREND 8 +#define NES_NIC_CQ_DOWNWARD_TREND 16 struct nes_hw_tune_timer { //u16 cq_count; From gstreiff at NetEffect.com Thu Feb 21 05:46:19 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Thu, 21 Feb 2008 07:46:19 -0600 Subject: [ofa-general] [PATCH 2.6 6/8] RDMA/nes: Fix rdma connectionestablishment on big-endian platforms In-Reply-To: <200802211429.m1LETfLj005153@velma.neteffect.com> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07950006@venom2> You'll notice I have two "6/8" patches. This is the bogus one. No one told me this job required ability to count. Glenn > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org]On Behalf Of > gstreiff at neteffect.com > Sent: Thursday, February 21, 2008 8:30 AM > To: rdreier at cisco.com > Cc: linux-kernel at vger.kernel.org; > general at lists.openfabrics.org; Faisal > Latif > Subject: [ofa-general] [PATCH 2.6 6/8] RDMA/nes: Fix rdma > connectionestablishment on big-endian platforms > > > From: Faisal Latif > > With commit ef19454bd437b2ba, behavior of crc32c changes on > big-endian platforms. > > Our algorithm expects previous behavior otherwise we have > rdma connection establishment failure on big-endian platforms > like ppc64. Applying cpu_to_le32() to value returned by > crc32c() to get previous behavior. > > Signed-off-by: Faisal Latif > Signed-off-by: Glenn Streiff > > --- > drivers/infiniband/hw/nes/nes.h | 14 ++++++++++++++ > drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- > 2 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/hw/nes/nes.h > b/drivers/infiniband/hw/nes/nes.h > index fd57e8a..b0d3c52 100644 > --- a/drivers/infiniband/hw/nes/nes.h > +++ b/drivers/infiniband/hw/nes/nes.h > @@ -285,6 +285,20 @@ struct nes_device { > }; > > > +static inline u32 get_crc_value(struct nes_v4_quad* nes_quad) > +{ > + u32 crc_value; > + crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct > nes_v4_quad)); > + > + /* > + * With commit ef19454bd437b2ba, behavior of crc32c changes on > + * big-endian platforms. Our algorithm expects previous behavior > + * otherwise we have rdma connection establishment issue > on ppc64. > + */ > + crc_value = cpu_to_le32(crc_value); > + return crc_value; > +} > + > static inline void > set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) > { > diff --git a/drivers/infiniband/hw/nes/nes_cm.c > b/drivers/infiniband/hw/nes/nes_cm.c > index 6c298aa..1f042d1 100644 > --- a/drivers/infiniband/hw/nes/nes_cm.c > +++ b/drivers/infiniband/hw/nes/nes_cm.c > @@ -2320,6 +2320,7 @@ int nes_accept(struct iw_cm_id *cm_id, > struct iw_cm_conn_param *conn_param) > struct iw_cm_event cm_event; > struct nes_hw_qp_wqe *wqe; > struct nes_v4_quad nes_quad; > + u32 crc_value; > int ret; > > ibqp = nes_get_qp(cm_id->device, conn_param->qpn); > @@ -2436,8 +2437,8 @@ int nes_accept(struct iw_cm_id *cm_id, > struct iw_cm_conn_param *conn_param) > nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; > > /* Produce hash key */ > - nesqp->hte_index = cpu_to_be32( > - crc32c(~0, (void *)&nes_quad, > sizeof(nes_quad)) ^ 0xffffffff); > + crc_value = get_crc_value(&nes_quad); > + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); > nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", > nesqp->hte_index, nesqp->hte_index & > adapter->hte_index_mask); > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From dwpmisoftwarem at pmisoftware.com Thu Feb 21 06:30:21 2008 From: dwpmisoftwarem at pmisoftware.com (Jerold Long) Date: Thu, 21 Feb 2008 22:30:21 +0800 Subject: [ofa-general] There is no cheaper source of original and perfectly working software. Message-ID: <01c874d9$57cf7c80$0274bc3d@dwpmisoftwarem> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. Accept this brilliant offer and take the advantage of our free installation consultations. Money back guarantee is available. http://geocities.com/stephenmiddleton91 Check our site for discounts! From andrea at qumranet.com Thu Feb 21 06:40:23 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 21 Feb 2008 15:40:23 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080221045430.GC15215@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> Message-ID: <20080221144023.GC9427@v2.random> On Thu, Feb 21, 2008 at 05:54:30AM +0100, Nick Piggin wrote: > will send you incremental changes that can be discussed more easily > that way (nothing major, mainly style and minor things). I don't need to say you're very welcome ;). > I agree: your coherent, non-sleeping mmu notifiers are pretty simple > and unintrusive. The sleeping version is fundamentally going to either > need to change VM locks, or be non-coherent, so I don't think there is > a question of making one solution fit everybody. So the sleeping / > xrmap patch should be kept either completely independent, or as an > add-on to this one. The need to change the VM locks to fit the sleepable "mmu notifier" needs, I think is the major reason why the sleeping patch should be a separate config option unless you think the i_mmap_lock will benefit the VM for its own good regardless of the sleepable mmu notifiers. Otherwise we'll end up merging in mainline an API that can only satisfy the needs of the "sleeping users" that are only interested about anonymous memory. While the basic concept of the mmu notifiers is to cover the whole user visible address space, not just anonymous memory! Furthermore XPMEM users already asked to work on tmpfs/MAP_SHARED too... Originally the trick that I was trying to remove the "atomic" param, was to defer the invalidate_range after dropping the i_mmap_lock. But clearly in truncate we'll have no more guarantees that nor the vma nor the MM still exists after spin_unlock(i_mmap_lock) is called... So it's simply impossible to call the mmu notifier out of the i_mmap_lock for truncate, and Christoph's patch looks unfixable without altering the VM core locking. Christoph's API one-config-fits-all can't really fit-all, but only the anonymous memory. However if I wear a KVM hat, I cannot care less what is merged as long as .25 will be able to fully swap reliably a virtualized guest OS ;). This is why I'm totally willing to support any decision in favor of anything (including your own patch that would only work for KVM) that can be merged. > I will post some suggestions to you when I get a chance. I really want suggestions on Jack's concern about issuing an invalidate per pte entry or per-pte instead of per-range. I'll answer that in a separate email. For KVM my patch is already close to optimal because each single spte invalidate requires a fixed amount of work, but for GRU a large invalidate-range would be more efficient. To address the GRU _valid_ concern, I can create a second version of my patch with range_begin/end instead of invalidate_pages, that still won't support sleeping users like XPMEM but only KVM and GRU. Then it's up to Christoph when he comes back to alter the vm locking so that those calls can sleep too... But that will require a much bigger change and then perhaps xpmem can share the same mmu notifiers when the config option to make the mmu notifier sleepable is enabled. But that part would better be incremental as it's not so obviously safe to merge as the mmu notifier themself. From _polack at acp.amdahl.com Thu Feb 21 04:59:11 2008 From: _polack at acp.amdahl.com (xanxa viarga) Date: Thu, 21 Feb 2008 12:59:11 +0000 Subject: [ofa-general] Save 80% on your pills. Discount Code #MyZZp Message-ID: <19117.vispi@trieu> Hi openib-general, be wise, purchase your meds from the most well-known online store since 1996. http://www.google.com/pagead/iclk?sa=l&ai=cIFhPw&num=17719&adurl=http://igrementsit.com jedidiah duke From jackm at dev.mellanox.co.il Thu Feb 21 07:12:29 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 21 Feb 2008 17:12:29 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <47BD63BC.4040207@voltaire.com> References: <200802202056.32451.okir@lst.de> <200802211221.53652.jackm@dev.mellanox.co.il> <47BD63BC.4040207@voltaire.com> Message-ID: <200802211712.29736.jackm@dev.mellanox.co.il> On Thursday 21 February 2008 13:42, Or Gerlitz wrote: > As far as I understand under Sinai you must issue an adjust_key call > when the key is about to wraparound, correct? > > Or. > Actually, its not related to wraparound. The key adjustment is in the mpt-index section only, and does not affect the sequence number section. If we don't re-initialize the key, adjust_key should not be called. - Jack From swise at opengridcomputing.com Thu Feb 21 07:41:27 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 09:41:27 -0600 Subject: [ofa-general] post_recv question Message-ID: <47BD9BA7.7050408@opengridcomputing.com> Hey all: I have a question regarding exactly _when_ a posted recv buffer is available for the HW to use: Consider that the post_recv methods usually just program a hw-specific WR in the RQ, then ring a doorbell, then return. There is a delta period between when the app returns from the post_recv call and when the HW actually DMA's the WR and programs up the HW to enable that buffer. (I'm assumming a specific HW design here, but I _think_ most HW behaves this way?). If this is all true, then from the apps point of view, the buffer isn't really available when it returns from post_recv. This can lead to conditions where the app advertises that recv buffer to the peer via some out of band channel, and the peer posts a SEND which arrives _before_ the HW has actually setup the RECV buffer. Granted, this hole is small, but does it exist for nes, mthca, ehca, and ipath libs/drivers? Or do they _not_ have this issue? Does the IBTA spec discuss this at all? Most importantly, does the IBTA spec and/or the iWARP verbs spec _mandate_ that the buffer is actually available when the post_recv() method returns (I didn't find it in the iWARP spec)? If such a mandate exists, then it would force post_recv() methods to stall and/or somehow know when the HW has completed setting up the recv buffer. This would kill performance IMO and I think no such mandate exists, but I wanted to know what others think. Maybe this isn't an issue with mthca/ehca/ipath/nes? Thanks, Steve. From ogerlitz at voltaire.com Thu Feb 21 07:49:08 2008 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 21 Feb 2008 17:49:08 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <200802211712.29736.jackm@dev.mellanox.co.il> References: <200802202056.32451.okir@lst.de> <200802211221.53652.jackm@dev.mellanox.co.il> <47BD63BC.4040207@voltaire.com> <200802211712.29736.jackm@dev.mellanox.co.il> Message-ID: <47BD9D74.6010007@voltaire.com> Jack Morgenstein wrote: > On Thursday 21 February 2008 13:42, Or Gerlitz wrote: >> As far as I understand under Sinai you must issue an adjust_key call >> when the key is about to wraparound, correct? > Actually, its not related to wraparound. The key adjustment is in the > mpt-index section only, and does not affect the sequence number section. > If we don't re-initialize the key, adjust_key should not be called. Is it possible to never re-initialize the key? if yes, what's the semantics of the M=max_map_per_fmr device attribute? I was thinking that after the fmr was mapped M times, something --has-- to be reinitialized, sorry if this is my misunderstanding, can you clarify that? Or > commit d4cb0784fd1ea99ef3d20526811bd5608146fe60 > Author: Or Gerlitz > Date: Sat Jun 17 20:37:37 2006 -0700 > > IB/mthca: Fill in max_map_per_fmr device attribute > > Report the true max_map_per_fmr value from mthca_query_device(), > taking into account the change in FMR remapping introduced by the > Sinai performance optimization. > > Signed-off-by: Or Gerlitz > Signed-off-by: Roland Dreier > > diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c > index a2eae8a..8f89ba7 100644 > --- a/drivers/infiniband/hw/mthca/mthca_provider.c > +++ b/drivers/infiniband/hw/mthca/mthca_provider.c > @@ -115,6 +115,16 @@ static int mthca_query_device(struct ib_device *ibdev, > props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; > props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * > props->max_mcast_grp; > + /* > + * If Sinai memory key optimization is being used, then only > + * the 8-bit key portion will change. For other HCAs, the > + * unused index bits will also be used for FMR remapping. > + */ > + if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) > + props->max_map_per_fmr = 255; > + else > + props->max_map_per_fmr = > + (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1; > > err = 0; > out: From bunk at kernel.org Thu Feb 21 07:49:51 2008 From: bunk at kernel.org (Adrian Bunk) Date: Thu, 21 Feb 2008 17:49:51 +0200 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> Message-ID: <20080221154951.GA28328@cs181133002.pp.htv.fi> On Thu, Feb 21, 2008 at 06:39:45AM -0600, Glenn Streiff wrote: > > > > > No, 51af33e8 was for a similar same bug 400 lines below > > this bug... > > > > > > Heh, sorry. > > > > > > Glenn -- please review Adrian's patches and let me know > > which ones are > > > good to apply. > > > > > > > I went ahead and created a patch series and attributed Adrian > for the patches of his I liked. There were a couple that > I tweaked. Wasn't sure if all the hunks would apply nicely > after that if we mixed and matched his and mine, hence the series. > > Hope that's okay. Should I have gotten his ack for the ones > I rewrote? The fixes were pretty small so I figured they didn't > really need more review. >... Looking at the patches what you did seems OK. But regarding "review" I have a different criticism directed at Roland: This driver should really have gotten some review before being included in the kernel. Even a simple checkpatch run finds more than > 250 stylistic errors (not code bugs but cases where the driver violates the standard code formatting rules of kernel code). And I'm not talking about the > 2000 checkpatch warnings that are mostly about too long lines (which should arguably also be fixed). And many more issues that could have been foung during a review. E.g. when you look at 3/8 from this series the code if (!cm_node) return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) return -1; doesn't look good since the -1 should most likely better be something like -ENOMEM (I haven't checked whether you can immediately change it at this specific place). And these are just comments from someone with zero knowledge about InfiniBand, but I'd expect InfiniBand-specifig bugs might be found before they hit users if an InfiniBand maintainer would review the complete driver. Note that this is not meant as a criticism against Glenn - it's normal that submitted code contains bugs, but a code review can help to cope with this. > Glenn cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From eli at dev.mellanox.co.il Thu Feb 21 07:53:25 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 21 Feb 2008 17:53:25 +0200 Subject: [ofa-general] post_recv question In-Reply-To: <47BD9BA7.7050408@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> Message-ID: <1203609205.5629.53.camel@mtls03> When you post to a receive queue, the buffer does not belong the application until after it returned to the app. While it belongs to the HW, the hardware may use it to write the contents of send messages targeting the QP - during this time the driver should no use this buffer. When you poll the CQ referenced by the receive queue, you will get a CQE with the work request id of the buffer you posted and then you know the buffer is back in the driver ownership. On Thu, 2008-02-21 at 09:41 -0600, Steve Wise wrote: > Hey all: > > I have a question regarding exactly _when_ a posted recv buffer is > available for the HW to use: > > Consider that the post_recv methods usually just program a hw-specific > WR in the RQ, then ring a doorbell, then return. There is a delta > period between when the app returns from the post_recv call and when the > HW actually DMA's the WR and programs up the HW to enable that buffer. > (I'm assumming a specific HW design here, but I _think_ most HW behaves > this way?). > > If this is all true, then from the apps point of view, the buffer isn't > really available when it returns from post_recv. This can lead to > conditions where the app advertises that recv buffer to the peer via > some out of band channel, and the peer posts a SEND which arrives > _before_ the HW has actually setup the RECV buffer. > > Granted, this hole is small, but does it exist for nes, mthca, ehca, and > ipath libs/drivers? Or do they _not_ have this issue? > > Does the IBTA spec discuss this at all? Most importantly, does the IBTA > spec and/or the iWARP verbs spec _mandate_ that the buffer is actually > available when the post_recv() method returns (I didn't find it in the > iWARP spec)? If such a mandate exists, then it would force post_recv() > methods to stall and/or somehow know when the HW has completed setting > up the recv buffer. This would kill performance IMO and I think no such > mandate exists, but I wanted to know what others think. > > Maybe this isn't an issue with mthca/ehca/ipath/nes? > > > Thanks, > > Steve. > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From jackm at dev.mellanox.co.il Thu Feb 21 07:55:40 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 21 Feb 2008 17:55:40 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <47BD9D74.6010007@voltaire.com> References: <200802202056.32451.okir@lst.de> <200802211712.29736.jackm@dev.mellanox.co.il> <47BD9D74.6010007@voltaire.com> Message-ID: <200802211755.41184.jackm@dev.mellanox.co.il> On Thursday 21 February 2008 17:49, Or Gerlitz wrote: > Is it possible to never re-initialize the key? if yes, what's the > semantics of the M=max_map_per_fmr device attribute? I was thinking that > after the fmr was mapped M times, something --has-- to be reinitialized, > sorry if this is my misunderstanding, can you clarify that? > It does not have to be re-initialized. However, the cache needs to be flushed (SYNC_TPT), so that we do not have the same 32-bit key multiple times in the cache. The "something" which must be done is to flush the cache. Once the cache is flushed, we again have max_map_per_fmr remap possibilities, and we don't care what the initial sequence value is. However, the index value MUST be the same as it was before. - Jack From steiner at sgi.com Thu Feb 21 08:10:28 2008 From: steiner at sgi.com (Jack Steiner) Date: Thu, 21 Feb 2008 10:10:28 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v6 In-Reply-To: <20080221144023.GC9427@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> Message-ID: <20080221161028.GA14220@sgi.com> > I really want suggestions on Jack's concern about issuing an > invalidate per pte entry or per-pte instead of per-range. I'll answer > that in a separate email. For KVM my patch is already close to optimal > because each single spte invalidate requires a fixed amount of work, > but for GRU a large invalidate-range would be more efficient. > > To address the GRU _valid_ concern, I can create a second version of > my patch with range_begin/end instead of invalidate_pages, that still I don't know how much significance to place on this data, but it is a real data point. I ran the GRU regression test suite on kernels with both types of mmu_notifiers. The kernel/driver using Christoph's patch had 1/7 the number of TLB invalidates as Andrea's patch. This reduction is due to both differences I mentioned yesterday: - different location of callout for address space teardown - range callouts Unfortunately, the current driver does not allow me to quantify which of the differences is most significant. Also, I'll try to post the driver within the next few days. It is still in development but it compiles and can successfully run most workloads on a system simulator. --- jack From swise at opengridcomputing.com Thu Feb 21 08:14:30 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 10:14:30 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <1203609205.5629.53.camel@mtls03> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> Message-ID: <47BDA366.9090401@opengridcomputing.com> Eli Cohen wrote: > When you post to a receive queue, the buffer does not belong the > application until after it returned to the app. While it belongs to the > HW, the hardware may use it to write the contents of send messages > targeting the QP - during this time the driver should no use this > buffer. When you poll the CQ referenced by the receive queue, you will > get a CQE with the work request id of the buffer you posted and then you > know the buffer is back in the driver ownership. > This has nothing to do with my questions. > On Thu, 2008-02-21 at 09:41 -0600, Steve Wise wrote: >> Hey all: >> >> I have a question regarding exactly _when_ a posted recv buffer is >> available for the HW to use: >> >> Consider that the post_recv methods usually just program a hw-specific >> WR in the RQ, then ring a doorbell, then return. There is a delta >> period between when the app returns from the post_recv call and when the >> HW actually DMA's the WR and programs up the HW to enable that buffer. >> (I'm assumming a specific HW design here, but I _think_ most HW behaves >> this way?). >> >> If this is all true, then from the apps point of view, the buffer isn't >> really available when it returns from post_recv. This can lead to >> conditions where the app advertises that recv buffer to the peer via >> some out of band channel, and the peer posts a SEND which arrives >> _before_ the HW has actually setup the RECV buffer. >> >> Granted, this hole is small, but does it exist for nes, mthca, ehca, and >> ipath libs/drivers? Or do they _not_ have this issue? >> >> Does the IBTA spec discuss this at all? Most importantly, does the IBTA >> spec and/or the iWARP verbs spec _mandate_ that the buffer is actually >> available when the post_recv() method returns (I didn't find it in the >> iWARP spec)? If such a mandate exists, then it would force post_recv() >> methods to stall and/or somehow know when the HW has completed setting >> up the recv buffer. This would kill performance IMO and I think no such >> mandate exists, but I wanted to know what others think. >> >> Maybe this isn't an issue with mthca/ehca/ipath/nes? >> >> >> Thanks, >> >> Steve. >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sean.hefty at intel.com Thu Feb 21 08:19:10 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 21 Feb 2008 08:19:10 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <1203609205.5629.53.camel@mtls03> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> Message-ID: <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> >> I have a question regarding exactly _when_ a posted recv buffer is >> available for the HW to use: >> >> Consider that the post_recv methods usually just program a hw-specific >> WR in the RQ, then ring a doorbell, then return. There is a delta >> period between when the app returns from the post_recv call and when the >> HW actually DMA's the WR and programs up the HW to enable that buffer. >> (I'm assumming a specific HW design here, but I _think_ most HW behaves >> this way?). >> >> If this is all true, then from the apps point of view, the buffer isn't >> really available when it returns from post_recv. This can lead to >> conditions where the app advertises that recv buffer to the peer via >> some out of band channel, and the peer posts a SEND which arrives >> _before_ the HW has actually setup the RECV buffer. I'm really not following the question here. When you say that the app advertises the buffer, are you saying that it sends some sort of credit that a receive is posted? I would fully expect the receive buffer to be available to receive data before post_recv returns, but I not sure what race you're referring to. Are you suggesting that this isn't the case? - Sean From jlentini at netapp.com Thu Feb 21 08:19:34 2008 From: jlentini at netapp.com (James Lentini) Date: Thu, 21 Feb 2008 11:19:34 -0500 (EST) Subject: [ofa-general] post_recv question In-Reply-To: <47BD9BA7.7050408@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> Message-ID: On Thu, 21 Feb 2008, Steve Wise wrote: > Hey all: > > I have a question regarding exactly _when_ a posted recv buffer is available > for the HW to use: > > Consider that the post_recv methods usually just program a hw-specific WR in > the RQ, then ring a doorbell, then return. There is a delta period between > when the app returns from the post_recv call and when the HW actually DMA's > the WR and programs up the HW to enable that buffer. (I'm assumming a specific > HW design here, but I _think_ most HW behaves this way?). > > If this is all true, then from the apps point of view, the buffer isn't really > available when it returns from post_recv. This can lead to conditions where > the app advertises that recv buffer to the peer via some out of band channel, > and the peer posts a SEND which arrives _before_ the HW has actually setup the > RECV buffer. > > Granted, this hole is small, but does it exist for nes, mthca, ehca, and ipath > libs/drivers? Or do they _not_ have this issue? > > Does the IBTA spec discuss this at all? Most importantly, does the IBTA spec > and/or the iWARP verbs spec _mandate_ that the buffer is actually available > when the post_recv() method returns (I didn't find it in the iWARP spec)? If > such a mandate exists, then it would force post_recv() methods to stall and/or > somehow know when the HW has completed setting up the recv buffer. This would > kill performance IMO and I think no such mandate exists, but I wanted to know > what others think. > > Maybe this isn't an issue with mthca/ehca/ipath/nes? > > > Thanks, > > Steve. >From the RDMA application perspective, the application has to assume that when post_recv() returns, the RECV WR is on the QP's recv queue since there are no APIs for the application to query the availability/eligibility of a RECV. From swise at opengridcomputing.com Thu Feb 21 08:34:47 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 10:34:47 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> Message-ID: <47BDA827.90806@opengridcomputing.com> Sean Hefty wrote: >>> I have a question regarding exactly _when_ a posted recv buffer is >>> available for the HW to use: >>> >>> Consider that the post_recv methods usually just program a hw-specific >>> WR in the RQ, then ring a doorbell, then return. There is a delta >>> period between when the app returns from the post_recv call and when the >>> HW actually DMA's the WR and programs up the HW to enable that buffer. >>> (I'm assumming a specific HW design here, but I _think_ most HW behaves >>> this way?). >>> >>> If this is all true, then from the apps point of view, the buffer isn't >>> really available when it returns from post_recv. This can lead to >>> conditions where the app advertises that recv buffer to the peer via >>> some out of band channel, and the peer posts a SEND which arrives >>> _before_ the HW has actually setup the RECV buffer. > > I'm really not following the question here. When you say that the app > advertises the buffer, are you saying that it sends some sort of credit that a > receive is posted? Yes. > I would fully expect the receive buffer to be available to > receive data before post_recv returns, but I not sure what race you're referring > to. Are you suggesting that this isn't the case? > That is what I'm suggesting. Here is the timing sequence: t0: app calls post_recv t1: post_recv code builds a hw-specific WR in the hw work queue t2: post_recv code rings a doorbell (write to adapter mem or register) t3: post_recv returns t4: t5: device HW dma engine moves the WR to adapter memory t6: device FW prepares the HW RQ entry making the buffer available. Note at time t4, the application thinks its ready, but its really not ready until t6. This clearly is a implementation-specific issue. But I was under the assumption that all the RDMA HW behaves this way. Maybe not? To further complicate things, this race condition is never seen _if_ the application uses the same QP to advertise (send a credit allowing the peer to SEND) the RECV buffer availability. So if the app posts a SEND after the RECV is posted and that SEND allows the peer access to the RECV buffer, then everything is ok. This is due to the fact that the FW/HW will process the SEND only after processing the RECV. If the app uses a different QP to post the SEND advertising the RECV, then the race condition exists allowing the peer to SEND into that RECV buffer before the HW makes it ready. This all assumes a specific design of rdma hw. Maybe nobody else has this issue? Maybe I'm not making sense. :) Steve. From mdidomenico at gmail.com Thu Feb 21 08:49:48 2008 From: mdidomenico at gmail.com (Michael Di Domenico) Date: Thu, 21 Feb 2008 11:49:48 -0500 Subject: [ofa-general] ipoib connected mode Message-ID: <97a7c7ed0802210849s593c73ccp8a5589c334f9bed0@mail.gmail.com> 5. On RedHat EL 4 up4, the IPOIB implementation is not spec-compliant: - ipoib multicast does not work - ipoib cannot interoperate between RHEL4U4 and other hosts. This is due to missing code in the kernel which was available in U3 and U5 but removed in U4. As a workaround, upgrade to RHEL4U5. I found this blurb in the ipoib_release_notes file inside ofed-1.2.5.1. Does this mean that IPoIB does not work between a RHEL4U4 and RHEL5 x86_64 machine? I ask, because i have one of each one with Ipath and one with Mthca and i can certainly talk between the machines. Is connected mode supported? Do we have any performance benchmark numbers between two hosts running over connected-mode? Oh btw: this is OFED-1.2.5.1 on Mthca and Pathscale 2.2EA on Ipath From dwscrmakerm at scrmaker.com Thu Feb 21 08:49:44 2008 From: dwscrmakerm at scrmaker.com (Tiffany Olsen) Date: Fri, 22 Feb 2008 00:49:44 +0800 Subject: [ofa-general] Show your loved one you care, help them quit smoking Message-ID: <441240286.94892729819073@scrmaker.com> An HTML attachment was scrubbed... URL: From jackm at dev.mellanox.co.il Thu Feb 21 08:53:24 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 21 Feb 2008 18:53:24 +0200 Subject: [ofa-general] post_recv question In-Reply-To: <47BDA827.90806@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> Message-ID: <200802211853.25092.jackm@dev.mellanox.co.il> On Thursday 21 February 2008 18:34, Steve Wise wrote: > This clearly is a implementation-specific issue.  But I was under the > assumption that all the RDMA HW behaves this way.  Maybe not? > For RDMA operations, NO receive WQE needs to be posted. The rdma target is a memory region, with an rkey. The target advertises the rkey and the address, and the source posts an rdma operation using the target data. No completion is generated on the target (if there is no immediate data in the send). - Jack From swise at opengridcomputing.com Thu Feb 21 08:58:49 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 10:58:49 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <200802211853.25092.jackm@dev.mellanox.co.il> References: <47BD9BA7.7050408@opengridcomputing.com> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <200802211853.25092.jackm@dev.mellanox.co.il> Message-ID: <47BDADC9.7020107@opengridcomputing.com> Jack Morgenstein wrote: > On Thursday 21 February 2008 18:34, Steve Wise wrote: >> This clearly is a implementation-specific issue. But I was under the >> assumption that all the RDMA HW behaves this way. Maybe not? >> > For RDMA operations, NO receive WQE needs to be posted. The rdma target > is a memory region, with an rkey. The target advertises the rkey and the > address, and the source posts an rdma operation using the target data. > > No completion is generated on the target (if there is no immediate data in the > send). > > - Jack I'm not communicating clearly I guess. This issue is only with posting RECVS. Not RDMA read/write. From weiny2 at llnl.gov Thu Feb 21 09:12:12 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 21 Feb 2008 09:12:12 -0800 Subject: [ofa-general] Re: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL In-Reply-To: <47BD4C49.4000609@dev.mellanox.co.il> References: <20080220140245.13d706f2.weiny2@llnl.gov> <20080221070521.GQ17477@sashak.voltaire.com> <47BD4C49.4000609@dev.mellanox.co.il> Message-ID: <20080221091212.3faaa505.weiny2@llnl.gov> On Thu, 21 Feb 2008 12:02:49 +0200 Yevgeny Kliteynik wrote: > Sasha Khapyorsky wrote: > > Hi Ira, > > > > On 14:02 Wed 20 Feb , Ira Weiny wrote: > >> I actually hit these when I was testing with a low OSM_UMAD_MAX_PENDING value. > >> I was a bit unsure of what to do in these cases if p_physp was NULL. What I do > >> in this patch seems reasonable as the fabric does route but perhaps you could > >> double check me? > > > > The patch looks correct for me. And seems I need to review all > > osm_node_get_physp_ptr() usages again. > > > >> From 310a6cceca946fcc86f7bec28dfdeba77d011fc5 Mon Sep 17 00:00:00 2001 > >> From: Ira K. Weiny > >> Date: Wed, 20 Feb 2008 13:53:51 -0800 > >> Subject: [PATCH] Fix 2 potential core dumps now that osm_node_get_physp_ptr can return NULL > >> > >> > >> Signed-off-by: Ira K. Weiny > > > > Applied. Thanks. > > Is this applicable to ofed_1_3 as well? No, osm_node_get_physp_ptr does not return NULL in ofed_1_3. Ira > > -- Yevgeny > > > Sasha > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From okir at lst.de Thu Feb 21 09:24:32 2008 From: okir at lst.de (Olaf Kirch) Date: Thu, 21 Feb 2008 18:24:32 +0100 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <47BD63BC.4040207@voltaire.com> References: <200802202056.32451.okir@lst.de> <200802211221.53652.jackm@dev.mellanox.co.il> <47BD63BC.4040207@voltaire.com> Message-ID: <200802211824.34083.okir@lst.de> On Thursday 21 February 2008 12:42, Or Gerlitz wrote: > As far as I understand under Sinai you must issue an adjust_key call > when the key is about to wraparound, correct? I don't think so. On Arbel, ib_mthca uses the entire upper part of the 32bit word as the sequence counter. Is SINAI_OPT is set, the sequence counter is in bits 25-31 (17-24 seem to be reserved, and bit 3 is mirrored to bit 23 - this is what adjust_key seems to be doing). If SINAI_OPT is not set, the sequence counter is in bits 17-31, and adjust_key is a no-nop. So when the sequence counter overflows, it doesn't spill into any reserved bit. At least that's how I read the code. Olaf -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From jlentini at netapp.com Thu Feb 21 09:39:58 2008 From: jlentini at netapp.com (James Lentini) Date: Thu, 21 Feb 2008 12:39:58 -0500 (EST) Subject: [ofa-general] post_recv question In-Reply-To: <47BDA827.90806@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> Message-ID: On Thu, 21 Feb 2008, Steve Wise wrote: > Sean Hefty wrote: > > > > I have a question regarding exactly _when_ a posted recv buffer is > > > > available for the HW to use: > > > > > > > > Consider that the post_recv methods usually just program a hw-specific > > > > WR in the RQ, then ring a doorbell, then return. There is a delta > > > > period between when the app returns from the post_recv call and when the > > > > HW actually DMA's the WR and programs up the HW to enable that buffer. > > > > (I'm assumming a specific HW design here, but I _think_ most HW behaves > > > > this way?). > > > > > > > > If this is all true, then from the apps point of view, the buffer isn't > > > > really available when it returns from post_recv. This can lead to > > > > conditions where the app advertises that recv buffer to the peer via > > > > some out of band channel, and the peer posts a SEND which arrives > > > > _before_ the HW has actually setup the RECV buffer. > > > > I'm really not following the question here. When you say that the app > > advertises the buffer, are you saying that it sends some sort of credit that > > a > > receive is posted? > > Yes. > > > I would fully expect the receive buffer to be available to > > receive data before post_recv returns, but I not sure what race you're > > referring > > to. Are you suggesting that this isn't the case? > > > > That is what I'm suggesting. > > Here is the timing sequence: > > t0: app calls post_recv > t1: post_recv code builds a hw-specific WR in the hw work queue > t2: post_recv code rings a doorbell (write to adapter mem or register) > t3: post_recv returns > t4: > t5: device HW dma engine moves the WR to adapter memory > t6: device FW prepares the HW RQ entry making the buffer available. > > Note at time t4, the application thinks its ready, but its really not ready > until t6. > > This clearly is a implementation-specific issue. But I was under the > assumption that all the RDMA HW behaves this way. Maybe not? > > To further complicate things, this race condition is never seen _if_ the > application uses the same QP to advertise (send a credit allowing the peer to > SEND) the RECV buffer availability. So if the app posts a SEND after the RECV > is posted and that SEND allows the peer access to the RECV buffer, then > everything is ok. This is due to the fact that the FW/HW will process the > SEND only after processing the RECV. If the app uses a different QP to post > the SEND advertising the RECV, then the race condition exists allowing the > peer to SEND into that RECV buffer before the HW makes it ready. > > This all assumes a specific design of rdma hw. Maybe nobody else has this > issue? > > Maybe I'm not making sense. :) > I'm following you. Applications assume that when post_recv() returns the RECV WR is on the queue. There is no API for the RDMA application writer to query the availability/eligibility of the RECV, so this is a reasonable and necessary assumption. From ardavis at ichips.intel.com Thu Feb 21 09:40:38 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 21 Feb 2008 09:40:38 -0800 Subject: [ofa-general] uDAPL libdat2.so version # problem for today's OFED code In-Reply-To: References: <47AB7912.5040700@ichips.intel.com> Message-ID: <47BDB796.7070509@ichips.intel.com> Tang, Changqing wrote: > Arlin: > Here is another question. > > The /etc/dat.conf is: > OpenIB-cma u1.2 nonthreadsafe default libdaplcma.so.1 dapl.1.2 "ib0 0" "" > OpenIB-cma-1 u1.2 nonthreadsafe default libdaplcma.so.1 dapl.1.2 "ib1 0" "" > ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib0 0" "" > ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib1 0" "" > > A simple code just call dat_registry_list_prodivers() to get the list > in /etc/dat.conf, and call dat_ia_openv() in a loop of above list. If I compile > and link this code with /usr/include/dat2 and libdat2.so, dat_ia_openv() return > DAT_SUCCESS for all four entries. You should be using the dat_ia_open and not the dat_ia_openv. You are setting the MAJOR and MINOR versions according to the query and not based on your build so the open always return's SUCCESS. see dat.h for definition: #define dat_ia_open(name, qlen, async_evd, ia) \ dat_ia_openv((name), (qlen), (async_evd), (ia), \ DAT_VERSION_MAJOR, DAT_VERSION_MINOR, \ DAT_THREADSAFE) -arlin From email.oivp at tie.cl Thu Feb 21 10:26:39 2008 From: email.oivp at tie.cl (Simon Ziller) Date: Thu, 21 Feb 2008 13:26:39 -0500 Subject: [ofa-general] Eine Empfehlung von Simon Ziller Message-ID: <0E02BE8F.92BFFEFF@tie.cl> Hallo Daniel! Ich habe eine super Seite entdeckt, wo man ganz einfach einen Seitensprung Partner finden kann. Ich habe mir gerade mein Passwort angefordert und kann die Seite nur weiterempfehlen. Echt eine super Sache! Schau einfach auch mal vorbei: http://www.onlineseitensprung3.tk/ Viele Gr��e Simon Ziller ------------------------------------------------------------------------ Diese ePost wurde versendet von: Simon Ziller (email.oivp at tie.cl) From DenisbibChristian at opensecrets.org Thu Feb 21 14:03:38 2008 From: DenisbibChristian at opensecrets.org (Jamal Wyatt) Date: Thu, 21 Feb 2008 20:03:38 -0200 Subject: [ofa-general] reduce debt with Debt Pros Message-ID: <380601c874b4$332bc160$2101a8c0@your1o6etrt0v8> Get Out of Debt Today. Avoid Bankruptcy. Save Thousands... The Professional Way!! http://blongl.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From xma at us.ibm.com Thu Feb 21 10:09:14 2008 From: xma at us.ibm.com (Shirley Ma) Date: Thu, 21 Feb 2008 10:09:14 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <47BDA827.90806@opengridcomputing.com> Message-ID: Hello Steve, > Here is the timing sequence: > > t0: app calls post_recv > t1: post_recv code builds a hw-specific WR in the hw work queue > t2: post_recv code rings a doorbell (write to adapter mem or register) > t3: post_recv returns > t4: > t5: device HW dma engine moves the WR to adapter memory > t6: device FW prepares the HW RQ entry making the buffer available. > > Note at time t4, the application thinks its ready, but its really not > ready until t6. > > This clearly is a implementation-specific issue. But I was under the > assumption that all the RDMA HW behaves this way. Maybe not? > > To further complicate things, this race condition is never seen _if_ the > application uses the same QP to advertise (send a credit allowing the > peer to SEND) the RECV buffer availability. So if the app posts a SEND > after the RECV is posted and that SEND allows the peer access to the > RECV buffer, then everything is ok. This is due to the fact that the > FW/HW will process the SEND only after processing the RECV. If the app > uses a different QP to post the SEND advertising the RECV, then the race > condition exists allowing the peer to SEND into that RECV buffer before > the HW makes it ready. > > This all assumes a specific design of rdma hw. Maybe nobody else has > this issue? > > Maybe I'm not making sense. :) I think your descriptions here match what Ralph found RNR in IPoIB-CM. Ralph, Does this make sense? Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From glebn at voltaire.com Thu Feb 21 10:19:52 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Thu, 21 Feb 2008 20:19:52 +0200 Subject: [ofa-general] post_recv question In-Reply-To: <47BDA827.90806@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> Message-ID: <20080221181952.GB18720@minantech.com> On Thu, Feb 21, 2008 at 10:34:47AM -0600, Steve Wise wrote: > To further complicate things, this race condition is never seen _if_ the > application uses the same QP to advertise (send a credit allowing the > peer to SEND) the RECV buffer availability. So if the app posts a SEND > after the RECV is posted and that SEND allows the peer access to the > RECV buffer, then everything is ok. This is due to the fact that the > FW/HW will process the SEND only after processing the RECV. If the app > uses a different QP to post the SEND advertising the RECV, then the race > condition exists allowing the peer to SEND into that RECV buffer before > the HW makes it ready. > OpenMPI can be configured to send credit updates over different QP. I'll try to stress it next week to see what happens. -- Gleb. From jsquyres at cisco.com Thu Feb 21 10:36:46 2008 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 21 Feb 2008 10:36:46 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <20080221181952.GB18720@minantech.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> Message-ID: <5F61BED7-63C3-44DD-9A83-9498B36DE66C@cisco.com> On Feb 21, 2008, at 10:19 AM, Gleb Natapov wrote: >> To further complicate things, this race condition is never seen >> _if_ the >> application uses the same QP to advertise (send a credit allowing the >> peer to SEND) the RECV buffer availability. So if the app posts a >> SEND >> after the RECV is posted and that SEND allows the peer access to the >> RECV buffer, then everything is ok. This is due to the fact that the >> FW/HW will process the SEND only after processing the RECV. If the >> app >> uses a different QP to post the SEND advertising the RECV, then the >> race >> condition exists allowing the peer to SEND into that RECV buffer >> before >> the HW makes it ready. >> > OpenMPI can be configured to send credit updates over different QP. > I'll > try to stress it next week to see what happens. FWIW: this is exactly where the question arose: Steve's working on the iwarp port of OMPI, and since we send the flow control messages for all QP's between a pair of processes over a single QP, this apparent race condition can occur. -- Jeff Squyres Cisco Systems From caitlin.bestler at gmail.com Thu Feb 21 10:37:30 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 21 Feb 2008 10:37:30 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <47BD9BA7.7050408@opengridcomputing.com> References: <47BD9BA7.7050408@opengridcomputing.com> Message-ID: <469958e00802211037i73d447d2y381d773560b17d49@mail.gmail.com> On Thu, Feb 21, 2008 at 7:41 AM, Steve Wise wrote: > Hey all: > > I have a question regarding exactly _when_ a posted recv buffer is > available for the HW to use: > None of the verbs specifications will be explicit about this for the simple reason that none wants to specify exactly what an RQ or SRQ actually is. This is how good specifications are written. But, wherever this queue (RQ) or pool (SRQ) of receive WQEs live, successfully posting a receive WQE should mean exactly that -- it was successfully posted. And if a receive WQE was successfully posted I cannot see a justification for an RDMA device raising a "no buffer available" subsequent to that point. Now if a buffer was received prior to the recv wqe post completing, the error may have already been raised, and the exception might not be delivered to the host until after the recv wqe call successfully completed. So applications SHOULD NOT be written to assume that they will win a "tie", but rather to ensure that the recv WQE is posted *before* the message arrives. Sending the request after posting the recv WQE for the reply should be mor than adequate for that purpose. Specific implementations should take whatever steps are required to ensure that the hardware will not declare "no buffer" after the user has posted a buffer, whether that is through a doorbell or by rechecking the queue after any check of cached contents comes up empty. From caitlin.bestler at gmail.com Thu Feb 21 10:43:31 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 21 Feb 2008 10:43:31 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <5F61BED7-63C3-44DD-9A83-9498B36DE66C@cisco.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> <5F61BED7-63C3-44DD-9A83-9498B36DE66C@cisco.com> Message-ID: <469958e00802211043od0f7ef9x229d8c0364734d1e@mail.gmail.com> On Thu, Feb 21, 2008 at 10:36 AM, Jeff Squyres wrote: > On Feb 21, 2008, at 10:19 AM, Gleb Natapov wrote: > > >> To further complicate things, this race condition is never seen > >> _if_ the > >> application uses the same QP to advertise (send a credit allowing the > >> peer to SEND) the RECV buffer availability. So if the app posts a > >> SEND > >> after the RECV is posted and that SEND allows the peer access to the > >> RECV buffer, then everything is ok. This is due to the fact that the > >> FW/HW will process the SEND only after processing the RECV. If the > >> app > >> uses a different QP to post the SEND advertising the RECV, then the > >> race > >> condition exists allowing the peer to SEND into that RECV buffer > >> before > >> the HW makes it ready. > >> > > OpenMPI can be configured to send credit updates over different QP. > > I'll > > try to stress it next week to see what happens. > > > FWIW: this is exactly where the question arose: Steve's working on the > iwarp port of OMPI, and since we send the flow control messages for > all QP's between a pair of processes over a single QP, this apparent > race condition can occur. > I believe that the iWARP RFCs are clear that SEND/RECV flow control is the responsibility of the ULP. Because it is the ULP's responsibility, the ULP may use ANY method of communicating that flow control. It does not have to be the same connection, it does not even have to be the same network. Aside from the oft-cited carrier pigeon method of delivering credits, there are ULPs that use multiple connections for reliability. In those a credit would typically be implicitly granted to send a reply with each request, but the reply could occur on a different connection within the session. From ralph.campbell at qlogic.com Thu Feb 21 11:10:24 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 21 Feb 2008 11:10:24 -0800 Subject: [ofa-general] post_recv question In-Reply-To: References: Message-ID: <1203621024.5109.161.camel@brick.pathscale.com> On Thu, 2008-02-21 at 10:09 -0800, Shirley Ma wrote: > Hello Steve, > > > Here is the timing sequence: > > > > t0: app calls post_recv > > t1: post_recv code builds a hw-specific WR in the hw work queue > > t2: post_recv code rings a doorbell (write to adapter mem or > register) > > t3: post_recv returns > > t4: This is wrong. The HCA has control of the receive buffer until poll_cq() returns a CQE saying the posted buffer is completed (either OK or error). Think about it. The application can do a post_recv() and it could be days or nanoseconds before a packet is sent to that buffer. The application can't assume anything about the contents until the HCA says something is there. Oh, I see. You are saying the application thinks the buffer is available for the HCA to use. > > t5: device HW dma engine moves the WR to adapter memory > > t6: device FW prepares the HW RQ entry making the buffer available. > > > > Note at time t4, the application thinks its ready, but its really > not > > ready until t6. > > This clearly is a implementation-specific issue. But I was under > the > > assumption that all the RDMA HW behaves this way. Maybe not? Not all hardware works the same. You can't make assumptions beyond what the library API guarantees without building hardware specific dependencies into your program. It can even change between different versions of microcode or kernel software for the same HCA. > > To further complicate things, this race condition is never seen _if_ > the > > application uses the same QP to advertise (send a credit allowing > the > > peer to SEND) the RECV buffer availability. So if the app posts a > SEND > > after the RECV is posted and that SEND allows the peer access to > the > > RECV buffer, then everything is ok. This is due to the fact that > the > > FW/HW will process the SEND only after processing the RECV. If the > app > > uses a different QP to post the SEND advertising the RECV, then the > race > > condition exists allowing the peer to SEND into that RECV buffer > before > > the HW makes it ready. Well, there is no guarantee that the HCA processes the post_recv() before the post_send() even on the same QP. Send and receive are unordered with respect to each other. The fact that it works is an HCA specific implementation artifact. > > This all assumes a specific design of rdma hw. Maybe nobody else > has > > this issue? > > > > Maybe I'm not making sense. :) > > I think your descriptions here match what Ralph found RNR in IPoIB-CM. > > Ralph, > > Does this make sense? > > Thanks > Shirley I think you are making sense. There is an indeterminate race between post_recv() returning to the application and when a packet being received by the HCA might be able to use that buffer. There are no ordering guarantees between messages sent on one QP and another so the application can't easily use a different QP to advertise posted buffers (credits). That is why the IB RC protocol does this for you in band if the RC QP is using a dedicated receive queue but not a shared receive queue. The problem with shared receive queues is that the application would have to pick an endpoint and tell it there is a buffer available for the endpoint to send to. Obviously, if you have two endpoints, they both can't send to the same receive buffer. ib_ipoib uses shared receive queues and doesn't try to manage posted buffer credits so the RNR NAK issue isn't the same as what Steve is trying to do. From dwriverreliefm at riverrelief.org Thu Feb 21 11:27:31 2008 From: dwriverreliefm at riverrelief.org (Rosalyn Bryant) Date: Fri, 22 Feb 2008 00:27:31 +0500 Subject: [ofa-general] Get your free 2400$ welcome bonus and win much more! Message-ID: <01c874e9$b6045b80$c481965a@dwriverreliefm> There is no more convenient way to win real money than joining our Golden Gate Casino members. All the most popular casino games! Easy to download, install and use free software! One of the industry's best welcome bonus $2400! Register with Golden Gate Casino and enjoy a great atmosphere of the real casino, friendly customer support, absolute security and safety. http://geocities.com/leilawatts165 Play casino games any time you like. From glebn at voltaire.com Thu Feb 21 11:31:11 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Thu, 21 Feb 2008 21:31:11 +0200 Subject: [ofa-general] post_recv question In-Reply-To: <1203621024.5109.161.camel@brick.pathscale.com> References: <1203621024.5109.161.camel@brick.pathscale.com> Message-ID: <20080221193111.GC18720@minantech.com> On Thu, Feb 21, 2008 at 11:10:24AM -0800, Ralph Campbell wrote: > > > To further complicate things, this race condition is never seen _if_ > > the > > > application uses the same QP to advertise (send a credit allowing > > the > > > peer to SEND) the RECV buffer availability. So if the app posts a > > SEND > > > after the RECV is posted and that SEND allows the peer access to > > the > > > RECV buffer, then everything is ok. This is due to the fact that > > the > > > FW/HW will process the SEND only after processing the RECV. If the > > app > > > uses a different QP to post the SEND advertising the RECV, then the > > race > > > condition exists allowing the peer to SEND into that RECV buffer > > before > > > the HW makes it ready. > > Well, there is no guarantee that the HCA processes the post_recv() > before the post_send() even on the same QP. Send and receive are > unordered with respect to each other. The fact that it works is > an HCA specific implementation artifact. So there is no way to implement SW flow control over Infiniband? How is that IB spec has SW flow control specification for SDP in it then? > > > > This all assumes a specific design of rdma hw. Maybe nobody else > > has > > > this issue? > > > > > > Maybe I'm not making sense. :) > > > > I think your descriptions here match what Ralph found RNR in IPoIB-CM. > > > > Ralph, > > > > Does this make sense? > > > > Thanks > > Shirley > > I think you are making sense. There is an indeterminate race > between post_recv() returning to the application and when > a packet being received by the HCA might be able to use > that buffer. There are no ordering guarantees > between messages sent on one QP and another so the application > can't easily use a different QP to advertise posted buffers (credits). If after post_recv() returns it is guarantied that receive buffers are available to HW we don't need ordering guaranties between QPs to successfully implement SW flow control. > That is why the IB RC protocol does this for you in band if the RC QP > is using a dedicated receive queue but not a shared receive queue. What do you mean by that? RNR works for both RC and SRQ QPs. -- Gleb. From swise at opengridcomputing.com Thu Feb 21 11:32:46 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 13:32:46 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <1203621024.5109.161.camel@brick.pathscale.com> References: <1203621024.5109.161.camel@brick.pathscale.com> Message-ID: <47BDD1DE.4030504@opengridcomputing.com> Ralph Campbell wrote: > On Thu, 2008-02-21 at 10:09 -0800, Shirley Ma wrote: >> Hello Steve, >> >>> Here is the timing sequence: >>> >>> t0: app calls post_recv >>> t1: post_recv code builds a hw-specific WR in the hw work queue >>> t2: post_recv code rings a doorbell (write to adapter mem or >> register) >>> t3: post_recv returns >>> t4: > > This is wrong. The HCA has control of the receive buffer > until poll_cq() returns a CQE saying the posted buffer > is completed (either OK or error). > Think about it. The application can do a post_recv() and > it could be days or nanoseconds before a packet is sent to > that buffer. The application can't assume anything about > the contents until the HCA says something is there. > > Oh, I see. You are saying the application thinks the buffer > is available for the HCA to use. > >>> t5: device HW dma engine moves the WR to adapter memory >>> t6: device FW prepares the HW RQ entry making the buffer available. >>> >>> Note at time t4, the application thinks its ready, but its really >> not >>> ready until t6. >>> This clearly is a implementation-specific issue. But I was under >> the >>> assumption that all the RDMA HW behaves this way. Maybe not? > > Not all hardware works the same. You can't make assumptions > beyond what the library API guarantees without building > hardware specific dependencies into your program. > I'm asking this from a device driver developer's perspective. I'm not writing an application. I'm trying to understand and define exactly what must be guaranteed by the device/driver up returning from post_recv(). > It can even change between different versions of microcode or > kernel software for the same HCA. > >>> To further complicate things, this race condition is never seen _if_ >> the >>> application uses the same QP to advertise (send a credit allowing >> the >>> peer to SEND) the RECV buffer availability. So if the app posts a >> SEND >>> after the RECV is posted and that SEND allows the peer access to >> the >>> RECV buffer, then everything is ok. This is due to the fact that >> the >>> FW/HW will process the SEND only after processing the RECV. If the >> app >>> uses a different QP to post the SEND advertising the RECV, then the >> race >>> condition exists allowing the peer to SEND into that RECV buffer >> before >>> the HW makes it ready. > > Well, there is no guarantee that the HCA processes the post_recv() > before the post_send() even on the same QP. Send and receive are > unordered with respect to each other. The fact that it works is > an HCA specific implementation artifact. > >>> This all assumes a specific design of rdma hw. Maybe nobody else >> has >>> this issue? >>> >>> Maybe I'm not making sense. :) >> I think your descriptions here match what Ralph found RNR in IPoIB-CM. >> >> Ralph, >> >> Does this make sense? >> >> Thanks >> Shirley > > I think you are making sense. There is an indeterminate race > between post_recv() returning to the application and when > a packet being received by the HCA might be able to use > that buffer. There are no ordering guarantees > between messages sent on one QP and another so the application > can't easily use a different QP to advertise posted buffers (credits). > That is why the IB RC protocol does this for you in band if the RC QP > is using a dedicated receive queue but not a shared receive queue. > Do you mean the IB RC protocol advertises credits as part of the transport protocol? > The problem with shared receive queues is that the application > would have to pick an endpoint and tell it there is a buffer > available for the endpoint to send to. Obviously, if you have > two endpoints, they both can't send to the same receive buffer. > > ib_ipoib uses shared receive queues and doesn't try to manage > posted buffer credits so the RNR NAK issue isn't the same > as what Steve is trying to do. > From sean.hefty at intel.com Thu Feb 21 11:40:10 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 21 Feb 2008 11:40:10 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <47BDD1DE.4030504@opengridcomputing.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> Message-ID: <000201c874c1$925dcc70$72258686@amr.corp.intel.com> >I'm asking this from a device driver developer's perspective. I'm not >writing an application. I'm trying to understand and define exactly >what must be guaranteed by the device/driver up returning from >post_recv(). At least from IB's view for post receive (from spec): Control returns to the Consumer immediately after the WQEs have been submitted to the Receive Queue or the SRQ and the HCA has been notified that one or more WQEs are ready to process. - Sean From ralph.campbell at qlogic.com Thu Feb 21 11:44:07 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 21 Feb 2008 11:44:07 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <20080221193111.GC18720@minantech.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <20080221193111.GC18720@minantech.com> Message-ID: <1203623047.5109.170.camel@brick.pathscale.com> On Thu, 2008-02-21 at 21:31 +0200, Gleb Natapov wrote: > On Thu, Feb 21, 2008 at 11:10:24AM -0800, Ralph Campbell wrote: > > > > To further complicate things, this race condition is never seen _if_ > > > the > > > > application uses the same QP to advertise (send a credit allowing > > > the > > > > peer to SEND) the RECV buffer availability. So if the app posts a > > > SEND > > > > after the RECV is posted and that SEND allows the peer access to > > > the > > > > RECV buffer, then everything is ok. This is due to the fact that > > > the > > > > FW/HW will process the SEND only after processing the RECV. If the > > > app > > > > uses a different QP to post the SEND advertising the RECV, then the > > > race > > > > condition exists allowing the peer to SEND into that RECV buffer > > > before > > > > the HW makes it ready. > > > > Well, there is no guarantee that the HCA processes the post_recv() > > before the post_send() even on the same QP. Send and receive are > > unordered with respect to each other. The fact that it works is > > an HCA specific implementation artifact. > So there is no way to implement SW flow control over Infiniband? How > is that IB spec has SW flow control specification for SDP in it then? > > > > > > > This all assumes a specific design of rdma hw. Maybe nobody else > > > has > > > > this issue? > > > > > > > > Maybe I'm not making sense. :) > > > > > > I think your descriptions here match what Ralph found RNR in IPoIB-CM. > > > > > > Ralph, > > > > > > Does this make sense? > > > > > > Thanks > > > Shirley > > > > I think you are making sense. There is an indeterminate race > > between post_recv() returning to the application and when > > a packet being received by the HCA might be able to use > > that buffer. There are no ordering guarantees > > between messages sent on one QP and another so the application > > can't easily use a different QP to advertise posted buffers (credits). > If after post_recv() returns it is guarantied that receive buffers are > available to HW we don't need ordering guaranties between QPs to > successfully implement SW flow control. Right. I was just pointing out that Steve is correct in his assumption that there might be races between post_recv() returning and the HCA being able to use that buffer to receive a packet that was already in flight before the post_recv(). > > That is why the IB RC protocol does this for you in band if the RC QP > > is using a dedicated receive queue but not a shared receive queue. > What do you mean by that? RNR works for both RC and SRQ QPs. Right. I was referring to the credit returned in the ACK header which allows the remote RC QP endpoint to send a message after a post_recv(). There is no such message level flow control if the RC QP is using a SRQ. > -- > Gleb. From gstreiff at NetEffect.com Thu Feb 21 11:46:14 2008 From: gstreiff at NetEffect.com (Glenn Streiff) Date: Thu, 21 Feb 2008 13:46:14 -0600 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fixoff-by-one In-Reply-To: <20080221154951.GA28328@cs181133002.pp.htv.fi> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07950007@venom2> > Looking at the patches what you did seems OK. > > > But regarding "review" I have a different criticism directed > at Roland: > > This driver should really have gotten some review before > being included > in the kernel. > > Even a simple checkpatch run finds more than > 250 stylistic errors > (not code bugs but cases where the driver violates the standard code > formatting rules of kernel code). > > And I'm not talking about the > 2000 checkpatch warnings that > are mostly > about too long lines (which should arguably also be fixed). > > And many more issues that could have been foung during a review. > E.g. when you look at 3/8 from this series the code > if (!cm_node) > return -EINVAL; > new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); > if (!new_send) > return -1; > doesn't look good since the -1 should most likely better be something > like -ENOMEM (I haven't checked whether you can immediately change it > at this specific place). > > And these are just comments from someone with zero knowledge about > InfiniBand, but I'd expect InfiniBand-specifig bugs might be found > before they hit users if an InfiniBand maintainer would review the > complete driver. > > Note that this is not meant as a criticism against Glenn - it's > normal that submitted code contains bugs, but a code review > can help to > cope with this. > > > Glenn > > cu > Adrian > Hi, Adrian. Yeah, I agree that the stylistic issues are annoying and I am actually itching to get some of those simples things corrected. Roland has outlined several areas for improvement in the driver (style-wise and substance-wise) and I'm working to address those. I'm learning the ropes here so I expect I'll get faster/better at responding and fixing things like the coverity issues you flagged. I need to pull these tools into my own release process so I'm catching flaws on my side. I want the driver to be worthy. Regards, Glenn From ralph.campbell at qlogic.com Thu Feb 21 11:49:42 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Thu, 21 Feb 2008 11:49:42 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <47BDD1DE.4030504@opengridcomputing.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> Message-ID: <1203623382.5109.174.camel@brick.pathscale.com> On Thu, 2008-02-21 at 13:32 -0600, Steve Wise wrote: > > I think you are making sense. There is an indeterminate race > > between post_recv() returning to the application and when > > a packet being received by the HCA might be able to use > > that buffer. There are no ordering guarantees > > between messages sent on one QP and another so the application > > can't easily use a different QP to advertise posted buffers (credits). > > That is why the IB RC protocol does this for you in band if the RC QP > > is using a dedicated receive queue but not a shared receive queue. > > > > Do you mean the IB RC protocol advertises credits as part of the > transport protocol? Yes. See chapter 9.7.7.2 in Rel 1.2 vol. 1. From glebn at voltaire.com Thu Feb 21 11:50:10 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Thu, 21 Feb 2008 21:50:10 +0200 Subject: [ofa-general] post_recv question In-Reply-To: <1203623047.5109.170.camel@brick.pathscale.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <20080221193111.GC18720@minantech.com> <1203623047.5109.170.camel@brick.pathscale.com> Message-ID: <20080221195010.GD18720@minantech.com> On Thu, Feb 21, 2008 at 11:44:07AM -0800, Ralph Campbell wrote: > > > That is why the IB RC protocol does this for you in band if the RC QP > > > is using a dedicated receive queue but not a shared receive queue. > > What do you mean by that? RNR works for both RC and SRQ QPs. > > Right. > I was referring to the credit returned in the ACK header which > allows the remote RC QP endpoint to send a message after a post_recv(). > There is no such message level flow control if the RC QP is using a SRQ. Ah, you are talking about that flow control. But the purpose of that flow control is to rate limit a sender in case of receive buffers shortage, not to prevent RNRs completely. -- Gleb. From swise at opengridcomputing.com Thu Feb 21 12:18:20 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 14:18:20 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <000201c874c1$925dcc70$72258686@amr.corp.intel.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> <000201c874c1$925dcc70$72258686@amr.corp.intel.com> Message-ID: <47BDDC8C.9080202@opengridcomputing.com> Sean Hefty wrote: >> I'm asking this from a device driver developer's perspective. I'm not >> writing an application. I'm trying to understand and define exactly >> what must be guaranteed by the device/driver up returning from >> post_recv(). > > At least from IB's view for post receive (from spec): > > Control returns to the Consumer immediately after the WQEs have been submitted > to the Receive Queue or the SRQ and the HCA has been notified that one or more > WQEs are ready to process. > > - Sean See? This implies that the HCA is _not_ necessarily ready to place incoming SENDS into those posted recv buffers... "the HCA has been notified". From caitlin.bestler at gmail.com Thu Feb 21 12:20:06 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 21 Feb 2008 12:20:06 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <000201c874c1$925dcc70$72258686@amr.corp.intel.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> <000201c874c1$925dcc70$72258686@amr.corp.intel.com> Message-ID: <469958e00802211220r2f728ae7s8e8c1bac521caccc@mail.gmail.com> On Thu, Feb 21, 2008 at 11:40 AM, Sean Hefty wrote: > >I'm asking this from a device driver developer's perspective. I'm not > >writing an application. I'm trying to understand and define exactly > >what must be guaranteed by the device/driver up returning from > >post_recv(). > > At least from IB's view for post receive (from spec): > > Control returns to the Consumer immediately after the WQEs have been submitted > to the Receive Queue or the SRQ and the HCA has been notified that one or more > WQEs are ready to process. > > - Sean > Would you agree that if the WQEs have already successfully been "submitted to the Receive Queue or the SRQ and the HCA has been notified" that the HCA would be incorrect in subsequently raising an error stating that the buffers were not available? iWARP does not convey send credits in the RDMA protocol, but I believe both iWARP and IB are in agreement that declaring "no buffer available" and causing the reliable connection to be torn down is a serious step. The HCA/RNIC is not free to be sloppy in making this determination. There are other places in both specifications where the RDMA device is given latitude to asynchronously implement a request. For example, it is clear that a window is *not* necessarily bound when the bind call completes. But in all those cases there is an explicit completion to allow the consumer to unambiguously know when it is safe to proceed. the application is never expected to rely on knowledge of specific HCAs or RNICs, or to "guess" what might be good enough. There are only two feedbacks from posting a Receive WQE: the call completion and the CQE being returned by cq_poll(). There are only two states for the Recv WQE between those two events: available for allocation and allocated. And the application does not need to know about the difference between those two states on a per-WQE basis. If there were a third state, then there would have to be a mechanism to make that information available. There is none, so such a third state must not exist (at least in any observable form). From changquing.tang at hp.com Thu Feb 21 12:19:36 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Thu, 21 Feb 2008 20:19:36 +0000 Subject: [ofa-general] Can we provide a qp_num when creating a QP ? Message-ID: HI, Roland or other engineers: I have asked this question before. To wire a QP connection, each side need to know peer's side port lid and qp_num. For MPI with many ranks, this is an alltoall exchange. If we can create a QP with provided qp_num, then MPI does not need the qp_num exchange, for a QP-pair between two processes, MPI can figure out peer QP's qp_num. So we can eliminate the third-party channel to exchange information, and speedup the startup time. Curently qp_num is always a return value from the driver. If we can suggest a qp_num when creating a QP, and the qp_num is already used, then IBV can either error out, or pick up another number for app. Thanks. --CQ From rdreier at cisco.com Thu Feb 21 12:22:16 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 21 Feb 2008 12:22:16 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <20080221181952.GB18720@minantech.com> (Gleb Natapov's message of "Thu, 21 Feb 2008 20:19:52 +0200") References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> Message-ID: > OpenMPI can be configured to send credit updates over different QP. I'll > try to stress it next week to see what happens. It seems that it would be pretty hard to hit this race in practice. And I don't think mem-free Mellanox hardware has any race -- not positive about Tavor/non-mem-free Arbel. (On IB you need to set RNR retries to 0 also for the missing receive to be detectable even if the race exists) From swise at opengridcomputing.com Thu Feb 21 12:24:30 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 21 Feb 2008 14:24:30 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <469958e00802211220r2f728ae7s8e8c1bac521caccc@mail.gmail.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> <000201c874c1$925dcc70$72258686@amr.corp.intel.com> <469958e00802211220r2f728ae7s8e8c1bac521caccc@mail.gmail.com> Message-ID: <47BDDDFE.5000509@opengridcomputing.com> Caitlin Bestler wrote: > On Thu, Feb 21, 2008 at 11:40 AM, Sean Hefty wrote: >>> I'm asking this from a device driver developer's perspective. I'm not >> >writing an application. I'm trying to understand and define exactly >> >what must be guaranteed by the device/driver up returning from >> >post_recv(). >> >> At least from IB's view for post receive (from spec): >> >> Control returns to the Consumer immediately after the WQEs have been submitted >> to the Receive Queue or the SRQ and the HCA has been notified that one or more >> WQEs are ready to process. >> >> - Sean >> > > Would you agree that if the WQEs have already successfully been > "submitted to the Receive Queue or the SRQ and the HCA has been notified" > that the HCA would be incorrect in subsequently raising an error > stating that the > buffers were not available? > > iWARP does not convey send credits in the RDMA protocol, but I believe both > iWARP and IB are in agreement that declaring "no buffer available" and causing > the reliable connection to be torn down is a serious step. The HCA/RNIC is not > free to be sloppy in making this determination. > > There are other places in both specifications where the RDMA device is given > latitude to asynchronously implement a request. For example, it is clear that > a window is *not* necessarily bound when the bind call completes. But in > all those cases there is an explicit completion to allow the consumer to > unambiguously know when it is safe to proceed. > > the application is never expected to rely on knowledge of specific HCAs > or RNICs, or to "guess" what might be good enough. There are only > two feedbacks from posting a Receive WQE: the call completion > and the CQE being returned by cq_poll(). > > There are only two states for the Recv WQE between those two events: > available for allocation and allocated. And the application does not > need to know about the difference between those two states on a > per-WQE basis. > > If there were a third state, then there would have to be a mechanism > to make that information available. There is none, so such a third state > must not exist (at least in any observable form). I agree. Its just that the wording above is pretty loose in my mind. But I'm only seeking clarification. Seems the consensus is that when you return from post_recv() the buffer can be assumed to be available for incoming SEND placement... From rdreier at cisco.com Thu Feb 21 12:28:55 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 21 Feb 2008 12:28:55 -0800 Subject: [ofa-general] [2.6 patch] infiniband/hw/nes/nes_verbs.c: fix off-by-one In-Reply-To: <20080221154951.GA28328@cs181133002.pp.htv.fi> (Adrian Bunk's message of "Thu, 21 Feb 2008 17:49:51 +0200") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> Message-ID: > This driver should really have gotten some review before being included > in the kernel. > Even a simple checkpatch run finds more than > 250 stylistic errors > (not code bugs but cases where the driver violates the standard code > formatting rules of kernel code). Linus has strongly stated that we should merge hardware drivers early, and I agree: although the nes driver clearly needs more work, there's no advantage to users with the hardware in forcing them to wait for 2.6.26 to merge the driver, since they'll just have to patch the grungy code in themselves anyway. And by merging the driver early, we get fixed up for any tree-wide changes and allow janitors to help with the cleanup. (By the way, the code is not that pretty but it a lot closer to upstream style than most driver submissions) > And these are just comments from someone with zero knowledge about > InfiniBand, but I'd expect InfiniBand-specifig bugs might be found > before they hit users if an InfiniBand maintainer would review the > complete driver. Just for the record, although this driver is under drivers/infiniband, it is actually for a device that does iWARP/10 Gb ethernet. At some point we may want to rename drivers/infiniband to drivers/rdma, but so far the churn hasn't seemed worth it for what is basically a cosmetic issue. - R. From caitlin.bestler at gmail.com Thu Feb 21 12:29:14 2008 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Thu, 21 Feb 2008 12:29:14 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <47BDDDFE.5000509@opengridcomputing.com> References: <1203621024.5109.161.camel@brick.pathscale.com> <47BDD1DE.4030504@opengridcomputing.com> <000201c874c1$925dcc70$72258686@amr.corp.intel.com> <469958e00802211220r2f728ae7s8e8c1bac521caccc@mail.gmail.com> <47BDDDFE.5000509@opengridcomputing.com> Message-ID: <469958e00802211229w127e71e1wd2baa1f320b3c0fe@mail.gmail.com> On Thu, Feb 21, 2008 at 12:24 PM, Steve Wise wrote: > > > Seems the consensus is that when you return from post_recv() the buffer > can be assumed to be available for incoming SEND placement... > > I don't think any other solution could even work for SRQs. Applications cannot be required to advertise the credit on the "same" QP after posting to the SRQ. And requiring that credit be advertised on a per-connection basis would be contrary to the goal of allowing SRQs to be used as a common pool for multiple connection sessions. From glebn at voltaire.com Thu Feb 21 12:37:22 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Thu, 21 Feb 2008 22:37:22 +0200 Subject: [ofa-general] post_recv question In-Reply-To: References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> Message-ID: <20080221203721.GE18720@minantech.com> On Thu, Feb 21, 2008 at 12:22:16PM -0800, Roland Dreier wrote: > (On IB you need to set RNR > retries to 0 also for the missing receive to be detectable even if the > race exists) OpenMPI does this for SW flow controlled QPs. -- Gleb. From rpearson at systemfabricworks.com Thu Feb 21 12:43:31 2008 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Thu, 21 Feb 2008 14:43:31 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <47BDDDFE.5000509@opengridcomputing.com> Message-ID: <6gvfio$3monu5@rrcs-agw-01.hrndva.rr.com> Seems the consensus is that when you return from post_recv() the buffer can be assumed to be available for incoming SEND placement... As you pointed out earlier you have done a PIO. Nothing is said about the 'readiness'. A race between a doorbell and an incoming send packet isn't really meaningful if they happen independently at or very nearly the same time. Either it makes it or it doesn't. This is like a two processor load/store race in memory. The order is really arbitrary if the two operations are independent. On the other hand if you do the doorbell to post the receive buffer and *then* do another doorbell to post a send operation that causes the existence of the buffer to be made known to the other side and *then* the other side sends a message that has to work so the HCA has to complete the one doorbell before it can handle the second. If you notify out of band then you mileage may vary. From sashak at voltaire.com Thu Feb 21 13:06:37 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 21 Feb 2008 21:06:37 +0000 Subject: [ofa-general] [PATCH] opensm: fix potential core dumps Message-ID: <20080221210637.GU17477@sashak.voltaire.com> When for some reason one or more PortInfo was dropped switch node may have uninitialized elements in physp_table array and osm_node_get_physp_ptr() will return NULL. Signed-off-by: Sasha Khapyorsky --- This patch is not related to OFED. opensm/opensm/osm_perfmgr.c | 2 +- opensm/opensm/osm_state_mgr.c | 2 +- opensm/opensm/osm_ucast_mgr.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index c1c620c..cc95bee 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -629,7 +629,7 @@ static int sweep_hop_1(osm_sm_t * sm) for (port_num = 0; port_num < num_ports; port_num++) { /* go through the port only if the port is not DOWN */ p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); - if (ib_port_info_get_port_state + if (!p_ext_physp || ib_port_info_get_port_state (&p_ext_physp->port_info) <= IB_LINK_DOWN) continue; diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 8f76c00..38b2c4e 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -504,7 +504,7 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) for (port_num = 0; port_num < num_ports; port_num++) { /* go through the port only if the port is not DOWN */ p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); - if (ib_port_info_get_port_state + if (p_ext_physp && ib_port_info_get_port_state (&(p_ext_physp->port_info)) > IB_LINK_DOWN) { memset(&context, 0, sizeof(context)); context.ni_context.node_guid = diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c index 27a206f..1aa5ea9 100644 --- a/opensm/opensm/osm_ucast_mgr.c +++ b/opensm/opensm/osm_ucast_mgr.c @@ -561,7 +561,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item, /* make sure the link is healthy. If it is not - don't propagate through it. */ p_physp = osm_node_get_physp_ptr(p_node, port_num); - if (!osm_link_is_healthy(p_physp)) + if (!p_physp || !osm_link_is_healthy(p_physp)) continue; __osm_ucast_mgr_process_neighbor(p_mgr, p_sw, -- 1.5.4.1.122.gaa8d From bunk at kernel.org Thu Feb 21 13:01:24 2008 From: bunk at kernel.org (Adrian Bunk) Date: Thu, 21 Feb 2008 23:01:24 +0200 Subject: [ofa-general] Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> Message-ID: <20080221210124.GD28328@cs181133002.pp.htv.fi> [ Linus Added to the To: since I want to hear his opinion on this issue. ] On Thu, Feb 21, 2008 at 12:28:55PM -0800, Roland Dreier wrote: > > This driver should really have gotten some review before being included > > in the kernel. > > > Even a simple checkpatch run finds more than > 250 stylistic errors > > (not code bugs but cases where the driver violates the standard code > > formatting rules of kernel code). > > Linus has strongly stated that we should merge hardware drivers early, > and I agree: although the nes driver clearly needs more work, there's > no advantage to users with the hardware in forcing them to wait for > 2.6.26 to merge the driver, since they'll just have to patch the > grungy code in themselves anyway. And by merging the driver early, we > get fixed up for any tree-wide changes and allow janitors to help with > the cleanup. Is it really intended to merge drivers without _any_ kind of review? This driver even lacks a basic "please fix the > 250 checkpatch errors" [1] and similar low hanging fruits that could easily be spotted and then fixed by the submitter within a short amount of time. I see the point that it might make sense to not prevent the merging of drivers infinitely when they have some hard-to-fix issues, but was this really meant as an excuse for maintainers to no longer any review of what they merge at all? > (By the way, the code is not that pretty but it a lot closer to > upstream style than most driver submissions) >... There might be worse code being submitted, but when looking at what gets merged into Linus' tree this driver beats all other drivers I remember in both number of stylistic problems and bugs. [2] > - R. cu Adrian BTW: Greg, you are Cc'ed for your joke in [3]... [1] not to mention the > 2000 checkpatch warnings [2] as already said, that's not meant against the driver submitter I'm complaining about the complete lack of review that would have brought this driver into shape [3] http://lkml.org/lkml/2008/2/12/427 -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From rdreier at cisco.com Thu Feb 21 13:09:27 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 21 Feb 2008 13:09:27 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221210124.GD28328@cs181133002.pp.htv.fi> (Adrian Bunk's message of "Thu, 21 Feb 2008 23:01:24 +0200") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: > Is it really intended to merge drivers without _any_ kind of review? > > This driver even lacks a basic "please fix the > 250 checkpatch errors" [1] > and similar low hanging fruits that could easily be spotted and then > fixed by the submitter within a short amount of time. Just to be clear, this driver was reviewed. Many issues were found, and many were fixed while others are being worked on. It's a judgement call when to merge things, but in this case given the good engagement from the vendor, I didn't see anything to be gained by delaying the merge. - R. From torvalds at linux-foundation.org Thu Feb 21 13:14:55 2008 From: torvalds at linux-foundation.org (Linus Torvalds) Date: Thu, 21 Feb 2008 13:14:55 -0800 (PST) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221210124.GD28328@cs181133002.pp.htv.fi> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: On Thu, 21 Feb 2008, Adrian Bunk wrote: > > Is it really intended to merge drivers without _any_ kind of review? I'd really rather have the driver merged, and then *other* people can send patches! The thing is, that's what merging really means - people can work on it sanely together. Before it's merged, it's a lot harder for people to work on it unless they are really serious about that driver, so before merging, the janitorial kind of things seldom happen. So yes, I really do believe that we should merge drivers in particular a lot more aggressively. I'd like to see *testing* feedback, in order to not merge drivers that simply don't work well enough, but anything else? I suspect other feedback is as likely to cause problems as it is to fix things. > This driver even lacks a basic "please fix the > 250 checkpatch errors" [1] > and similar low hanging fruits that could easily be spotted and then > fixed by the submitter within a short amount of time. Quite frankly, I've several times been *this* close (holds up fingers so you can't even see between them) to just remove checkpatch entirely. I'm personally of the opinion that a lot of checkpatch "fixes" are anything but. That mainly concerns fixing overlong lines (where the "fixed" version is usually worse than the original), but it's been true for some other warnings too. Linus From greg at kroah.com Thu Feb 21 13:30:37 2008 From: greg at kroah.com (Greg KH) Date: Thu, 21 Feb 2008 13:30:37 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221210124.GD28328@cs181133002.pp.htv.fi> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <20080221213037.GA24191@kroah.com> On Thu, Feb 21, 2008 at 11:01:24PM +0200, Adrian Bunk wrote: > > BTW: Greg, you are Cc'ed for your joke in [3]... > [3] http://lkml.org/lkml/2008/2/12/427 That was not a joke, I ment it. Do you have proof that the majority of patches going into the kernel tree are not reviewed by at least 2 people? Now they might not be 2 people that you personally like/agree with, but that's a totally different topic... And I'm with Linus on this one, it's much easier to work on driver fixes together with others, when they are in the kernel tree. Although I do like the checkpatch.pl script, it has helped me in making it easier to clean up some vendor-provided drivers recently, finding some obvious coding style issues that I had missed the first pass through. thanks, greg k-h From David.Cincibus at seznam.cz Thu Feb 21 14:04:38 2008 From: David.Cincibus at seznam.cz (=?us-ascii?Q?David=20Cincibus?=) Date: Thu, 21 Feb 2008 23:04:38 +0100 (CET) Subject: [ofa-general] UK NATIONAL LOTTERY PAYMENT CENTRE:Ref: UKL/491OXI/04 Message-ID: <1062.1663-1431-1055084789-1203631478@seznam.cz> UK Lottery Headquarters: Customer Service 28 TANFIELD ROAD, CROYDON.LONDON Ref: UKL/491OXI/04 Batch: 12/25/0304 Date: 21/20/2008. Attn:Winner, PAYMENT APPROVAL In receipt of your mail, you have being officially cleared for payment by the Verifications Dept. at the headquarters of the UK National Lottery. Please note that All participants for the online version were selected randomly from World Wide Web sites through computer draw system and extracted from over 100,000 unions, associations, and corporate bodies that are listed online. The original copy of your duly approved winning certificate and a covering document from the British Government stating that the amount won was obtained legally through her National Lottery Promotions has been approved and will be sent to you as collacteral of proof. You have to choose your prefferred delivery option and as well meet the stipulated conditions associated with the stipulated delivery options. The parcel sent to us for delivery is a lottery parcel containing the sum of 750,000GBP including a winning National Certificate of Approval. Note:We do have two(2) major options whereby you can have your funds transfered to you. OPTION 1. COURIER DELIVERY.. OPTION 2. BANK TRANSFER.. COURIER DELIVERY. To transfer your funds within the Uk using the Courier delivery service,we charge £399 and delivery will be effected within 24hours. INTERNATIONAL DELIVERY We charge a minimum of £329.00 for international delivery.Delivery will be effected to the address stated to circumvent the misappropriation of claims. Please find below the break down of delivery charges using the courier delivery services for international delivery. MAILWAY DELIVERY(2 Working Days) An original certificate of weight.....0.9Gramms Total weight of parcel....................0.9Gramms Colour of parcel.............................Brown Mailing............................................£110.00 Insurance........................................£135.00 Vat (5%)..........................................£200.00 TOTAL............................................£445.00 MAILWAY DELIVERY(4 Working Days) An original certificate of weight.....0.9Gramms Total weight of parcel....................0.9Gramms Colour of parcel.............................Brown Mailing............................................£110.00 Insurance........................................£119.00 Vat (5%)..........................................£100.00 TOTAL............................................£319.00 BANK TRANSFER OPTION: Your prize money is protected by a hard cover insurance policy(GEOTRUST), which makes it impossible to deduct any amount from the prize money with or without the consent of the beneficiary. This is in accordance with section (13) 1n of the national gambling act as adopted in 1993 and amended on 3rd july 1996 by the constitutional assembly, this is to protect winners and to avoid misappropration of funds.Also be reminded that the deadline for the claiming of winnings is exactly one week after the receipt of this email. After this period, your cash prize will be deemed to have been forfeited by you and will be re-used in the drawings of the next edition of the lottery. You are to confirm the receipt of this mail by picking either the Bank transfer option or the courier delivery option to enable us furnish you with payment instructions on how to make your payments for your delivery to commence and please note that processing payment for either the bank transfer or courier delivery must be received and confirmed before we begin the processing of funds delivery. We await your urgent response to enable us furnish you with further instruction. Congratulations once again. Regards, Mr.Nicholson .Spiff Dispatch officer UK NATIONAL LOTTERY PAYMENTCENTRE Open 7days-24hrs-365days a year. Email:n_spiff at yahoo.com From arjan at infradead.org Thu Feb 21 14:08:55 2008 From: arjan at infradead.org (Arjan van de Ven) Date: Thu, 21 Feb 2008 14:08:55 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221210124.GD28328@cs181133002.pp.htv.fi> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <20080221140855.6aea8cc1@laptopd505.fenrus.org> On Thu, 21 Feb 2008 23:01:24 +0200 Adrian Bunk wrote: > [ Linus Added to the To: since I want to hear his opinion on this > issue. ] > > On Thu, Feb 21, 2008 at 12:28:55PM -0800, Roland Dreier wrote: > > > This driver should really have gotten some review before being > > > included in the kernel. > > > > > Even a simple checkpatch run finds more than > 250 stylistic > > > errors (not code bugs but cases where the driver violates the > > > standard code formatting rules of kernel code). > > > > Linus has strongly stated that we should merge hardware drivers > > early, and I agree: although the nes driver clearly needs more > > work, there's no advantage to users with the hardware in forcing > > them to wait for 2.6.26 to merge the driver, since they'll just > > have to patch the grungy code in themselves anyway. And by merging > > the driver early, we get fixed up for any tree-wide changes and > > allow janitors to help with the cleanup. > > Is it really intended to merge drivers without _any_ kind of review? No of course not. I totally agree we should be more agressive in merging drivers earlier. A minimal review needs to happen so for a few things imo 1) That the driver doesn't break the build 2) That the driver has no obvious huge security holes (this is a big deal for unsuspecting users) 3) that there's not an obscene amount of "uses deprecated api" compiler warnings (since those are annoying for everyone else) 4) that people who don't have the hardware are not negatively affected (say crashes without the hw or so) beyond that.. that's what EXPERIMENTAL is for (joking; lets not open that can of fish) From David.Cincibus at seznam.cz Thu Feb 21 14:14:53 2008 From: David.Cincibus at seznam.cz (=?us-ascii?Q?David=20Cincibus?=) Date: Thu, 21 Feb 2008 23:14:53 +0100 (CET) Subject: [ofa-general] UK NATIONAL LOTTERY PAYMENT CENTRE:Ref: UKL/491OXI/04 Message-ID: <1026.1609-1947-484076131-1203632093@seznam.cz> UK Lottery Headquarters: Customer Service 28 TANFIELD ROAD, CROYDON.LONDON Ref: UKL/491OXI/04 Batch: 12/25/0304 Date: 21/20/2008. Attn:Winner, PAYMENT APPROVAL In receipt of your mail, you have being officially cleared for payment by the Verifications Dept. at the headquarters of the UK National Lottery. Please note that All participants for the online version were selected randomly from World Wide Web sites through computer draw system and extracted from over 100,000 unions, associations, and corporate bodies that are listed online. The original copy of your duly approved winning certificate and a covering document from the British Government stating that the amount won was obtained legally through her National Lottery Promotions has been approved and will be sent to you as collacteral of proof. You have to choose your prefferred delivery option and as well meet the stipulated conditions associated with the stipulated delivery options. The parcel sent to us for delivery is a lottery parcel containing the sum of 750,000GBP including a winning National Certificate of Approval. Note:We do have two(2) major options whereby you can have your funds transfered to you. OPTION 1. COURIER DELIVERY.. OPTION 2. BANK TRANSFER.. COURIER DELIVERY. To transfer your funds within the Uk using the Courier delivery service,we charge £399 and delivery will be effected within 24hours. INTERNATIONAL DELIVERY We charge a minimum of £329.00 for international delivery.Delivery will be effected to the address stated to circumvent the misappropriation of claims. Please find below the break down of delivery charges using the courier delivery services for international delivery. MAILWAY DELIVERY(2 Working Days) An original certificate of weight.....0.9Gramms Total weight of parcel....................0.9Gramms Colour of parcel.............................Brown Mailing............................................£110.00 Insurance........................................£135.00 Vat (5%)..........................................£200.00 TOTAL............................................£445.00 MAILWAY DELIVERY(4 Working Days) An original certificate of weight.....0.9Gramms Total weight of parcel....................0.9Gramms Colour of parcel.............................Brown Mailing............................................£110.00 Insurance........................................£119.00 Vat (5%)..........................................£100.00 TOTAL............................................£319.00 BANK TRANSFER OPTION: Your prize money is protected by a hard cover insurance policy(GEOTRUST), which makes it impossible to deduct any amount from the prize money with or without the consent of the beneficiary. This is in accordance with section (13) 1n of the national gambling act as adopted in 1993 and amended on 3rd july 1996 by the constitutional assembly, this is to protect winners and to avoid misappropration of funds.Also be reminded that the deadline for the claiming of winnings is exactly one week after the receipt of this email. After this period, your cash prize will be deemed to have been forfeited by you and will be re-used in the drawings of the next edition of the lottery. You are to confirm the receipt of this mail by picking either the Bank transfer option or the courier delivery option to enable us furnish you with payment instructions on how to make your payments for your delivery to commence and please note that processing payment for either the bank transfer or courier delivery must be received and confirmed before we begin the processing of funds delivery. We await your urgent response to enable us furnish you with further instruction. Congratulations once again. Regards, Mr.Nicholson .Spiff Dispatch officer UK NATIONAL LOTTERY PAYMENTCENTRE Open 7days-24hrs-365days a year. Email:n_spiff at yahoo.com From adobriyan at gmail.com Thu Feb 21 14:33:03 2008 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Fri, 22 Feb 2008 01:33:03 +0300 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <20080221223303.GD1846@martell.zuzino.mipt.ru> On Thu, Feb 21, 2008 at 01:14:55PM -0800, Linus Torvalds wrote: > On Thu, 21 Feb 2008, Adrian Bunk wrote: > > Is it really intended to merge drivers without _any_ kind of review? > > I'd really rather have the driver merged, and then *other* people can send > patches! > > The thing is, that's what merging really means - people can work on it > sanely together. Before it's merged, it's a lot harder for people to work > on it unless they are really serious about that driver, so before > merging, the janitorial kind of things seldom happen. > > So yes, I really do believe that we should merge drivers in particular a > lot more aggressively. I'd like to see *testing* feedback, in order to not > merge drivers that simply don't work well enough, but anything else? I > suspect other feedback is as likely to cause problems as it is to fix > things. > > > This driver even lacks a basic "please fix the > 250 checkpatch errors" [1] > > and similar low hanging fruits that could easily be spotted and then > > fixed by the submitter within a short amount of time. > > Quite frankly, I've several times been *this* close (holds up fingers so > you can't even see between them) to just remove checkpatch entirely. Agrh! What stopped you?! > I'm personally of the opinion that a lot of checkpatch "fixes" are > anything but. That mainly concerns fixing overlong lines (where the > "fixed" version is usually worse than the original), but it's been true > for some other warnings too. Speaking of driver, could authors please comment all those barrier() calls and remove trailing "return;" at the end of void functions. From jeff at garzik.org Thu Feb 21 14:33:10 2008 From: jeff at garzik.org (Jeff Garzik) Date: Thu, 21 Feb 2008 17:33:10 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221140855.6aea8cc1@laptopd505.fenrus.org> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221140855.6aea8cc1@laptopd505.fenrus.org> Message-ID: <47BDFC26.8000201@garzik.org> Arjan van de Ven wrote: > On Thu, 21 Feb 2008 23:01:24 +0200 > Adrian Bunk wrote: > >> [ Linus Added to the To: since I want to hear his opinion on this >> issue. ] >> >> On Thu, Feb 21, 2008 at 12:28:55PM -0800, Roland Dreier wrote: >>> > This driver should really have gotten some review before being >>> > included in the kernel. >>> >>> > Even a simple checkpatch run finds more than > 250 stylistic >>> > errors (not code bugs but cases where the driver violates the >>> > standard code formatting rules of kernel code). >>> >>> Linus has strongly stated that we should merge hardware drivers >>> early, and I agree: although the nes driver clearly needs more >>> work, there's no advantage to users with the hardware in forcing >>> them to wait for 2.6.26 to merge the driver, since they'll just >>> have to patch the grungy code in themselves anyway. And by merging >>> the driver early, we get fixed up for any tree-wide changes and >>> allow janitors to help with the cleanup. >> Is it really intended to merge drivers without _any_ kind of review? > > No of course not. > > I totally agree we should be more agressive in merging drivers earlier. > A minimal review needs to happen so for a few things imo > 1) That the driver doesn't break the build > 2) That the driver has no obvious huge security holes > (this is a big deal for unsuspecting users) > 3) that there's not an obscene amount of "uses deprecated api" compiler warnings > (since those are annoying for everyone else) > 4) that people who don't have the hardware are not negatively affected > (say crashes without the hw or so) FWIW, my general guidelines for merging drivers in my areas are: 1) it's not fugly 2) it has an active maintainer who responds to feedback I tend to think it is NOT in the best interests of Linux users, for us to merge vendor-fugly drivers with many layers of OS wrappers and similar obfuscation. But similarly... I merge drivers long before our SCSI maintainer will, and I value "it works" above stupid checkpatch warnings. Jeff From tom at opengridcomputing.com Thu Feb 21 14:47:59 2008 From: tom at opengridcomputing.com (Tom Tucker) Date: Thu, 21 Feb 2008 16:47:59 -0600 Subject: [ofa-general] post_recv question In-Reply-To: References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> Message-ID: <1203634079.31225.42.camel@trinity.ogc.int> On Thu, 2008-02-21 at 12:22 -0800, Roland Dreier wrote: > > OpenMPI can be configured to send credit updates over different QP. I'll > > try to stress it next week to see what happens. > > It seems that it would be pretty hard to hit this race in practice. > And I don't think mem-free Mellanox hardware has any race -- not > positive about Tavor/non-mem-free Arbel. (On IB you need to set RNR > retries to 0 also for the missing receive to be detectable even if the > race exists) Well....consider the case of two adapters on two different pci busses. One is busy one is not. Specifically, the post_recv QP is on an HCA on a busy bus, the post_send (of the credit) is on a QP on an HCA on a dedicated bus. I think we can assume that the ringing of the doorbell is synchronous, i.e. when the processor completes it's write, the card knows there are RQ WQE available in host memory, but whether or not and when the WQE is fetched relative to the processor is asynchronous. The card will have to get on the bus again and read host memory. Meanwhile the processor runs off and posts a send on the other QP on a different HCA of the credit. The peer responds, with a send to the "data qp". The receiving adapter knows the WQE is there, but it may not have fetched it yet. The crux of the question is whether or not the adapter MUST fetch the WQE and place the packet, or can it simply drop it. If you say it MUST, then you must have enough buffer to handle worst case delayed placement. If the post guarantee is only within the same QP or affiliated QP (SRQ), then all it must do is ensure that when processing a SQ request AND the associated RQ (SRQ) is empty, that it must fetch outstanding, unread RQ WQE prior to processing the SQ WQE. This allows for the post_recv guarantees without the HCA buffering requirements. I seem to recall that "the specs" say something about ordering and synchronization between unaffiliated QP and/or between adapters, but the specific reference long ago fell off my LRU list. Tom > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From greg at kroah.com Thu Feb 21 14:43:15 2008 From: greg at kroah.com (Greg KH) Date: Thu, 21 Feb 2008 14:43:15 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221223303.GD1846@martell.zuzino.mipt.ru> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221223303.GD1846@martell.zuzino.mipt.ru> Message-ID: <20080221224315.GA27640@kroah.com> On Fri, Feb 22, 2008 at 01:33:03AM +0300, Alexey Dobriyan wrote: > On Thu, Feb 21, 2008 at 01:14:55PM -0800, Linus Torvalds wrote: > > Quite frankly, I've several times been *this* close (holds up fingers so > > you can't even see between them) to just remove checkpatch entirely. > > Agrh! What stopped you?! > > > I'm personally of the opinion that a lot of checkpatch "fixes" are > > anything but. That mainly concerns fixing overlong lines (where the > > "fixed" version is usually worse than the original), but it's been true > > for some other warnings too. > > Speaking of driver, could authors please comment all those barrier() > calls and remove trailing "return;" at the end of void functions. Why don't you make a patch to checkpatch.pl for those types of things? :) From jeff at garzik.org Thu Feb 21 14:57:43 2008 From: jeff at garzik.org (Jeff Garzik) Date: Thu, 21 Feb 2008 17:57:43 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221224315.GA27640@kroah.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221223303.GD1846@martell.zuzino.mipt.ru> <20080221224315.GA27640@kroah.com> Message-ID: <47BE01E7.7020508@garzik.org> Greg KH wrote: > On Fri, Feb 22, 2008 at 01:33:03AM +0300, Alexey Dobriyan wrote: >> Speaking of driver, could authors please comment all those barrier() >> calls and remove trailing "return;" at the end of void functions. > > Why don't you make a patch to checkpatch.pl for those types of things? > :) Drat, you beat me to that response..... :) From adobriyan at gmail.com Thu Feb 21 14:58:17 2008 From: adobriyan at gmail.com (Alexey Dobriyan) Date: Fri, 22 Feb 2008 01:58:17 +0300 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221224315.GA27640@kroah.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221223303.GD1846@martell.zuzino.mipt.ru> <20080221224315.GA27640@kroah.com> Message-ID: <20080221225817.GE1846@martell.zuzino.mipt.ru> On Thu, Feb 21, 2008 at 02:43:15PM -0800, Greg KH wrote: > On Fri, Feb 22, 2008 at 01:33:03AM +0300, Alexey Dobriyan wrote: > > On Thu, Feb 21, 2008 at 01:14:55PM -0800, Linus Torvalds wrote: > > > Quite frankly, I've several times been *this* close (holds up fingers so > > > you can't even see between them) to just remove checkpatch entirely. > > > > Agrh! What stopped you?! > > > > > I'm personally of the opinion that a lot of checkpatch "fixes" are > > > anything but. That mainly concerns fixing overlong lines (where the > > > "fixed" version is usually worse than the original), but it's been true > > > for some other warnings too. > > > > Speaking of driver, could authors please comment all those barrier() > > calls and remove trailing "return;" at the end of void functions. > > Why don't you make a patch to checkpatch.pl for those types of things? > :) Sorry, I'm not touching it with an eigthy six foot pole. :^) From barristerneils at yahoo.co.uk Thu Feb 21 15:04:18 2008 From: barristerneils at yahoo.co.uk (BARRISTER Neils) Date: Thu, 21 Feb 2008 23:04:18 +0000 (GMT) Subject: [ofa-general] Dear Friend Message-ID: <875228.90535.qm@web28405.mail.ukl.yahoo.com> Dear Friend I did not forgot your past effort and attempts to assist me, now I'm happy to inform you that i have succeeded in getting those funds transferred under the cooperation of a new partner from Paraguay. Now Contact my secretary ask him for ($450.000.) for your compensation his, name is Mr. Morgan Preye E-Mail (morganpry at live.com] HE will send you the money without any delay. Your information needed to enable him sends the cheque to you because I travel for another investment project. .. 1. FULL NAMES:...................... 2. ADDRESS:........................... 3. TELEPHONE NUMBER:................. 4. STATE:........................... 5. COUNTRY:.......................... Take care of yourself I hope to meet you soon Regards Barr.Anthony Neil [Esq]. --------------------------------- Yahoo! Answers - Get better answers from someone who knows. Tryit now. -------------- next part -------------- An HTML attachment was scrubbed... URL: From jengelh at computergmbh.de Thu Feb 21 15:31:42 2008 From: jengelh at computergmbh.de (Jan Engelhardt) Date: Fri, 22 Feb 2008 00:31:42 +0100 (CET) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221224315.GA27640@kroah.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221223303.GD1846@martell.zuzino.mipt.ru> <20080221224315.GA27640@kroah.com> Message-ID: On Feb 21 2008 14:43, Greg KH wrote: >On Fri, Feb 22, 2008 at 01:33:03AM +0300, Alexey Dobriyan wrote: >> On Thu, Feb 21, 2008 at 01:14:55PM -0800, Linus Torvalds wrote: >> > Quite frankly, I've several times been *this* close (holds up fingers so >> > you can't even see between them) to just remove checkpatch entirely. >> >> Agrh! What stopped you?! >> >> > I'm personally of the opinion that a lot of checkpatch "fixes" are >> > anything but. That mainly concerns fixing overlong lines (where the >> > "fixed" version is usually worse than the original), but it's been true >> > for some other warnings too. >> >> Speaking of driver, could authors please comment all those barrier() >> calls and remove trailing "return;" at the end of void functions. > >Why don't you make a patch to checkpatch.pl for those types of things? >:) checkpatch would never allow a patch to patch checkpatch. From khc at pm.waw.pl Thu Feb 21 15:38:14 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 00:38:14 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: (Linus Torvalds's message of "Thu\, 21 Feb 2008 13\:14\:55 -0800 \(PST\)") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: Linus Torvalds writes: > I'm personally of the opinion that a lot of checkpatch "fixes" are > anything but. That mainly concerns fixing overlong lines Perhaps we should increase line length limit, 132 should be fine. Especially useful with long printk() lines and long arithmetic expressions. -- Krzysztof Halasa From bunk at kernel.org Thu Feb 21 15:40:18 2008 From: bunk at kernel.org (Adrian Bunk) Date: Fri, 22 Feb 2008 01:40:18 +0200 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BDFC26.8000201@garzik.org> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221140855.6aea8cc1@laptopd505.fenrus.org> <47BDFC26.8000201@garzik.org> Message-ID: <20080221234018.GE28328@cs181133002.pp.htv.fi> On Thu, Feb 21, 2008 at 05:33:10PM -0500, Jeff Garzik wrote: >... > But similarly... I merge drivers long before our SCSI maintainer will, > and I value "it works" above stupid checkpatch warnings. I was not talking about checkpatch warnings. I'm talking about checkpatch errors for code like if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) I have to accept that Linus prefers to have the driver merged first and let janitors make the code readable in subsequent patches, but if GNU indent wasn't unable to properly cope with the fact that this driver has over 2000 lines that are over 80 characters long I'd simply run this driver through scripts/Lindent . > Jeff cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From jeff at garzik.org Thu Feb 21 15:41:06 2008 From: jeff at garzik.org (Jeff Garzik) Date: Thu, 21 Feb 2008 18:41:06 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <47BE0C12.604@garzik.org> Krzysztof Halasa wrote: > Linus Torvalds writes: > >> I'm personally of the opinion that a lot of checkpatch "fixes" are >> anything but. That mainly concerns fixing overlong lines > > Perhaps we should increase line length limit, 132 should be fine. I think checkpatch is useful, but I've agreed from the beginning that the line length complaint is completely silly. If a driver is full of lines of length >80, that's a problem. If it's just a few, that's more of a developer decision based on the individual line of code. Jeff From alan at lxorguk.ukuu.org.uk Thu Feb 21 15:31:44 2008 From: alan at lxorguk.ukuu.org.uk (Alan Cox) Date: Thu, 21 Feb 2008 23:31:44 +0000 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <20080221233144.6368712c@core> On Fri, 22 Feb 2008 00:38:14 +0100 Krzysztof Halasa wrote: > Linus Torvalds writes: > > > I'm personally of the opinion that a lot of checkpatch "fixes" are > > anything but. That mainly concerns fixing overlong lines > > Perhaps we should increase line length limit, 132 should be fine. > Especially useful with long printk() lines and long arithmetic > expressions. Agreed. The fact I'm having to fix bugs introduced by incorrect printk wrapping confirms that for printk strings at least it is overzealous. I'm all for it complaining about printk(KERN_FOO "<90 chars>", foo, bar + 37); type bits when the foo, bar should be underneath to be visible but for straight quoted text too long it should not warn and try to get the text folded. Alan From caitlin.bestler at neterion.com Thu Feb 21 15:48:33 2008 From: caitlin.bestler at neterion.com (Caitlin Bestler) Date: Thu, 21 Feb 2008 15:48:33 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <1203634079.31225.42.camel@trinity.ogc.int> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> <1203634079.31225.42.camel@trinity.ogc.int> Message-ID: <469958e00802211548s54e4d157w961c998dbab88557@mail.gmail.com> Good example, more detailed comments in-line. On Thu, Feb 21, 2008 at 2:47 PM, Tom Tucker wrote: > > On Thu, 2008-02-21 at 12:22 -0800, Roland Dreier wrote: > > > OpenMPI can be configured to send credit updates over different QP. I'll > > > try to stress it next week to see what happens. > > > > It seems that it would be pretty hard to hit this race in practice. > > > And I don't think mem-free Mellanox hardware has any race -- not > > positive about Tavor/non-mem-free Arbel. (On IB you need to set RNR > > retries to 0 also for the missing receive to be detectable even if the > > race exists) > > Well....consider the case of two adapters on two different pci busses. > One is busy one is not. Specifically, the post_recv QP is on an HCA on a > busy bus, the post_send (of the credit) is on a QP on an HCA on a > dedicated bus. > > I think we can assume that the ringing of the doorbell is synchronous, > i.e. when the processor completes it's write, the card knows there are > RQ WQE available in host memory, but whether or not and when the WQE is > fetched relative to the processor is asynchronous. The card will have to > get on the bus again and read host memory. Meanwhile the processor runs > off and posts a send on the other QP on a different HCA of the credit. > The peer responds, with a send to the "data qp". The receiving adapter > knows the WQE is there, but it may not have fetched it yet. > > The crux of the question is whether or not the adapter MUST fetch the > WQE and place the packet, or can it simply drop it. If you say it MUST, > then you must have enough buffer to handle worst case delayed placement. > If the post guarantee is only within the same QP or affiliated QP (SRQ), > then all it must do is ensure that when processing a SQ request AND the > associated RQ (SRQ) is empty, that it must fetch outstanding, unread RQ > WQE prior to processing the SQ WQE. This allows for the post_recv > guarantees without the HCA buffering requirements. > I disagree. What is required is the adapter MUST NOT take an action based on a "buffer not available" diagnosis until it is certain that it has considered all WQEs that have been successfully posted by the consumer. Further, it MUST NOT require a further action by the consumer to guarantee that it notices a posted WQE. Particularly in iWARP the application layer is free to implement Send/Recv credits by *any* mechanism desired (the only requirement is that there is one, you might recall that there were extensive discussions on this point regarding unsolicited messages for iSER). The concept that the application MUST provide SOME form of flow control was accepted only grudgingly. So clearly any more specific mechanisms were not the intent of the drafters. So if there are still 1000 Recv WQEs in the SRQ we can allow the adapter a great deal of flexibility in when the 1001st is linked into the data structures. The only real constraint is that it MUST do 1001 successful allocations *before* it triggers any sort of "buffer not available" error. I'm not recalling the specific language immediately, but I do recall concluding that sub-dividing the SRQ on an RSS-like basis was *not* compliant with the RDMAC specs and that the left-half of the adpater could not declare "buffer not found" while the right-half of the adapter still had a free buffer. This is of course a major pain if you are trying to team two RDMA adapters to form a single virtual adapter, or even two largely independent ports on the same physical adapter. But the intent of the specifications are very clear: if the consumer has posted 1000 recv WQEs and gotten "SUCCESS" to each of them, then the adapter MUST allocate all 1000 recv WQEs *before* it can fail an operation because no buffer was available. So there is a difference between "must be pushed to the adapter now" and "must be pushed to the adapter before it is too late". From khc at pm.waw.pl Thu Feb 21 16:05:26 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 01:05:26 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BE0C12.604@garzik.org> (Jeff Garzik's message of "Thu\, 21 Feb 2008 18\:41\:06 -0500") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> Message-ID: Jeff Garzik writes: > If a driver is full of lines of length >80, that's a problem. I'm not sure. We all have more than 80-chars wide displays for years, don't we? The problem is not the number of characters but code which is too complex and which may sometimes have too many levels of indentation. Unfortunately expressing code complexity in terms of line lengths doesn't seem to work at all. The 80-chars limit harms development, it makes the code less readable, sometimes far less readable. I think we should increase length limit to 132 for the whole kernel code. Obviously printk() _output_ etc. should stay at 80. -- Krzysztof Halasa From meier3 at llnl.gov Thu Feb 21 16:27:30 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Thu, 21 Feb 2008 16:27:30 -0800 Subject: [ofa-general] OpenSM Console Ideas? Message-ID: <47BE16F2.8000507@llnl.gov> LLNL uses the remote console feature in OpenSM. We have a need to secure this remote connection with authentication/authorization and encryption (specifically PAM and OpenSSL). I have a working prototype, and would like to formalize it and share/include this with OpenSM. Before I go down this path too far, I would like to solicit ideas from others who use the console. Currently, the console can be used in local, loopback, or remote modes. If security is added, should it replace other modes, or be an additional mode? The intention is to use PAM for the AA framework, and OpenSSL for secure sockets. Are there any serious objections to this implementation plan? The console feature has always been a configuration/command line option, but should the secure console be conditionally compiled/linked as well? (eliminate dependency on the PAM and OpenSSL libs, pam, pam_misc, cryto, ssl). The secure console would require a relatively primitive client application, which I will probably package under opensm, just like osmtest. Make sense? Do you have any other ideas or suggestions for the remote console? -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov From bunk at kernel.org Thu Feb 21 16:29:45 2008 From: bunk at kernel.org (Adrian Bunk) Date: Fri, 22 Feb 2008 02:29:45 +0200 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221233144.6368712c@core> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221233144.6368712c@core> Message-ID: <20080222002945.GF28328@cs181133002.pp.htv.fi> On Thu, Feb 21, 2008 at 11:31:44PM +0000, Alan Cox wrote: > On Fri, 22 Feb 2008 00:38:14 +0100 > Krzysztof Halasa wrote: > > > Linus Torvalds writes: > > > > > I'm personally of the opinion that a lot of checkpatch "fixes" are > > > anything but. That mainly concerns fixing overlong lines > > > > Perhaps we should increase line length limit, 132 should be fine. > > Especially useful with long printk() lines and long arithmetic > > expressions. > > Agreed. The fact I'm having to fix bugs introduced by incorrect printk > wrapping confirms that for printk strings at least it is overzealous. > > I'm all for it complaining about > > printk(KERN_FOO "<90 chars>", foo, bar + 37); > > type bits when the foo, bar should be underneath to be visible but for > straight quoted text too long it should not warn and try to get the text > folded. I think it should warn, but people have to be aware of the following: - checkpatch errors are for stuff that really has to be fixed - checkpatch warnings are for stuff that should be looked at - the goal is not 0 checkpatch warnings but readable and bugfree code A nice property of checkpatch is that it encourages to look closer at code like the following (it warns about the volatile): if (!netif_queue_stopped(netdev)) { netif_stop_queue(netdev); barrier(); if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) { netif_start_queue(netdev); goto sq_no_longer_full; } } > Alan cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From jeff at garzik.org Thu Feb 21 16:44:08 2008 From: jeff at garzik.org (Jeff Garzik) Date: Thu, 21 Feb 2008 19:44:08 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> Message-ID: <47BE1AD8.4060600@garzik.org> Krzysztof Halasa wrote: > Jeff Garzik writes: > >> If a driver is full of lines of length >80, that's a problem. > > I'm not sure. > We all have more than 80-chars wide displays for years, don't we? The Every time this discussion comes up, people point out that it remains highly common to open multiple 80-column terminal windows, making the 80-column limit still highly relevant in modern times. > The > problem is [...] code which is too > complex and which may sometimes have too many levels of indentation. Quite true. Jeff From weiny2 at llnl.gov Thu Feb 21 16:56:55 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 21 Feb 2008 16:56:55 -0800 Subject: [ofa-general] [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING Message-ID: <20080221165655.0227c88c.weiny2@llnl.gov> >From b8fb2151b92ddd4a7d2a4cc2ab38a6b34fffc7ab Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 21 Feb 2008 09:10:10 -0800 Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING Signed-off-by: Ira K. Weiny --- opensm/include/vendor/osm_vendor_ibumad.h | 4 ++-- opensm/libvendor/osm_vendor_ibumad.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h index 84fd21a..3a3f070 100644 --- a/opensm/include/vendor/osm_vendor_ibumad.h +++ b/opensm/include/vendor/osm_vendor_ibumad.h @@ -141,12 +141,12 @@ typedef struct _umad_match { uint32_t version; } umad_match_t; -#define OSM_UMAD_MAX_PENDING 1000 +#define DEFAULT_OSM_UMAD_MAX_PENDING 1000 typedef struct vendor_match_tbl { - umad_match_t tbl[OSM_UMAD_MAX_PENDING]; uint32_t last_version; int max; + umad_match_t *tbl; } vendor_match_tbl_t; typedef struct _osm_vendor { diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 679f06a..f847e61 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -451,6 +451,7 @@ ib_api_status_t osm_vendor_init(IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN const uint32_t timeout) { + char *max = NULL; int r, n_cas; OSM_LOG_ENTER(p_log); @@ -480,7 +481,31 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, } p_vend->ca_count = n_cas; - p_vend->mtbl.max = OSM_UMAD_MAX_PENDING; + p_vend->mtbl.max = DEFAULT_OSM_UMAD_MAX_PENDING; + + if ((max = getenv("OSM_UMAD_MAX_PENDING")) != NULL) { + int tmp = strtol(max, NULL, 0); + if (tmp > 0) + p_vend->mtbl.max = tmp; + else + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: Error:" + "OSM_UMAD_MAX_PENDING=%d is invalid", + tmp); + } + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "osm_vendor_init: %d pending umads specified\n", + p_vend->mtbl.max); + + p_vend->mtbl.tbl = calloc(p_vend->mtbl.max, sizeof(*(p_vend->mtbl.tbl))); + if (!p_vend->mtbl.tbl) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: Error:" + "failed to allocate vendor match table\n"); + r = IB_INSUFFICIENT_MEMORY; + goto Exit; + } Exit: OSM_LOG_EXIT(p_log); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-libvendor-osm_vendor_ibumad.c-Add-environmen.patch Type: application/octet-stream Size: 2397 bytes Desc: not available URL: From bunk at kernel.org Thu Feb 21 17:06:59 2008 From: bunk at kernel.org (Adrian Bunk) Date: Fri, 22 Feb 2008 03:06:59 +0200 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221213037.GA24191@kroah.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221213037.GA24191@kroah.com> Message-ID: <20080222010659.GG28328@cs181133002.pp.htv.fi> On Thu, Feb 21, 2008 at 01:30:37PM -0800, Greg KH wrote: > On Thu, Feb 21, 2008 at 11:01:24PM +0200, Adrian Bunk wrote: > > > > BTW: Greg, you are Cc'ed for your joke in [3]... > > > [3] http://lkml.org/lkml/2008/2/12/427 > > That was not a joke, I ment it. Do you have proof that the majority of > patches going into the kernel tree are not reviewed by at least 2 > people? >... I don't see any way for getting a proof in any direction, but no matter how many SOB lines a patch has my impression is that usually at a maximum the one person who applies a patch reviews it ("review" as in "understands the code in question well and reviews the patch line for line"). Sometimes there's even simply noone who could a patch at all, e.g. I'm not sure whether there is anyone at all who would be able to review a patch by Sam fiddling with kbuild internals. How many lines of code get changed in the kernel per day? And we should have for each changed line two people who are both experienced enough in this area of the kernel and who have the time to review this line? Even one of our best maintained subsystems has commits that contain bugs like + if ((!tid_agg_rx->reorder_buf) && net_ratelimit()) { + printk(KERN_ERR "can not allocate reordering buffer " + "to tid %d\n", tid); + goto end; + } > thanks, > > greg k-h cu Adrian -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From tom at opengridcomputing.com Thu Feb 21 17:17:14 2008 From: tom at opengridcomputing.com (Tom Tucker) Date: Thu, 21 Feb 2008 19:17:14 -0600 Subject: [ofa-general] post_recv question In-Reply-To: <469958e00802211548s54e4d157w961c998dbab88557@mail.gmail.com> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> <1203634079.31225.42.camel@trinity.ogc.int> <469958e00802211548s54e4d157w961c998dbab88557@mail.gmail.com> Message-ID: <1203643034.31225.82.camel@trinity.ogc.int> On Thu, 2008-02-21 at 15:48 -0800, Caitlin Bestler wrote: > Good example, more detailed comments in-line. > > On Thu, Feb 21, 2008 at 2:47 PM, Tom Tucker wrote: > > > > On Thu, 2008-02-21 at 12:22 -0800, Roland Dreier wrote: > > > > OpenMPI can be configured to send credit updates over different QP. I'll > > > > try to stress it next week to see what happens. > > > > > > It seems that it would be pretty hard to hit this race in practice. > > > > > And I don't think mem-free Mellanox hardware has any race -- not > > > positive about Tavor/non-mem-free Arbel. (On IB you need to set RNR > > > retries to 0 also for the missing receive to be detectable even if the > > > race exists) > > > > Well....consider the case of two adapters on two different pci busses. > > One is busy one is not. Specifically, the post_recv QP is on an HCA on a > > busy bus, the post_send (of the credit) is on a QP on an HCA on a > > dedicated bus. > > > > I think we can assume that the ringing of the doorbell is synchronous, > > i.e. when the processor completes it's write, the card knows there are > > RQ WQE available in host memory, but whether or not and when the WQE is > > fetched relative to the processor is asynchronous. The card will have to > > get on the bus again and read host memory. Meanwhile the processor runs > > off and posts a send on the other QP on a different HCA of the credit. > > The peer responds, with a send to the "data qp". The receiving adapter > > knows the WQE is there, but it may not have fetched it yet. > > > > The crux of the question is whether or not the adapter MUST fetch the > > WQE and place the packet, or can it simply drop it. If you say it MUST, > > then you must have enough buffer to handle worst case delayed placement. > > If the post guarantee is only within the same QP or affiliated QP (SRQ), > > then all it must do is ensure that when processing a SQ request AND the > > associated RQ (SRQ) is empty, that it must fetch outstanding, unread RQ > > WQE prior to processing the SQ WQE. This allows for the post_recv > > guarantees without the HCA buffering requirements. > > > > I disagree. What is required is the adapter MUST NOT take an action based > on a "buffer not available" diagnosis until it is certain that it has considered > all WQEs that have been successfully posted by the consumer. > Ok. So what does the HW do with the packet while it's pondering it's options? It has to put it somewhere. That's my point. You either guarantee that any advertisement of availability can't be issued prior to the buffer being available, or the buffer is synchronously available prior to the advertisement of the credit. Snooping the [s]RQ while processing SQ is a way of delaying the issuance of a credit before the buffer (spec'd in the WQE) is actually known to the adapter. But this only works in the context of a single HBA. > Further, it MUST NOT require a further action by the consumer to guarantee > that it notices a posted WQE. Agreed. > Particularly in iWARP the application layer > is free to implement Send/Recv credits by *any* mechanism desired (the > only requirement is that there is one, you might recall that there were > extensive discussions on this point regarding unsolicited messages for > iSER). The concept that the application MUST provide SOME form of > flow control was accepted only grudgingly. So clearly any more specific > mechanisms were not the intent of the drafters. Yes, but I'm not sure there's any confusion there -- I think this discussion is about "how credits can be issued". In particular what does it mean to issue a credit for: - this QP, - another QP on the same HCA - another QP on a different HCA So far, it seems the consensus is that "all of the above" should work. I'm just not convinced the current implementations guarantee this. > > So if there are still 1000 Recv WQEs in the SRQ we can allow the adapter > a great deal of flexibility in when the 1001st is linked into the data > structures. > The only real constraint is that it MUST do 1001 successful allocations > *before* it triggers any sort of "buffer not available" error. > agreed. > I'm not recalling the specific language immediately, but I do recall concluding > that sub-dividing the SRQ on an RSS-like basis was *not* compliant with > the RDMAC specs and that the left-half of the adpater could not declare > "buffer not found" while the right-half of the adapter still had a free buffer. agreed. > This is of course a major pain if you are trying to team two RDMA adapters > to form a single virtual adapter, or even two largely independent ports on > the same physical adapter. But the intent of the specifications are very > clear: if the consumer has posted 1000 recv WQEs and gotten "SUCCESS" > to each of them, then the adapter MUST allocate all 1000 recv WQEs > *before* it can fail an operation because no buffer was available. > agreed. > So there is a difference between "must be pushed to the adapter now" > and "must be pushed to the adapter before it is too late". yes. Tom From gstreiff at neteffect.com Thu Feb 21 18:25:13 2008 From: gstreiff at neteffect.com (gstreiff at neteffect.com) Date: Thu, 21 Feb 2008 20:25:13 -0600 Subject: [ofa-general] [PATCH] RDMA/nes: Fix cm_event_connected() for big-endian platforms Message-ID: <200802220225.m1M2PDW9009391@velma.neteffect.com> We recently added portabiliity/helper function get_crc_value() to nes_accept(). This should also be deployed to cm_event_connected. Otherwise rmda connection establishment will fail on big-endian platforms such as ppc64. This remediation was triggered by change near 2.6.23 to lib/crc32.c with commit ef19454bd437b2ba. Prior to the commit we might get the following return value from crc32c() on ppc64: 0xc69c51fd After the commit: 0xfd519cc6 So the helper function does an _le32 on the value so we have good interop between kernels at different rev levels for example. Signed-off-by: Glenn Streiff --- drivers/infiniband/hw/nes/nes_cm.c | 5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 0c5dd5b..4705dbc 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2752,6 +2752,7 @@ void cm_event_connected(struct nes_cm_event *event) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + u32 crc_value; int ret; /* get all our handles */ @@ -2829,8 +2830,8 @@ void cm_event_connected(struct nes_cm_event *event) nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = cpu_to_be32( - crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); From davidn at davidnewall.com Thu Feb 21 17:46:45 2008 From: davidn at davidnewall.com (David Newall) Date: Fri, 22 Feb 2008 12:16:45 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <47BE2985.6020305@davidnewall.com> Krzysztof Halasa wrote: > Linus Torvalds writes: >> I'm personally of the opinion that a lot of checkpatch "fixes" are >> anything but. That mainly concerns fixing overlong lines >> > > Perhaps we should increase line length limit, 132 should be fine. > Especially useful with long printk() lines and long arithmetic > expressions. > Yes; or even longer. 80 characters might have made sense on a screen when the alternative was 80 characters on a punched card, but on a modern computer it's very restrictive. That's especially true with the deep indents that you quickly get in C. Even short lines often need to be split when you put a few tabs in front of them, and that makes comprehension that bit harder, not to mention looks ugly. From khc at pm.waw.pl Thu Feb 21 18:02:21 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 03:02:21 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BE1AD8.4060600@garzik.org> (Jeff Garzik's message of "Thu\, 21 Feb 2008 19\:44\:08 -0500") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <47BE1AD8.4060600@garzik.org> Message-ID: Jeff Garzik writes: > Every time this discussion comes up, people point out that it remains > highly common to open multiple 80-column terminal windows, making the > 80-column limit still highly relevant in modern times. I guess only because of the limit :-) Raise the limit, terminal windows will follow. I'm using 80-column windows, too. -- Krzysztof Halasa From viro at ZenIV.linux.org.uk Thu Feb 21 18:06:15 2008 From: viro at ZenIV.linux.org.uk (Al Viro) Date: Fri, 22 Feb 2008 02:06:15 +0000 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BE2985.6020305@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> Message-ID: <20080222020615.GE27894@ZenIV.linux.org.uk> On Fri, Feb 22, 2008 at 12:16:45PM +1030, David Newall wrote: > Krzysztof Halasa wrote: > > Linus Torvalds writes: > >> I'm personally of the opinion that a lot of checkpatch "fixes" are > >> anything but. That mainly concerns fixing overlong lines > >> > > > > Perhaps we should increase line length limit, 132 should be fine. > > Especially useful with long printk() lines and long arithmetic > > expressions. > > > > > Yes; or even longer. 80 characters might have made sense on a screen > when the alternative was 80 characters on a punched card, but on a > modern computer it's very restrictive. That's especially true with the > deep indents that you quickly get in C ... if your style is lousy. I agree that situation with printks is not normal in that respect and I certainly have no love for the checkpatch nonsense, but pressure to keep the fucking nesting depth low is a Good Thing(tm). From khc at pm.waw.pl Thu Feb 21 18:23:45 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 03:23:45 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222020615.GE27894@ZenIV.linux.org.uk> (Al Viro's message of "Fri\, 22 Feb 2008 02\:06\:15 +0000") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: Al Viro writes: > ... if your style is lousy. I agree that situation with printks is > not normal in that respect and I certainly have no love for the > checkpatch nonsense, but pressure to keep the fucking nesting depth > low is a Good Thing(tm). Indeed. Unfortunately it is orthogonal to the line length limit. We should limit the nesting level, though I think there is no universally good value. What is good for one case (a function with a short multi-level if/for/etc) is bad for another (a long switch() where any added complexity makes it unparseable). So I think it just have to meet the author's and reviewers' taste. We already depend on this. -- Krzysztof Halasa From dwremotelinuxm at remotelinux.com Thu Feb 21 18:57:27 2008 From: dwremotelinuxm at remotelinux.com (James Magee) Date: Fri, 22 Feb 2008 09:57:27 +0700 Subject: [ofa-general] Win real money! Message-ID: <01c87539$546bdd80$83194476@dwremotelinuxm> Now you have a brilliant possibility to feel casino excitement without leaving your house. All your favorite games are available to play in Golden Gate Casino. Just download free software and start playing. We provide 24 hours a day, 7 days a week support and service! Truly fair play guaranteed for players. High level of security! http://geocities.com/andrewkinney610 Don't hesitate, register now! From viro at ZenIV.linux.org.uk Thu Feb 21 19:13:15 2008 From: viro at ZenIV.linux.org.uk (Al Viro) Date: Fri, 22 Feb 2008 03:13:15 +0000 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: <20080222031315.GF27894@ZenIV.linux.org.uk> On Fri, Feb 22, 2008 at 03:23:45AM +0100, Krzysztof Halasa wrote: > Al Viro writes: > > > ... if your style is lousy. I agree that situation with printks is > > not normal in that respect and I certainly have no love for the > > checkpatch nonsense, but pressure to keep the fucking nesting depth > > low is a Good Thing(tm). > > Indeed. Unfortunately it is orthogonal to the line length limit. Not quite. Add such things as choice of sane identifiers. And sane use of local variables, while we are at it - things like twenty lines of foobar[(index + 1) % BLAH]->spork.vomit[12]->field_name = ; with the only difference in the field_name, except for one line where we have a typo and see 11 instead of intended 12, are responsible for quite a few of such overruns. IMO the line length overruns make good warnings. Not as in "here's a cheap way to get more changesets", but as in "that code might have other problems nearby" kind of heuristics. From torvalds at linux-foundation.org Thu Feb 21 19:13:19 2008 From: torvalds at linux-foundation.org (Linus Torvalds) Date: Thu, 21 Feb 2008 19:13:19 -0800 (PST) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222020615.GE27894@ZenIV.linux.org.uk> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: On Fri, 22 Feb 2008, Al Viro wrote: > > ... if your style is lousy. I agree that situation with printks is > not normal in that respect and I certainly have no love for the > checkpatch nonsense, but pressure to keep the fucking nesting depth > low is a Good Thing(tm). I do agree, but that has little to do with line length *directly*. IOW, I'd personally be happier with a checkpatch that calculated "complexity" and indentation over line length. There is definitely a correlation there: there is no question that complex lines with deep indentation tend to be long. So yes, "long lines are correlated with bad code" is certainly true to some degree. But sometimes lines are long just because it's a function call with multiple parameters, and it's just three levels indented, and it had a string there too. It may be long, but it's not complex, and keeping it on one line actually makes it much easier to visually parse (and grep for, for that matter). So I'd be happier with warnings about deep indentation (but how do you count it? Will people then try to fake things out by using 4-space indents and then "deep" indentations will look like just a couple of tabs?) and against complex expressions (ie "if ((a = xyz()) == NULL) .." should just be split up into "a = xyz(); if (!a) ..", but there are sometimes reasons for those things too! Linus From anencephalus at sikici.com Thu Feb 21 19:17:50 2008 From: anencephalus at sikici.com ($1000 Credit Line) Date: Fri, 22 Feb 2008 03:17:50 +0000 Subject: [ofa-general] Gamble for Real $ with No Deposit Required Message-ID: <10db01c87501$2ca199cc$32bc5e47@[71.94.188.50]> No Credit Check! Gamble on Credit http://www.missoulaofficecity.info/ From dwplanetbortzm at planetbortz.com Thu Feb 21 20:58:35 2008 From: dwplanetbortzm at planetbortz.com (Angela Cooley) Date: Fri, 22 Feb 2008 12:58:35 +0800 Subject: [ofa-general] Cheap and excellent software - too good to be true? Read information below! Message-ID: <01c87552$a2410f80$08dbd43a@dwplanetbortzm> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. We provide help in installing software. You can ask any question and get a free of charge consultation. Guaranteed access to all updates! Friendly and professional service! http://geocities.com/b_alton You'll definitely find software you need. From rdreier at cisco.com Thu Feb 21 22:09:11 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 21 Feb 2008 22:09:11 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <1203634079.31225.42.camel@trinity.ogc.int> (Tom Tucker's message of "Thu, 21 Feb 2008 16:47:59 -0600") References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> <1203634079.31225.42.camel@trinity.ogc.int> Message-ID: > I think we can assume that the ringing of the doorbell is synchronous, > i.e. when the processor completes it's write, the card knows there are > RQ WQE available in host memory, It doesn't affect your larger point, but to be pedantically precise, writes across PCI will be posted, so the CPU may fully retire a write to MMIO long before that write completes at its final destination. - R. From xma at us.ibm.com Thu Feb 21 22:15:03 2008 From: xma at us.ibm.com (Shirley Ma) Date: Thu, 21 Feb 2008 22:15:03 -0800 Subject: [ofa-general] post_recv question In-Reply-To: <1203621024.5109.161.camel@brick.pathscale.com> Message-ID: Hello Ralph, > ib_ipoib uses shared receive queues and doesn't try to manage > posted buffer credits so the RNR NAK issue isn't the same > as what Steve is trying to do. I meant the problem you saw might be the same reason. How many connections did you have when you hit this problem? Probably more than 1? thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwsmilesatfrancem at smilesatfrance.com Thu Feb 21 22:24:36 2008 From: dwsmilesatfrancem at smilesatfrance.com (Mandy Thornton) Date: Fri, 22 Feb 2008 14:24:36 +0800 Subject: [ofa-general] Amazing New Products launch! Message-ID: <978128134.64711704195758@smilesatfrance.com> An HTML attachment was scrubbed... URL: From junio at pobox.com Thu Feb 21 22:29:09 2008 From: junio at pobox.com (Junio C Hamano) Date: Thu, 21 Feb 2008 22:29:09 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: (Linus Torvalds's message of "Thu, 21 Feb 2008 19:13:19 -0800 (PST)") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: <7vbq69pkqy.fsf@gitster.siamese.dyndns.org> Linus Torvalds writes: > So I'd be happier with warnings about deep indentation (but how do you > count it? Will people then try to fake things out by using 4-space indents > and then "deep" indentations will look like just a couple of tabs?) and > against complex expressions (ie "if ((a = xyz()) == NULL) .." should just > be split up into "a = xyz(); if (!a) ..", but there are sometimes reasons > for those things too! Deep indentation should be fairly easy, given that you already have rules in place that says "Tabs are 8 characters". So if you find a line that begins with more than say 4 SP, you can flag that as already bogus (i.e. "does not indent with HT"), more than 8 SP definitely so. I'll leave harder "complex expressions" to sparse experts ;-), From ray-lk at madrabbit.org Thu Feb 21 22:37:00 2008 From: ray-lk at madrabbit.org (Ray Lee) Date: Thu, 21 Feb 2008 22:37:00 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: <2c0942db0802212237t41da54b4h2aa2b052b7633f53@mail.gmail.com> On Thu, Feb 21, 2008 at 7:13 PM, Linus Torvalds wrote: > So I'd be happier with warnings about deep indentation (but how do you > count it? Will people then try to fake things out by using 4-space indents > and then "deep" indentations will look like just a couple of tabs?) I suspect that 90% of the cases that people really care about would get caught successfully just by counting brace depth. ie, by looking at { { {} {} {{{}{}}} } } I bet you can tell me which section should have been pulled out into a separate routine. From bunk at kernel.org Fri Feb 22 01:02:28 2008 From: bunk at kernel.org (Adrian Bunk) Date: Fri, 22 Feb 2008 11:02:28 +0200 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <7vbq69pkqy.fsf@gitster.siamese.dyndns.org> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <7vbq69pkqy.fsf@gitster.siamese.dyndns.org> Message-ID: <20080222090228.GH28328@cs181133002.pp.htv.fi> On Thu, Feb 21, 2008 at 10:29:09PM -0800, Junio C Hamano wrote: > Linus Torvalds writes: > > > So I'd be happier with warnings about deep indentation (but how do you > > count it? Will people then try to fake things out by using 4-space indents > > and then "deep" indentations will look like just a couple of tabs?) and > > against complex expressions (ie "if ((a = xyz()) == NULL) .." should just > > be split up into "a = xyz(); if (!a) ..", but there are sometimes reasons > > for those things too! > > Deep indentation should be fairly easy, given that you > already have rules in place that says "Tabs are 8 characters". > So if you find a line that begins with more than say 4 SP, you > can flag that as already bogus (i.e. "does not indent with HT"), > more than 8 SP definitely so. >... Checkpatch already has an error "use tabs not spaces". And people should realize that checkpatch is not a tool for janitors but for authors and maintainers to easily spot some of the possible problems in a driver and thereby automate some part of patch review. E.g. in this driver we are talking about checkpatch warns about the > 2000 lines over 80 characters. And that's not a surprise and a symptom when code is 6 tabs indented. If someone said fixing that should not delay the merge of a 16.500 lines driver I would agree with that since fixing that would require a huge amount of work for a not that big gain. But that a merged driver contains > 250 checkpatch errors is really not nice. Most of them are easy to fix stylistic errors that simply make the driver easier to read and whose fixing would only take a few hours altogether. [1] And the 13 checkpatch errors about volatile usage are not stuff for janitors (unless you count our number one cleanup person Al as janitor) but indicate really fishy code. cu Adrian [1] one might argue whether "easier to read" really applies when checkpatch gives errors for e.g. the usage of C99 comments, but different from overly long lines that's at least stuff that can be fixed very quickly and in a quite automatic way -- "Is there not promise of rain?" Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. "Only a promise," Lao Er said. Pearl S. Buck - Dragon Seed From tom at opengridcomputing.com Fri Feb 22 01:18:59 2008 From: tom at opengridcomputing.com (Tom Tucker) Date: Fri, 22 Feb 2008 03:18:59 -0600 Subject: [ofa-general] post_recv question In-Reply-To: Message-ID: On 2/22/08 12:09 AM, "Roland Dreier" wrote: >> I think we can assume that the ringing of the doorbell is synchronous, >> i.e. when the processor completes it's write, the card knows there are >> RQ WQE available in host memory, > > It doesn't affect your larger point, but to be pedantically precise, > writes across PCI will be posted, so the CPU may fully retire a write > to MMIO long before that write completes at its final destination. > You're right. In fact, I think up to 4 words for the common implementation. But I think this speaks again to the claim that guarantees between adapters on different busses can't work because posted writes go to different FIFO's. > - R. From dwroeweoutfittersm at roeweoutfitters.com Fri Feb 22 01:45:54 2008 From: dwroeweoutfittersm at roeweoutfitters.com (Miranda Vargas) Date: Fri, 22 Feb 2008 17:45:54 +0800 Subject: [ofa-general] Get your free 2400$ welcome bonus and win much more! Message-ID: <01c8757a$c57fb500$ac5a38da@dwroeweoutfittersm> Visit Golden Gate casino and you won't be disappointed. Huge welcome bonus! Free to download software! Most popular games! Register free account today and take the advantage of playing when and whatever you like. Golden Gate Casino guarantees competent customer support for all players, quick response in case you have question or problem and instant payouts. Fair gaming only! http://geocities.com/lilianabradley864 Choose Golden Gate Casino! From dwradioforlessm at radioforless.net Fri Feb 22 02:04:25 2008 From: dwradioforlessm at radioforless.net (Elma Vela) Date: Fri, 22 Feb 2008 18:04:25 +0800 Subject: [ofa-general] Purchase software at surprisingly low prices! Message-ID: <01c8757d$5bb4da80$3206f0de@dwradioforlessm> Get original and perfectly functioning software at low prices. All software can be downloaded immediately after purchase. Impressive selection of programs even for Macintosh! Programs in many languages are available. We provide help in installing software. You can ask any question and get a free of charge consultation. Guaranteed access to all updates! Friendly and professional service! http://geocities.com/austinfinch24 Incredible selection of programs and applications! From alan at lxorguk.ukuu.org.uk Fri Feb 22 02:04:35 2008 From: alan at lxorguk.ukuu.org.uk (Alan Cox) Date: Fri, 22 Feb 2008 10:04:35 +0000 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> Message-ID: <20080222100435.12616d92@core> On Fri, 22 Feb 2008 01:05:26 +0100 Krzysztof Halasa wrote: > Jeff Garzik writes: > > > If a driver is full of lines of length >80, that's a problem. > > I'm not sure. > We all have more than 80-chars wide displays for years, don't we? The Even a vt132 serial terminal or later can do 132. Decades not years. From ruthjohnson43 at gmail.com Fri Feb 22 02:25:55 2008 From: ruthjohnson43 at gmail.com (ruth johnson) Date: Fri, 22 Feb 2008 02:25:55 -0800 Subject: [ofa-general] Hello Message-ID: <25a3be800802220225y301f4a5fk95f3444a09f09302@mail.gmail.com> Hello with love, I am ruth tall slim that love sightseeing, i was going over the computer today and came across your email and got interested in knowing more about you for important discussion reply to me via my mail address if you care ( ruthjohnson42 at yahoo.com) i will send my pics later. Thanks, -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Fri Feb 22 03:05:26 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 22 Feb 2008 03:05:26 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080222-0200 daily build status Message-ID: <20080222110526.6C0D6E60A1B@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From bart.vanassche at gmail.com Fri Feb 22 04:29:31 2008 From: bart.vanassche at gmail.com (Bart Van Assche) Date: Fri, 22 Feb 2008 13:29:31 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BE2985.6020305@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> Message-ID: On Fri, Feb 22, 2008 at 2:46 AM, David Newall wrote: > Krzysztof Halasa wrote: > > Perhaps we should increase line length limit, 132 should be fine. > > Especially useful with long printk() lines and long arithmetic > > expressions. > > Yes; or even longer. 80 characters might have made sense on a screen > when the alternative was 80 characters on a punched card, but on a > modern computer it's very restrictive. That's especially true with the > deep indents that you quickly get in C. Even short lines often need to > be split when you put a few tabs in front of them, and that makes > comprehension that bit harder, not to mention looks ugly. There is a reason to limit line length: scientific research has shown that readability of regular texts is optimal for a line length between 55 and 65 characters. My experience is that the readability of source code decreases when the lines are very long (more than 160 characters). Bart Van Assche. From davidn at davidnewall.com Fri Feb 22 06:25:03 2008 From: davidn at davidnewall.com (David Newall) Date: Sat, 23 Feb 2008 00:55:03 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> Message-ID: <47BEDB3F.4090100@davidnewall.com> Bart Van Assche wrote: > There is a reason to limit line length: scientific research has shown > that readability of regular texts is optimal for a line length between > 55 and 65 characters. Putting aside the point that we're talking code, not regular text, I've heard that said before and I don't think it's quite like that. Perhaps the numbers you said might assume various things such as the width of the eye's field of view, the distance to the image and the size of each character? > My experience is that the readability of source > code decreases when the lines are very long (more than 160 > characters). The point is that the width, excluding leading and trailing white space, is what really matters. Even deeply indented code can be a snap to understand if you don't have to fight artificial line breaks. And we've got a much wider -- and taller! -- space available than we had in the old 80x24 (and 80x1) days. From 9z7 at ameritrade.com Fri Feb 22 07:20:11 2008 From: 9z7 at ameritrade.com (Betsy Mcdonough) Date: Fri, 22 Feb 2008 23:20:11 +0800 Subject: [ofa-general] Let's chat Message-ID: <01c875a9$78670780$67f4f13a@9z7> Hello! I am bored today. I am nice girl that would like to chat with you. Email me at Sarah at TheHealCare.info only, because I am using my friend's email to write this. Will send some of my pictures From hrosenstock at xsigo.com Fri Feb 22 06:15:08 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 22 Feb 2008 06:15:08 -0800 Subject: [ofa-general] OpenSM Console Ideas? In-Reply-To: <47BE16F2.8000507@llnl.gov> References: <47BE16F2.8000507@llnl.gov> Message-ID: <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> Hi Tim, On Thu, 2008-02-21 at 16:27 -0800, Timothy A. Meier wrote: > LLNL uses the remote console feature in OpenSM. We have a need to secure > this remote connection with authentication/authorization and encryption > (specifically PAM and OpenSSL). I have a working prototype, and would > like to formalize it and share/include this with OpenSM. > > Before I go down this path too far, I would like to solicit ideas from > others who use the console. > > Currently, the console can be used in local, loopback, or remote modes. > If security is added, should it replace other modes, or be an additional mode? IMO the old modes should be preserved and I would view authentication/authorization and encryption as an orthogonal dimension to be supported with any of those modes. > The intention is to use PAM for the AA framework, and OpenSSL for secure > sockets. Are there any serious objections to this implementation plan? Is the license compatible with OpenFabrics ? > The console feature has always been a configuration/command line option, > but should the secure console be conditionally compiled/linked as well? > (eliminate dependency on the PAM and OpenSSL libs, pam, pam_misc, cryto, ssl). This might depend on the licensing. Also, on one hand, it would be nice to minimize the build options, but for those where space is an issue, the separate configurability of this would be useful. (Not knowing the additional size of this but it sounds like it will be large enough to not make this a mandatory requirement of the console). -- Hal > The secure console would require a relatively primitive client application, > which I will probably package under opensm, just like osmtest. Make sense? > > Do you have any other ideas or suggestions for the remote console? > From a.p.zijlstra at chello.nl Fri Feb 22 07:17:17 2008 From: a.p.zijlstra at chello.nl (Peter Zijlstra) Date: Fri, 22 Feb 2008 16:17:17 +0100 Subject: ***SPAM*** Re: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BEDB3F.4090100@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <47BEDB3F.4090100@davidnewall.com> Message-ID: <1203693437.6242.40.camel@lappy> On Sat, 2008-02-23 at 00:55 +1030, David Newall wrote: > Bart Van Assche wrote: > > There is a reason to limit line length: scientific research has shown > > that readability of regular texts is optimal for a line length between > > 55 and 65 characters. > > Putting aside the point that we're talking code, not regular text, I've > heard that said before and I don't think it's quite like that. Perhaps > the numbers you said might assume various things such as the width of > the eye's field of view, the distance to the image and the size of each > character? Not in my experience. > > My experience is that the readability of source > > code decreases when the lines are very long (more than 160 > > characters). > > The point is that the width, excluding leading and trailing white space, > is what really matters. Even deeply indented code can be a snap to > understand if you don't have to fight artificial line breaks. And we've > got a much wider -- and taller! -- space available than we had in the > old 80x24 (and 80x1) days. I have 2 24" screens running at 1920x1200 with X forced to 75dpi and use a 8pt Monospace font. (Yes, I can read that from more than 3ft away) Using a fullscreen gvim (without the icons, but with the menu) with 3 vertical splits gives me 4 columns of 113 rows and 95 chars. So, yes, I have the screen estate for very long lines, but I find that long lines require more effort to read (that very much includes leading whitespace). Also, since long lines are rare (and they should be, if you nest too deep you have other issues) accommodating them would waste a lot of screen estate otherwise useful for another column of text. Even with e-mail, I can easily show over 200 characters wide with a large font (say 11pt) but find it harder to read emails that don't nicely wrap at 78. So much so that I often find myself not reading the mail, or restyling it if I find it important enough to read anyway. Please, lets keep the 80 as a guideline, and not trip over the occasional lines that exceed it in good style (read: wrapping them would indeed give uglier code) From dwshopirishwithmoyturam at shopirishwithmoytura.com Fri Feb 22 07:47:02 2008 From: dwshopirishwithmoyturam at shopirishwithmoytura.com (Toby Warren) Date: Fri, 22 Feb 2008 23:47:02 +0800 Subject: [ofa-general] Cheapest software prices! Message-ID: <01c875ad$38a21f00$ace282de@dwshopirishwithmoyturam> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/rogelio_knowles Take this time and money saving offer! From linville at tuxdriver.com Fri Feb 22 07:48:43 2008 From: linville at tuxdriver.com (John W. Linville) Date: Fri, 22 Feb 2008 10:48:43 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BEDB3F.4090100@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <47BEDB3F.4090100@davidnewall.com> Message-ID: <20080222154843.GC3067@tuxdriver.com> On Sat, Feb 23, 2008 at 12:55:03AM +1030, David Newall wrote: > Bart Van Assche wrote: > > There is a reason to limit line length: scientific research has shown > > that readability of regular texts is optimal for a line length between > > 55 and 65 characters. > > Putting aside the point that we're talking code, not regular text, I've > heard that said before and I don't think it's quite like that. Perhaps > the numbers you said might assume various things such as the width of > the eye's field of view, the distance to the image and the size of each > character? I'm sure all those assumptions are baked-in to the estimate. Yet the fact remains that people's eyes are only so good and most people will be reading at similar distances from the screen. So I don't see any reason to invalidate those assumptions. FWIW, I find reading longer lines to be painful -- it is easier to loose one's place in the text. I would also echo a point Jeff Garzik made elsewhere that it is often beneficial to have multiple windows oppen side-by-side. Longer lines makes it harder to do that in a useful way. Instead the lines either wrap or just trail off the screen. See the output of sdiff for how this limits usefulness. > > My experience is that the readability of source > > code decreases when the lines are very long (more than 160 > > characters). > > The point is that the width, excluding leading and trailing white space, > is what really matters. Even deeply indented code can be a snap to > understand if you don't have to fight artificial line breaks. And we've > got a much wider -- and taller! -- space available than we had in the > old 80x24 (and 80x1) days. I'm not sure deeply indented code is ever a snap to understand. And FWIW, I'd rather deal with "artificial" line breaks than parameter lists that just stream off the side of the page. The line breaks make long parameters lists easier to digest. I'll sacrifice the occasional odd breakage of a long string. John -- John W. Linville linville at tuxdriver.com From steiner at sgi.com Fri Feb 22 08:31:26 2008 From: steiner at sgi.com (Jack Steiner) Date: Fri, 22 Feb 2008 10:31:26 -0600 Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080221044256.GA15215@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080219234049.GA27856@sgi.com> <20080221044256.GA15215@wotan.suse.de> Message-ID: <20080222163126.GA32146@sgi.com> > Also, I'll try to post the driver within the next few days. It is > still in development but it compiles and can successfully run most > workloads on a system simulator. Here is the source of the GRU driver. It is still in development but it compiles & runs (on IA64) in a system simulator. The GRU is a hardware resource located in the chipset. It is mmaped into the user address space. The GRU contains functions such as load/store, scatter/gather, bcopy, etc. It is directly accessed by user instructions using user virtual addresses. GRU instructions (ex., bcopy) use user virtual addresses for operands. The GRU contains a large TLB that is functionally very similar to processor TLBs. This version uses the V7 mmu notifier patch from Christoph. The changes to switch to Andrea's patch are trivial. (Note, however, that XPMEM still requires Christoph's patch). The interesting parts relating to mmu_notifiers are in the following functions: gru_try_dropin() - does TLB dropins gru_flush_tlb_range() - TLB flushing gru_mmuops_...() - all functions starting with "gru_mmuops_" gru_register_mmu_notifier() - registers notifiers I have no doubt that there are bugs in the code. If you find them, please let me know where they are .... :-) Other comments appreciated, too. Portions are rough but this arch/ia64/sn/kernel/sn2/sn2_smp.c | 5 drivers/Makefile | 1 drivers/gru/Makefile | 4 drivers/gru/gru.h | 348 +++++++++++++ drivers/gru/gru_instructions.h | 502 +++++++++++++++++++ drivers/gru/grufault.c | 557 ++++++++++++++++++++++ drivers/gru/grufile.c | 453 +++++++++++++++++ drivers/gru/gruhandles.h | 655 +++++++++++++++++++++++++ drivers/gru/grukservices.c | 129 +++++ drivers/gru/grulib.h | 84 +++ drivers/gru/grumain.c | 958 ++++++++++++++++++++++++++++++++++++++ drivers/gru/grummuops.c | 376 ++++++++++++++ drivers/gru/gruprocfs.c | 309 ++++++++++++ drivers/gru/grutables.h | 517 ++++++++++++++++++++ drivers/sn/Kconfig | 7 15 files changed, 4905 insertions(+) Index: linux/drivers/Makefile =================================================================== --- linux.orig/drivers/Makefile 2008-02-22 09:37:21.759206853 -0600 +++ linux/drivers/Makefile 2008-02-22 09:37:51.722947267 -0600 @@ -5,6 +5,7 @@ # Rewritten to use lists instead of if-statements. # +obj-$(CONFIG_GRU) += gru/ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ obj-$(CONFIG_RAPIDIO) += rapidio/ Index: linux/drivers/gru/Makefile =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/Makefile 2008-02-22 09:37:51.742949764 -0600 @@ -0,0 +1,4 @@ +# +EXTRA_CFLAGS += -Werror -Wall +obj-$(CONFIG_GRU) := gru.o +gru-y := grufile.o grumain.o grufault.o grummuops.o gruprocfs.o grukservices.o Index: linux/drivers/sn/Kconfig =================================================================== --- linux.orig/drivers/sn/Kconfig 2008-02-22 09:37:21.803212347 -0600 +++ linux/drivers/sn/Kconfig 2008-02-22 09:37:51.774953759 -0600 @@ -18,4 +18,11 @@ config SGI_IOC3 I/O controller or a PCI IOC3 serial card say Y. Otherwise say N. +config GRU + tristate "SGI GRU driver" + default y + ---help--- + This option enables basic support for the SGI UV GRU driver. + + endmenu Index: linux/arch/ia64/sn/kernel/sn2/sn2_smp.c =================================================================== --- linux.orig/arch/ia64/sn/kernel/sn2/sn2_smp.c 2008-02-22 09:37:21.831215842 -0600 +++ linux/arch/ia64/sn/kernel/sn2/sn2_smp.c 2008-02-22 09:37:51.838961749 -0600 @@ -113,6 +113,11 @@ void sn_migrate(struct task_struct *task pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); volatile unsigned long *adr = last_pda->pio_write_status_addr; unsigned long val = last_pda->pio_write_status_val; + extern void gru_migrate_task(int, int); + + if (current->mm && hlist_empty(¤t->mm->mmu_notifier.head) && + task_thread_info(current)->last_cpu != task_cpu(current)) + gru_migrate_task(task_thread_info(current)->last_cpu, task_cpu(current)); /* Drain PIO writes from old CPU's Shub */ while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) Index: linux/drivers/gru/gru.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/gru.h 2008-02-11 11:22:32.000000000 -0600 @@ -0,0 +1,348 @@ +/* + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _GRU_H_ +#define _GRU_H_ + +#ifdef EMUSUPPORT +#define _EMUSUPPORT 1 +#else +#define _EMUSUPPORT 0 +#endif + +#ifndef __KERNEL__ +#include +#else +#include +#endif + +/* + * Maximum number of GRU segments that a user can have open + * ZZZ temp - set higher for testing. Revisit. + */ +#define GRU_MAX_OPEN_CONTEXTS 32 + +/* + * Constants for addressing user Gseg + */ +#define GRU_CB_BASE 0 +#define GRU_DS_BASE 0x20000 +#define GRU_HANDLE_STRIDE 256 +#define GRU_CACHE_LINE_BYTES 64 + + +/* + * GRU Segment limits + */ +#define GRU_MAX_CB (128 - 16) +#define GRU_DS_BYTES (32768 - 1024) + +/* + * Pagesize used to map GRU GSeg + */ +#ifdef __ia64__ +#define GRU_GSEG_PAGESIZE (256 * 1024) +#define GRU_GSEG_PAGESIZE_SHIFT 18 +#else +#define GRU_GSEG_PAGESIZE (2 * 1024 * 1024UL) +#endif + + +/* Basic types - improve type checking */ +typedef struct { void *cookie; } gru_cookie_t; +typedef struct gru_control_segment_s gru_segment_t; +typedef struct gru_control_block_s gru_control_block_t; + +/* Flags for GRU options on the gru_create_context() call */ +/* Select one of the follow 4 options to specify how TLB misses are handled */ +#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ +#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ +#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrut to cpu to + handle fault */ +#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ +#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ + +/* + * Ugly testing hack!! - if set, GRU thinks all pages are 1 TB. + * Works on emulator only + */ +#define GRU_OPT_FAKE_TB_PAGES 0x8000 /* EMU testing only - GRU uses + 1 TB pages */ +/* + * Get exception detail for CB that failed. + */ + +/* + * Structure used to fetch exception detail for CBs that terminate with + * CBS_EXCEPTION + */ +struct control_block_extended_exc_detail { + unsigned long cb; + int opc; + int ecause; + int exopc; + long exceptdet0; + int exceptdet1; +}; + + + +/*---------------------------------------------------------------------------- + * Inline functions for waiting for CB completion & checking CB status + */ + +/* + * Control block status and exception codes + */ +#define CBS_IDLE 0 +#define CBS_EXCEPTION 1 +#define CBS_ACTIVE 2 +#define CBS_CALL_OS 3 + +/* CB substatus bitmasks */ +#define CBSS_MSG_QUEUE_MASK 7 +#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 + +/* CB substatus message queue values (low 3 bits of substatus) */ +#define CBSS_LB_OVERFLOWED 1 +#define CBSS_QLIMIT_REACHED 2 +#define CBSS_PAGE_OVERFLOW 3 +#define CBSS_AMO_NACKED 4 +#define CBSS_PUT_NACKED 5 + +/* + * Control block definition for checking status + */ +struct gru_control_block_status { + volatile unsigned int icmd :1; + unsigned int unused1 :31; + unsigned int unused2 :24; + volatile unsigned int istatus :2; + volatile unsigned int isubstatus :4; + unsigned int inused3 :2; +}; + +/* Get CB status */ +static inline int gru_get_cb_status(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->istatus; +} + +/* Get CB message queue substatus */ +static inline int gru_get_cb_message_queue_substatus(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; +} + +/* Get CB substatus */ +static inline int gru_get_cb_substatus(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus; +} + +extern int gru_check_status_proc(gru_control_block_t *cb); +extern int gru_wait_proc(gru_control_block_t *cb); +extern void gru_wait_abort_proc(gru_control_block_t *cb); +extern void gru_abort(int, gru_control_block_t *cb, char *str); + +/* Check the status of a CB. If the CB is in UPM mode, call the + * OS to handle the UPM status. + * Returns the CB status field value (0 for normal completion) + */ +static inline int gru_check_status(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret = cbs->istatus; + + if (_EMUSUPPORT || ret == CBS_CALL_OS) + ret = gru_check_status_proc(cb); + return ret; +} + +/* Wait for CB to complete. + * Returns the CB status field value (0 for normal completion) + */ +static inline int gru_wait(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + if (cbs->istatus != CBS_IDLE) + return gru_wait_proc(cb); + return cbs->istatus; +} + +/* Wait for CB to complete. Aborts program if error. (Note: error does NOT + * mean TLB mis - only fatal errors such as memory parity error or user + * bugs will cause termination. + */ +static inline void gru_wait_abort(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + if (cbs->istatus != CBS_IDLE) + gru_wait_abort_proc(cb); +} + +#ifndef __KERNEL__ +/* Name of DSO library */ +#define LIBGRU_SO "libgru.so" + +/* Environment variables for controlling behavior*/ + +/* + * Override TLBMISS fault map mode + * - "user_polling", "interrupt", "os_polling" + */ +#define GRU_TLBMISS_MODE_ENV "GRU_TLBMISS_MODE" + +/* Set exception retry count for numalink timeout & memory parity */ +#define GRU_EXCEPTION_RETRY_ENV "GRU_EXCEPTION_RETRY" +#define GRU_EXCEPTION_RETRY_DEFAULT 3 + + + +/* + * Create a new GRU context + * cookie - (OUT): magic identifier of the GRU segment + * start - starting address for mmaped segments (NULL means + * OS picks address). + * ctlblks - number of active control blocks + * dataseg_bytes - number of data segment bytes + * max_threads - maximum number of threads that will use the context + * options - specifies various options + * (see constants below) + * + * Returns 0 if successful, else error code returned in errno + */ +extern int gru_create_context(gru_cookie_t *cookie, void *start, + unsigned int ctlblks, unsigned int dataseg_bytes, + unsigned int max_threads, unsigned int options); + + +/* + * Destroy a GRU context + * cookie - cookie returned from gru_create_context() + * + * Returns: + * 0 - success + * -1 - failure. See errno for additional status + */ +extern int gru_destroy_context(gru_cookie_t cookie); + + +/* + * Get the handle to a thread's private GRU context + * cookie - cookie returned from gru_create_context() + * threadnum - thread number (0 .. #threads-1) + * + * Returns pointer to GSeg if successful, else returns NULL. + * Error code returned in errno + */ +gru_segment_t *gru_get_thread_gru_segment(gru_cookie_t cookie, int threadnum); + +/* + * Flush a range of virtual addresses from the GRU TLB (intended for testcases + * only) + */ +int gru_flush_tlb(gru_segment_t *gseg, void *vaddr, size_t len); + +/* + * Unload a GRU context & free GRU resource. Will be reloaded on next + * reference. + */ +int gru_unload_context(void *gseg); + +/* + * Get struct control_block_extended_exc_detail for CB. + */ +extern int gru_get_cb_exception_detail(gru_control_block_t *cb, + struct control_block_extended_exc_detail *excdet); + +/* Get a string that describes the CB exception detail. */ +extern char *gru_get_cb_exception_detail_str(int ret, gru_control_block_t *cb); + + +/* + * Get a pointer to a control block + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired CB + */ +static inline gru_control_block_t *gru_get_cb_pointer(gru_segment_t *gseg, + int index) +{ + return (gru_control_block_t *)((void *)gseg + GRU_CB_BASE + + index * GRU_HANDLE_STRIDE); +} + +/* + * Get a pointer to a cacheline in the data segment portion of a GSeg + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired cache line + */ +static inline void *gru_get_data_pointer(gru_segment_t *gseg, int index) +{ + return (void *)((void *)gseg + GRU_DS_BASE + + index * GRU_CACHE_LINE_BYTES); +} + +/* + * Convert a vaddr into the tri index within the GSEG + * vaddr - virtual address of within gseg + */ +static inline int gru_get_tri(void *vaddr) +{ + return (((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE); +} +#endif /* ! __KERNEL__ */ + +#ifdef EMUSUPPORT +/* + * Hooks for instruction emulator + */ +enum {EMU_ID_SIM2_CHET, EMU_ID_SIM2_SIM2, EMU_ID_MEDUSA}; +int gru_emulator_id(void); + +extern void emuloguser(char *fmt, ...); +extern int is_emu(void); +# ifdef __KERNEL__ + extern void emu_writeback_hook(void *p); + extern void emu_kwait_hook(void *p, int wait); +# define gru_flush_cache_hook(p) emu_writeback_hook(p) +# define gru_emulator_wait_hook(p, w) emu_kwait_hook(p, w) +# else + extern void lib_cb_wait_hook(void *p, int wait) __attribute__ ((weak)); + extern void lib_writeback_hook(void *p) __attribute__ ((weak)); + +# define gru_flush_cache_hook(p) \ + do { \ + if (lib_writeback_hook) \ + lib_writeback_hook(p); \ + } while (0) + +# define gru_emulator_wait_hook(p, w) \ + do { \ + if (lib_cb_wait_hook) \ + lib_cb_wait_hook(p, w); \ + } while (0) + +# endif +#else +#define emuloguser printf +#define gru_flush_cache_hook(p) +#define gru_emulator_wait_hook(p, w) +#define is_emu() 0 +#endif + +#endif /* _GRU_H_ */ Index: linux/drivers/gru/gru_instructions.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/gru_instructions.h 2008-01-25 08:13:07.135721041 -0600 @@ -0,0 +1,502 @@ +/* + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _GRU_INSTRUCTIONS_H_ +#define _GRU_INSTRUCTIONS_H_ + +/* + * Instruction formats + */ + +/* + * Generic instruction format. + * This definition has precise bit field definitions. + */ +struct gru_instruction_bits { + /* DW 0 - low */ + unsigned int icmd: 1; + unsigned char ima: 3; /* CB_DelRep, unmapped mode */ + unsigned char reserved0: 4; + unsigned int xtype: 3; + unsigned int iaa0: 2; + unsigned int iaa1: 2; + unsigned char reserved1: 1; + unsigned char opc: 8; /* opcode */ + unsigned char exopc: 8; /* extended opcode */ + /* DW 0 - high */ + unsigned int idef2: 22; /* TRi0 */ + unsigned char reserved2: 2; + unsigned char istatus: 2; + unsigned char isubstatus:4; + unsigned char reserved3: 2; + /* DW 1 */ + unsigned long idef4; /* 42 bits: TRi1, BufSize */ + /* DW 2-6 */ + unsigned long idef1; /* BAddr0 */ + unsigned long idef5; /* Nelem */ + unsigned long idef6; /* Stride, Operand1 */ + unsigned long idef3; /* BAddr1, Value, Operand2 */ + unsigned long reserved4; + /* DW 7 */ + unsigned long avalue; /* AValue */ +}; + +/* + * Generic instruction with friendlier names. This format is used + * for inline instructions. + */ +struct gru_instruction { + /* DW 0 */ + volatile unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ + unsigned int tri0; + /* DW 1-7 */ + unsigned long tri1_bufsize; + unsigned long baddr0; + unsigned long nelem; + unsigned long op1_stride; + unsigned long op2_value_baddr1; + unsigned long reserved0; + unsigned long avalue; +}; + +/* Some shifts and masks for the low 32 bits of a GRU command */ +#define GRU_CB_ICMD_SHFT 0 +#define GRU_CB_ICMD_MASK 0x1 +#define GRU_CB_XTYPE_SHFT 8 +#define GRU_CB_XTYPE_MASK 0x7 +#define GRU_CB_IAA0_SHFT 11 +#define GRU_CB_IAA0_MASK 0x3 +#define GRU_CB_IAA1_SHFT 13 +#define GRU_CB_IAA1_MASK 0x3 +#define GRU_CB_IMA_SHFT 1 +#define GRU_CB_IMA_MASK 0x3 +#define GRU_CB_OPC_SHFT 16 +#define GRU_CB_OPC_MASK 0xff +#define GRU_CB_EXOPC_SHFT 24 +#define GRU_CB_EXOPC_MASK 0xff + +/* GRU instruction opcodes (opc field) */ +#define OP_NOP 0x00 +#define OP_BCOPY 0x01 +#define OP_VLOAD 0x02 +#define OP_IVLOAD 0x03 +#define OP_VSTORE 0x04 +#define OP_IVSTORE 0x05 +#define OP_VSET 0x06 +#define OP_IVSET 0x07 +#define OP_MESQ 0x08 +#define OP_GAMXR 0x09 +#define OP_GAMIR 0x0a +#define OP_GAMIRR 0x0b +#define OP_GAMER 0x0c +#define OP_GAMERR 0x0d +#define OP_BSTORE 0x0e +#define OP_VFLUSH 0x0f + + +/* Extended opcodes values (exopc field) */ + +/* GAMIR - AMOs with implicit operands */ +#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ +#define EOP_IR_CLR 0x02 /* Fetch and clear */ +#define EOP_IR_INC 0x05 /* Fetch and increment */ +#define EOP_IR_DEC 0x07 /* Fetch and decrement */ +#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ +#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ + +/* GAMIRR - Registered AMOs with implicit operands */ +#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ +#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ +#define EOP_IRR_INC 0x05 /* Registered fetch and increment */ +#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ +#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ + +/* GAMER - AMOs with explicit operands */ +#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ER_OR 0x01 /* Logical OR with memory */ +#define EOP_ER_AND 0x02 /* Logical AND with memory */ +#define EOP_ER_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ER_ADD 0x04 /* Add value to memory */ +#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ + +/* GAMERR - Registered AMOs with explicit operands */ +#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ERR_OR 0x01 /* Logical OR with memory */ +#define EOP_ERR_AND 0x02 /* Logical AND with memory */ +#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ERR_ADD 0x04 /* Add value to memory */ +#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ERR_EPOLL 0x09 /* Poll for equality */ +#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ + +/* GAMXR - SGI Arithmetic unit */ + + +/* Transfer types (xtype field) */ +#define XTYPE_B 0x0 /* byte */ +#define XTYPE_S 0x1 /* short (2-byte) */ +#define XTYPE_W 0x2 /* word (4-byte) */ +#define XTYPE_DW 0x3 /* doubleword (8-byte) */ +#define XTYPE_RSVD4 0x4 +#define XTYPE_RSVD5 0x5 +#define XTYPE_CL 0x6 /* cacheline (64-byte) */ +#define XTYPE_RSVD7 0x7 + + +/* Instruction access attributes (iaa0, iaa1 fields) */ +#define IAA_RAM 0x0 /* normal cached RAM access */ +#define IAA_NCRAM 0x2 /* noncoherent RAM access */ +#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ +#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ + + +/* Instruction mode attributes (ima field) */ +#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ +#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ +#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ + +/* CBE ecause bits */ +#define CBE_CAUSE_RI_BIT 0 +#define CBE_CAUSE_INVALID_INSTRUCTION_BIT 1 +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN_BIT 2 +#define CBE_CAUSE_PE_CHECK_DATA_ERROR_BIT 3 +#define CBE_CAUSE_IAA_GAA_MISMATCH_BIT 4 +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION_BIT 5 +#define CBE_CAUSE_OS_FATAL_TLB_FAULT_BIT 6 +#define CBE_CAUSE_EXECUTION_HW_ERROR_BIT 7 +#define CBE_CAUSE_TLBHW_ERROR_BIT 8 +#define CBE_CAUSE_RA_REQUEST_TIMEOUT_BIT 9 +#define CBE_CAUSE_HA_REQUEST_TIMEOUT_BIT 10 +#define CBE_CAUSE_RA_RESPONSE_FATAL_BIT 11 +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL_BIT 12 +#define CBE_CAUSE_HA_RESPONSE_FATAL_BIT 13 +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL_BIT 14 +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR_BIT 15 +#define CBE_CAUSE_RESPONSE_DATA_ERROR_BIT 16 +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR_BIT 17 + +#define CBE_CAUSE_RI (1 << CBE_CAUSE_RI_BIT) +#define CBE_CAUSE_INVALID_INSTRUCTION (1 << CBE_CAUSE_INVALID_INSTRUCTION_BIT) +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN_BIT) +#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << CBE_CAUSE_PE_CHECK_DATA_ERROR_BIT) +#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << CBE_CAUSE_IAA_GAA_MISMATCH_BIT) +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION_BIT) +#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << CBE_CAUSE_OS_FATAL_TLB_FAULT_BIT) +#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << CBE_CAUSE_EXECUTION_HW_ERROR_BIT) +#define CBE_CAUSE_TLBHW_ERROR (1 << CBE_CAUSE_TLBHW_ERROR_BIT) +#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << CBE_CAUSE_RA_REQUEST_TIMEOUT_BIT) +#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << CBE_CAUSE_HA_REQUEST_TIMEOUT_BIT) +#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << CBE_CAUSE_RA_RESPONSE_FATAL_BIT) +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << CBE_CAUSE_RA_RESPONSE_NON_FATAL_BIT) +#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << CBE_CAUSE_HA_RESPONSE_FATAL_BIT) +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << CBE_CAUSE_HA_RESPONSE_NON_FATAL_BIT) +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR_BIT) +#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << CBE_CAUSE_RESPONSE_DATA_ERROR_BIT) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR_BIT) + + +/* Message queue head structure */ +union gru_mesqhead { + unsigned long val; + struct { + unsigned int head; + unsigned int limit; + } q; +}; + + +/* Generate the low word of a GRU instruction */ +static inline unsigned int +opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, + unsigned char iaa0, unsigned char iaa1, + unsigned char ima) +{ + return ((1 << GRU_CB_ICMD_SHFT) | + (iaa0 << GRU_CB_IAA0_SHFT) | + (iaa1 << GRU_CB_IAA1_SHFT) | + (xtype << GRU_CB_XTYPE_SHFT) | + (ima << GRU_CB_IMA_SHFT) | + (opcode << GRU_CB_OPC_SHFT) | + (exopc << GRU_CB_EXOPC_SHFT)); +} + +/* + * Prefetch a cacheline + * ??? should I use actual "load" or hardware prefetch??? + */ +static inline void gru_prefetch(void *p) +{ + *(volatile char *)p; +} + + +/* + * Use the "fc" instruction as a hook into the emulator + * ZZZ serialization requirements here??? + */ +static inline void gru_flush_cache(void *p) +{ +#if defined(__ia64__) + asm volatile ("fc %0"::"r" (p):"memory"); +#elif defined(__x86_64__) + asm volatile("clflush %0" :: "m" (p)); +#else +#error "bad arch" +#endif + gru_flush_cache_hook(p); /* No code generated unless -D EMUSUPPORT */ +} + + +/* Values for the "hints" parameter of the GRU instruction functions */ +#define HINT_CB_UNMAPPED IMA_UNMAPPED +#define HINT_CB_DELAY IMA_CB_DELAY + +/* Convert "hints" to IMA */ +#define CB_IMA(h) ((h) & (IMA_UNMAPPED | IMA_CB_DELAY)) + +/* Convert data segment cache line index into TRI0 / TRI1 value */ +#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) + +/* Inline functions for GRU instructions. + * Note: + * - nelem and stride are in elements + * - tri0/tri1 is in bytes for the beginning of the data segment. + */ +static inline void gru_vload(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->op1_stride = stride; + ins->op32 = opword(OP_VLOAD, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_vstore(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->op1_stride = stride; + ins->op32 = opword(OP_VSTORE, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_ivload(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned int tri0, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = tri1; + ins->op32 = opword(OP_IVLOAD, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_ivstore(gru_control_block_t *cb, void *mem_addr, + int iaa0, unsigned int tri0, unsigned int tri1, + unsigned char xtype, unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = tri1; + ins->op32 = opword(OP_IVSTORE, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_vset(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned long value, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->op1_stride = stride; + ins->op32 = opword(OP_VSET, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_ivset(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned long value, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + ins->op32 = opword(OP_IVSET, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_vflush(gru_control_block_t *cb, void *mem_addr, int iaa0, + unsigned long nelem, unsigned char xtype, unsigned long stride, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op1_stride = stride; + ins->nelem = nelem; + ins->op32 = opword(OP_VFLUSH, 0, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_nop(gru_control_block_t *cb, int hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->op32 = opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + + +static inline void gru_bcopy(gru_control_block_t *cb, const void *src, + int iaa0, void *dest, int iaa1, + unsigned long nelem, unsigned int xtype, unsigned int tri0, + unsigned int bufsize, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->tri1_bufsize = bufsize; + ins->op1_stride = 1; + ins->op32 = opword(OP_BCOPY, 0, xtype, iaa0, iaa1, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_bstore(gru_control_block_t *cb, const void *src, + void *dest, int iaa0, unsigned long nelem, unsigned int xtype, + unsigned int tri0, unsigned int stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri0 = tri0; + ins->op1_stride = stride; + ins->op32 = opword(OP_BSTORE, 0, xtype, iaa0, iaa0, CB_IMA(hints)); + /* ZZZ iaa0 or iaa1 */ + gru_flush_cache(ins); +} + +static inline void gru_gamir(gru_control_block_t *cb, int exopc, void *src, + int iaa0, unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op32 = opword(OP_GAMIR, exopc, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_gamirr(gru_control_block_t *cb, int exopc, void *src, + int iaa0, unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op32 = opword(OP_GAMIRR, exopc, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_gamer(gru_control_block_t *cb, int exopc, void *src, + int iaa0, unsigned int xtype, + unsigned long operand1, unsigned long operand2, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = operand1; + ins->op1_stride = operand2; + ins->op32 = opword(OP_GAMER, exopc, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_gamerr(gru_control_block_t *cb, int exopc, void *src, + int iaa0, unsigned int xtype, unsigned long operand1, + unsigned long operand2, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = operand1; + ins->op1_stride = operand2; + ins->op32 = opword(OP_GAMERR, exopc, xtype, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline void gru_mesq(gru_control_block_t *cb, void *queue, int iaa0, + unsigned long msg_bytes, unsigned long tri0, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)queue; + ins->nelem = msg_bytes / GRU_CACHE_LINE_BYTES; + ins->tri0 = tri0; + ins->op32 = opword(OP_MESQ, 0, XTYPE_CL, iaa0, 0, CB_IMA(hints)); + gru_flush_cache(ins); +} + +static inline unsigned long gru_get_amo_value(gru_control_block_t *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue; +} + +static inline int gru_get_amo_value_head(gru_control_block_t *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return (ins->avalue & 0xffffffff); +} + +static inline int gru_get_amo_value_limit(gru_control_block_t *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue >> 32; +} + +static inline union gru_mesqhead gru_mesq_head(int head, int limit) +{ + union gru_mesqhead mqh; + + mqh.q.head = head; + mqh.q.limit = limit; + return mqh; +} + + +#endif /* _GRU_INSTRUCTIONS_H_ */ Index: linux/drivers/gru/grufault.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grufault.c 2008-02-19 10:19:25.876327857 -0600 @@ -0,0 +1,557 @@ +/* + * SN Platform GRU Driver + * + * FAULT HANDLER FOR GRU DETECTED TLB MISSES + * + * This file contains code that handles TLB misses within the GRU. + * These misses are reported either via interrupts or user polling of + * the user CB. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#include "grulib.h" +#include "gru_instructions.h" +#ifdef EMU +#include "emu.h" +#endif + +/* + * Test if a physical address is a valid GRU GSEG address + */ +static inline int is_gru_paddr(unsigned long paddr) +{ + return (paddr >= gru_start_paddr && paddr < gru_end_paddr); +} + +/* + * Find and lock the gts that contains the specified user vaddr. + * + * Returns: + * - *gts with the mmap_sem locked for read and the GTS locked. + * - NULL if vaddr invalid OR is not a valid GSEG vaddr. + */ + +static struct gru_thread_state *gru_find_and_lock_gts(unsigned long vaddr) +{ + struct vm_area_struct *vma; + struct gru_thread_state *gts; + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, vaddr); + if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) { + gts = gru_find_thread_state(vma, TSID(vaddr - vma->vm_start)); + if (gts) { + down(>s->ts_ctxsem); + return gts; + } + } + up_read(¤t->mm->mmap_sem); + return NULL; +} + +/* + * Unlock a GTS that was previously locked with gru_find_and_lock_gts(). + */ +static void gru_unlock_gts(struct gru_thread_state *gts) +{ + up(>s->ts_ctxsem); + up_read(¤t->mm->mmap_sem); +} + +/* + * Set a CB.istatus to active using a user virtual address. This must be done + * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. + * If the line is evicted, the status may be lost. The in-cache update + * is necessary to prevent the user from seeing a stale cb.istatus that will + * change as soon as the TFH restart is complete. Races may cause an + * occasional failure to clear the cb.istatus, but that is ok. + */ +static void gru_cb_set_istatus_active(unsigned long __user *cb) +{ + union { + struct gru_instruction_bits bits; + unsigned long dw; + } u; + + if (cb) { + get_user(u.dw, cb); + u.bits.istatus = CBS_ACTIVE; + put_user(u.dw, cb); + } +} + +/* + * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the + * interrupt. Interrupts are always sent to a cpu on the blade that contains the + * GRU (except for headless blades which are not currently supported). A blade + * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ + * number uniquely identifies the GRU chipleton the local blade that caused the + * interrupt. Always called in interrupt context. + */ +static inline struct gru_state *irq_to_gru(int irq) +{ + return &gru_base[numa_blade_id()]->bs_grus[irq - IRQ_GRU]; +} + +/* + * Read & clear a TFM + * + * The GRU has an array of fault maps. A map is private to a cpu + * Only one cpu will be accessing a cpu's fault map. + * + * This function scans the cpu-private fault map & clears all bits that + * are set. The function returns a bitmap that indicates the bits that + * were cleared. Note that sense the maps may be updated asynchronously by + * the GRU, atomic operations must be used to clear bits. + */ +static void get_clear_fault_map(struct gru_state *gru, + struct gru_tlb_fault_map *map) +{ + unsigned long i, k; + struct gru_tlb_fault_map *tfm; + + tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); + prefetchw(tfm); /* Helps on hardware, required for emulator */ + for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { + k = tfm->fault_bits[i]; + if (k) + k = xchg(&tfm->fault_bits[i], 0UL); + map->fault_bits[i] = k; + } + + /* + * Not functionally required but helps performance. (Required + * on emulator) + */ + gru_flush_cache(tfm); +} + +/* + * Atomic (interrupt context) & non-atomic (user context) functions to + * convert a vaddr into a physical address & pagesize. + * returns: + * 0 - successful + * < 0 - error code + * 1 - (atomic only) try again in non-atomic context + */ +static int non_atomic_pte_lookup(struct vm_area_struct *vma, + unsigned long vaddr, int write, + unsigned long *paddr, int *pagesize) +{ + struct page *page; + + if (get_user_pages + (current, current->mm, vaddr, 1, write, 1, &page, NULL) <= 0) + return -EFAULT; + *paddr = page_to_phys(page); + *pagesize = + is_vm_hugetlb_page(vma) ? GRU_PAGESIZE(HPAGE_SHIFT) : + GRU_PAGESIZE(PAGE_SHIFT); + put_page(page); + return 0; +} + +static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, + int write, unsigned long *paddr, int *pagesize) +{ + struct page *page; + + page = follow_page(vma, vaddr, (write ? FOLL_WRITE : 0)); + if (!page) + return 1; + *paddr = page_to_phys(page); + *pagesize = + is_vm_hugetlb_page(vma) ? GRU_PAGESIZE(HPAGE_SHIFT) : + GRU_PAGESIZE(PAGE_SHIFT); + return 0; +} + +/* + * Drop a TLB entry into the GRU. The fault is described by info in an TFH. + * Input: + * cb Address of user CBR. Null if not running in user context + * Return: + * 0 = dropin, exception, or switch to UPM successful + * 1 = range invalidate active + * 2 = asid == 0 + * < 0 = error code + * + */ +static int gru_try_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + unsigned long __user *cb) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + int pagesize, asid, write, ret; + unsigned long paddr, vaddr; + + /* + * NOTE: The GRU contains magic hardware that eliminates races between + * TLB invalidates and TLB dropins. If an invalidate occurs + * in the window between reading the TFH and the subsequent TLB dropin, + * the dropin is ignored. This eliminates the need for additional locks. + */ + write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; + vaddr = tfh->missvaddr; + asid = tfh->missasid; + if (asid == 0) + goto failnoasid; + + rmb(); /* TFH must be cache resident before reading ms_range_active */ + + /* + * TFH is cache resident - at least briefly. Fail the dropin + * if a range invalidate is active. + */ + if (atomic_read(>s->ts_ms->ms_range_active)) + goto failactive; + + vma = find_vma(mm, vaddr); + if (!vma) + goto failinval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pagesize); + if (ret) { + if (!cb) + goto failupm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &pagesize)) + goto failinval; + } + if (is_gru_paddr(paddr)) + goto failinval; + gru_cb_set_istatus_active(cb); + tfh_write_restart(tfh, paddr, GAA_RAM, vaddr, asid, write, pagesize); + STAT(tlb_dropin); + gru_dbg(grudev, + "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, paddr 0x%lx\n", + ret ? "non-atomic" : "atomic", tfh, vaddr, asid, pagesize, + paddr); + return 0; + +failnoasid: + /* No asid (delayed unload). */ + STAT(tlb_dropin_fail_no_asid); + gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + if (!cb) + tfh_user_polling_mode(tfh); + return 2; + +failupm: + /* Atomic failure switch CBR to UPM */ + STAT(tlb_dropin_fail_upm); + gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + tfh_user_polling_mode(tfh); + return 1; + +failinval: + /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ + STAT(tlb_dropin_fail_invalid); + gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + tfh_exception(tfh); + return -EFAULT; + +failactive: + /* Range invalidate active. Switch to UPM iff atomic */ + STAT(tlb_dropin_fail_range_active); + gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", + tfh, vaddr); + if (!cb) + tfh_user_polling_mode(tfh); + return 1; +} + +/* + * Process an external interrupt from the GRU. This interrupt is + * caused by a TLB miss. + * Note that this is the interrupt handler that is registered with linux + * interrupt handlers. + */ +irqreturn_t gru_intr(int irq, void *dev_id) +{ + struct gru_state *gru; + struct gru_tlb_fault_map map; + struct gru_thread_state *gts; + struct gru_tlb_fault_handle *tfh = NULL; + int cbrnum, ctxnum; + + STAT(intr); + + gru = irq_to_gru(irq); + if (!gru) { + dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", + raw_smp_processor_id(), irq); + return IRQ_NONE; + } + get_clear_fault_map(gru, &map); + gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid, + map.fault_bits[0]); + + for_each_cbr_in_tfm(cbrnum, map.fault_bits) { + tfh = get_tfh_by_index(gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + + /* + * When hardware sets a bit in the faultmap, it implicitly + * locks the GRU context so that it cannot be unloaded. + * gs_gts cannot change until a TFH start/writestart command + * is issued + */ + ctxnum = tfh->ctxnum; + gts = gru->gs_gts[ctxnum]; + if (down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gts, tfh, NULL); + up_read(>s->ts_mm->mmap_sem); + } else { + tfh_user_polling_mode(tfh); + } + } + return IRQ_HANDLED; +} + +/* + * UPM call but nothing found in TFH. It _could_ be a race that was lost, + * a user bug, or a hardware bug. Try to determine which. + */ +static int gru_check_for_bug(unsigned long arg, + struct gru_tlb_fault_handle *tfh) +{ + struct gru_instruction_bits ins, *cb = (void *)arg; + + STAT(call_os_check_for_bug); + gru_dbg(grudev, "cb %p\n", cb); + if (copy_from_user(&ins, cb, sizeof(ins))) + return -EFAULT; + if (cb->istatus != CBS_CALL_OS) + return 0; + barrier(); + gru_flush_cache(cb); + if (copy_from_user(&ins, cb, sizeof(ins))) + return -EFAULT; + if (cb->istatus != CBS_CALL_OS) { + dev_info(grudev, "cb %p: Possible coherency bug\n", cb); + return 0; + } + + gru_flush_cache(tfh); + barrier(); + + if (tfh->state == TFHSTATE_MISS_UPM) { + dev_info(grudev, "tfh %p: Possible coherency bug\n", cb); + return -EAGAIN; + } + gru_dbg(grudev, "cb %p: CB in UPM state but no TFH fault\n", cb); + return -EIO; + +} + +static int gru_user_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + unsigned long __user *cb) +{ + struct gru_mm_struct *gms = gts->ts_ms; + int ret; + + while (1) { + wait_event(gms->ms_wait_queue, + atomic_read(&gms->ms_range_active) == 0); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + ret = gru_try_dropin(gts, tfh, cb); + if (ret <= 0) + return ret; + STAT(call_os_wait_queue); + } +} + +/* + * This interface is called as a result of a user detecting a "call OS" bit + * in a user CB. Normally means that a TLB fault has occurred. + * cb - user virtual address of the CB + */ +int gru_handle_user_call_os(unsigned long cb) +{ + struct gru_tlb_fault_handle *tfh; + struct gru_thread_state *gts; + unsigned long __user *cbp; + int ucbnum, cbrnum, ret = -EINVAL; + + STAT(call_os); + gru_dbg(grudev, "address 0x%lx\n", cb); + + /* sanity check the cb pointer */ + ucbnum = UCBNUM(cb); + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + cbp = (unsigned long *)cb; + + gts = gru_find_and_lock_gts(cb); + if (!gts) + return -EINVAL; + + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + goto exit; + } + + /* + * If force_unload is set, the UPM TLB fault is phony. The task + * has migrated to another node and the GSEG must be moved. Just + * unload the context. The task will page fault and assign a new + * context. + */ + ret = -EAGAIN; + cbrnum = thread_cbr_number(gts, ucbnum); + if (gts->ts_force_unload) { + gru_unload_context(gts, 1); + } else if (gts->ts_gru) { + tfh = get_tfh_by_index(gts->ts_gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + if (tfh->state == TFHSTATE_IDLE) { + gru_dbg(grudev, "UNEXPECTED: tfh %p idle\n", tfh); + gru_flush_cache(tfh); + STAT(call_os_tfh_idle); + } + if (tfh->state == TFHSTATE_MISS_UPM) + ret = gru_user_dropin(gts, tfh, cbp); + else + ret = gru_check_for_bug(cb, tfh); + } +exit: + gru_unlock_gts(gts); + return ret; +} + +/* + * Fetch the exception detail information for a CB that terminated with + * an exception. + */ +int gru_get_exception_detail(unsigned long arg) +{ + struct control_block_extended_exc_detail excdet; + struct gru_control_block_extended *cbe; + struct gru_thread_state *gts; + int ucbnum, cbrnum, ret; + + STAT(user_exception); + if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) + return -EFAULT; + + gru_dbg(grudev, "address 0x%lx\n", excdet.cb); + gts = gru_find_and_lock_gts(excdet.cb); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) { + ucbnum = UCBNUM(excdet.cb); + cbrnum = thread_cbr_number(gts, ucbnum); + cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + excdet.opc = cbe->opccpy; + excdet.exopc = cbe->exopccpy; + excdet.ecause = cbe->ecause; + excdet.exceptdet0 = cbe->idef1upd; + excdet.exceptdet1 = cbe->idef3upd; + ret = 0; + } else { + ret = -EAGAIN; + } + gru_unlock_gts(gts); + + gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, + excdet.ecause); + if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) + ret = -EFAULT; + return ret; +} + +/* + * User request to unload a context. Content is saved for possible reload. + */ +int gru_user_unload_context(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_unload_context_req req; + + STAT(user_unload_context); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "vaddr 0x%lx\n", req.vaddr); + + gts = gru_find_and_lock_gts(req.vaddr); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + + return 0; +} + +/* + * User request to flush a range of virtual addresses from the GRU TLB + * (Mainly for testing). + */ +int gru_user_flush_tlb(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_flush_tlb_req req; + + STAT(user_flush_tlb); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, + req.vaddr, req.len); + + gts = gru_find_and_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + gru_flush_tlb_range(gts->ts_ms, req.vaddr, req.vaddr + req.len); + gru_unlock_gts(gts); + + return 0; +} + +/* + * Register the current task as the user of the GSEG slice. + * Needed for TLB fault interrupt targeting. + */ +int gru_set_task_slice(long address) +{ + struct gru_thread_state *gts; + + STAT(set_task_slice); + gru_dbg(grudev, "address 0x%lx\n", address); + gts = gru_find_and_lock_gts(address); + if (!gts) + return -EINVAL; + + gts->ts_tgid_owner = current->tgid; + gru_unlock_gts(gts); + + return 0; +} Index: linux/drivers/gru/grufile.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grufile.c 2008-02-19 09:30:53.000000000 -0600 @@ -0,0 +1,453 @@ +/* + * SN Platform GRU Driver + * + * FILE OPERATIONS & DRIVER INITIALIZATION + * + * This file supports the user system call for file open, close, mmap, etc. + * This also incudes the driver initialization code. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#ifdef __ia64__ +#include +#include +#else +#define cnodeid_to_nasid(n) 0 /* ZZZ fixme */ +#endif +#ifdef EMU +#include "emu.h" +#endif + +#ifndef EMU +struct gru_stats_s gru_stats; +struct gru_blade_state *gru_base[GRU_MAX_BLADES]; +unsigned long gru_start_paddr, gru_end_paddr; +#endif + +static struct file_operations gru_fops; +static struct miscdevice gru_miscdev; + +/* + * gru_vma_open + * + * Called when a device mapping is created by a means other than mmap + * (via fork, etc.). Increments the reference count on the underlying + * gru data so it is not freed prematurely. + */ +STATIC void gru_vma_open(struct vm_area_struct *vma) +{ + struct gru_thread_state *gts; + struct gru_thread_data *gtd; + + if (IS_THREAD_DATA(vma->vm_private_data)) { + gtd = vma->vm_private_data; + } else { + gts = gru_find_thread_state(vma, TSID(0)); + down(>s->ts_ctxsem); + zap_page_range(vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE, NULL); + if (gts->ts_gru) + gru_unload_context(gts, 1); + gtd = gts->ts_td; + up(>s->ts_ctxsem); + } + + atomic_inc(>d->td_refcnt); + vma->vm_private_data = gtd; + gru_dbg(grudev, "vma %p, gtd %p, refcnt %d\n", vma, gtd, + atomic_read(>d->td_refcnt)); +} + +/* + * gru_vma_close + * + * Called when unmapping a device mapping. Frees all gru resources + * and tables belonging to the vma. + */ +STATIC void gru_vma_close(struct vm_area_struct *vma) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts; + struct list_head *entry, *next; + + if (IS_THREAD_DATA(vma->vm_private_data)) { + gru_dbg(grudev, "vma %p, td %p\n", vma, vma->vm_private_data); + gtd_drop(vma->vm_private_data); + } else { + vdata = vma->vm_private_data; + vma->vm_private_data = NULL; + gru_dbg(grudev, "vma %p, vdata %p\n", vma, vdata); + list_for_each_safe(entry, next, &vdata->vd_head) { + gts = + list_entry(entry, struct gru_thread_state, ts_next); + list_del(>s->ts_next); + down(>s->ts_ctxsem); + if (gts->ts_gru) + gru_unload_context(gts, 0); + up(>s->ts_ctxsem); + gtd_drop(gts->ts_td); + gts_drop(gts); + } + kfree(vdata); + STAT(vdata_free); + } +} + +/* + * gru_file_open + * + * Called when the GRU is opened. + */ +STATIC int gru_file_open(struct inode *inode, struct file *file) +{ + struct gru_file_data *fdata; + + fdata = kzalloc(sizeof(*fdata), GFP_KERNEL); + if (!fdata) + return -ENOMEM; + + STAT(fdata_alloc); + file->private_data = (void *)fdata; + gru_dbg(grudev, "file %p, fdata %p\n", file, fdata); + return 0; +} + +/* + * gru_file_release + * + * Called when the GRU is released - last "open" has been closed. + */ +STATIC int gru_file_release(struct inode *inode, struct file *file) +{ + gru_dbg(grudev, "file %p, fdata %p\n", file, file->private_data); + kfree(file->private_data); + STAT(fdata_free); + return 0; +} + +/* + * gru_file_mmap + * + * Called when mmaping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +STATIC int gru_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct gru_file_data *fdata = file->private_data; + + if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) + return -EPERM; + + if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || + CONTEXT_WINDOW_BYTES(fdata->fd_thread_slices) != + vma->vm_end - vma->vm_start) + return -EINVAL; + + vma->vm_flags |= + (VM_IO | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | VM_RESERVED); + vma->vm_page_prot = PAGE_SHARED; + vma->vm_ops = &gru_vm_ops; + + vma->vm_private_data = gru_alloc_vma_data(vma, TSID(0), NULL); + if (!vma->vm_private_data) + return -ENOMEM; + + gru_dbg(grudev, "file %p, fdata %p, vaddr 0x%lx, vma %p, vdata %p\n", + file, file->private_data, vma->vm_start, vma, + vma->vm_private_data); + return 0; +} + +/* + * Create a new GRU context + */ +static int gru_create_new_context(unsigned long arg, + struct gru_file_data *fdata) +{ + struct gru_create_context_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + if (req.data_segment_bytes == 0 + || req.data_segment_bytes > GRU_NUM_USER_DSR_BYTES) + return -EINVAL; + if (req.control_blocks == 0 || req.control_blocks > GRU_NUM_USER_CBR) + return -EINVAL; + if (req.maximum_thread_count == 0 || req.maximum_thread_count > NR_CPUS) + return -EINVAL; + + if (!(req.options & GRU_OPT_MISS_MASK)) + req.options |= GRU_OPT_MISS_USER_POLL; /* ZZZ change default */ + + fdata->fd_dsr_au_count = GRU_DS_BYTES_TO_AU(req.data_segment_bytes); + fdata->fd_user_options = req.options; + fdata->fd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + fdata->fd_thread_slices = req.maximum_thread_count; + + return 0; +} + +/* + * Get GRU configuration info (temp - for emulator testing) + */ +static long gru_get_config_info(unsigned long arg) +{ + struct gru_config_info info; + + info.cpus = num_online_cpus(); + info.nodes = num_online_nodes(); + info.blades = info.nodes / NODESPERBLADE; + info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * gru_file_unlocked_ioctl + * + * Called to update file attributes via IOCTL calls. + */ +STATIC long gru_file_unlocked_ioctl(struct file *file, unsigned int req, + unsigned long arg) +{ + int err = -EBADRQC; + + gru_dbg(grudev, "file %p, fdata %p\n", file, file->private_data); + + switch (req) { + case GRU_CREATE_CONTEXT: + err = gru_create_new_context(arg, file->private_data); + break; + case GRU_SET_TASK_SLICE: + err = gru_set_task_slice(arg); + break; + case GRU_USER_GET_EXCEPTION_DETAIL: + err = gru_get_exception_detail(arg); + break; + case GRU_USER_UNLOAD_CONTEXT: + err = gru_user_unload_context(arg); + break; + case GRU_USER_FLUSH_TLB: + err = gru_user_flush_tlb(arg); + break; + case GRU_USER_CALL_OS: + err = gru_handle_user_call_os(arg); + break; + case GRU_GET_CONFIG_INFO: + err = gru_get_config_info(arg); + break; + } + return err; +} + +/* + * Called at init time to build tables for all GRUs that are present in the + * system. + */ +static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, + void *vaddr, int base_nasid, int nid, int bid, int grunum) +{ + spin_lock_init(&gru->gs_lock); + spin_lock_init(&gru->gs_asid_lock); + gru->gs_gru_base_paddr = paddr; + gru->gs_gru_base_vaddr = vaddr; + gru->gs_gid = bid * GRUS_PER_HUB + grunum; + gru->gs_blade = gru_base[bid]; + gru->gs_present = 1; + gru->gs_blade_id = bid; + gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; + gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru_tgh_flush_init(gru); + gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n", + bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, + gru->gs_gru_base_paddr); + gru_kservices_init(gru); +} + +static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr, + int base_nasid) +{ + int nasid, nid, bid, grunum; + int order = get_order(sizeof(struct gru_blade_state)); + struct page *page; + struct gru_state *gru; + unsigned long paddr; + void *vaddr; + + for_each_online_node(nid) { + bid = nid_to_blade(nid); + nasid = cnodeid_to_nasid(nid); + if (gru_base[bid]) + continue; + page = alloc_pages_node(nid, GFP_KERNEL, order); + if (!page) + goto fail; + gru_base[bid] = page_address(page); + memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); + gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; + spin_lock_init(&gru_base[bid]->bs_lock); + + for (gru = gru_base[bid]->bs_grus, grunum = 0; + grunum < GRU_CHIPLETS_PER_BLADE; grunum++, gru++) { + paddr = gru_base_paddr + GRUCHIPOFFSET(nasid, base_nasid, grunum); + vaddr = gru_base_vaddr + GRUCHIPOFFSET(nasid, base_nasid, grunum); + gru_init_chiplet(gru, paddr, vaddr, nasid, bid, nid, grunum); + } + } + + return 0; + +fail: + for (nid--; nid >= 0; nid--) + free_pages((unsigned long)gru_base[nid], order); + return -ENOMEM; +} + +/* + * gru_init + * + * Called at boot or module load time to initialize the GRUs. + */ +STATIC int __init gru_init(void) +{ + int ret, irqno; + char id[10]; + void *gru_start_vaddr; + int base_nasid; + +#ifdef EMU + gru_start_paddr = GRUPSEGBASE; + gru_end_paddr = GRUPSEGBASE + MAX_NUMNODES * GRU_SIZE; + gru_start_vaddr = GRUVSEGBASE; + base_nasid = 0; +#else + /* Need real addresses from ACPI */ + gru_start_paddr = 0xd000000000UL; + gru_end_paddr = 0xd000000000UL + MAX_NUMNODES * GRU_SIZE; + gru_start_vaddr = __va(gru_start_paddr); + base_nasid = 0; +#endif + printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", + gru_start_paddr, gru_end_paddr); + for (irqno = 0; irqno < GRU_CHIPLETS_PER_BLADE; irqno++) { + ret = request_irq(IRQ_GRU + irqno, gru_intr, 0, id, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed\n", + GRU_DRIVER_ID_STR); + goto exit1; + } + } + + ret = misc_register(&gru_miscdev); + if (ret) { + printk(KERN_ERR "%s: misc_register failed\n", + GRU_DRIVER_ID_STR); + goto exit1; + } + + ret = gru_proc_init(); + if (ret) { + printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); + goto exit2; + } + + ret = gru_init_tables(gru_start_paddr, gru_start_vaddr, base_nasid); + if (ret) { + printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); + goto exit3; + } + + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, REVISION); + return 0; + +exit3: + gru_proc_exit(); +exit2: + misc_deregister(&gru_miscdev); +exit1: + for (--irqno; irqno >= 0; irqno--) + free_irq(IRQ_GRU + irqno, NULL); + return ret; + +} + +static void __exit gru_exit(void) +{ + int i, bid; + int order = get_order(sizeof(struct gru_state) * GRU_CHIPLETS_PER_BLADE); + + for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) + free_irq(IRQ_GRU + i, NULL); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); + + misc_deregister(&gru_miscdev); + gru_proc_exit(); +} + +static struct file_operations gru_fops = { + .owner = THIS_MODULE, + .open = gru_file_open, + .release = gru_file_release, + .unlocked_ioctl = gru_file_unlocked_ioctl, + .mmap = gru_file_mmap, +}; + +static struct miscdevice gru_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "gru", + .fops = &gru_fops, +}; + +struct vm_operations_struct gru_vm_ops = { + .open = gru_vma_open, + .close = gru_vma_close, + .nopfn = gru_nopfn, +}; + +module_init(gru_init); +module_exit(gru_exit); + +#ifndef MODULE +static int set_debug_options(char *str) +{ + int val; + + get_option(&str, &val); + options = val; + return 1; +} + +__setup("gru_debug=", set_debug_options); +#endif + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Driver for SGI GRU"); +MODULE_LICENSE("GPL"); Index: linux/drivers/gru/gruhandles.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/gruhandles.h 2008-02-19 09:30:53.000000000 -0600 @@ -0,0 +1,655 @@ +/* + * SN Platform GRU Driver + * + * GRU HANDLE DEFINITION + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifndef _ASM_IA64_SN_GRUHANDLES_H +#define _ASM_IA64_SN_GRUHANDLES_H + +/* + * Manifest constants for GRU Memory Map + */ +#define GRU_GSEG0_BASE 0 +#define GRU_MCS_BASE (64 * 1024 * 1024) +#define GRU_SIZE (128UL * 1024 * 1024) + +/* Handle & resource counts */ +#define GRU_NUM_CB 128 +#define GRU_NUM_DSR_BYTES (32 * 1024) +#define GRU_NUM_TFM 16 +#define GRU_NUM_TGH 24 +#define GRU_NUM_CBE 128 +#define GRU_NUM_TFH 128 +#define GRU_NUM_CCH 16 +#define GRU_NUM_GSH 1 + +/* Resources PERMANENTLY reserved for kernel use */ +#define GRU_NUM_KERNEL_CBR 16 +#define GRU_NUM_KERNEL_DSR_BYTES 1024 +#define KERNEL_CTXNUM 15 + +/* Maximum resource counts that can be reserved by user programs */ +#define GRU_NUM_USER_CBR (GRU_NUM_CBE - GRU_NUM_KERNEL_CBR) +#define GRU_NUM_USER_DSR_BYTES (GRU_NUM_DSR_BYTES - GRU_NUM_KERNEL_DSR_BYTES) + +/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles + * are the same */ +#define GRU_HANDLE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 + +/* Base addresses of handles */ +#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) +#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) +#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) +#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) +#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) +#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000) + +/* User gseg constants */ +#define GRU_GSEG_STRIDE (4 * 1024 * 1024) +#ifdef __ia64__ +#define GRU_GSEG_PAGESIZE (256 * 1024) +#define GRU_GSEG_PAGESIZE_SHIFT 18 +#else +#define GRU_GSEG_PAGESIZE (2 * 1024 * 1024UL) +#endif +#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) + +/* Data segment constants */ +#define GRU_DSR_AU_BYTES 1024 +#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) + +/* Control block constants */ +#define GRU_CBR_AU_SIZE 2 +#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) + +/* Convert resource counts to the number of AU */ +#define GRU_DS_BYTES_TO_AU(n) (((n) + GRU_DSR_AU_BYTES - 1) / \ + GRU_DSR_AU_BYTES) +#define GRU_CB_COUNT_TO_AU(n) (((n) + GRU_CBR_AU_SIZE - 1) / \ + GRU_CBR_AU_SIZE) + +/* UV limits */ +#define GRUS_PER_HUB 2 +#define GRU_HUBS_PER_BLADE 1 +#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRUS_PER_HUB) + +/* User GRU Gseg offsets */ +#define GRU_CB_BASE 0 +#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) +#define GRU_DS_BASE 0x20000 +#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) + +/* General addressing macros. b=grubase, c=ctxnum, i=cbnum, cl=cacheline# */ +#define GRU_GSEG(b, c) ((void *)((b) + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * (c))) +#define GRU_GSEG_CB(b, c, i) ((void *)(GRU_GSEG((b), (c)) + GRU_CB_BASE + GRU_HANDLE_STRIDE * (i))) +#define GRU_GSEG_DS(b, c, cl) ((void *)(GRU_GSEG((b), (c)) + GRU_DS_BASE + GRU_CACHE_LINE_BYTES * (cl))) +#define GRU_TFM(b, c) ((struct gru_tlb_fault_map *)((unsigned long)(b) + GRU_TFM_BASE + (c) * GRU_HANDLE_STRIDE)) +#define GRU_TGH(b, c) ((struct gru_tlb_global_handle *)((unsigned long)(b) + GRU_TGH_BASE + (c) * GRU_HANDLE_STRIDE)) +#define GRU_CBE(b, n) ((struct gru_control_block_extended *)((unsigned long)(b) + GRU_CBE_BASE + (n) * GRU_HANDLE_STRIDE)) +#define GRU_TFH(b, n) ((struct gru_tlb_fault_handle *)((unsigned long)(b) + GRU_TFH_BASE + (n) * GRU_HANDLE_STRIDE)) +#define GRU_CCH(b, n) ((struct gru_context_configuration_handle *)((unsigned long)(b) + GRU_CCH_BASE + (n) * GRU_HANDLE_STRIDE)) +#define GRU_GSH(b) ((struct gru_global_status_handle *)((unsigned long)(b) + GRU_GSH_BASE)) + +/* Test if an offset is a valid kernel handle address. Ex: TYPE_IS(CBE, chiplet_offset) */ +#define TYPE_IS(hid, h) ((h) >= GRU_##hid##_BASE && (h) < GRU_##hid##_BASE + GRU_NUM_##hid * GRU_HANDLE_STRIDE \ + && (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + +/* Test a GRU physical address to determine the type of address range (does NOT validate holes) */ +#define IS_MCS_PADDR(h) (((h) & (GRU_SIZE - 1)) >= GRU_MCS_BASE) +#define IS_CBR_PADDR(h) (((h) & (GRU_SIZE - 1)) < GRU_MCS_BASE && (((h) & (GRU_GSEG_STRIDE - 1)) < GRU_DS_BASE)) +#define IS_DSR_PADDR(h) (((h) & (GRU_SIZE - 1)) < GRU_MCS_BASE && (((h) & (GRU_GSEG_STRIDE - 1)) >= GRU_DS_BASE)) + +/* Convert an arbitrary handle address to the beginning of the GRU segment */ +#ifndef __PLUGIN__ +#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) +#else +/* Emulator hack */ +extern void *gmu_grubase(void *h); +#define GRUBASE(h) gmu_grubase(h) +#endif + +/* Convert a GRU physical address to the chiplet offset */ +#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) + +/* Convert a GSEG CB address to the relative CB number within the user gseg context */ +#define UCBNUM(cb) ((((unsigned long)(cb) - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / GRU_HANDLE_STRIDE) + +/* Convert a TFH address to the relative TFH number within the GRU*/ +#define TFHNUM(tfh) ((((unsigned long)(tfh) - GRU_TFH_BASE) % GRU_SIZE) / GRU_HANDLE_STRIDE) + +/* Convert a CCH address to the relative context number within the GRU*/ +#define CCHNUM(cch) ((((unsigned long)(cch) - GRU_CCH_BASE) % GRU_SIZE) / GRU_HANDLE_STRIDE) + +/* Convert a CBE address to the relative context number within the GRU*/ +#define CBENUM(cbe) ((((unsigned long)(cbe) - GRU_CBE_BASE) % GRU_SIZE) / GRU_HANDLE_STRIDE) + +/* Convert a TFM address to the relative context number within the GRU*/ +#define TFMNUM(tfm) ((((unsigned long)(tfm) - GRU_TFM_BASE) % GRU_SIZE) / GRU_HANDLE_STRIDE) + +/* byte offset to a specific GRU chiplet. (n=nasid, bn=base_nasid for first node, c=chiplet (0 or 1)*/ +#define GRUCHIPOFFSET(n, bn, c) (GRU_SIZE * ((n) - (bn) + (c))) + +#ifndef BITS_TO_LONGS +#define BITS_TO_LONGS(bits) (((bits)+64-1)/64) +#endif + +/* + * GSH - GRU Status Handle + * + */ +struct gru_global_status_handle { + unsigned long bits[BITS_TO_LONGS(GRU_NUM_CBE) * 2]; + unsigned long fill[4]; +}; + +enum gru_gsh_status { + GSHSTATUS_INACTIVE, + GSHSTATUS_IDLE, + GSHSTATUS_ACTIVE, + GSHSTATUS_INTERRUPTED +}; + +/* + * Global TLB Fault Map + * + */ +struct gru_tlb_fault_map { + unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill0[2]; + unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill1[2]; +}; + +/* + * TGH - TLB Global Handle + * + */ +struct gru_tlb_global_handle { + unsigned int cmd:1; /* DW 0 */ + unsigned int delresp:1; + unsigned int opc:1; + unsigned int fill1:5; + + unsigned int fill2:8; + + unsigned int status:2; + unsigned long fill3:2; + unsigned int state:3; + unsigned long fill4:1; + + unsigned int cause:3; + unsigned long fill5:37; + + unsigned long vaddr:64; /* DW 1 */ + + unsigned int asid:24; /* DW 2 */ + unsigned int fill6:8; + + unsigned int pagesize:5; + unsigned int fill7:11; + + unsigned int global:1; + unsigned int fill8:15; + + unsigned long vaddrmask:39; /* DW 3 */ + unsigned int fill9:9; + unsigned int n:10; + unsigned int fill10:6; + + unsigned int ctxbitmap:16; /* DW4 */ + unsigned long fill11[3]; +}; + +enum gru_tgh_cmd { + TGHCMD_START +}; + +enum gru_tgh_opc { + TGHOP_TLBNOP, + TGHOP_TLBINV +}; + +enum gru_tgh_status { + TGHSTATUS_IDLE, + TGHSTATUS_EXCEPTION, + TGHSTATUS_ACTIVE +}; + +enum gru_tgh_state { + TGHSTATE_IDLE, + TGHSTATE_PE_INVAL, + TGHSTATE_INTERRUPT_INVAL, + TGHSTATE_WAITDONE, + TGHSTATE_RESTART_CTX, +}; + +/* + * TFH - TLB Global Handle + * + */ +struct gru_tlb_fault_handle { + unsigned int cmd:1; /* DW 0 - low 32*/ + unsigned int delresp:1; + unsigned int fill0:2; + unsigned int opc:3; + unsigned int fill1:9; + + unsigned int status:2; + unsigned int fill2:1; + unsigned int color:1; + unsigned int state:3; + unsigned int fill3:1; + + unsigned int cause:7; /* DW 0 - high 32 */ + unsigned int fill4:1; + + unsigned int indexway:12; + unsigned int fill5:4; + + unsigned int ctxnum:4; + unsigned int fill6:12; + + unsigned long missvaddr:64; /* DW 1 */ + + unsigned int missasid:24; /* DW 2 */ + unsigned int fill7:8; + unsigned int fillasid:24; + unsigned int dirty:1; + unsigned int gaa:2; + unsigned long fill8:5; + + unsigned long pfn:41; /* DW 3 */ + unsigned int fill9:7; + unsigned int pagesize:5; + unsigned int fill10:11; + + unsigned long fillvaddr:64; /* DW 4 */ + + unsigned long fill11[3]; +}; + +enum gru_tfh_opc { + TFHOP_NOOP, + TFHOP_RESTART, + TFHOP_WRITE_ONLY, + TFHOP_WRITE_RESTART, + TFHOP_EXCEPTION, + TFHOP_USER_POLLING_MODE = 7, +}; + +enum tfh_status { + TFHSTATUS_IDLE, + TFHSTATUS_EXCEPTION, + TFHSTATUS_ACTIVE, +}; + +enum tfh_state { + TFHSTATE_INACTIVE, + TFHSTATE_IDLE, + TFHSTATE_MISS_UPM, + TFHSTATE_MISS_FMM, + TFHSTATE_HW_ERR, + TFHSTATE_WRITE_TLB, + TFHSTATE_RESTART_CBR, +}; + +/* TFH cause bits */ +enum tfh_cause { + TFHCAUSE_NONE, + TFHCAUSE_TLB_MISS, + TFHCAUSE_TLB_MOD, + TFHCAUSE_HW_ERROR_RR, + TFHCAUSE_HW_ERROR_MAIN_ARRAY, + TFHCAUSE_HW_ERROR_VALID, + TFHCAUSE_HW_ERROR_PAGESIZE, + TFHCAUSE_INSTRUCTION_EXCEPTION, + TFHCAUSE_UNCORRECTIBLE_ERROR, +}; + +/* GAA values */ +#define GAA_RAM 0x0 +#define GAA_NCRAM 0x2 +#define GAA_MMIO 0x1 +#define GAA_REGISTER 0x3 + +/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ +#define GRU_PADDR_SHIFT 12 + +/* + * Context Configuration handle + * + */ +struct gru_context_configuration_handle { + unsigned int cmd:1; /* DW0 */ + unsigned int delresp:1; + unsigned int opc:3; + unsigned int unmap_enable:1; + unsigned int req_slice_set_enable:1; + unsigned int req_slice:2; + unsigned int cb_int_enable:1; + unsigned int tlb_int_enable:1; + unsigned int tfm_fault_bit_enable:1; + unsigned int tlb_int_select:4; + + unsigned int status:2; + unsigned int state:2; + unsigned int reserved2:4; + + unsigned int cause:4; + unsigned int tfm_done_bit_enable:1; + unsigned int unused:3; + + unsigned int dsr_allocation_map; + + unsigned long cbr_allocation_map; /* DW1 */ + + unsigned int asid[8]; /* DW 2 - 5 */ + unsigned short sizeavail[8]; /* DW 6 - 7 */ +} __attribute__ ((packed)); + +enum gru_cch_opc { + CCHOP_START = 1, + CCHOP_ALLOCATE, + CCHOP_INTERRUPT, + CCHOP_DEALLOCATE, + CCHOP_INTERRUPT_SYNC, +}; + +enum gru_cch_status { + CCHSTATUS_IDLE, + CCHSTATUS_EXCEPTION, + CCHSTATUS_ACTIVE, +}; + +enum gru_cch_state { + CCHSTATE_INACTIVE, + CCHSTATE_MAPPED, + CCHSTATE_ACTIVE, + CCHSTATE_INTERRUPTED, +}; + +/* CCH Exception cause */ +enum gru_cch_cause { + CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, + CCHCAUSE_ILLEGAL_OPCODE = 2, + CCHCAUSE_INVALID_START_REQUEST = 3, + CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, + CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, + CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, + CCHCAUSE_CCH_BUSY = 7, + CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, + CCHCAUSE_BAD_TFM_CONFIG = 9, + CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, + CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, + CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, +}; +/* + * CBE - Control Block Extended + * + */ +struct gru_control_block_extended { + unsigned int reserved0:1; /* DW 0 - low */ + unsigned int imacpy:3; + unsigned int reserved1:4; + unsigned int xtypecpy:3; + unsigned int iaa0cpy:2; + unsigned int iaa1cpy:2; + unsigned int reserved2:1; + unsigned int opccpy:8; + unsigned int exopccpy:8; + + unsigned int idef2cpy:22; /* DW 0 - high */ + unsigned int reserved3:10; + + unsigned int idef4cpy:22; /* DW 1 */ + unsigned int reserved4:10; + unsigned int idef4upd:22; + unsigned int reserved5:10; + + unsigned long idef1upd:64; /* DW 2 */ + + unsigned long idef5cpy:64; /* DW 3 */ + + unsigned long idef6cpy:64; /* DW 4 */ + + unsigned long idef3upd:64; /* DW 5 */ + + unsigned long idef5upd:64; /* DW 6 */ + + unsigned int idef2upd:22; /* DW 7 */ + unsigned int reserved6:10; + + unsigned int ecause:20; + unsigned int cbrstate:4; + unsigned int cbrexecstatus:8; +}; + +enum gru_cbr_state { + CBRSTATE_INACTIVE, + CBRSTATE_IDLE, + CBRSTATE_PE_CHECK, + CBRSTATE_QUEUED, + CBRSTATE_WAIT_RESPONSE, + CBRSTATE_INTERRUPTED, + CBRSTATE_INTERRUPTED_MISS_FMM, + CBRSTATE_BUSY_INTERRUPT_MISS_FMM, + CBRSTATE_INTERRUPTED_MISS_UPM, + CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, + CBRSTATE_REQUEST_ISSUE, + CBRSTATE_BUSY_INTERRUPT, +}; + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLBHW_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) + +/* CBE ecause bits - defined in gru_instructions.h */ + +/* + * Convert a processor pagesize into the strange encoded pagesize used by the GRU. + * Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) + * pagesize log pagesize grupagesize + * 4k 12 0 + * 8k 13 1 + * 16k 14 2 + * 64k 16 3 + * 256k 18 4 + * ... + */ +#define GRU_PAGESIZE(sh) (((sh) <= 14) ? (sh) - 12 : ((sh) >> 1) - 5) +#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) + +/* minimum TLB purge count to ensure a full purge */ +#define GRUMAXINVAL 1024UL + +/* convert the weird GRU encoded pagesize to a pageshift or pagesize */ +#define GRUPAGESHIFT(e) (((e) < 2) ? (12UL + (e)) : (14UL + 2UL * ((e) - 2))) +#define GRUPAGESIZE(e) (1UL << GRUPAGESHIFT(e)) + +/*----------------------------------------------------------------------------------------- + * + * Handle operations + */ + +#define cch_to_gsh(c) GRU_GSH(GRUBASE(c)) +#define cch_to_tfh(c, i) GRU_TFH(GRUBASE(c), (i)) +#define cch_to_cbe(c, i) GRU_CBE(GRUBASE(c), (i)) +#define cbe_to_tfh(c) GRU_TFH(GRUBASE(c), CBENUM(c)) +#define cbe_to_cch(c) GRU_CCH(GRUBASE(c), CBENUM(c)) +#define tfh_to_cbe(c) GRU_CBE(GRUBASE(c), TFHNUM(c)) + +#ifdef __KERNEL__ +#include "gru_instructions.h" + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long*)(h)) >> 16) & 3) + +static inline void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD bit must be last */ + *w0 = *w0 | 1; + gru_flush_cache(h); +} + +static inline int wait_instruction_complete(void *h) +{ + int status; + + do { + gru_emulator_wait_hook(h, 1); /* No code generated unless -D EMUSUPPORT */ + cpu_relax(); + barrier(); + status = GET_MSEG_HANDLE_STATUS(h); + } while (status == CCHSTATUS_ACTIVE); + return status; +} + +static inline int cch_allocate(struct gru_context_configuration_handle *cch, + int asidval, unsigned long cbrmap, + unsigned long dsrmap) +{ + int i; + +#if defined(__ia64__) + for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */ + cch->asid[i] = (asidval++); + if (i == RGN_HPAGE) + cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift); +#ifdef EMU + else if (fake_tb_pages) + cch->sizeavail[i] = GRU_SIZEAVAIL(40); +#endif + else + cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT); + } +#else + for (i = 0; i < 8; i++) { + cch->asid[i] = asidval++; + cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT); /* ZZZ hugepages??? */ + } +#endif + + cch->dsr_allocation_map = dsrmap; + cch->cbr_allocation_map = cbrmap; + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch); +} + +static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh); +} + +static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long pfn, unsigned long vaddr, + int asid, int dirty, int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = pfn; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); +} + +static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +static inline void tfh_restart(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_RESTART; + start_instruction(tfh); +} + +static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +static inline void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} +#endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_SN_GRUHANDLES_H */ Index: linux/drivers/gru/grukservices.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grukservices.c 2008-02-15 13:56:45.652296396 -0600 @@ -0,0 +1,129 @@ +/* + * SN Platform GRU Driver + * + * KERNEL SERVICES THAT USE THE GRU + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2007-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#include "gru_instructions.h" +#ifdef __ia64__ +#include +#include +#endif +#ifdef EMU +#include "emu.h" +#endif + +#ifdef EMU +#define PADDR(v) (emu_vtop((unsigned long)v)) +#elif defined(__ia64__) +#define PADDR(v) ((void *)__pa(ia64_imva(v))) +#else +#define PADDR(v) ((void *)__pa(v)) +#endif + +#define MAGIC 0x1234567887654321UL + +static __cacheline_aligned unsigned long word0; +static __cacheline_aligned unsigned long word1; + +static inline int gruwait(gru_control_block_t *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + while (cbs->istatus >= CBS_ACTIVE) { + gru_emulator_wait_hook(cb, 1); /* No code unless -DEMUSUPPORT */ + cpu_relax(); + barrier(); + } + return cbs->istatus; +} + +static int quicktest(struct gru_state *gru) +{ + void *cb; + + cb = GRU_GSEG(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); + word0 = MAGIC; + + gru_vload(cb, (void *)PADDR(&word0), IAA_RAM, 0, XTYPE_DW, 1, 1, + HINT_CB_UNMAPPED | HINT_CB_DELAY); + if (gruwait(cb) != CBS_IDLE) + BUG(); + + gru_vstore(cb, (void *)PADDR(&word1), IAA_RAM, 0, XTYPE_DW, 1, 1, + HINT_CB_UNMAPPED | HINT_CB_DELAY); + if (gruwait(cb) != CBS_IDLE) + BUG(); + + if (word0 != word1 || word0 != MAGIC) { + printk + ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n", + gru->gs_gid, word1, MAGIC); + BUG(); /* ZZZ should not be fatal */ + } + + return 0; +} + +int gru_kservices_init(struct gru_state *gru) +{ + struct gru_context_configuration_handle *cch; + unsigned long cbr_map, dsr_map; + int err; + + cbr_map = + reserve_gru_cb_resources(gru, + GRU_CB_COUNT_TO_AU(GRU_NUM_KERNEL_CBR), + NULL); + dsr_map = + reserve_gru_ds_resources(gru, + GRU_DS_BYTES_TO_AU + (GRU_NUM_KERNEL_DSR_BYTES), NULL); + __set_bit(KERNEL_CTXNUM, &gru->gs_context_map); + gru->gs_active_contexts++; + cch = GRU_CCH(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); + + lock_handle(cch); + cch->tfm_fault_bit_enable = 0; + cch->tlb_int_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->unmap_enable = 1; + err = cch_allocate(cch, 0, cbr_map, dsr_map); + if (err) { + gru_dbg(grudev, + "Unable to allocate kernel CCH: gru %d, err %d\n", + gru->gs_gid, err); + BUG(); + } + if (cch_start(cch)) { + gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n", + gru->gs_gid, err); + BUG(); + } + unlock_handle(cch); + + return quicktest(gru); +} Index: linux/drivers/gru/grulib.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grulib.h 2008-02-15 13:56:46.440393908 -0600 @@ -0,0 +1,84 @@ +/* + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _GRULIB_H_ +#define _GRULIB_H_ + +#define GRU_BASENAME "gru" +#define GRU_FULLNAME "/dev/gru" +#define GRU_IOCTL_NUM 'G' +#ifdef __ia64__ +#define GRU_GSEG_PAGESIZE (256 * 1024) +#define GRU_GSEG_PAGESIZE_SHIFT 18 +#else +#define GRU_GSEG_PAGESIZE (2 * 1024 * 1024UL) +#endif + +/* Set Number of Request Blocks */ +#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) + +/* Register task as using the slice */ +#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *) + +/* Fetch exception detail */ +#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) + +/* For user call_os handling - normally a TLB fault */ +#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) + +/* For user unload context */ +#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) + +/* For user TLB flushing (primarily for tests) */ +#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) + +/* Get some config options (primarily for tests & emulator) */ +#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) + +#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) +#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_create_context_req { + unsigned int data_segment_bytes; + unsigned int control_blocks; + unsigned int maximum_thread_count; + unsigned int options; +}; + +/* + * Structure used to pass unload context parameters to the driver + */ +struct gru_unload_context_req { + unsigned long vaddr; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_flush_tlb_req { + unsigned long gseg; + unsigned long vaddr; + size_t len; +}; + +/* + * GRU configuration info (temp - for testing) + */ +struct gru_config_info { + int cpus; + int blades; + int nodes; + int chiplets; + int fill[16]; +}; + +#endif /* _GRULIB_H_ */ Index: linux/drivers/gru/grumain.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grumain.c 2008-02-19 09:30:53.000000000 -0600 @@ -0,0 +1,958 @@ +/* + * SN Platform GRU Driver + * + * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#ifdef EMU +#include "emu.h" +#endif + +unsigned long options; + +static struct device_driver gru_driver = { + .name = "gru" +}; + +static struct device gru_device = { + .bus_id = {0}, + .driver = &gru_driver, +}; + +struct device *grudev = &gru_device; + +/* + * Select a gru fault map to be used by the current cpu. Note that + * multiple cpus may be using the same map. + * ZZZ should "shift" be used?? Depends on HT cpu numbering + * ZZZ should be inline but did not work on emulator + */ +int gru_cpu_fault_map_id(void) +{ + return blade_processor_id() % GRU_NUM_TFM; +} + + +/*--------- ASID Management ------------------------------------------- + * + * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. + * Once MAX is reached, flush the TLB & start over. However, + * some asids may still be in use. There won't be many (percentage wise) still + * in use. Search active contexts & determine the value of the first + * asid in use ("x"s below). Set "limit" to this value. + * This defines a block of assignable asids. + * + * When "limit" is reached, search forward from limit+1 and determine the + * next block of assignable asids. + * + * Repeat until MAX_ASID is reached, then start over again. + * + * Each time MAX_ASID is reached, increment the asid generation. Since + * the search for in-use asids only checks contexts with GRUs currently + * assigned, asids in some contexts will be missed. Prior to loading + * a context, the asid generation of the GTS asid is rechecked. If it + * doesn't match the current generation, a new asid will be assigned. + * + * 0---------------x------------x---------------------x----| + * ^-next ^-limit ^-MAX_ASID + * + * All asid manipulation & context loading/unloading is protected by the + * gs_lock. + */ + +/* Hit the asid limit. Start over */ +static int gru_wrap_asid(struct gru_state *gru) +{ + gru_dbg(grudev, "gru %p\n", gru); + STAT(asid_wrap); + gru->gs_asid_gen++; + gru_flush_all_tlb(gru); + return MIN_ASID; +} + +/* Find the next chunk of unused asids */ +static int gru_reset_asid_limit(struct gru_state *gru, int asid) +{ + int i, gid, inuse_asid, limit; + + gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + STAT(asid_next); + limit = MAX_ASID; + if (asid >= limit) + asid = gru_wrap_asid(gru); + gid = gru->gs_gid; +again: + for (i = 0; i < GRU_NUM_CCH; i++) { + if (!gru->gs_gts[i]) + continue; + inuse_asid = gru->gs_gts[i]->ts_ms->ms_asids[gid].mt_asid; + gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n", + gru, inuse_asid, i, gru->gs_gts[i]); + if (inuse_asid == asid) { + asid += ASID_INC; + if (asid >= limit) { + /* + * empty range: reset the range limit and + * start over + */ + limit = MAX_ASID; + if (asid >= MAX_ASID) + asid = gru_wrap_asid(gru); + goto again; + } + } + + if ((inuse_asid > asid) && (inuse_asid < limit)) + limit = inuse_asid; + } + gru->gs_asid_limit = limit; + gru->gs_asid = asid; + gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid, + limit); + return asid; +} + +/* Assign a new ASID to a thread context. */ +static int gru_assign_asid(struct gru_state *gru) +{ + int asid; + + spin_lock(&gru->gs_asid_lock); + gru->gs_asid += ASID_INC; + asid = gru->gs_asid; + if (asid >= gru->gs_asid_limit) + asid = gru_reset_asid_limit(gru, asid); + spin_unlock(&gru->gs_asid_lock); + + gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + return asid; +} + +/* + * Clear n bits in a word. Return a word indicating the bits that were cleared. + * Optionally, build an array of chars that contain the bit numbers allocated. + */ +static unsigned long reserve_resources(unsigned long *p, int n, int mmax, + char *idx) +{ + unsigned long bits = 0; + int i; + + do { + i = find_first_bit(p, mmax); + if (i == mmax) + BUG(); + __clear_bit(i, p); + __set_bit(i, &bits); + if (idx) + *idx++ = i; + } while (--n); + return bits; +} + +unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap) +{ + return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, + cbmap); +} + +unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap) +{ + return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, + dsmap); +} + +static void reserve_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts++; + gts->ts_cbr_map = + reserve_gru_cb_resources(gru, gts->ts_cbr_au_count, + gts->ts_cbr_idx); + gts->ts_dsr_map = + reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL); +} + +static void free_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts--; + gru->gs_cbr_map |= gts->ts_cbr_map; + gru->gs_dsr_map |= gts->ts_dsr_map; +} + +/* + * Check if a GRU has sufficient free resources to satisfy an allocation + * request. Note: GRU locks may or may not be held when this is called. If + * not held, recheck after acquiring the appropriate locks. + * + * Returns 1 if sufficient resources, 0 if not + */ +static int check_gru_resources(struct gru_state *gru, int cbr_au_count, + int dsr_au_count, int max_active_contexts) +{ + return (hweight64(gru->gs_cbr_map) >= cbr_au_count + && hweight64(gru->gs_dsr_map) >= dsr_au_count + && gru->gs_active_contexts < max_active_contexts); +} + +/* + * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG + * context. + */ +static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, + int ctxnum) +{ + struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; + unsigned short ctxbitmap = (1 << ctxnum); + int asid; + + spin_lock(&gms->ms_asid_lock); + asid = asids->mt_asid; + + if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) { + asid = gru_assign_asid(gru); + asids->mt_asid = asid; + asids->mt_asid_gen = gru->gs_asid_gen; + STAT(asid_new); + } else { + STAT(asid_reuse); + } + + BUG_ON(asids->mt_ctxbitmap & ctxbitmap); + asids->mt_ctxbitmap |= ctxbitmap; + if (!test_bit(gru->gs_gid, gms->ms_asidmap)) + __set_bit(gru->gs_gid, gms->ms_asidmap); + spin_unlock(&gms->ms_asid_lock); + + gru_dbg(grudev, + "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]); + return asid; +} + +static void gru_unload_mm_tracker(struct gru_state *gru, + struct gru_mm_struct *gms, int ctxnum) +{ + struct gru_mm_tracker *asids; + unsigned short ctxbitmap; + + asids = &gms->ms_asids[gru->gs_gid]; + ctxbitmap = (1 << ctxnum); + spin_lock(&gms->ms_asid_lock); + BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); + asids->mt_ctxbitmap ^= ctxbitmap; + gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Decrement the reference count on a GTD structure. Free the structure + * if the reference count goes to zero. + */ +void gtd_drop(struct gru_thread_data *gtd) +{ + if (gtd && atomic_dec_return(>d->td_refcnt) == 0) { + kfree(gtd); + STAT(gtd_free); + } +} + +/* + * Decrement the reference count on a GTS structure. Free the structure + * if the reference count goes to zero. + */ +void gts_drop(struct gru_thread_state *gts) +{ + if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { + gru_drop_mmu_notifier(gts->ts_ms); + kfree(gts); + STAT(gts_free); + } +} + +/* + * Locate the GTS structure for the current thread. + */ +static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data + *vdata, int tsid) +{ + struct gru_thread_state *gts; + + list_for_each_entry(gts, &vdata->vd_head, ts_next) + if (gts->ts_tsid == tsid) + return gts; + return NULL; +} + +/* + * Break a copy-on-write reference to a gru thread data struct. + */ +static int gru_break_cow(struct vm_area_struct *vma, + struct gru_thread_state *gts) +{ + struct gru_thread_data *gtd; + struct gru_vma_data *vdata = vma->vm_private_data; + + gtd = kmalloc(THREADDATABYTES(vdata), GFP_KERNEL); + if (!gtd) + return 0; + STAT(gtd_alloc); + STAT(break_cow); + memcpy(gtd, gts->ts_td, THREADDATABYTES(vdata)); + atomic_set(>d->td_refcnt, 1); + gtd_drop(gts->ts_td); + gts->ts_td = gtd; + gru_dbg(grudev, "alloc gts %p, new gtd %p\n", gts, gtd); + return 1; +} + +/* + * Allocate a thread data structure. + */ +static struct gru_thread_data *gru_alloc_gtd(struct gru_vma_data *vdata, + struct gru_thread_state *gts) +{ + struct gru_thread_data *gtd; + int bytes = THREADDATABYTES(vdata); + + gtd = kzalloc(bytes, GFP_KERNEL); + if (!gtd) + return NULL; + + STAT(gtd_alloc); + atomic_set(>d->td_refcnt, 1); + gtd->td_magic = TD_MAGIC; + gru_dbg(grudev, "alloc vdata %p, new gtd %p\n", vdata, gtd); + return gtd; +} + +/* + * Allocate a thread state structure. + */ +static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + struct gru_vma_data *vdata, + int tsid, + struct gru_thread_data *gtd) +{ + struct gru_thread_state *gts; + + gts = kzalloc(sizeof(*gts), GFP_KERNEL); + if (!gts) + return NULL; + + STAT(gts_alloc); + atomic_set(>s->ts_refcnt, 1); + sema_init(>s->ts_ctxsem, 1); + gts->ts_cbr_au_count = vdata->vd_cbr_au_count; + gts->ts_dsr_au_count = vdata->vd_dsr_au_count; + gts->ts_tsid = tsid; + gts->ts_user_options = vdata->vd_user_options; + gts->ts_ctxnum = NULLCTX; + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gts->ts_tlb_int_select = -1; + gts->ts_ms = gru_register_mmu_notifier(); + if (!gts->ts_ms) + goto err; + + if (!gtd) + gtd = gru_alloc_gtd(vdata, gts); + if (!gtd) + goto err; + + gts->ts_td = gtd; + + gru_dbg(grudev, "alloc vdata %p, new gts %p, new gtd %p\n", vdata, gts, + gtd); + return gts; + +err: + gts_drop(gts); + return NULL; +} + +/* + * Allocate a vma private data structure. + */ +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid, + void *gtd) +{ + struct gru_file_data *fdata; + struct gru_vma_data *vdata = NULL; + struct gru_thread_state *gts = NULL; + + vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); + if (!vdata) + return NULL; + + INIT_LIST_HEAD(&vdata->vd_head); + spin_lock_init(&vdata->vd_lock); + fdata = vma->vm_file->private_data; + vdata->vd_cbr_au_count = fdata->fd_cbr_au_count; + vdata->vd_dsr_au_count = fdata->fd_dsr_au_count; + vdata->vd_thread_slices = fdata->fd_thread_slices; + vdata->vd_user_options = fdata->fd_user_options; + + gts = gru_alloc_gts(vma, vdata, TSID(0), gtd); + if (!gts) { + kfree(vdata); + return NULL; + } + gru_dbg(grudev, "alloc vdata %p, gts %p, gtd %p\n", vdata, gts, gtd); + list_add(>s->ts_next, &vdata->vd_head); + + mb(); /* Make sure head is visible */ + if (cmpxchg(&vma->vm_private_data, gtd, vdata) != gtd) { + if (!gtd) + gtd_drop(gts->ts_td); + gts_drop(gts); + kfree(vdata); + STAT(vdata_double_alloc); + } else { + STAT(vdata_alloc); + } + return vma->vm_private_data; +} + +/* + * Find the thread state structure for the current thread. If none + * exists, allocate one. + * + * Note that the vm_private structure in the vma _may_ be a pointer to + * a COW thread data structure. If so, create a vma structure, etc... + */ +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts, *ngts; + + vdata = vma->vm_private_data; + if (IS_THREAD_DATA(vdata)) { + vdata = gru_alloc_vma_data(vma, tsid, vdata); + if (!vdata) + return NULL; + } + + spin_lock(&vdata->vd_lock); + gts = gru_find_current_gts_nolock(vdata, tsid); + if (gts) { + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p, gtd %p\n", vma, gts, + gts->ts_td); + return gts; + } + spin_unlock(&vdata->vd_lock); + + gts = gru_alloc_gts(vma, vdata, tsid, NULL); + if (!gts) + return NULL; + + spin_lock(&vdata->vd_lock); + ngts = gru_find_current_gts_nolock(vdata, tsid); + if (ngts) { + gts_drop(gts); + gts = ngts; + STAT(gts_double_allocate); + } else { + list_add(>s->ts_next, &vdata->vd_head); + } + spin_unlock(&vdata->vd_lock); + + gru_dbg(grudev, "vma %p, new gts %p, gtd %p\n", vma, gts, gts->ts_td); + return gts; +} + +/* + * Free the GRU context assigned to the thread state. + */ +static void gru_free_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + gru = gts->ts_gru; + gru_dbg(grudev, "gts %p, gru %p\n", gts, gru); + + spin_lock(&gru->gs_lock); + gru->gs_gts[gts->ts_ctxnum] = NULL; + free_gru_resources(gru, gts); + BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); + __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); + gts->ts_ctxnum = NULLCTX; + gts->ts_gru = NULL; + spin_unlock(&gru->gs_lock); + + gts_drop(gts); + STAT(free_context); +} + +/* + * Prefetching cachelines help hardware performance. + */ +static void prefetch_data(void *p, int num, int stride) +{ + while (num-- > 0) { + prefetchw(p); + p += stride; + } +} + +static inline long gru_copy_handle(void *d, void *s) +{ + memcpy(d, s, GRU_HANDLE_BYTES); + return GRU_HANDLE_BYTES; +} + +/* rewrite in assembly & use lots of prefetch */ +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, + GRU_CACHE_LINE_BYTES); + + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); + prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, + GRU_CACHE_LINE_BYTES); + cb += GRU_HANDLE_STRIDE; + } + + cb = gseg + GRU_CB_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); + cb += GRU_HANDLE_STRIDE; + } + + memcpy(gseg + GRU_DS_BASE, save, length); +} + +static void gru_unload_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(save, cb); + save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + memcpy(save, gseg + GRU_DS_BASE, length); +} + +void gru_unload_context(struct gru_thread_state *gts, int savestate) +{ + struct gru_thread_data *gtd = gts->ts_td; + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int ctxnum = gts->ts_ctxnum; + + zap_page_range(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE, NULL); + cch = GRU_CCH(gru->gs_gru_base_vaddr, ctxnum); + + lock_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + gru_dbg(grudev, "gts %p, gtd %p\n", gts, gtd); + + gru_unload_mm_tracker(gru, gts->ts_ms, gts->ts_ctxnum); + if (savestate) + gru_unload_context_data(gtd->td_gdata, gru->gs_gru_base_vaddr, + ctxnum, gts->ts_cbr_map, + gts->ts_dsr_map); + + if (cch_deallocate(cch)) + BUG(); + gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ + unlock_handle(cch); + + gru_free_gru_context(gts); + STAT(unload_context); +} + +/* + * Load a GRU context by copying it from the thread data structure in memory + * to the GRU. + */ +static void gru_load_context(struct gru_thread_state *gts) +{ + struct gru_thread_data *gtd = gts->ts_td; + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int err, asid, ctxnum = gts->ts_ctxnum; + + gru_dbg(grudev, "gts %p, gtd %p\n", gts, gtd); + cch = GRU_CCH(gru->gs_gru_base_vaddr, ctxnum); + + lock_handle(cch); + asid = gru_load_mm_tracker(gru, gts->ts_ms, gts->ts_ctxnum); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch->tlb_int_enable) { + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gts->ts_tlb_int_select; + } + cch->tfm_done_bit_enable = 0; + err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map); + if (err) { + gru_dbg(grudev, + "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", + err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); + BUG(); + } + + gru_load_context_data(gtd->td_gdata, gru->gs_gru_base_vaddr, ctxnum, + gts->ts_cbr_map, gts->ts_dsr_map); + + if (cch_start(cch)) + BUG(); + unlock_handle(cch); + + STAT(load_context); +} + +/* + * Update fields in an active CCH: + * - retarget interrupts on local blade + * - force a delayed context unload by clearing the CCH asids. This + * forces TLB misses for new GRU instructions. The context is unloaded + * when the next TLB miss occurs. + */ +static int gru_update_cch(struct gru_thread_state *gts, int int_select) +{ + struct gru_context_configuration_handle *cch; + struct gru_state *gru = gts->ts_gru; + int i, ctxnum = gts->ts_ctxnum, ret = 0; + + cch = GRU_CCH(gru->gs_gru_base_vaddr, ctxnum); + + lock_handle(cch); + if (cch->state == CCHSTATE_ACTIVE) { + if (gru->gs_gts[gts->ts_ctxnum] != gts) + goto exit; + if (cch_interrupt(cch)) + BUG(); + if (int_select >= 0) { + gts->ts_tlb_int_select = int_select; + cch->tlb_int_select = int_select; + } else { + for (i = 0; i < 8; i++) + cch->asid[i] = 0; + cch->tfm_fault_bit_enable = 0; + cch->tlb_int_enable = 0; + gts->ts_force_unload = 1; + } + if (cch_start(cch)) + BUG(); + ret = 1; + } +exit: + unlock_handle(cch); + return ret; +} + +/* + * Update CCH tlb interrupt select. Required when all the following is true: + * - task's GRU context is loaded into a GRU + * - task is using interrupt notification for TLB faults + * - task has migrated to a different cpu on the same blade where + * it was previously running. + */ +static int gru_retarget_intr(struct gru_thread_state *gts) +{ + if (gts->ts_tlb_int_select < 0 + || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) + return 0; + + gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, + gru_cpu_fault_map_id()); + return gru_update_cch(gts, gru_cpu_fault_map_id()); +} + +/* + * Try to unload the GRU context. Task has migrated to a different blade. + * Called on migration when locks could not be obtained to immediately unload + * the context. + */ +static int gru_delayed_unload_context(struct gru_thread_state *gts) +{ + gru_dbg(grudev, "migration unload context gts %p\n", gts); + return gru_update_cch(gts, -1); +} + +/* + * All GRU contexts on the local blade are busy. Steal one from another process. + * This is a hack until a _real_ resource scheduler is written.... + */ +#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) +#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ + ((g)+1) : &(b)->bs_grus[0]) + +static void gru_steal_context(struct gru_thread_state *gts) +{ + struct gru_blade_state *blade; + struct gru_state *gru = NULL; + struct gru_thread_state *ngts = NULL; + int ctxnum, cbr, dsr, ok = 0; + + cbr = gts->ts_cbr_au_count; + dsr = gts->ts_dsr_au_count; + + preempt_disable(); + blade = gru_base[numa_blade_id()]; + spin_lock(&blade->bs_lock); + + ctxnum = next_ctxnum(blade->bs_lru_ctxnum); + gru = blade->bs_lru_gru; + if (ctxnum == 0) + gru = next_gru(blade, gru); + while (1) { + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (gru == blade->bs_lru_gru + && ctxnum == blade->bs_lru_ctxnum) + break; + ok = check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH); + if (ok) + break; + ngts = gru->gs_gts[ctxnum]; + if (ngts && down_trylock(&ngts->ts_ctxsem) == 0) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ok || ngts + || (gru == blade->bs_lru_gru + && ctxnum == blade->bs_lru_ctxnum)) + break; + ctxnum = 0; + gru = next_gru(blade, gru); + } + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; + spin_unlock(&blade->bs_lock); + preempt_enable(); + + if (ngts) { + STAT(steal_context); + ngts->ts_steal_jiffies = jiffies; + gru_unload_context(ngts, 1); + up(&ngts->ts_ctxsem); + } else { + STAT(steal_context_failed); + } + gru_dbg(grudev, + "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;" + " avail cb %ld, ds %ld\n", + gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), + hweight64(gru->gs_dsr_map)); +} + +/* + * Scan the GRUs on the local blade & assign a GRU context & ASID. + */ +static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru, *grux; + int i, max_active_contexts; + + preempt_disable(); + +again: + gru = NULL; + max_active_contexts = GRU_NUM_CCH; + for_each_gru_on_blade(grux, numa_blade_id(), i) { + if (check_gru_resources(grux, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, + max_active_contexts)) { + gru = grux; + max_active_contexts = grux->gs_active_contexts; + if (max_active_contexts == 0) + break; + } + } + + if (gru) { + spin_lock(&gru->gs_lock); + if (!check_gru_resources(gru, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, GRU_NUM_CCH)) { + spin_unlock(&gru->gs_lock); + goto again; + } + reserve_gru_resources(gru, gts); + gts->ts_gru = gru; + gts->ts_ctxnum = + find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); + atomic_inc(>s->ts_refcnt); + gru->gs_gts[gts->ts_ctxnum] = gts; + __set_bit(gts->ts_ctxnum, &gru->gs_context_map); + spin_unlock(&gru->gs_lock); + + STAT(assign_context); + gru_dbg(grudev, + "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n", + gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, + gts->ts_gru->gs_gid, gts->ts_ctxnum, + gts->ts_cbr_au_count, gts->ts_dsr_au_count); + } else { + gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); + STAT(assign_context_failed); + } + + preempt_enable(); + return gru; +} + +/* + * gru_nopage + * + * Map the user's GRU segment + */ +unsigned long gru_nopfn(struct vm_area_struct *vma, unsigned long address) +{ + struct gru_thread_state *gts; + unsigned long paddr; + + gru_dbg(grudev, "vma %p, address 0x%lx (0x%lx)\n", + vma, address, GSEG_BASE(address)); + STAT(nopfn); + + gts = gru_find_thread_state(vma, TSID(address - vma->vm_start)); + if (!gts) + return VM_FAULT_SIGBUS; + +again: + preempt_disable(); + down(>s->ts_ctxsem); + if (gts->ts_gru) { + if (gts->ts_gru->gs_blade_id != numa_blade_id()) { + STAT(migrated_nopfn_unload); + gru_unload_context(gts, 1); + } else { + if (gru_retarget_intr(gts)) + STAT(migrated_nopfn_retarget); + } + } + + if (!gts->ts_gru) { + while (!gru_assign_gru_context(gts)) { + up(>s->ts_ctxsem); + preempt_enable(); + schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + gru_steal_context(gts); + goto again; + } + if (atomic_read(>s->ts_td->td_refcnt) > 1) + if (!gru_break_cow(vma, gts)) { + up(>s->ts_ctxsem); + return VM_FAULT_SIGBUS; + } + gru_load_context(gts); + paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); + remap_pfn_range(vma, address & ~(GRU_GSEG_PAGESIZE - 1), + paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, + vma->vm_page_prot); + } + + up(>s->ts_ctxsem); + preempt_enable(); + + return NOPFN_REFAULT; +} + +/* + * gru_migrate_task + * + * Task has migrated to a different blade or a different cpu on the same blade + */ +static int do_migrate_gts(struct gru_state *gru, struct gru_thread_state *gts, + int locked, int pbid, int bid) +{ + int again = 0; + + if (pbid == bid) { + if (gru_retarget_intr(gts)) + STAT(migrated_retarget); + } else if (locked && down_trylock(>s->ts_ctxsem) == 0) { + spin_unlock(&gru->gs_lock); + gru_unload_context(gts, 1); + up(>s->ts_ctxsem); + STAT(migrated_unload); + again = 1; + } else if (gru_delayed_unload_context(gts)) { + STAT(migrated_unload_delay); + } + return again; +} + +void gru_migrate_task(int pcpu, int cpu) +{ + struct gru_state *gru; + struct gru_thread_state *gts; + struct gru_blade_state *blade; + struct mm_struct *mm = current->mm; + int pbid = cpu_to_blade(pcpu), bid = cpu_to_blade(cpu); + int locked = 0, ctxnum, scr; + + STAT(migrate_check); + blade = gru_base[bid]; + if (!blade || !mm) + return; + +again: + if (!locked) + locked= down_read_trylock(&mm->mmap_sem); + for_each_gru_on_blade(gru, pbid, scr) { + spin_lock(&gru->gs_lock); + for_each_gts_on_gru(gts, gru, ctxnum) + if (gts->ts_tgid_owner == current->tgid && gts->ts_gru) + if (do_migrate_gts(gru, gts, locked, pbid, bid)) + goto again; + spin_unlock(&gru->gs_lock); + } + + if (locked) + up_read(&mm->mmap_sem); +} Index: linux/drivers/gru/grummuops.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grummuops.c 2008-02-19 09:30:53.000000000 -0600 @@ -0,0 +1,376 @@ +/* + * SN Platform GRU Driver + * + * MMUOPS callbacks + TLB flushing + * + * This file handles mmuops callbacks from the core kernel. The callbacks + * are used to update the TLB in the GRU as a result of changes in the + * state of a process address space. This file also handles TLB invalidates + * from the GRU driver. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + * + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#ifdef EMU +#include "emu.h" +#endif + +#define gru_random() get_cycles() + +/* ---------------------------------- TLB Invalidation functions -------- + * get_tgh_handle + * + * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the + * local blade, use a fixed TGH that is a function of the blade-local cpu + * number. Normally, this TGH is private to the cpu & no contention occurs for + * the TGH. For offblade GRUs, select a random TGH in the range above the + * private TGHs. A spinlock is required to access this TGH & the lock must be + * released when the invalidate is completes. This sucks, but it is the best we + * can do. + * + * Note that the spinlock is IN the TGH handle so locking does not involve + * additional cache lines. + * + */ +static inline int get_off_blade_tgh(struct gru_state *gru) +{ + int n; + + n = GRU_NUM_TGH - gru->gs_tgh_first_remote; + n = gru_random() % n; + n += gru->gs_tgh_first_remote; + return n; +} + +static inline int get_on_blade_tgh(struct gru_state *gru) +{ + return blade_processor_id() >> gru->gs_tgh_local_shift; +} + +static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state + *gru) +{ + struct gru_tlb_global_handle *tgh; + int n; + + preempt_disable(); + if (numa_blade_id() == gru->gs_blade_id) + n = get_on_blade_tgh(gru); + else + n = get_off_blade_tgh(gru); + tgh = get_tgh_by_index(gru, n); + lock_handle(tgh); + + return tgh; +} + + +static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + unlock_handle(tgh); + preempt_enable(); +} + +/* + * gru_flush_tlb_range + * + * General purpose TLB invalidation function. This function scans every GRU in + * the ENTIRE system (partition) looking for GRUs where the specified MM has + * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR + * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned + * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the + * cost of (possibly) a large number of future TLBmisses. + * + * The current algorithm is optimized based on the following (somewhat true) + * assumptions: + * - GRU contexts are not loaded into a GRU unless a reference is made to + * the data segment or control block (this is true, not an assumption). + * If a DS/CB is referenced, the user will also issue instructions that + * cause TLBmisses. It is not necessary to optimize for the case where + * contexts are loaded but no instructions cause TLB misses. (I know + * this will happen but I'm not optimizing for it). + * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally + * a few usec but in unusual cases, it could be longer. Avoid if + * possible. + * - intrablade process migration between cpus is not frequent but is + * common. + * - a GRU context is not typically migrated to a different GRU on the + * blade because of intrablade migration + * - interblade migration is rare. Processes migrate their GRU context to + * the new blade. + * - if interblade migration occurs, migration back to the original blade + * is very very rare (ie., no optimization for this case) + * - most GRU instruction operate on a subset of the user REGIONS. Code + * & shared library regions are not likely targets of GRU instructions. + * + * To help improve the efficiency of TLB invalidation, the GMS data + * structure is maintained for EACH address space (MM struct). The GMS is + * also the structure that contains the pointer to the mmuops callout + * functions. This structure is linked to the mm_struct for the address space + * using the mmuops "register" function. The mmuops interfaces are used to + * provide the callbacks for TLB invalidation. The GMS contains: + * + * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is + * loaded into the GRU. + * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in + * the above array + * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active + * in the GRU for the address space. This bitmap must be passed to the + * GRU to do an invalidate. + * + * The current algorithm for invalidating TLBs is: + * - scan the asidmap for GRUs where the context has been loaded, ie, + * asid is non-zero. + * - for each gru found: + * - if the ctxtmap is non-zero, there are active contexts in the + * GRU. TLB invalidate instructions must be issued to the GRU. + * - if the ctxtmap is zero, no context is active. Set the ASID to + * zero to force a full TLB invalidation. This is fast but will + * cause a lot of TLB misses if the context is reloaded onto the + * GRU + * + */ + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len) +{ + struct gru_state *gru; + struct gru_mm_tracker *asids; + struct gru_tlb_global_handle *tgh; + unsigned long num; + int grupagesize, pagesize, pageshift, gid, asid; + + pageshift = (is_hugepage(NULL, start) ? HPAGE_SHIFT : PAGE_SHIFT); + pagesize = (1UL << pageshift); + grupagesize = GRU_PAGESIZE(pageshift); + num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); + + STAT(flush_tlb); + gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, + start, len, gms->ms_asidmap[0]); + + spin_lock(&gms->ms_asid_lock); + for_each_gru_in_bitmap(gid, gms->ms_asidmap) { + STAT(flush_tlb_gru); + gru = GID_TO_GRU(gid); + asids = gms->ms_asids + gid; + asid = asids->mt_asid; + if (asids->mt_ctxbitmap && asid) { + STAT(flush_tlb_gru_tgh); + asid = GRUASID(asid, start); + gru_dbg(grudev, + " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, num, asids->mt_ctxbitmap); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, start, 0, asid, grupagesize, 0, + num - 1, asids->mt_ctxbitmap); + get_unlock_tgh_handle(tgh); + } else { + STAT(flush_tlb_gru_zero_asid); + asids->mt_asid = 0; + __clear_bit(gru->gs_gid, gms->ms_asidmap); + gru_dbg(grudev, + " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", + gid, asid, asids->mt_ctxbitmap, + gms->ms_asidmap[0]); + } + } + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Flush the entire TLB on a chiplet. + */ +void gru_flush_all_tlb(struct gru_state *gru) +{ + struct gru_tlb_global_handle *tgh; + + gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0); + get_unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * Called from a mmuops callback to unmap a range of PTEs. + * + * Called holding the mmap_sem for write. + */ +static void gru_mmuops_invalidate_range_begin(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end, + int atomic) +{ + struct gru_mm_struct *gms; + + STAT(mmuops_invalidate_range); + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, atomic %d\n", gms, + start, end, atomic); + atomic_inc(&gms->ms_range_active); + gru_flush_tlb_range(gms, start, end - start); +} + +static void gru_mmuops_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, + unsigned long end, int atomic) +{ + struct gru_mm_struct *gms; + + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, atomic %d\n", gms, + start, end, atomic); + atomic_dec(&gms->ms_range_active); + wake_up_all(&gms->ms_wait_queue); +} + +/* + * Called from a mmuops callback whenever a valid PTE is unloaded ex. when a + * page is paged out by the kernel. + * + * Called holding the mm->page_table_lock + */ +static void gru_mmuops_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long vaddr) +{ + struct gru_mm_struct *gms; + + STAT(mmuops_invalidate_page); + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + gru_dbg(grudev, "gms %p, vaddr 0x%lx\n", gms, vaddr); + gru_flush_tlb_range(gms, vaddr, 1); +} + +/* + * Called at start of address space teardown. GTS's still + * hold a reference count on the GMS. Structure is not freed + * until the reference count goes to zero. + */ +static void gru_mmuops_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct gru_mm_struct *gms; + + STAT(mmuops_release); + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + gru_dbg(grudev, "gms %p\n", gms); + gms->ms_released = 1; +} + +static const struct mmu_notifier_ops gru_mmuops = { + .release = gru_mmuops_release, + .invalidate_range_begin = gru_mmuops_invalidate_range_begin, + .invalidate_range_end = gru_mmuops_invalidate_range_end, + .invalidate_page = gru_mmuops_invalidate_page, +}; + +/* Move this to the basic mmuops file. But for now... */ +static struct mmu_notifier *mmuops_find_ops(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + struct hlist_node *n, *t; + struct gru_mm_struct *gms; + + hlist_for_each_entry_safe_rcu(mn, n, t, &mm->mmu_notifier.head, hlist) + if (mn->ops == &gru_mmuops) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + if (atomic_read(&gms->ms_refcnt) > 0) + return mn; + } + return NULL; +} + +struct gru_mm_struct *gru_register_mmu_notifier(void) +{ + struct gru_mm_struct *gms; + struct mmu_notifier *mn; + + mn = mmuops_find_ops(current->mm); + if (mn) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + atomic_inc(&gms->ms_refcnt); + } else { + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (gms) { + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + INIT_HLIST_NODE(&gms->ms_notifier.hlist); + mmu_notifier_register(&gms->ms_notifier, current->mm); + synchronize_rcu(); + } + } + return gms; +} + +void gru_drop_mmu_notifier(struct gru_mm_struct *gms) +{ + if (atomic_dec_return(&gms->ms_refcnt) == 0) { + if (!gms->ms_released) + mmu_notifier_unregister(&gms->ms_notifier, current->mm); + synchronize_rcu(); + kfree(gms); + } +} + +/* + * Setup TGH parameters. There are: + * - 24 TGH handles per GRU chiplet + * - a portion (MAX_LOCAL_TGH) of the handles are reserved for + * use by blade-local cpus + * - the rest are used by off-blade cpus. This usage is + * less frequent than blade-local usage. + * + * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade + * has less tan or equal to 16 cpus, each cpu has a unique handle that it can + * use. + */ +#define MAX_LOCAL_TGH 16 + +void gru_tgh_flush_init(struct gru_state *gru) +{ + int cpus, shift = 0, n; + + cpus = nr_cpus_blade(gru->gs_blade_id); + + /* n = cpus rounded up to next power of 2 */ + if (cpus) { + n = 1 << fls(cpus - 1); + + /* + * shift count for converting local cpu# to TGH index + * 0 if cpus <= MAX_LOCAL_TGH, + * 1 if cpus <= 2*MAX_LOCAL_TGH, + * etc + */ + shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); + } + gru->gs_tgh_local_shift = shift; + + /* first starting TGH index to use for remote purges */ + gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; + +} Index: linux/drivers/gru/gruprocfs.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/gruprocfs.c 2008-02-15 13:56:46.200364209 -0600 @@ -0,0 +1,309 @@ +/* + * SN Platform GRU Driver + * + * PROC INTERFACES + * + * This file supports the /proc interfaces for the GRU driver + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifdef EMU +#include "preemu.h" +#endif +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#ifdef EMU +#include "emu.h" +#endif + +#define print_stat(s, f, id) \ + seq_printf(s, "%lu: " id, atomic_long_read(&gru_stats.f)) + +static int statistics_show(struct seq_file *s, void *p) +{ + print_stat(s, fdata_alloc, "allocate fdata"); + print_stat(s, fdata_free, "free fdata"); + print_stat(s, vdata_alloc, "allocate vdata"); + print_stat(s, vdata_free, "free vdata"); + print_stat(s, gts_alloc, "thread state allocate"); + print_stat(s, gts_free, "thread state free"); + print_stat(s, gtd_alloc, "thread data allocate"); + print_stat(s, gtd_free, "thread data free"); + print_stat(s, vdata_double_alloc, "race in cow vdata alloc"); + print_stat(s, gts_double_allocate, "race in cow gts alloc"); + + print_stat(s, assign_context, "allocate context"); + print_stat(s, assign_context_failed, "allocate context failed"); + print_stat(s, free_context, "free GRU context"); + print_stat(s, load_context, "load GRU context"); + print_stat(s, unload_context, "unload GRU context"); + print_stat(s, steal_context, "steal context"); + print_stat(s, steal_context_failed, "steal context failed"); + print_stat(s, nopfn, "nopfn"); + print_stat(s, break_cow, "break cow data fault"); + + print_stat(s, asid_new, "asid new"); + print_stat(s, asid_next, "asid next"); + print_stat(s, asid_wrap, "asid wrap"); + print_stat(s, asid_reuse, "asid reuse"); + + print_stat(s, intr, "interrupt"); + print_stat(s, call_os, "user call os"); + print_stat(s, call_os_tfh_idle, "call_os_tfh_idle"); + print_stat(s, call_os_check_for_bug, "call_os_check_for_bug"); + print_stat(s, call_os_wait_queue, "call_os_wait_queue"); + print_stat(s, user_flush_tlb, "user flush tlb"); + print_stat(s, user_unload_context, "user unload context"); + print_stat(s, user_exception, "user exception"); + print_stat(s, set_task_slice, "set task slice"); + print_stat(s, migrate_check, "migrate task check"); + print_stat(s, migrated_retarget, "migrate retarget"); + print_stat(s, migrated_unload, "migrate unload"); + print_stat(s, migrated_unload_delay, "migrate unload delay"); + print_stat(s, migrated_nopfn_retarget, "migrate nopfn retarget"); + print_stat(s, migrated_nopfn_unload, "migrate nopfn unload"); + print_stat(s, tlb_dropin, "tlb dropin"); + print_stat(s, tlb_dropin_fail_no_asid, "tlb_dropin_fail_no_asid"); + print_stat(s, tlb_dropin_fail_upm, "tlb_dropin_fail_upm"); + print_stat(s, tlb_dropin_fail_invalid, "tlb_dropin_fail_invalid"); + print_stat(s, tlb_dropin_fail_range_active, "tlb_dropin_fail_range_active"); + print_stat(s, mmuops_invalidate_range, "mmuops invalidate range"); + print_stat(s, mmuops_invalidate_page, "mmuops update page"); + print_stat(s, mmuops_age_page, "mmuops age page"); + print_stat(s, mmuops_release, "mmuops release"); + + print_stat(s, flush_tlb, "flush tlb"); + print_stat(s, flush_tlb_gru, "flush tlb gru"); + print_stat(s, flush_tlb_gru_tgh, "flush tlb tgh"); + print_stat(s, flush_tlb_gru_zero_asid, "flush tlb zero asid"); + return 0; +} + +static ssize_t statistics_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + memset(&gru_stats, 0, sizeof(gru_stats)); + return count; +} + +static int options_show(struct seq_file *s, void *p) +{ + seq_printf(s, "0x%lx\n", options); + return 0; +} + +static ssize_t options_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + char buf[80]; + + if (copy_from_user + (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) + return -EFAULT; + options = simple_strtoul(buf, NULL, 0); + + return count; +} + +static int cch_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data; + int i; + struct gru_state *gru = GID_TO_GRU(gid); + struct gru_thread_state *ts; + const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; + + if (gid == 0) + seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "pid", "cbrs", "dsbytes", "mode"); + if (gru) + for (i = 0; i < GRU_NUM_CCH; i++) { + ts = gru->gs_gts[i]; + if (!ts) + continue; + seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", + gru->gs_gid, gru->gs_blade_id, i, + ts->ts_tgid_owner, + ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, + ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, + mode[ts->ts_user_options & + GRU_OPT_MISS_MASK]); + } + + return 0; +} + +static int gru_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data, ctxfree, cbrfree, dsrfree; + struct gru_state *gru = GID_TO_GRU(gid); + + if (gid == 0) { + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", + "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", + "busy", "busy", "free", "free", "free"); + } + if (gru) { + ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; + cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", + gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, + GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, + ctxfree, cbrfree, dsrfree); + } + + return 0; +} + +static void seq_stop(struct seq_file *file, void *data) +{ +} + +static void *seq_start(struct seq_file *file, loff_t *gid) +{ + if (*gid < GRU_MAX_GRUS) + return gid; + return NULL; +} + +static void *seq_next(struct seq_file *file, void *data, loff_t *gid) +{ + (*gid)++; + if (*gid < GRU_MAX_GRUS) + return gid; + return NULL; +} + +static struct seq_operations cch_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = cch_seq_show +}; + +static struct seq_operations gru_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = gru_seq_show +}; + +static int statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, statistics_show, NULL); +} + +static int options_open(struct inode *inode, struct file *file) +{ + return single_open(file, options_show, NULL); +} + +static int cch_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cch_seq_ops); +} + +static int gru_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &gru_seq_ops); +} + +/* *INDENT-OFF* */ +static struct file_operations statistics_fops = { + .open = statistics_open, + .read = seq_read, + .write = statistics_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct file_operations options_fops = { + .open = options_open, + .read = seq_read, + .write = options_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct file_operations cch_fops = { + .open = cch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +static struct file_operations gru_fops = { + .open = gru_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_entry { + char *name; + int mode; + struct file_operations *fops; + struct proc_dir_entry *entry; +} proc_files[] = { + {"statistics", 0644, &statistics_fops}, + {"debug_options", 0644, &options_fops}, + {"cch_status", 0444, &cch_fops}, + {"gru_status", 0444, &gru_fops}, + {NULL} +}; +/* *INDENT-ON* */ + +static struct proc_dir_entry *proc_gru; + +static int create_proc_file(struct proc_entry *p) +{ + p->entry = create_proc_entry(p->name, p->mode, proc_gru); + if (!p->entry) + return -1; + p->entry->proc_fops = p->fops; + return 0; +} + +static void delete_proc_files(void) +{ + struct proc_entry *p; + + if (proc_gru) { + for (p = proc_files; p->name; p++) + if (p->entry) + remove_proc_entry(p->name, proc_gru); + remove_proc_entry("gru", NULL); + } +} + +int gru_proc_init(void) +{ + struct proc_entry *p; + + proc_gru = proc_mkdir("gru", NULL); + + for (p = proc_files; p->name; p++) + if (create_proc_file(p)) + goto err; + return 0; + +err: + delete_proc_files(); + return -1; +} + +void gru_proc_exit(void) +{ + delete_proc_files(); +} Index: linux/drivers/gru/grutables.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux/drivers/gru/grutables.h 2008-02-22 09:36:17.000000000 -0600 @@ -0,0 +1,517 @@ +/* + * SN Platform GRU Driver + * + * GRU DRIVER TABLES, MACROS, externs, etc + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2005-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +#ifndef _ASM_IA64_SN_GRUTABLES_H +#define _ASM_IA64_SN_GRUTABLES_H + +/* + * Tables: + * + * GFD - GRU File Data - Holds GSEG options. Used to communicate with + * user using ioctls. + * VDATA-VMA Data - Holds a few parameters. Head of linked list of + * GTS tables for threads using the GSEG + * GTS - Gru Thread State - contains info for managing a GSEG context. A + * GTS is allocated for each thread accessing a + * GSEG. + * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is + * not loaded into a GRU + * GMS - GRU Memory Struct - Used to manage TLB shotdowns. Tracks GRUs + * where a GSEG has been loaded. Similar to + * an mm_struct but for GRU. + * + * GS - GRU State - Used to manage the state of a GRU chiplet + * BS - Blade State - Used to manage state of all GRU chiplets + * on a blade + * + * + * Normal task tables for task using GRU. + * - 2 threads in process + * - 2 GSEGs open in process + * - GSEG1 is being used by both tthreads + * - GSEG2 is used only by thread 2 + * + * task -->| + * task ---+---> mm ->-- (mmuops) -------------+-> gms + * | | + * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) + * | | gtd | + * | | | + * | +-> gts--->| GSEG1 (thread2) + * | gtd | + * | | + * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) + * | gtd + * . + * . + * + * GSEGs are logically copy-on-write at fork time. + * + * At open + * file.private_data -> gfd + * + * At mmap, + * vma -> vdata -> gts -> gtd + * + * After fork + * parent + * vma -> vdata -> gts -> gtd # normal case + * child / + * vma -> ------------/ # gtd shared with parent + * + * Parent page fault for GSEG + * before + * vma -> vdata -> gts -> gtd + * after + * vma -> vdata -> gts -> gtd # allocate a new gtd. Old gtd + * if left with child + * + * Child page fault before + * vma -> gtd + * after + * vma -> vdata -> gts -> gtd # Allocate GTS. Move old gtd + * to new gts + * + */ + +#include +#include +#include +#include "gru.h" +#include "gruhandles.h" + + + /* Some hacks for running on the hardware simulator */ +#ifdef EMU +#undef local_irq_disable +#undef local_irq_enable +#define local_irq_disable() emu_local_irq_disable() +#define local_irq_enable() emu_local_irq_enable() +void emu_local_irq_disable(void); +void emu_local_irq_enable(void); +#define gru_stats gruhdr->egru_stats +#define gru_base gruhdr->egru_base +#define cpu_trinfo gruhdr->cpu_trinfo +#define gru_start_paddr gruhdr->egru_start_paddr +#define gru_end_paddr gruhdr->egru_end_paddr +#define STATIC +#else +extern struct gru_stats_s gru_stats; +extern struct gru_blade_state *gru_base[]; +extern unsigned long gru_start_paddr, gru_end_paddr; +#define STATIC static +#endif + +#define GRU_MAX_BLADES MAX_NUMNODES +#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) + +#define GRU_DRIVER_ID_STR "SGI GRU Device Driver" +#define REVISION "0.01" + +/* + * GRU statistics. + */ +struct gru_stats_s { + atomic_long_t fdata_alloc; + atomic_long_t fdata_free; + atomic_long_t vdata_alloc; + atomic_long_t vdata_free; + atomic_long_t gts_alloc; + atomic_long_t gts_free; + atomic_long_t gtd_alloc; + atomic_long_t gtd_free; + atomic_long_t vdata_double_alloc; + atomic_long_t gts_double_allocate; + atomic_long_t assign_context; + atomic_long_t assign_context_failed; + atomic_long_t free_context; + atomic_long_t load_context; + atomic_long_t unload_context; + atomic_long_t steal_context; + atomic_long_t steal_context_failed; + atomic_long_t nopfn; + atomic_long_t break_cow; + atomic_long_t asid_new; + atomic_long_t asid_next; + atomic_long_t asid_wrap; + atomic_long_t asid_reuse; + atomic_long_t intr; + atomic_long_t call_os; + atomic_long_t call_os_tfh_idle; + atomic_long_t call_os_check_for_bug; + atomic_long_t call_os_wait_queue; + atomic_long_t user_flush_tlb; + atomic_long_t user_unload_context; + atomic_long_t user_exception; + atomic_long_t set_task_slice; + atomic_long_t migrate_check; + atomic_long_t migrated_retarget; + atomic_long_t migrated_unload; + atomic_long_t migrated_unload_delay; + atomic_long_t migrated_nopfn_retarget; + atomic_long_t migrated_nopfn_unload; + atomic_long_t tlb_dropin; + atomic_long_t tlb_dropin_fail_no_asid; + atomic_long_t tlb_dropin_fail_upm; + atomic_long_t tlb_dropin_fail_invalid; + atomic_long_t tlb_dropin_fail_range_active; + atomic_long_t mmuops_invalidate_range; + atomic_long_t mmuops_invalidate_page; + atomic_long_t mmuops_age_page; + atomic_long_t mmuops_release; + atomic_long_t flush_tlb; + atomic_long_t flush_tlb_gru; + atomic_long_t flush_tlb_gru_tgh; + atomic_long_t flush_tlb_gru_zero_asid; +}; + +#define GRU_DEBUG 1 + +#define OPT_DPRINT 1 +#define OPT_STATS 0x2 + +#ifdef EMU +# undef dev_printk +# define dev_printk(level, dev, s, x...) \ + EMULOG(TR_GRU_DEBUG, "DRV", s, x) +#endif + +#define IRQ_GRU 110 /* Starting IRQ number for interrupts */ + +/* Delay in jiffies between attempts to assign a GRU context */ +#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) + +/* If a process has it's context stolen, min delay in jiffies before trying to + * steal a context from another process */ +#define GRU_STEAL_DELAY ((HZ * 200) / 1000) + +#ifdef GRU_DEBUG +#define STAT(id) do { \ + if (options & OPT_STATS) \ + atomic_long_inc(&gru_stats.id); \ + } while (0) + +#define gru_dbg(dev, fmt, x...) do { \ + if (options & OPT_DPRINT) dev_dbg(dev, "%s: " fmt, __FUNCTION__, x); \ + } while (0) +#else +#define STAT(id) +#define gru_dbg(x...) +#endif + +/*----------------------------------------------------------------------------- + * ASID management + */ +//#define MAX_ASID 0xfffff0 +#define MAX_ASID 0x1f0 +#define MIN_ASID 8 +#define ASID_INC 8 /* number of regions */ + +/* Generate a GRU asid value from a GRU base asid & a virtual address. */ +#ifdef __ia64__ +#define VADDR_HI_BIT 64 +#elif __x86_64 +#define VADDR_HI_BIT 48 +#else +#error "bad arch" +#endif +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) +#define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) + +/*------------------------------------------------------------------------------ + * File & VMS Tables + */ + +struct gru_state; + +/* + * This is the file_private data structure + * Note: values are used only when GRU is mmaped. At that + * time the current values are copied to the GTS. + */ +struct gru_file_data { + long fd_user_options; /* misc user option flags */ + int fd_cbr_au_count; /* number control blocks AU */ + int fd_dsr_au_count; /* data segment size AU */ + int fd_thread_slices; /* max threads that will access + the context */ +}; + +/* + * This structure is pointed to from the mmstruct via the mmuops pointer. There + * is one of these per address space. + */ +struct gru_mm_tracker { + unsigned int mt_asid_gen; /* ASID wrap count */ + int mt_asid; /* current base ASID for gru */ + unsigned short mt_ctxbitmap; /* bitmap of contexts using + asid */ +}; + +struct gru_mm_struct { + struct mmu_notifier ms_notifier; + atomic_t ms_refcnt; + char ms_released; + spinlock_t ms_asid_lock; + atomic_t ms_range_active; /* number of range_invals active */ + wait_queue_head_t ms_wait_queue; + DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); + struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; +}; + +/* + * One of these structures is allocated when a GSEG is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + * Note: after a fork, the CHILD's vm_private_data field points to a + * "struct gru_thread_data" (the VM open callout can't allocate memory). + * The normal vdata/gts/gtd structures are allocated on first fault. + */ +struct gru_vma_data { + spinlock_t vd_lock; /* Serialize access to vma */ + struct list_head vd_head; /* head of linked list of gts */ + long vd_user_options;/* misc user option flags */ + int vd_cbr_au_count; + int vd_dsr_au_count; + int vd_thread_slices; +}; + +/* + * One of these is allocated for each thread accessing a mmaped GRU. A linked + * list of these structure is hung off the struct gru_vma_data in the mm_struct. + */ +struct gru_thread_data { + long td_magic; /* magic ID for IS_THREAD_DATA */ + atomic_t td_refcnt; /* number of GTS structs sharing data */ + unsigned long td_gdata[0]; /* save area for GRU data (CB, DS, CBE) */ +}; +#define TD_MAGIC 0xabcd1235 +#define IS_THREAD_DATA(p) (*((long *)(p)) == TD_MAGIC) + +struct gru_thread_state { + struct list_head ts_next; /* list - head at vma-private */ + struct semaphore ts_ctxsem; /* load/unload CTX lock */ + struct mm_struct *ts_mm; /* mm currently mapped to context */ + struct vm_area_struct *ts_vma; /* vma of GRU context */ + struct gru_state *ts_gru; /* GRU where the context is loaded */ + struct gru_mm_struct *ts_ms; /* asid & ioproc struct */ + struct gru_thread_data *ts_td; /* gru thread data */ + unsigned long ts_steal_jiffies;/* jiffies when context last stolen */ + pid_t ts_tgid_owner; /* task that is using the context - for migration */ + int ts_tsid; /* thread that owns the structure */ + int ts_tlb_int_select;/* target cpu if interrupts enabled */ + int ts_ctxnum; /* context number where the context is loaded */ + atomic_t ts_refcnt; /* reference count GTS */ + long ts_user_options;/* misc user option flags */ + unsigned long ts_cbr_map; /* map of allocated CBRs */ + unsigned long ts_dsr_map; /* map of allocated DATA resources */ + unsigned char ts_dsr_au_count;/* Number of DSR resources requied for contest */ + unsigned char ts_cbr_au_count;/* Number of CBR resources requied for contest */ + char ts_force_unload;/* force context to be unloaded after migration */ + char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each allocated CB */ +}; + +/* + * Threaded programs actually allocate an array of GSEGs when a context is created. Each + * thread uses a separate GSEG. TSID is the index into the GSEG array. + */ +#define TSID(off) ((off) / GRU_GSEG_PAGESIZE) +#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + (gts)->ts_tsid * GRU_GSEG_PAGESIZE) + +#define NULLCTX -1 /* if context not loaded into GRU */ + +/*----------------------------------------------------------------------------- + * GRU State Tables + */ + +/* + * One of these exists for each GRU chiplet. + */ +struct gru_state { + struct gru_blade_state *gs_blade; /* GRU state for entire blade */ + unsigned long gs_gru_base_paddr; /* Physical address of gru segments (64) */ + void *gs_gru_base_vaddr; /* Virtual address of gru segments (64) */ + char gs_present; /* 0=GRU not present */ + unsigned char gs_gid; /* unique GRU number */ + char gs_tgh_local_shift; /* used to pick TGH for local flush */ + char gs_tgh_first_remote; /* starting TGH# for remote flush */ + short gs_blade_id; /* blade of GRU */ + spinlock_t gs_asid_lock; /* lock used for assigning asids */ + spinlock_t gs_lock; /* lock used for assigning contexts */ + + /* ---- the following fields are protected by the gs_asid_lock spinlock ---- */ + int gs_asid; /* Next available ASID */ + int gs_asid_limit; /* Limit of available ASIDs */ + unsigned int gs_asid_gen; /* asid generation. Inc on wrap */ + + /* ---- the following fields are protected by the gs_lock spinlock ---- */ + short gs_active_contexts; /* number of contexts in use */ + unsigned long gs_context_map; /* bitmap used to manage contexts in use */ + unsigned long gs_cbr_map; /* bitmap used to manage CB resources */ + unsigned long gs_dsr_map; /* bitmap used to manage DATA resources */ + struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using the context */ +}; + +/* + * This structure contains the GRU state for all the GRUs on a blade. + */ +struct gru_blade_state { + /* ---- the following fields are protected by the blade bs_lock spinlock ---- */ + spinlock_t bs_lock; /* lock used for stealing contexts */ + int bs_lru_ctxnum; /* STEAL - last context stolen */ + struct gru_state *bs_lru_gru; /* STEAL - last gru stolen */ + + struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; +}; + +/*----------------------------------------------------------------------------- + * Address Primitives + */ +#define get_tfm_for_cpu(g, c) ((struct gru_tlb_fault_map *)GRU_TFM((g)->gs_gru_base_vaddr, (c))) +#define get_tfh_by_index(g, i) ((struct gru_tlb_fault_handle *)GRU_TFH((g)->gs_gru_base_vaddr, (i))) +#define get_tgh_by_index(g, i) ((struct gru_tlb_global_handle *)GRU_TGH((g)->gs_gru_base_vaddr, (i))) +#define get_cbe_by_index(g, i) ((struct gru_control_block_extended *)GRU_CBE((g)->gs_gru_base_vaddr, (i))) + +/*----------------------------------------------------------------------------- + * Useful Macros + */ + +/* Number of bytes to save/restore when unloading/loading GRU contexts */ +#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) +#define CB_CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) +#define THREADDATABYTES(v) (sizeof(struct gru_thread_data) + \ + DSR_BYTES((v)->vd_dsr_au_count) + \ + CB_CBR_BYTES((v)->vd_cbr_au_count)) + +/* Convert a user CB number to the actual CBRNUM */ +#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ + * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) + +/* Test if a vaddr is a hugepage */ +#define is_hugepage(m, v) is_hugepage_only_range(m, (v), PAGE_SIZE) + +/* Convert a gid to a pointer to the GRU */ +#define GID_TO_GRU(gid) (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ + (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ + bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : NULL) + +/* Scan all active GRUs in a GRU bitmap */ +#define for_each_gru_in_bitmap(gid, map) \ + for (gid = find_first_bit(map, GRU_MAX_GRUS); gid < GRU_MAX_GRUS; \ + gid++, gid = find_next_bit(map, GRU_MAX_GRUS, gid)) + +/* Scan all active GRUs on a specific blade */ +#define for_each_gru_on_blade(gru, nid, i) \ + for (gru = gru_base[nid]->bs_grus, i = 0; i < GRU_CHIPLETS_PER_BLADE; i++, gru++) \ + if (gru->gs_present) + +/* Scan all active GTSs on a gru. Note: must hold ss_lock to use thsi macro. */ +#define for_each_gts_on_gru(gts, gru, ctxnum) \ + if (gru->gs_present) \ + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) \ + if ((gts = gru->gs_gts[ctxnum])) + +/* Scan each CBR whose bit is set in a TFM (or copy of) */ +#define for_each_cbr_in_tfm(i, map) \ + for (i = find_first_bit(map, GRU_NUM_CBE); i < GRU_NUM_CBE; \ + i++, i = find_next_bit(map, GRU_NUM_CBE, i)) + +/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ +#define for_each_cbr_in_allocation_map(i, map, k) \ + for (k = find_first_bit(map, GRU_CBR_AU); k < GRU_CBR_AU; \ + k = find_next_bit(map, GRU_CBR_AU, k + 1)) \ + for (i = k*GRU_CBR_AU_SIZE; i < (k + 1) * GRU_CBR_AU_SIZE; i++) + +/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ +#define for_each_dsr_in_allocation_map(i, map, k) \ + for (k = find_first_bit((const unsigned long *)map, GRU_DSR_AU); \ + k < GRU_DSR_AU; \ + k = find_next_bit((const unsigned long *)map, GRU_DSR_AU, k + 1))\ + for (i = k*GRU_DSR_AU_CL; i < (k + 1) * GRU_DSR_AU_CL; i++) + +#define gseg_physical_address(gru, ctxnum) \ + (gru->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) +#define gseg_virtual_address(gru, ctxnum) \ + (gru->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) + +/* ZZZ Hacks until we hook up to the rest of the UV infrastructure */ +#define NODESPERBLADE 1 +#define CPUSPERSOCKET 8 +#define SOCKETSPERBLADE 2 +#define CPUSPERBLADE (CPUSPERSOCKET * SOCKETSPERBLADE) +#define CPUSPERNODE (CPUSPERBLADE / NODESPERBLADE) + +#define blade_processor_id() (smp_processor_id() % CPUSPERBLADE) +#define numa_blade_id() (numa_node_id() / NODESPERBLADE) +#define nid_to_blade(nid) ((nid) / NODESPERBLADE) +#define nr_cpus_blade(nid) (CPUSPERSOCKET * SOCKETSPERBLADE) +#define cpu_to_blade(cpu) ((cpu) / CPUSPERBLADE) + +/*----------------------------------------------------------------------------- + * Lock / Unlock GRU handles + * Use the "delresp" bit in the handle as a "lock" bit. + */ + +static inline void lock_handle(void *h) +{ + while (test_and_set_bit(1, h)) { + cpu_relax(); +#ifdef EMU + my_usleep(100); +#endif + } +} + +static inline void unlock_handle(void *h) +{ + clear_bit(1, h); +} + +/*----------------------------------------------------------------------------- + * Function prototypes & externs + */ +extern struct vm_operations_struct gru_vm_ops; +extern struct device *grudev; +struct gru_unload_context_req; +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid, + void *gtd); +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid); +void gru_unload_context(struct gru_thread_state *gts, int savestate); +void gtd_drop(struct gru_thread_data *gtd); +void gts_drop(struct gru_thread_state *gts); +void gru_tgh_flush_init(struct gru_state *gru); +int gru_kservices_init(struct gru_state *gru); +irqreturn_t gru_intr(int irq, void *dev_id); +int gru_handle_user_call_os(unsigned long address); +int gru_user_flush_tlb(unsigned long arg); +int gru_user_unload_context(unsigned long arg); +int gru_get_exception_detail(unsigned long arg); +int gru_set_task_slice(long address); +int gru_cpu_fault_map_id(void); +void gru_flush_all_tlb(struct gru_state *gru); +void gru_migrate_task(int pcpu, int cpu); +int gru_proc_init(void); +void gru_proc_exit(void); +unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap); +unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap); +extern unsigned long gru_nopfn(struct vm_area_struct *, unsigned long); +extern struct gru_mm_struct *gru_register_mmu_notifier(void); +extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len); + +extern unsigned long options; + +#endif /* _ASM_IA64_SN_GRUTABLES_H */ From w47153 at gmail.com Fri Feb 22 08:32:26 2008 From: w47153 at gmail.com (w47153 at gmail.com) Date: Fri, 22 Feb 2008 10:32:26 -0600 Subject: [ofa-general] scsi transport layer for srp Message-ID: <43a7cec60802220832s1bffb7ebg4b90542a9f9655dd@mail.gmail.com> When I worked on OFED-1.3-rc2, I happened to notice that even though ib_rcp.c includes the scsi_transport_srp.h header and called into the functions in scsi_transport_srp.c such as srp_remove_host, however, scsi_transport_srp.ko has never been loaded in my system. However it seems to be working fine. Would it mean that that piece of source have been patched out? I'm now in the process of debugging an issue on srp and would like to trace into the stack. Really confusing about the situation. Would anyone familiar with topic gives me some guidances? Thanks. Stoney -------------- next part -------------- An HTML attachment was scrubbed... URL: From Caitlin.Bestler at neterion.com Fri Feb 22 09:33:08 2008 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Fri, 22 Feb 2008 12:33:08 -0500 Subject: [ofa-general] post_recv question In-Reply-To: <1203643034.31225.82.camel@trinity.ogc.int> References: <47BD9BA7.7050408@opengridcomputing.com> <1203609205.5629.53.camel@mtls03> <000101c874a5$7df4cc00$72258686@amr.corp.intel.com> <47BDA827.90806@opengridcomputing.com> <20080221181952.GB18720@minantech.com> <1203634079.31225.42.camel@trinity.ogc.int> <469958e00802211548s54e4d157w961c998dbab88557@mail.gmail.com> <1203643034.31225.82.camel@trinity.ogc.int> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD7703183ADB@nekter> Tom Tucker wrote: > > Ok. So what does the HW do with the packet while it's pondering it's > options? It has to put it somewhere. At the point where the RQ/SRQ would be checked the HW should not have to "put" the packet anywhere. At least not until it can allocate a WQE or declare a no-buffer-available error. RDMA is incompatible with cut-through placement directly into a user buffer. The hardware has to capture the entire packet before it can legitimately allocate the correct receive WQE. So it is distinctly feasible for a HW implementation to simply leave the packet where it is while it completes a more extended check to ensure that there really is no available recv WQE. Or an implementation could adopt the strategy of ensuring that all RQ/SRQ WQEs must be very easy for the Hardware to find, and the posting routine must do whatever is necessary to ensure that this has been done. Which of many solutions is deployed is up to the implementation. The semantics are that after the recv wqe is posted that it *is* in the RQ/SRQ. Wherever the RQ/SRQ is represented in host and/or adapter memory, a QP is responsible for guaranteeing that it has searched the entire "there" before it declares that the recv wqe is "not there". That involves work on the part of posting recv wqes and/or on allocating them. But it is up to those entities to divide the work. They cannot decide that the application should solve the problem for them. From ralph.campbell at qlogic.com Fri Feb 22 09:47:13 2008 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Fri, 22 Feb 2008 09:47:13 -0800 Subject: [ofa-general] post_recv question In-Reply-To: References: Message-ID: <1203702433.5109.210.camel@brick.pathscale.com> On Thu, 2008-02-21 at 22:15 -0800, Shirley Ma wrote: > Hello Ralph, > > > ib_ipoib uses shared receive queues and doesn't try to manage > > posted buffer credits so the RNR NAK issue isn't the same > > as what Steve is trying to do. > I meant the problem you saw might be the same reason. How many > connections did you have when you hit this problem? Probably more than > 1? > > thanks > Shirley Possibly. I haven't had RNR NAK problems running the RC5 bits with netperf between two systems. I'll try iperf and see what happens. From sashak at voltaire.com Fri Feb 22 10:45:21 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 22 Feb 2008 18:45:21 +0000 Subject: [ofa-general] [PATCH] opensm/libvendor: use CL_HTON64() macro for constant conversion Message-ID: <20080222184521.GE29692@sashak.voltaire.com> Use CL_HTON64() macro for constant conversion instead of cl_ntoh64() function. Also it changes conversion "direction" since this value used in network byte order. Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 679f06a..39c5fb3 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -128,7 +128,7 @@ Exit: static osm_madw_t *get_madw(osm_vendor_t * p_vend, ib_net64_t * tid) { umad_match_t *m, *e; - ib_net64_t mtid = (*tid & cl_ntoh64(0x00000000ffffffffllu)); + ib_net64_t mtid = (*tid & CL_HTON64(0x00000000ffffffffllu)); osm_madw_t *res; /* -- 1.5.4.1.122.gaa8d From sashak at voltaire.com Fri Feb 22 10:51:24 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 22 Feb 2008 18:51:24 +0000 Subject: [ofa-general] [PATCH] opensm/osm_vendor_ibumad: simplify put_madw() prototype Message-ID: <20080222185124.GF29692@sashak.voltaire.com> In put_madw() pass transaction id by value as it used there and not by refernce. Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 39c5fb3..d51bd6d 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -154,7 +154,7 @@ static osm_madw_t *get_madw(osm_vendor_t * p_vend, ib_net64_t * tid) } static void -put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t * tid) +put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t tid) { umad_match_t *m, *e, *old_lru, *lru = 0; osm_madw_t *p_req_madw; @@ -165,7 +165,7 @@ put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t * tid) pthread_mutex_lock(&p_vend->match_tbl_mutex); for (m = p_vend->mtbl.tbl, e = m + p_vend->mtbl.max; m < e; m++) { if (m->tid == 0) { - m->tid = *tid; + m->tid = tid; m->v = p_madw; m->version = cl_atomic_inc((atomic32_t *) & p_vend->mtbl. @@ -185,9 +185,9 @@ put_madw(osm_vendor_t * p_vend, osm_madw_t * p_madw, ib_net64_t * tid) p_bind = p_req_madw->h_bind; p_req_madw->status = IB_CANCELED; pthread_mutex_lock(&p_vend->cb_mutex); - (*p_bind->send_err_callback) (p_bind->client_context, old_lru->v); + (*p_bind->send_err_callback) (p_bind->client_context, p_req_madw); pthread_mutex_unlock(&p_vend->cb_mutex); - lru->tid = *tid; + lru->tid = tid; lru->v = p_madw; lru->version = cl_atomic_inc((atomic32_t *) & p_vend->mtbl.last_version); @@ -1081,7 +1081,7 @@ osm_vendor_send(IN osm_bind_handle_t h_bind, Resp: if (resp_expected) - put_madw(p_vend, p_madw, &p_mad->trans_id); + put_madw(p_vend, p_madw, p_mad->trans_id); #ifdef VENDOR_RMPP_SUPPORT sent_mad_size = p_madw->mad_size; -- 1.5.4.1.122.gaa8d From sean.hefty at intel.com Fri Feb 22 10:40:45 2008 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 22 Feb 2008 10:40:45 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: ib/cm: flush workqueue when removing device Message-ID: <000101c87582$6f5ea110$9c98070a@amr.corp.intel.com> When a cm mad is received, it is queued to a cm workqueue for processing. The queued work item references the port and device on which the mad was received. If that device is removed from the system before the work item can execute, the work item will reference freed memory. To fix this, flush the workqueue after unregistering to receive mads, and before the device can be freed. Signed-off-by: Sean Hefty --- Bug found with SDP testing on OFED 1.3. The patch is also available at: git://git.openfabrics.org/~shefty/rdma-dev.git for-roland drivers/infiniband/core/cm.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b10ade9..4df4051 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3759,6 +3759,7 @@ static void cm_remove_one(struct ib_device *device) port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + flush_workqueue(cm.wq); cm_remove_port_fs(port); } kobject_put(&cm_dev->dev_obj); @@ -3813,6 +3814,7 @@ static void __exit ib_cm_cleanup(void) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); + ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { @@ -3820,7 +3822,6 @@ static void __exit ib_cm_cleanup(void) kfree(timewait_info); } - ib_unregister_client(&cm_client); class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } From mingo at elte.hu Fri Feb 22 10:54:00 2008 From: mingo at elte.hu (Ingo Molnar) Date: Fri, 22 Feb 2008 19:54:00 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> Message-ID: <20080222185359.GA29945@elte.hu> * Linus Torvalds wrote: > I'm personally of the opinion that a lot of checkpatch "fixes" are > anything but. That mainly concerns fixing overlong lines (where the > "fixed" version is usually worse than the original), but it's been > true for some other warnings too. that was certainly the case for the earlier checkpatch releases which treated overlong lines as an error. So here's a quick list of negative and positive aspects of current versions of checkpatch, as i see them. But let me first declare it that when scripts/checkpatch.pl was initially merged last year i immediately ran it over my own files and became a deep sceptic of it. (check the lkml archives, i complained alot about it) Now i've got more than half a year of experience with using checkpatch as an integral part of scheduler maintenance, and we've now got 4 months of experience with using checkpatch in arch/x86 maintenance. Based on this first hand experience, my opinion about checkpatch has changed, rather radically: i now believe that checkpatch is almost as important to the long term health of our kernel development process as BitKeeper/Git turned out to be. If i had to stop using it today, it would be almost as bad of a step backwards to me as if we had to migrate the kernel source code control to CVS. Lets see the Bad Side of checkpatch: 1) checkpatch "errors" shouldnt be taken too seriously for newly introduced "leaf" driver code, which code we dont at all know whether we'll be maintaining in any serious manner in the future. Slowing down a submission by requirig it to pass checkpatch is not as clear-cut as it is for core infrastructure and architecture code. It's far more important to get _any_ code to users (as long as it's not outright harmful) than to nitpick about style details. 2) it still has some false positives. (They are quite rare in the latest versions, about 1 out of 100 for code that is already "clean". I send them over to Andy whenever i see them, and they get fixed quickly. The false positives were a big annoyance in early checkpatch.pl versions, these days they are not - to me at least.) 3) it's _really_ annoying when sometimes i stumble over some old, crufty piece of code that according to checkpatch is in high need of some good, thorough cleanup - and when i take a look at the code it turns out that the original author of that crap piece of code turns out to be ... me. Those moments can be pretty embarrasing and sobering ;-) The Good Side of checkpatch (and here i'll only list the non-obvious advantages): 1) 90% of the scheduler related checkpatch fixes today you'll never recognize in a commit! The fixes all happen before code is submitted, and the fixes are seemlessly embedded in nice looking patches. (in that sense checkpatch is a bit like lockdep: 90% of the errors they detect wont hit lkml, ever.) 2) you might know that Deja-Vu moment when you look at a new patch that has been submitted to lkml and you have a strange, weird "feeling" that there's something wrong about the patch. It's totally subconscious, and you take a closer look and a few seconds later you find a real bug in the code. That "feeling" i believe comes from a fundamental property of how human vision is connected to the human brain: pattern matching. Really good programmers have built a "library" of patterns of "good" and "bad" looking coding practices. If a patch or if a file has a clean _style_, bugs and deeper structural problems often stand out like a sore thumb. But if the code is peppered with random style noise, it's a lot harder (for me at least) to notice real bugs. I can notice bugs in a squeeky clean code base about 5 times easier than in a noisy codebase. This effect alone makes checkpatch indispensible for the scheduler and for arch/x86. Sidenote: i dont really need fancy metrics trying to tell me how good an algorithm _truly_ is (although it certainly would be interesting to have). I can _see_ that at a glance - provided the code follows common kernel practices and a common, consistent style. Checkpatch makes visual code patterns universal and eases the human maintainance work enormously, for a 150+ KLOC subsystem like arch/x86. I'm not distracted (visually and mentally) by the thick fog of small silly details and quirks in coding style. Others might have radar eyes and radar brains, i dont :-) 3) checkpatch also keeps _my_ bugs out of the kernel in an interesting way. I'm sure many of you are like me: i've got "weaker" moments when i write rather crappy code, and i've got "stronger" moments when i'm in the flow and can write a few thousand lines of code with nary a hickup. What makes things worse is it's really hard to tell the two apart. It turns out - and this surprised me a lot - that when i write new code that is "weaker", i tend to make more "style mistakes", without noticing them. Later on, when i do a checkpatch run, i see some weird looking code and find that it's also buggy! This concept also works with code written by others: when i get a careless patch written in a hurry, it is much more likely to have style errors in it, and as a maintainer i'm warned about that fact. The best programmers are the ones who have a good eye for details - and that subconsciously extends to "style details" too. I've yet to see a _single_ example of a good, experienced kernel programmer who writes code that looks absolutely careless and sloppy, but which is top-notch otherwise. (Newbies will make style mistakes a lot more often - and for them checkpatch is a nice and easy experience at reading other people's code and trying to learn the style of the kernel.) 4) there's a psychological effect as well: clean _looking_ code is more attractive to coders to improve upon. Once the code _looks_ clean (mechanically), the people with the real structural cleanups are not far away either. Code that just looks nice is simply more of a pleasure to work with and to improve, so there's a strong psychological relationship between the "small, seemingly unimportant details" cleanups and the real, structural cleanups. On the other hand, bad looking, unaesthetic code is avoided by kernel developers like the pest. That is a constant skewing force that is very harmful to Linux, because the "current style" of subsystems is a pretty random property at the moment, and there are _many_ important codebases in the kernel that are avoided by most of us purely just because they look so awful. 5) cleanups were rather hard to get upstream before, because there was never any true "objective basis" for the cleanups, giving an easy excuse for flames over stupid taste differences, and making it easy for maintainers to reject 90%-good cleanups just based on taste differences. Checkpatch gives the right tool to people to write consistently clean code and makes it harder for maintainers to find the arguments to keep keep code unclean. After this list of rather subjective impressions, i've also got some historic raw data as well about how arch/x86 cleanups progressed over the past 4 months. ( NOTE: the "errors" count below does _not_ include "lines longer than 80 chars" warnings nor any of the other checkpatch warnings - only checkpatch "errors" which are real bona fide style errors in 99%+ of the cases. ) errors lines of code errors/KLOC ........................................................................ v2.6.24-rc1 arch/x86/ [23 Oct 2007] 8695 117423 74.0 v2.6.24-x86.git arch/x86/ [21 Nov 2007] 5190 117156 44.2 v2.6.24-x86.git arch/x86/ [18 Dec 2007] 4057 117213 34.6 v2.6.24-x86.git arch/x86/ [ 8 Jan 2008] 3650 117987 30.9 v2.6.24-x86.git arch/x86/ [ 4 Feb 2008] 3334 133542 24.9 v2.6.25-x86.git arch/x86/ [21 Feb 2008] 2724 136963 19.8 [ See: http://redhat.com/~mingo/x86.git/code-quality - although i guess i should rename it to "style-quality" - because there is no direct mapping of style quality to real code quality. NOTE: some of the reductions in the error count above are mechanic from things like really long arrays or the math-emu changes - but most of the real reductions are genuine. ] v2.6.24-rc1 was the raw arch/x86 code how we inherited it after we did the mechanic unification without changing any of the files. After that point you can see a marked reduction in the total count of style errors. While many of the fixes are just small details and may all seem insignificant in isolation, IMO the sum of those small details matters _a lot_: in the past 4 months the code has become a lot more hackable to us and that process was driven in large part by checkpatch. Ingo From linville at tuxdriver.com Fri Feb 22 08:48:19 2008 From: linville at tuxdriver.com (John W. Linville) Date: Fri, 22 Feb 2008 11:48:19 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <1203693437.6242.40.camel@lappy> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <47BEDB3F.4090100@davidnewall.com> <1203693437.6242.40.camel@lappy> Message-ID: <20080222164819.GF3067@tuxdriver.com> On Fri, Feb 22, 2008 at 04:17:17PM +0100, Peter Zijlstra wrote: > Even with e-mail, I can easily show over 200 characters wide with a > large font (say 11pt) but find it harder to read emails that don't > nicely wrap at 78. So much so that I often find myself not reading the > mail, or restyling it if I find it important enough to read anyway. Yes, ditto. And since most of my patch review is done inside mutt... -- John W. Linville linville at tuxdriver.com From bart.vanassche at gmail.com Fri Feb 22 11:11:09 2008 From: bart.vanassche at gmail.com (Bart Van Assche) Date: Fri, 22 Feb 2008 20:11:09 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222185359.GA29945@elte.hu> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080222185359.GA29945@elte.hu> Message-ID: On Fri, Feb 22, 2008 at 7:54 PM, Ingo Molnar wrote: > If a patch or if a file has a clean _style_, bugs and deeper > structural problems often stand out like a sore thumb. But if the > code is peppered with random style noise, it's a lot harder (for me > at least) to notice real bugs. I can notice bugs in a squeeky clean > code base about 5 times easier than in a noisy codebase. This effect > alone makes checkpatch indispensible for the scheduler and for > arch/x86. I also appreciate style uniformity in kernel code. My (limited) experience with checkpatch is that most checkpatch complaints are easy to resolve. Bart Van Assche. From hjgkghkldsdfjs at dsfh.com Fri Feb 22 11:11:33 2008 From: hjgkghkldsdfjs at dsfh.com (=?GB2312?B?0MLE6rrD?=) Date: Sat, 23 Feb 2008 03:11:33 +0800 Subject: [ofa-general] =?gb2312?b?t6IgIMax?= Message-ID: <20080222191159.8F413E60CC9@openfabrics.org> 您好!贵公司经理/财务: 首先,对于我的冒昧来函向您致歉,但愿这函对贵公司有所帮助。 我司享有国家优惠政策;常年主要以生产和销售为一体的定额纳税企业,现公司有余额的发.票向外优惠代.开。 如贵司有下列情况: 1、公司做进帐,出项有差额。2、客户压底货价,利润微薄。3、采购时需要正式票据报销。 我司为您提供如下票据: 1、国税(商品销售发.票),地税(运输票,广告票,服务票,建筑票等) 2、海关代征进口增值税专用缴款书 本公司承诺所开票据均可在网上查询和验证;如有需者敬请来电洽商合作,此信息长期有效敬请保留以备后用。 负责人:刘经理 手机:13662698132 From jeff at garzik.org Fri Feb 22 11:20:12 2008 From: jeff at garzik.org (Jeff Garzik) Date: Fri, 22 Feb 2008 14:20:12 -0500 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222185359.GA29945@elte.hu> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080222185359.GA29945@elte.hu> Message-ID: <47BF206C.8040001@garzik.org> Ingo Molnar wrote: > 2) you might know that Deja-Vu moment when you look at a new patch that > has been submitted to lkml and you have a strange, weird "feeling" > that there's something wrong about the patch. > > It's totally subconscious, and you take a closer look and a few > seconds later you find a real bug in the code. > > That "feeling" i believe comes from a fundamental property of how > human vision is connected to the human brain: pattern matching. > Really good programmers have built a "library" of patterns of "good" > and "bad" looking coding practices. > > If a patch or if a file has a clean _style_, bugs and deeper > structural problems often stand out like a sore thumb. But if the [...] > The best programmers are the ones who have a good eye for details - > and that subconsciously extends to "style details" too. I've yet to > see a _single_ example of a good, experienced kernel programmer who > writes code that looks absolutely careless and sloppy, but which is > top-notch otherwise. (Newbies will make style mistakes a lot more > often - and for them checkpatch is a nice and easy experience at > reading other people's code and trying to learn the style of the > kernel.) [...] > 4) there's a psychological effect as well: clean _looking_ code is > more attractive to coders to improve upon. Once the code _looks_ > clean (mechanically), the people with the real structural cleanups > are not far away either. Code that just looks nice is simply more of > a pleasure to work with and to improve, so there's a strong > psychological relationship between the "small, seemingly unimportant > details" cleanups and the real, structural cleanups. The above deserved to be quoted... just because I agree with all of it so strongly :) Bugs really do "hide" in ugly code, in part because my brain has been optimized to review clean code. Like everything else in life, one must strike a balance between picking style nits with someone's patch, and making honest criticisms of a patch because said patch is too "unclean" to be reviewed by anyone. Jeff From pavel at ucw.cz Fri Feb 22 10:45:18 2008 From: pavel at ucw.cz (Pavel Machek) Date: Fri, 22 Feb 2008 19:45:18 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> Message-ID: <20080222184518.GB6060@ucw.cz> On Fri 2008-02-22 01:05:26, Krzysztof Halasa wrote: > Jeff Garzik writes: > > > If a driver is full of lines of length >80, that's a problem. > > I'm not sure. > We all have more than 80-chars wide displays for years, don't we? The No. Zaurus is one example, second is small screen where you need big font to keep it readable (x60 on desk). Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html From pavel at ucw.cz Fri Feb 22 10:40:37 2008 From: pavel at ucw.cz (Pavel Machek) Date: Fri, 22 Feb 2008 19:40:37 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080221140855.6aea8cc1@laptopd505.fenrus.org> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080221140855.6aea8cc1@laptopd505.fenrus.org> Message-ID: <20080222184036.GA6060@ucw.cz> On Thu 2008-02-21 14:08:55, Arjan van de Ven wrote: > On Thu, 21 Feb 2008 23:01:24 +0200 > Adrian Bunk wrote: > > > [ Linus Added to the To: since I want to hear his opinion on this > > issue. ] > > > > On Thu, Feb 21, 2008 at 12:28:55PM -0800, Roland Dreier wrote: > > > > This driver should really have gotten some review before being > > > > included in the kernel. > > > > > > > Even a simple checkpatch run finds more than > 250 stylistic > > > > errors (not code bugs but cases where the driver violates the > > > > standard code formatting rules of kernel code). > > > > > > Linus has strongly stated that we should merge hardware drivers > > > early, and I agree: although the nes driver clearly needs more > > > work, there's no advantage to users with the hardware in forcing > > > them to wait for 2.6.26 to merge the driver, since they'll just > > > have to patch the grungy code in themselves anyway. And by merging > > > the driver early, we get fixed up for any tree-wide changes and > > > allow janitors to help with the cleanup. > > > > Is it really intended to merge drivers without _any_ kind of review? > > No of course not. > > I totally agree we should be more agressive in merging drivers earlier. > A minimal review needs to happen so for a few things imo > 1) That the driver doesn't break the build > 2) That the driver has no obvious huge security holes > (this is a big deal for unsuspecting users) > 3) that there's not an obscene amount of "uses deprecated api" compiler warnings > (since those are annoying for everyone else) > 4) that people who don't have the hardware are not negatively affected > (say crashes without the hw or so) 5) does not introduce new and ugly user-kernel we'll have problems fixing/removing? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html From greg at kroah.com Fri Feb 22 11:44:59 2008 From: greg at kroah.com (Greg KH) Date: Fri, 22 Feb 2008 11:44:59 -0800 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47BF206C.8040001@garzik.org> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <20080222185359.GA29945@elte.hu> <47BF206C.8040001@garzik.org> Message-ID: <20080222194459.GA21250@kroah.com> On Fri, Feb 22, 2008 at 02:20:12PM -0500, Jeff Garzik wrote: > Ingo Molnar wrote: >> 2) you might know that Deja-Vu moment when you look at a new patch that >> has been submitted to lkml and you have a strange, weird "feeling" >> that there's something wrong about the patch. >> It's totally subconscious, and you take a closer look and a few >> seconds later you find a real bug in the code. >> That "feeling" i believe comes from a fundamental property of how >> human vision is connected to the human brain: pattern matching. Really >> good programmers have built a "library" of patterns of "good" and >> "bad" looking coding practices. >> If a patch or if a file has a clean _style_, bugs and deeper >> structural problems often stand out like a sore thumb. But if the > [...] > >> The best programmers are the ones who have a good eye for details - >> and that subconsciously extends to "style details" too. I've yet to >> see a _single_ example of a good, experienced kernel programmer who >> writes code that looks absolutely careless and sloppy, but which is >> top-notch otherwise. (Newbies will make style mistakes a lot more >> often - and for them checkpatch is a nice and easy experience at >> reading other people's code and trying to learn the style of the >> kernel.) > [...] > >> 4) there's a psychological effect as well: clean _looking_ code is >> more attractive to coders to improve upon. Once the code _looks_ clean >> (mechanically), the people with the real structural cleanups are not >> far away either. Code that just looks nice is simply more of a >> pleasure to work with and to improve, so there's a strong >> psychological relationship between the "small, seemingly unimportant >> details" cleanups and the real, structural cleanups. > > The above deserved to be quoted... just because I agree with all of it so > strongly :) > > Bugs really do "hide" in ugly code, in part because my brain has been > optimized to review clean code. > > Like everything else in life, one must strike a balance between picking > style nits with someone's patch, and making honest criticisms of a patch > because said patch is too "unclean" to be reviewed by anyone. I totally agree with all of this. checkpatch.pl is a useful tool to use, and is quite handy for helping the kernel code for all of the above reasons. thanks, greg k-h From divisas1234 at gmail.com Fri Feb 22 08:48:35 2008 From: divisas1234 at gmail.com (divisas1234) Date: Fri, 22 Feb 2008 11:48:35 -0500 Subject: [ofa-general] divisas ganadoras Message-ID: <2311850-220082522164835531@MAGO> -------------- next part -------------- An HTML attachment was scrubbed... URL: From khc at pm.waw.pl Fri Feb 22 14:28:58 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 23:28:58 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222031315.GF27894@ZenIV.linux.org.uk> (Al Viro's message of "Fri\, 22 Feb 2008 03\:13\:15 +0000") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <20080222031315.GF27894@ZenIV.linux.org.uk> Message-ID: Al Viro writes: > IMO the line length overruns make good warnings. Not as in "here's a cheap > way to get more changesets", but as in "that code might have other problems > nearby" kind of heuristics. Sure, it does. However the human looking at the code is far better at spotting such problems. Machine-generated warnings are great when the machine is actually better than human. Anyway, warnings are one thing and line limit is another. We may raise the limit leaving the 80-chars warning in place. Unless there are too many false positives, of course. -- Krzysztof Halasa From khc at pm.waw.pl Fri Feb 22 14:37:11 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 23:37:11 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: (Linus Torvalds's message of "Thu\, 21 Feb 2008 19\:13\:19 -0800 \(PST\)") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> Message-ID: Linus Torvalds writes: > Will people then try to fake things out by using 4-space indents > and then "deep" indentations will look like just a couple of tabs?) There is no point in faking it as it's only advisory, it's to help the author who should be free to ignore the advice. People upstream won't be fooled by some cheap tab tricks I guess. -- Krzysztof Halasa From khc at pm.waw.pl Fri Feb 22 14:44:09 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 23:44:09 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080222184518.GB6060@ucw.cz> (Pavel Machek's message of "Fri\, 22 Feb 2008 19\:45\:18 +0100") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> Message-ID: Pavel Machek writes: > Zaurus is one example, second is small screen where you need big font > to keep it readable (x60 on desk). Come on, are you doing Linux kernel development on PDA? -- Krzysztof Halasa From khc at pm.waw.pl Fri Feb 22 14:59:35 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Fri, 22 Feb 2008 23:59:35 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <1203693437.6242.40.camel@lappy> (Peter Zijlstra's message of "Fri\, 22 Feb 2008 16\:17\:17 +0100") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <47BEDB3F.4090100@davidnewall.com> <1203693437.6242.40.camel@lappy> Message-ID: Peter Zijlstra writes: > So, yes, I have the screen estate for very long lines, but I find that > long lines require more effort to read (that very much includes leading > whitespace). Also, since long lines are rare (and they should be, if you > nest too deep you have other issues) accommodating them would waste a > lot of screen estate otherwise useful for another column of text. Either they are rare and you can wrap them and still use 80 columns, or it turns out they are not so rare and you may want to use wider windows (not necessarily 132 but perhaps 100). I think the question isn't if they are rare or not, or if people have 3 * 1920 pixels/line or just 1280. The question is: is the code more readable with hard limit equal to 80 characters, or maybe is it better to limit code block complexity instead, and let the maximum number of those small pictures in a line alone? (Limiting at 132 would have technical sense IMHO). Better code readability = less bugs without any additional effort. > Even with e-mail, I can easily show over 200 characters wide with a > large font (say 11pt) but find it harder to read emails that don't > nicely wrap at 78. Sure - because email is not C code. Actually you don't "read" C code, word by word, as you read books - do you? -- Krzysztof Halasa From viro at ZenIV.linux.org.uk Fri Feb 22 15:14:36 2008 From: viro at ZenIV.linux.org.uk (Al Viro) Date: Fri, 22 Feb 2008 23:14:36 +0000 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <47BEDB3F.4090100@davidnewall.com> <1203693437.6242.40.camel@lappy> Message-ID: <20080222231436.GH27894@ZenIV.linux.org.uk> On Fri, Feb 22, 2008 at 11:59:35PM +0100, Krzysztof Halasa wrote: > Sure - because email is not C code. > > Actually you don't "read" C code, word by word, as you read books - do > you? If it's decently written - sure, why not? Unfortunately, more common case is somewhere between the writing on the lavatory wall and appartment lease agreement, with several high school essays mixed in... From eventuatelo31 at taosystems.com Fri Feb 22 17:30:59 2008 From: eventuatelo31 at taosystems.com (Jodi Crump) Date: Sat, 23 Feb 2008 04:30:59 +0300 Subject: [ofa-general] TakeALookAddtoCartSoftTabs Message-ID: <01c875d4$e37a0380$78706bd4@eventuatelo31> ForValuedCustomerSpecialPricesCertified http://joycearcostu.blogspot.com From pavel at ucw.cz Sat Feb 23 01:43:34 2008 From: pavel at ucw.cz (Pavel Machek) Date: Sat, 23 Feb 2008 10:43:34 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> Message-ID: <20080223094334.GA3081@elf.ucw.cz> On Fri 2008-02-22 23:44:09, Krzysztof Halasa wrote: > Pavel Machek writes: > > > Zaurus is one example, second is small screen where you need big font > > to keep it readable (x60 on desk). > > Come on, are you doing Linux kernel development on PDA? I review patches on it, sometimes, yes. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html From dwrainbowpcmm at rainbowpcm.com Sat Feb 23 01:55:35 2008 From: dwrainbowpcmm at rainbowpcm.com (Gertrude Maddox) Date: Sat, 23 Feb 2008 12:55:35 +0300 Subject: [ofa-general] Enjoy gambling in the most reliable online casino! Message-ID: <01c8761b$6160fd80$a864ac55@dwrainbowpcmm> Golden Gate Casino is one of the best known internet casinos on the web. Just download free software, install it and start playing! Real gaming variety offers you Black Jack, Slots, Roulette Poker and more. We provide 24 hours a day, 7 days a week support and service! Truly fair play guaranteed for players. High level of security! http://geocities.com/norbertosharp647 Simply try and you'll like it! From vlad at lists.openfabrics.org Sat Feb 23 03:05:14 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 23 Feb 2008 03:05:14 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080223-0200 daily build status Message-ID: <20080223110514.EBD17E60A15@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.24 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From dwskoleportenm at skoleporten.net Sat Feb 23 04:29:07 2008 From: dwskoleportenm at skoleporten.net (Lynnette Leonard) Date: Sat, 23 Feb 2008 15:29:07 +0300 Subject: [ofa-general] you have nothing to lose, just a lot to gain! Message-ID: <01c87630$d428a380$34448d55@dwskoleportenm> Over 700,000 Men around the world are already satisfied with the Quality and Effectiveness of VPXL and you could be also. A new and more sexually powerful man is only a few months away . Your online shopping is safe & secure with us... also very discreet and private with no indication of penis enlargement on the bottle, package or billing receipt. We offer a FULL MONEY BACK GUARANTEE if you are not completely satisfied with the results of VPXL , you have nothing to lose, just a lot to gain ! From davidn at davidnewall.com Sat Feb 23 04:38:58 2008 From: davidn at davidnewall.com (David Newall) Date: Sat, 23 Feb 2008 23:08:58 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080223094334.GA3081@elf.ucw.cz> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> Message-ID: <47C013E2.6040201@davidnewall.com> Pavel Machek wrote: > On Fri 2008-02-22 23:44:09, Krzysztof Halasa wrote: > >> Pavel Machek writes: >> >> >>> Zaurus is one example, second is small screen where you need big font >>> to keep it readable (x60 on desk). >>> >> Come on, are you doing Linux kernel development on PDA? >> > > I review patches on it, sometimes, yes. > Do you actually get 80 columns wide on it? From a-annes at a1vacations.com Sat Feb 23 05:52:04 2008 From: a-annes at a1vacations.com (Shanna Donovan) Date: Sat, 23 Feb 2008 21:52:04 +0800 Subject: [ofa-general] I saw your picture Message-ID: <01c87666$53849a00$5200f5de@a-annes> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Malin at TheDoorwayBeyond.info only, because I am using my friend's email to write this. Will send some of my pictures From service at hellerbass.com Sat Feb 23 05:52:53 2008 From: service at hellerbass.com (Stewart Driscoll) Date: Sat, 23 Feb 2008 22:52:53 +0900 Subject: [ofa-general] Jetzt bestellen und ein blaues Wunder erleben Message-ID: <01c8766e$d27dd080$3753eb3a@service> Sie leben nur einmal - warum dann nicht was neues ausprobieren? Pr. .. Eise die keine Konk... ..Urrenz kennen - Kein peinlicher Arz t besuch erforderlich - Kein langes Warten - Auslieferung innerhalb von 2-3 Tagen - Kos... Tenlose, arztliche Telefon-Beratung - Bequem und dis kret 0... . N-line! be... .Stellen. - keine versteckte Kos// Ten - Disk rete Verpackung und Zahlung Originalme/ dikamente Ciii .. .aaaa/\aaalis... 10 Pack. 21,00 Euro Viiiiaaaaa. /\aaaaaaagra... 10 Pack. 11,00 Euro Nur fur kurze Zeit - vier Pil. .. len umsonst erhalten http://speechlong.com (bitte warten Sie einen Moment bis die Seite vollstandig geladen ist) -------------- next part -------------- An HTML attachment was scrubbed... URL: From khc at pm.waw.pl Sat Feb 23 05:58:10 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Sat, 23 Feb 2008 14:58:10 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080223094334.GA3081@elf.ucw.cz> (Pavel Machek's message of "Sat\, 23 Feb 2008 10\:43\:34 +0100") References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> Message-ID: Pavel Machek writes: >> Come on, are you doing Linux kernel development on PDA? > > I review patches on it, sometimes, yes. I take it the "sometimes" is the key word :-) -- Krzysztof Halasa From pavel at ucw.cz Sat Feb 23 07:25:22 2008 From: pavel at ucw.cz (Pavel Machek) Date: Sat, 23 Feb 2008 16:25:22 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47C013E2.6040201@davidnewall.com> References: <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> <47C013E2.6040201@davidnewall.com> Message-ID: <20080223152522.GA4287@ucw.cz> On Sat 2008-02-23 23:08:58, David Newall wrote: > Pavel Machek wrote: > > On Fri 2008-02-22 23:44:09, Krzysztof Halasa wrote: > > > >> Pavel Machek writes: > >> > >> > >>> Zaurus is one example, second is small screen where you need big font > >>> to keep it readable (x60 on desk). > >>> > >> Come on, are you doing Linux kernel development on PDA? Actually, I'd like to. There's a lot to fix on zaurus. Bit corruption while sleeping is high on list, but I guess I should move out of 2.6.16, first. > > I review patches on it, sometimes, yes. > > > > Do you actually get 80 columns wide on it? No, something like 62... but it is usually enough. x60 is about 100 columns wide (big font needed). Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html From jengelh at computergmbh.de Sat Feb 23 07:31:35 2008 From: jengelh at computergmbh.de (Jan Engelhardt) Date: Sat, 23 Feb 2008 16:31:35 +0100 (CET) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <2c0942db0802212237t41da54b4h2aa2b052b7633f53@mail.gmail.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <2c0942db0802212237t41da54b4h2aa2b052b7633f53@mail.gmail.com> Message-ID: On Feb 21 2008 22:37, Ray Lee wrote: >On Thu, Feb 21, 2008 at 7:13 PM, Linus Torvalds > wrote: >> So I'd be happier with warnings about deep indentation (but how do you >> count it? Will people then try to fake things out by using 4-space indents >> and then "deep" indentations will look like just a couple of tabs?) > >I suspect that 90% of the cases that people really care about would >get caught successfully just by counting brace depth. > >ie, by looking at { { {} {} {{{}{}}} } } I bet you can tell me which >section should have been pulled out into a separate routine. Not only that. By clever branch factoring, you can possibly get yourself rid of lots of deep levels. As in: static void blah(void) { if (foo) { bar; bar2; } else { if (this) { that; that2; } else { bad day; bad day2; } } } xfrmd: static void blah(void) { if (foo) { bar; bar2; return; } if (this) { that; that2; return; } /* yay, got rid of two levels of indent! */ good day; good day2; } From torvalds at linux-foundation.org Sat Feb 23 09:33:16 2008 From: torvalds at linux-foundation.org (Linus Torvalds) Date: Sat, 23 Feb 2008 09:33:16 -0800 (PST) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47C013E2.6040201@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> <47C013E2.6040201@davidnewall.com> Message-ID: On Sat, 23 Feb 2008, David Newall wrote: > > Do you actually get 80 columns wide on it? Do people really care that deeply? I still sometimes use small terminal windows - for a while I had my default terminal come up as 100x40, but I'm back to the standard 80x24, and while I often resize them, I certainly don't always. And do I find lines longer than 80 charactes unreadable? Hell no. Quite frankly, on a 80x24 display, I'll take long lines over split-up ones *any* day. For things like doing "git grep xyzzy", I'd *much* rather get the occasional long line that wraps (or, if I'm in "less -S", that I have to press right-arrow to see), than see just a meaningless fragment because somebody decided that they should always fit in 80 characters. So *consistently* long lines are the problem, not the occasional one. The occasional one is likely more readable as it is, than split up. Here's an example from code that actually looks pretty good in general: static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) and look around that function in general: it's doesn't match the coding standard, but also compare the output of git grep calc_delta_mine with the output of something like git grep update_load_sub which actually shows you what the calling convention is. So putting that long function definition on one line would make it a 108-character line or somethign like that, but it would have advantages too. It would have advantages for anything that is line-based (I use grep for *everything*, but maybe I'm just odd), but it would also actually be more readable for the people who have bigger windows. But my point is, some of those advantages remain even with small terminals, and quite often the downsides aren't even all that huge. Most editors wrap or chop the line according to your preferences (mine are personally to chop), and if it's a fairly uncommon thing, those downsides shrink further. Is 108 characters perhaps *too* long? In the above case it probably is, since the downside of splitting the patch is pretty small (it's a static function, only used in that file, the "grep" argument is weak, yadda yadda). But I'm just saying that it's not 100% obvious *even*if* you're on a 80x24 terminal, and in some other cases the downside of splitting the line can be much bigger (strings or more spread-out function calls and declarations etc). The line length problem would probably be better attacked as something more akin to the rule - do a rolling window of last non-empty lines (n ~ 15 or so) - if more than of those lines were longer than 72 charactes, somethign is wrong (m ~ 5 or so). which talks more about what matters - too deep indentation. And also attacks the problem that is really relevant: it's that kind of code that ends up being unreadable because so *much* of it is cut off or wrapped. Linus From i-peekaboo.com at klimawechsel.com Sat Feb 23 16:33:15 2008 From: i-peekaboo.com at klimawechsel.com (Parker Harris) Date: Sun, 24 Feb 2008 09:33:15 +0900 Subject: [ofa-general] Ssoftware At Loww Prrice Message-ID: <000a01c8767c$7e79e880$0100007f@qpkqmq> Hej, Are you a freequent visitor of rettail softtware sttores? We know what you're overppaying for: - box manufacturinng - CD/DVD - salespperson salarry - RRent of shopp sspace - Year-to--year increasingg taxes in your counttry Well, what for ?! You're able to downlload everytthing legally NOW! Fabulouus range of softtware and LOW costt will make you smile and ssave your monney! Welcome to http://haleyfioritard.blogspot.com Tis the truth, I am, he agreed Lord, she was going to miss th If you want me to come up to y She was deliberately trying to Iain? Frances Catherine called He nodded. Judith saw her frie From davidn at davidnewall.com Sat Feb 23 19:18:47 2008 From: davidn at davidnewall.com (David Newall) Date: Sun, 24 Feb 2008 13:48:47 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080223152522.GA4287@ucw.cz> References: <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> <47C013E2.6040201@davidnewall.com> <20080223152522.GA4287@ucw.cz> Message-ID: <47C0E217.8070801@davidnewall.com> Pavel Machek wrote: > On Sat 2008-02-23 23:08:58, David Newall wrote: > >> Pavel Machek wrote: >> >>> On Fri 2008-02-22 23:44:09, Krzysztof Halasa wrote: >>> >>> >>>> Pavel Machek writes: >>>> >>>> >>>> >>>>> Zaurus is one example, second is small screen where you need big font >>>>> to keep it readable (x60 on desk). >>>>> >>>>> >>>> Come on, are you doing Linux kernel development on PDA? >>>> > > Actually, I'd like to. There's a lot to fix on zaurus. Bit corruption > while sleeping is high on list, but I guess I should move out of > 2.6.16, first. > > >>> I review patches on it, sometimes, yes. >>> >>> >> Do you actually get 80 columns wide on it? >> > > No, something like 62... but it is usually enough. x60 is about 100 > columns wide (big font needed). Then it's a silly example to raise in a serious discussion of this type. From davidn at davidnewall.com Sat Feb 23 19:22:36 2008 From: davidn at davidnewall.com (David Newall) Date: Sun, 24 Feb 2008 13:52:36 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFF1@venom2> <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <2c0942db0802212237t41da54b4h2aa2b052b7633f53@mail.gmail.com> Message-ID: <47C0E2FC.6040809@davidnewall.com> Jan Engelhardt wrote: > static void blah(void) > { > if (foo) { > bar; > bar2; > return; > } > if (this) { > that; > that2; > return; > } > /* yay, got rid of two levels of indent! */ > good day; > good day2; > } I like this style. It's more readable than the alternative that you showed. If you hate returns mid-procedure, as some purists do, the following is also good: static void blah(void) { if (foo) { bar; bar2; } else if (this) { that; that2; } else { good day; good day2; } } From davidn at davidnewall.com Sat Feb 23 19:26:15 2008 From: davidn at davidnewall.com (David Newall) Date: Sun, 24 Feb 2008 13:56:15 +1030 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> <47C013E2.6040201@davidnewall.com> Message-ID: <47C0E3D7.50901@davidnewall.com> Linus Torvalds wrote: > On Sat, 23 Feb 2008, David Newall wrote: > >> Do you actually get 80 columns wide on it? >> > > Do people really care that deeply? > ... > And do I find lines longer than 80 charactes unreadable? Hell no. > I care, yes. I've found my code looks much prettier, with attendant improvement in ease of understanding, since I stopped being so anal about 80 columns. The width of the code, that is first to last non-blank on each line, is about the same, not because I work to keep it narrow, but because most statements just are narrow. Sometimes I do get really wide statements, for example when using deep data structures, especially as parameters in procedure calls, and this is easier to read than having to break the line. I honestly think the reason we used to insist on lines less than 80 characters was because on an 80 character screen, you get slightly better readability by choosing where to break each line than simply letting the hardware do it. We don't have the physical limit any more, so we don't need to impose it structurally. It's about readability, and with due respect, people who've never tried it aren't qualified to comment. > which talks more about what matters - too deep indentation. What's too deep? Is the following too deep? It's common enough, other than my refusal to relax consistent indenting style for switch bodies. The code is readable, and breaking it into multiple procedures just to de-indent is often impossible, and rarely readable. With a strict 80 character limit, the meat in the sandwich is left with only 20 or so characters in which to fit. Add a nested switch, and there's virtually no space left for code. 123456789012345678901234567890123456789012345678901234567890123456 (70) int procedure(param list) { switch (condition) { case value: if (another_condition) { if (variant) meat_in_sandwich; } else { code; } case value2: switch (sub_condition) { case sub_value: if (final_test) { something( NULL, 1, "two"); } } } } (Yes, I know, "we don't indent 'case' because it consumes too much room." That's inconsistent with the rest of normal indenting style, and a poor excuse to keep within an obsolete and unnecessary restriction.) From torvalds at linux-foundation.org Sat Feb 23 20:47:44 2008 From: torvalds at linux-foundation.org (Linus Torvalds) Date: Sat, 23 Feb 2008 20:47:44 -0800 (PST) Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <47C0E3D7.50901@davidnewall.com> References: <5E701717F2B2ED4EA60F87C8AA57B7CC0794FFFF@venom2> <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE0C12.604@garzik.org> <20080222184518.GB6060@ucw.cz> <20080223094334.GA3081@elf.ucw.cz> <47C013E2.6040201@davidnewall.com> <47C0E3D7.50901@davidnewall.com> Message-ID: On Sun, 24 Feb 2008, David Newall wrote: > > > which talks more about what matters - too deep indentation. > > What's too deep? Is the following too deep? It would be, if it weren't artificially so, for violates several kernel coding standards, one being that the "case" labels indent with the switch, not under it (the other being the placement of braces). > (Yes, I know, "we don't indent 'case' because it consumes too much > room." No, that's not it at all. We don't indent 'case' because it matches with the 'switch', not because of any room issues. > That's inconsistent with the rest of normal indenting style, and > a poor excuse to keep within an obsolete and unnecessary restriction.) It's not at all inconsistent. It's just making clear how the parts of the function group together. Indenting a case-statement an extra level is as stupid as indenting "else" one extra level from the "if ()" it goes together with. Do you think that would be sane? The fact that the 'case' thing is technically parsed as a separate statement in C doesn't change anything. Linus From patrick.latifi at qlogic.com Sat Feb 23 21:02:05 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Sat, 23 Feb 2008 21:02:05 -0800 Subject: [ofa-general] [PATCH] [DAPL v1] fix reuse of va_list in debugging mode Message-ID: <20080224050204.23945.75341.stgit@b64-10.internal.keyresearch.com> Make sure we reinitialize the va_list since va_list is undefined if a function traverses the va_list with va_arg. This patch fixes the debugging case when both stdout and syslog output is wanted. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_debug.c | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dapl/common/dapl_debug.c b/dapl/common/dapl_debug.c index db1f583..df6c18e 100644 --- a/dapl/common/dapl_debug.c +++ b/dapl/common/dapl_debug.c @@ -42,18 +42,19 @@ void dapl_internal_dbg_log ( DAPL_DBG_TYPE type, const char *fmt, ...) if ( type & g_dapl_dbg_type ) { - va_start (args, fmt); - if ( DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest ) { + va_start (args, fmt); dapl_os_vprintf (fmt, args); + va_end (args); } if ( DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest ) { + va_start (args, fmt); dapl_os_syslog(fmt, args); + va_end (args); } - va_end (args); } } From patrick.latifi at qlogic.com Sat Feb 23 21:03:21 2008 From: patrick.latifi at qlogic.com (Patrick Marchand Latifi) Date: Sat, 23 Feb 2008 21:03:21 -0800 Subject: [ofa-general] [PATCH] [DAPL v2] fix reuse of va_list in debugging mode Message-ID: <20080224050321.24057.37757.stgit@b64-10.internal.keyresearch.com> Make sure we reinitialize the va_list since va_list is undefined if a function traverses the va_list with va_arg. This patch fixes the debugging case when both stdout and syslog output is wanted. Signed-off-by: Patrick Marchand Latifi --- dapl/common/dapl_debug.c | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dapl/common/dapl_debug.c b/dapl/common/dapl_debug.c index f0959c2..9f4fa04 100644 --- a/dapl/common/dapl_debug.c +++ b/dapl/common/dapl_debug.c @@ -42,18 +42,19 @@ void dapl_internal_dbg_log ( DAPL_DBG_TYPE type, const char *fmt, ...) if ( type & g_dapl_dbg_type ) { - va_start (args, fmt); - if ( DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest ) { + va_start (args, fmt); dapl_os_vprintf (fmt, args); + va_end (args); } if ( DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest ) { + va_start (args, fmt); dapl_os_syslog(fmt, args); + va_end (args); } - va_end (args); } } From joern at logfs.org Sat Feb 23 23:47:31 2008 From: joern at logfs.org (=?utf-8?B?SsO2cm4=?= Engel) Date: Sun, 24 Feb 2008 08:47:31 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: References: <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <20080222031315.GF27894@ZenIV.linux.org.uk> Message-ID: <20080224074730.GB31293@lazybastard.org> On Fri, 22 February 2008 23:28:58 +0100, Krzysztof Halasa wrote: > Al Viro writes: > > > IMO the line length overruns make good warnings. Not as in "here's a cheap > > way to get more changesets", but as in "that code might have other problems > > nearby" kind of heuristics. > > Sure, it does. However the human looking at the code is far better at > spotting such problems. Machine-generated warnings are great when the > machine is actually better than human. I strongly disagree. Machine-generated warnings are a great way of quickly locating a large amount of questionable code in an otherwise overwhelming haystack. It doesn't even matter much, which warnings you look for. Almost all code checkers find the same hotspots. But there is a catch. If you have an over-eager warning police that "fixes all the warnings", the warnings may be gone, but the very real problems in near vicinity are not. Not to mention new problems introduced by those claimed "fixes". One fun hobby in my last job was to write a new code checker and locate those problem areas hidden behind warning-free code. I had to write a new checker so I would see below the polish of "fixes". The actual problems found by the checker were often trivial and near-meaningless. But those warnings are not the point at all, quite the contrary. The only important thing was "that code might have other problems nearby". Note one scary consequence: code checkers in the wrong hands are actively harmful. Jörn -- One of my most productive days was throwing away 1000 lines of code. -- Ken Thompson. From akstcamericanpmimnsdgs at americanpmi.com Sun Feb 24 02:27:39 2008 From: akstcamericanpmimnsdgs at americanpmi.com (Amalia Estrada) Date: , 24 Feb 2008 11:27:39 +0100 Subject: [ofa-general] happylife Message-ID: <01c876d8$433724c0$c4cd0b53@akstcamericanpmimnsdgs> We carry all kind of meds http://jacquelineseabornqm.blogspot.com From akstcangimnsdgs at angi.de Sun Feb 24 02:27:49 2008 From: akstcangimnsdgs at angi.de (Franklin Blankenship) Date: , 24 Feb 2008 11:27:49 +0100 Subject: [ofa-general] Live Message-ID: <01c876d8$49298350$c4cd0b53@akstcangimnsdgs> we got all kinfd of medicines http://emiliamadsongs.blogspot.com From vlad at lists.openfabrics.org Sun Feb 24 03:05:38 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 24 Feb 2008 03:05:38 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080224-0200 daily build status Message-ID: <20080224110538.CB1F0E6084A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.24 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From a-anthony at a9.com Sun Feb 24 04:21:16 2008 From: a-anthony at a9.com (Maxine Kruse) Date: , 24 Feb 2008 14:21:16 +0200 Subject: [ofa-general] I hope you will reply Message-ID: <01c876f0$84111600$b8cbed58@a-anthony> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Agneta at BestDoorway.info only, because I am using my friend's email to write this. You will see some of my private pics. From sashak at voltaire.com Sun Feb 24 04:14:05 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 24 Feb 2008 12:14:05 +0000 Subject: [ofa-general] Re: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING In-Reply-To: <20080221165655.0227c88c.weiny2@llnl.gov> References: <20080221165655.0227c88c.weiny2@llnl.gov> Message-ID: <20080224121405.GD3116@sashak.voltaire.com> Hi Ira, On 16:56 Thu 21 Feb , Ira Weiny wrote: > From b8fb2151b92ddd4a7d2a4cc2ab38a6b34fffc7ab Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 21 Feb 2008 09:10:10 -0800 > Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING > > > Signed-off-by: Ira K. Weiny > --- > opensm/include/vendor/osm_vendor_ibumad.h | 4 ++-- > opensm/libvendor/osm_vendor_ibumad.c | 27 ++++++++++++++++++++++++++- > 2 files changed, 28 insertions(+), 3 deletions(-) > > diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h > index 84fd21a..3a3f070 100644 > --- a/opensm/include/vendor/osm_vendor_ibumad.h > +++ b/opensm/include/vendor/osm_vendor_ibumad.h > @@ -141,12 +141,12 @@ typedef struct _umad_match { > uint32_t version; > } umad_match_t; > > -#define OSM_UMAD_MAX_PENDING 1000 > +#define DEFAULT_OSM_UMAD_MAX_PENDING 1000 > > typedef struct vendor_match_tbl { > - umad_match_t tbl[OSM_UMAD_MAX_PENDING]; > uint32_t last_version; > int max; > + umad_match_t *tbl; > } vendor_match_tbl_t; > > typedef struct _osm_vendor { > diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c > index 679f06a..f847e61 100644 > --- a/opensm/libvendor/osm_vendor_ibumad.c > +++ b/opensm/libvendor/osm_vendor_ibumad.c > @@ -451,6 +451,7 @@ ib_api_status_t > osm_vendor_init(IN osm_vendor_t * const p_vend, > IN osm_log_t * const p_log, IN const uint32_t timeout) > { > + char *max = NULL; > int r, n_cas; > > OSM_LOG_ENTER(p_log); > @@ -480,7 +481,31 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, > } > > p_vend->ca_count = n_cas; > - p_vend->mtbl.max = OSM_UMAD_MAX_PENDING; > + p_vend->mtbl.max = DEFAULT_OSM_UMAD_MAX_PENDING; > + > + if ((max = getenv("OSM_UMAD_MAX_PENDING")) != NULL) { > + int tmp = strtol(max, NULL, 0); > + if (tmp > 0) > + p_vend->mtbl.max = tmp; > + else > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_init: Error:" > + "OSM_UMAD_MAX_PENDING=%d is invalid", > + tmp); > + } > + > + osm_log(p_vend->p_log, OSM_LOG_INFO, > + "osm_vendor_init: %d pending umads specified\n", > + p_vend->mtbl.max); > + > + p_vend->mtbl.tbl = calloc(p_vend->mtbl.max, sizeof(*(p_vend->mtbl.tbl))); There is calloc(), I guess we also need free() somewhere? Sasha From francisc at der.com Sun Feb 24 02:25:20 2008 From: francisc at der.com (cordell mildred) Date: Sun, 24 Feb 2008 10:25:20 +0000 Subject: [ofa-general] Re: Message-ID: <000501c876de$01303114$cc7c35a3@axywq> Good News! Interested to obtain Bachelors', Masters', MBA's, Doctorate & Ph.D. degrees available in your field in 2 weeks time? It's available now... Call Us and get yours today +1-786-206-0956 Our Education office has someone available 24 hours a day, 7 Days a week Why waiting? +1-786-206-0956 From sashak at voltaire.com Sun Feb 24 06:15:37 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 24 Feb 2008 14:15:37 +0000 Subject: [ofa-general] [PATCH] opensm: rename OpenSM startup script to opensmd Message-ID: <20080224141537.GI3116@sashak.voltaire.com> Rename OpenSM startup script /etc/init.d/opensm -> /etc/init.d/opensmd. Signed-off-by: Sasha Khapyorsky --- opensm/Makefile.am | 4 ++-- opensm/opensm.spec.in | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/opensm/Makefile.am b/opensm/Makefile.am index 4474493..7e1d95d 100644 --- a/opensm/Makefile.am +++ b/opensm/Makefile.am @@ -17,8 +17,8 @@ else echo "define osm_build_type \"free\"" > $(DESTDIR)/$(includedir)/infiniband/opensm/osm_build_id.h endif $(top_srcdir)/config/install-sh -m 755 -d $(DESTDIR)/$(sysconfdir)/init.d - cp $(top_srcdir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensm - chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensm + cp $(top_srcdir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensmd + chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensmd man_MANS = man/opensm.8 man/osmtest.8 diff --git a/opensm/opensm.spec.in b/opensm/opensm.spec.in index ec8bf58..6de6333 100644 --- a/opensm/opensm.spec.in +++ b/opensm/opensm.spec.in @@ -95,7 +95,7 @@ else REDHAT="" fi mkdir -p $etc/{init.d,ofa,logrotate.d} -install -m 755 scripts/${REDHAT}opensm.init $etc/init.d/opensm +install -m 755 scripts/${REDHAT}opensm.init $etc/init.d/opensmd install -m 644 scripts/opensm.conf $etc/ofa/opensm.conf install -m 644 scripts/opensm.logrotate $etc/logrotate.d/opensm install -m 755 scripts/sldd.sh $RPM_BUILD_ROOT%{_sbindir}/sldd.sh @@ -105,15 +105,15 @@ rm -rf $RPM_BUILD_ROOT %post if [ $1 = 1 ]; then - /sbin/chkconfig --add opensm + /sbin/chkconfig --add opensmd else - /sbin/service opensm condrestart + /sbin/service opensmd condrestart fi %preun if [ $1 = 0 ]; then - /sbin/service opensm stop - /sbin/chkconfig --del opensm + /sbin/service opensmd stop + /sbin/chkconfig --del opensmd rm -f /var/cache/opensm/* fi @@ -126,7 +126,7 @@ fi %{_sbindir}/osmtest %{_mandir}/man8/* %doc AUTHORS COPYING README -%{_sysconfdir}/init.d/opensm +%{_sysconfdir}/init.d/opensmd %{_sbindir}/sldd.sh %config(noreplace) %{_sysconfdir}/ofa/opensm.conf %config(noreplace) %{_sysconfdir}/logrotate.d/opensm -- 1.5.4.rc2.60.gb2e62 From jinnslbt at vnbride.com Sun Feb 24 06:15:01 2008 From: jinnslbt at vnbride.com (Morgan Powell) Date: , 24 Feb 2008 06:15:01 -0800 Subject: [ofa-general] Office Enterprise 2007 Message-ID: <01c876ac$96696080$88d38c5b@jinnslbt> Microsoft Office Enterprise 2007 includes: • Access 2007 • Communicator 2007 • Excel 2007 • Groove 2007 • InfoPath 2007 • OneNote 2007 • Outlook 2007 • PowerPoint 2007 • Publisher 2007 • Word 2007 http://orafarquhargt.blogspot.com System Requirements • Intel® Pentium® or AMD® 500 MHz processor • Microsoft Windows® XP Professional or Home Edition with Service Pack 2, Windows Server® 2003 with SP1 , Microsoft Windows Vista. • 256 Mb of RAM • 2GB of available hard-disk space. • 1024x768 or higher resolution monitor • Some features require Microsoft Windows Desktop Search 3.0, Microsoft Windows Media Player 9.0, Microsoft DirectX 9.0b, Microsoft Active Sync 4.1 From kliteyn at mellanox.co.il Sun Feb 24 06:21:31 2008 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 24 Feb 2008 16:21:31 +0200 Subject: [ofa-general] RE: [PATCH] opensm: rename OpenSM startup script to opensmd In-Reply-To: <20080224141537.GI3116@sashak.voltaire.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C903640F22@mtlexch01.mtl.com> Looks good, thanks. Regards, Yevgeny Kliteynik Mellanox Technologies LTD Tel: +972-4-909-7200 ext: 394 Fax: +972-4-959-3245 P.O. Box 586 Yokneam 20692 ISRAEL -----Original Message----- From: Sasha Khapyorsky [mailto:sashak at voltaire.com] Sent: Sunday, February 24, 2008 4:16 PM To: OpenIB Cc: Hal Rosenstock; Yevgeny Kliteynik; Ira Weiny; Vladimir Sokolovsky; Tziporet Koren Subject: [PATCH] opensm: rename OpenSM startup script to opensmd Rename OpenSM startup script /etc/init.d/opensm -> /etc/init.d/opensmd. Signed-off-by: Sasha Khapyorsky --- opensm/Makefile.am | 4 ++-- opensm/opensm.spec.in | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/opensm/Makefile.am b/opensm/Makefile.am index 4474493..7e1d95d 100644 --- a/opensm/Makefile.am +++ b/opensm/Makefile.am @@ -17,8 +17,8 @@ else echo "define osm_build_type \"free\"" > $(DESTDIR)/$(includedir)/infiniband/opensm/osm_build_id.h endif $(top_srcdir)/config/install-sh -m 755 -d $(DESTDIR)/$(sysconfdir)/init.d - cp $(top_srcdir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensm - chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensm + cp $(top_srcdir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensmd + chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensmd man_MANS = man/opensm.8 man/osmtest.8 diff --git a/opensm/opensm.spec.in b/opensm/opensm.spec.in index ec8bf58..6de6333 100644 --- a/opensm/opensm.spec.in +++ b/opensm/opensm.spec.in @@ -95,7 +95,7 @@ else REDHAT="" fi mkdir -p $etc/{init.d,ofa,logrotate.d} -install -m 755 scripts/${REDHAT}opensm.init $etc/init.d/opensm +install -m 755 scripts/${REDHAT}opensm.init $etc/init.d/opensmd install -m 644 scripts/opensm.conf $etc/ofa/opensm.conf install -m 644 scripts/opensm.logrotate $etc/logrotate.d/opensm install -m 755 scripts/sldd.sh $RPM_BUILD_ROOT%{_sbindir}/sldd.sh @@ -105,15 +105,15 @@ rm -rf $RPM_BUILD_ROOT %post if [ $1 = 1 ]; then - /sbin/chkconfig --add opensm + /sbin/chkconfig --add opensmd else - /sbin/service opensm condrestart + /sbin/service opensmd condrestart fi %preun if [ $1 = 0 ]; then - /sbin/service opensm stop - /sbin/chkconfig --del opensm + /sbin/service opensmd stop + /sbin/chkconfig --del opensmd rm -f /var/cache/opensm/* fi @@ -126,7 +126,7 @@ fi %{_sbindir}/osmtest %{_mandir}/man8/* %doc AUTHORS COPYING README -%{_sysconfdir}/init.d/opensm +%{_sysconfdir}/init.d/opensmd %{_sbindir}/sldd.sh %config(noreplace) %{_sysconfdir}/ofa/opensm.conf %config(noreplace) %{_sysconfdir}/logrotate.d/opensm -- 1.5.4.rc2.60.gb2e62 From khc at pm.waw.pl Sun Feb 24 06:47:12 2008 From: khc at pm.waw.pl (Krzysztof Halasa) Date: Sun, 24 Feb 2008 15:47:12 +0100 Subject: [ofa-general] Re: Merging of completely unreviewed drivers In-Reply-To: <20080224074730.GB31293@lazybastard.org> (=?iso-8859-2?Q?=22J?= =?iso-8859-2?Q?=F6rn?= Engel"'s message of "Sun\, 24 Feb 2008 08\:47\:31 +0100") References: <20080221154951.GA28328@cs181133002.pp.htv.fi> <20080221210124.GD28328@cs181133002.pp.htv.fi> <47BE2985.6020305@davidnewall.com> <20080222020615.GE27894@ZenIV.linux.org.uk> <20080222031315.GF27894@ZenIV.linux.org.uk> <20080224074730.GB31293@lazybastard.org> Message-ID: Jörn Engel writes: > I strongly disagree. Machine-generated warnings are a great way of > quickly locating a large amount of questionable code in an otherwise > overwhelming haystack. It doesn't even matter much, which warnings you > look for. Almost all code checkers find the same hotspots. I think you misunderstood. Of course I'm not against warnings in general. I'm rather talking about _authority_ of human vs machine, in this specific ("measuring" code complexity) case. -- Krzysztof Halasa From madridg5344 at ecuatours.com Sun Feb 24 07:30:55 2008 From: madridg5344 at ecuatours.com (Stacy Riddle) Date: , 24 Feb 2008 23:30:55 +0800 Subject: [ofa-general] Microsoft Office 2007 ready to download Message-ID: <01c8773d$4d152180$bbd6a17c@madridg5344> Microsoft Office Enterprise 2007 includes: � Access 2007 � Communicator 2007 � Excel 2007 � Groove 2007 � InfoPath 2007 � OneNote 2007 � Outlook 2007 � PowerPoint 2007 � Publisher 2007 � Word 2007 http://isabellatancredinm.blogspot.com System Requirements � Intel� Pentium� or AMD� 500 MHz processor � Microsoft Windows� XP Professional or Home Edition with Service Pack 2, Windows Server� 2003 with SP1 , Microsoft Windows Vista. � 256 Mb of RAM � 2GB of available hard-disk space. � 1024x768 or higher resolution monitor � Some features require Microsoft Windows Desktop Search 3.0, Microsoft Windows Media Player 9.0, Microsoft DirectX 9.0b, Microsoft Active Sync 4.1 From sashak at voltaire.com Sun Feb 24 10:24:34 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 24 Feb 2008 18:24:34 +0000 Subject: [ofa-general] [PATCH] opensm/scripts: rename all opensm scripts as *.in In-Reply-To: <20080224141537.GI3116@sashak.voltaire.com> References: <20080224141537.GI3116@sashak.voltaire.com> Message-ID: <20080224182434.GL3116@sashak.voltaire.com> Rename all OpenSM scripts as *.in, add this to configure's AC_CONFIG_FILES. Signed-off-by: Sasha Khapyorsky --- opensm/configure.in | 2 +- opensm/scripts/opensm.init | 121 --------- opensm/scripts/opensm.init.in | 121 +++++++++ opensm/scripts/opensmd | 466 ---------------------------------- opensm/scripts/opensmd.in | 466 ++++++++++++++++++++++++++++++++++ opensm/scripts/redhat-opensm.init | 382 ---------------------------- opensm/scripts/redhat-opensm.init.in | 382 ++++++++++++++++++++++++++++ opensm/scripts/sldd.sh | 249 ------------------ opensm/scripts/sldd.sh.in | 249 ++++++++++++++++++ 9 files changed, 1219 insertions(+), 1219 deletions(-) delete mode 100644 opensm/scripts/opensm.init create mode 100644 opensm/scripts/opensm.init.in delete mode 100755 opensm/scripts/opensmd create mode 100755 opensm/scripts/opensmd.in delete mode 100755 opensm/scripts/redhat-opensm.init create mode 100755 opensm/scripts/redhat-opensm.init.in delete mode 100755 opensm/scripts/sldd.sh create mode 100755 opensm/scripts/sldd.sh.in diff --git a/opensm/configure.in b/opensm/configure.in index 3663d8d..5d2c267 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -176,7 +176,7 @@ OPENIB_APP_OSMV_CHECK_LIB # overrides. CFLAGS=$ac_env_CFLAGS_value -AC_CONFIG_FILES([man/opensm.8]) +AC_CONFIG_FILES([man/opensm.8 scripts/opensm.init scripts/redhat-opensm.init scripts/opensmd scripts/sldd.sh]) dnl Create the following Makefiles AC_OUTPUT([include/opensm/osm_version.h Makefile include/Makefile complib/Makefile libvendor/Makefile opensm/Makefile osmeventplugin/Makefile osmtest/Makefile opensm.spec]) diff --git a/opensm/scripts/opensm.init b/opensm/scripts/opensm.init deleted file mode 100644 index d717279..0000000 --- a/opensm/scripts/opensm.init +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/bash -# -# opensm: Manage OpenSM -# -# chkconfig: - 09 91 -# description: Manage OpenSM -# -### BEGIN INIT INFO -# Provides: opensm -# Required-Start: $syslog -# Default-Start: 2 3 5 -# Default-Stop: 0 1 6 -# Description: Manage OpenSM -### END INIT INFO -# -# Copyright 2006 PathScale, Inc. All Rights Reserved. -# -# This Software is licensed under one of the following licenses: -# -# 1) under the terms of the "Common Public License 1.0" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/cpl.php. -# -# 2) under the terms of the "The BSD License" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/bsd-license.php. -# -# 3) under the terms of the "GNU General Public License (GPL) Version 2" a -# copy of which is available from the Open Source Initiative, see -# http://www.opensource.org/licenses/gpl-license.php. -# -# Licensee has the right to choose one of the above licenses. -# -# Redistributions of source code must retain the above copyright -# notice and one of the license notices. -# -# Redistributions in binary form must reproduce both the above copyright -# notice, one of the license notices in the documentation -# and/or other materials provided with the distribution. - -# Source function library. -if [[ -s /etc/init.d/functions ]]; then - . /etc/init.d/functions - rc_status() { :; } - rc_exit() { exit $RETVAL; } -fi -if [[ -s /etc/rc.status ]]; then - . /etc/rc.status - failure() { rc_status -v; } - success() { rc_status -v; } -fi -if [[ -s /etc/sysconfig/opensm ]]; then - . /etc/sysconfig/opensm -fi - -start () { - echo -n "Starting opensm: " - /usr/sbin/opensm -B $OPTIONS > /dev/null - if [[ $RETVAL -eq 0 ]]; then - touch /var/lock/subsys/opensm - success - else - failure - fi - echo -} - -stop () { - echo -n "Shutting down opensm: " - killproc opensm - if [[ $RETVAL -eq 0 ]]; then - rm -f /var/lock/subsys/opensm - success - else - failure - fi - echo -} - -Xstatus () { - status opensm -} - -restart() { - stop - start -} - -# See how we were called. -case "$1" in - start) - start - ;; - stop) - stop - ;; - status) - Xstatus - ;; - restart | force-reload | reload) - restart - ;; - try-restart | condrestart) - [ -e /var/lock/subsys/opensm ] && restart - ;; - resweep) - killall -HUP opensm - RETVAL=$? - ;; - rotatelog) - killall -USR1 opensm - RETVAL=$? - ;; - *) - echo $"Usage: $0 {start|stop|status|restart|reload|condrestart|resweep|rotatelog}" - RETVAL=1 - ;; -esac - -_rc_status_all=$RETVAL -rc_exit diff --git a/opensm/scripts/opensm.init.in b/opensm/scripts/opensm.init.in new file mode 100644 index 0000000..d717279 --- /dev/null +++ b/opensm/scripts/opensm.init.in @@ -0,0 +1,121 @@ +#!/bin/bash +# +# opensm: Manage OpenSM +# +# chkconfig: - 09 91 +# description: Manage OpenSM +# +### BEGIN INIT INFO +# Provides: opensm +# Required-Start: $syslog +# Default-Start: 2 3 5 +# Default-Stop: 0 1 6 +# Description: Manage OpenSM +### END INIT INFO +# +# Copyright 2006 PathScale, Inc. All Rights Reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. + +# Source function library. +if [[ -s /etc/init.d/functions ]]; then + . /etc/init.d/functions + rc_status() { :; } + rc_exit() { exit $RETVAL; } +fi +if [[ -s /etc/rc.status ]]; then + . /etc/rc.status + failure() { rc_status -v; } + success() { rc_status -v; } +fi +if [[ -s /etc/sysconfig/opensm ]]; then + . /etc/sysconfig/opensm +fi + +start () { + echo -n "Starting opensm: " + /usr/sbin/opensm -B $OPTIONS > /dev/null + if [[ $RETVAL -eq 0 ]]; then + touch /var/lock/subsys/opensm + success + else + failure + fi + echo +} + +stop () { + echo -n "Shutting down opensm: " + killproc opensm + if [[ $RETVAL -eq 0 ]]; then + rm -f /var/lock/subsys/opensm + success + else + failure + fi + echo +} + +Xstatus () { + status opensm +} + +restart() { + stop + start +} + +# See how we were called. +case "$1" in + start) + start + ;; + stop) + stop + ;; + status) + Xstatus + ;; + restart | force-reload | reload) + restart + ;; + try-restart | condrestart) + [ -e /var/lock/subsys/opensm ] && restart + ;; + resweep) + killall -HUP opensm + RETVAL=$? + ;; + rotatelog) + killall -USR1 opensm + RETVAL=$? + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|reload|condrestart|resweep|rotatelog}" + RETVAL=1 + ;; +esac + +_rc_status_all=$RETVAL +rc_exit diff --git a/opensm/scripts/opensmd b/opensm/scripts/opensmd deleted file mode 100755 index 848d1cd..0000000 --- a/opensm/scripts/opensmd +++ /dev/null @@ -1,466 +0,0 @@ -#!/bin/bash - -# -# Copyright (c) 2006 Mellanox Technologies. All rights reserved. -# -# This Software is licensed under one of the following licenses: -# -# 1) under the terms of the "Common Public License 1.0" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/cpl.php. -# -# 2) under the terms of the "The BSD License" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/bsd-license.php. -# -# 3) under the terms of the "GNU General Public License (GPL) Version 2" a -# copy of which is available from the Open Source Initiative, see -# http://www.opensource.org/licenses/gpl-license.php. -# -# Licensee has the right to choose one of the above licenses. -# -# Redistributions of source code must retain the above copyright -# notice and one of the license notices. -# -# Redistributions in binary form must reproduce both the above copyright -# notice, one of the license notices in the documentation -# and/or other materials provided with the distribution. -# -# -# processname: /usr/sbin/opensm -# config: /etc/opensm.conf -# pidfile: /var/run/opensm.pid - -if [ ! -f /etc/opensm.conf ]; then - exit 0 -fi - -. /etc/opensm.conf - -CONFIG=/etc/opensm.conf - -prog=/usr/sbin/opensm -bin=${prog##*/} - -# Handover daemon for updating guid2lid cache file -sldd_prog=/usr/sbin/sldd.sh -sldd_bin=${sldd_prog##*/} -sldd_pid_file=/var/run/sldd.pid - -# Only use ONBOOT option if called by a runlevel directory. -# Therefore determine the base, follow a runlevel link name ... -base=${0##*/} -link=${base#*[SK][0-9][0-9]} -# ... and compare them -if [ $link == $base ] ; then - ONBOOT=yes -fi - -ACTION=$1 -shift - -if [ ! -x $prog ]; then - echo "OpenSM not installed" - exit 1 -fi - -# Check if OpenSM configured to start automatically -if [[ -z $ONBOOT || "$ONBOOT" != "yes" ]]; then - exit 0 -fi - -if ( grep -i 'SuSE Linux' /etc/issue >/dev/null 2>&1 ); then - if [ -n "$INIT_VERSION" ] ; then - # MODE=onboot - if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${CONFIG} > /dev/null ; then - exit 0 - fi - fi -fi - -if [ -f /etc/init.d/functions ]; then - . /etc/init.d/functions -fi - -# Setting OpenSM start parameters -PID_FILE=/var/run/${bin}.pid -touch $PID_FILE - -if [[ -z $DEBUG || "$DEBUG" == "none" ]]; then - DEBUG_FLAG="" -else - DEBUG_FLAG="-d ${DEBUG}" -fi - -if [[ -z $LMC || "$LMC" == "0" ]]; then - LMC_FLAG="" -else - LMC_FLAG="-l ${LMC}" -fi - -if [[ -z $MAXSMPS || "$MAXSMPS" == "4" ]]; then - MAXSMPS_FLAG="" -else - MAXSMPS_FLAG="-maxsmps ${MAXSMPS}" -fi - -if [[ -z $REASSIGN_LIDS || "$REASSIGN_LIDS" == "no" ]]; then - REASSIGN_LIDS_FLAG="" -else - REASSIGN_LIDS_FLAG="-r" -fi - -if [[ -z $SWEEP || "$SWEEP" == "10" ]]; then - SWEEP_FLAG="" -else - SWEEP_FLAG="-s ${SWEEP}" -fi - -if [[ -z $TIMEOUT || "$TIMEOUT" == "100" ]]; then - TIMEOUT_FLAG="" -else - TIMEOUT_FLAG="-t ${TIMEOUT}" -fi - -if [[ -z $OSM_LOG || "$OSM_LOG" == "/var/log/osm.log" ]]; then - OSM_LOG_FLAG="" -else - OSM_LOG_FLAG="-f ${OSM_LOG}" -fi - -if [[ -z $VERBOSE || "$VERBOSE" == "none" ]]; then - VERBOSE_FLAG="" -else - VERBOSE_FLAG="${VERBOSE}" -fi - -if [[ -z $UPDN || "$UPDN" == "off" ]]; then - UPDN_FLAG="" -else - UPDN_FLAG="-u" -fi - -if [[ -z $GUID_FILE || "$GUID_FILE" == "none" ]]; then - GUID_FILE_FLAG="" -else - GUID_FILE_FLAG="-a ${GUID_FILE}" -fi - -if [[ -z $GUID || "$GUID" == "none" ]]; then - GUID_FLAG="" -else - GUID_FLAG="-g ${GUID}" -fi - -if [[ -z $HONORE_GUID2LID || "$HONORE_GUID2LID" == "none" ]]; then - HONORE_GUID2LID_FLAG="" -else - HONORE_GUID2LID_FLAG="--honor_guid2lid" -fi - -if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then - HONORE_GUID2LID_FLAG="--honor_guid2lid" -fi - - -if [[ -z $CACHE_OPTIONS || "$CACHE_OPTIONS" == "none" ]]; then - CACHE_OPTIONS_FLAG="" -else - CACHE_OPTIONS_FLAG="--cache-options" -fi - - -if [ -z $PORT_NUM ]; then - PORT_FLAG=1 -else - PORT_FLAG="${PORT_NUM}" -fi - - -######################################################################### -# Get a sane screen width -[ -z "${COLUMNS:-}" ] && COLUMNS=80 - -[ -z "${CONSOLETYPE:-}" ] && [ -x /sbin/consoletype ] && CONSOLETYPE="`/sbin/consoletype`" - -if [ -f /etc/sysconfig/i18n -a -z "${NOLOCALE:-}" ] ; then - . /etc/sysconfig/i18n - if [ "$CONSOLETYPE" != "pty" ]; then - case "${LANG:-}" in - ja_JP*|ko_KR*|zh_CN*|zh_TW*) - export LC_MESSAGES=en_US - ;; - *) - export LANG - ;; - esac - else - export LANG - fi -fi - -# Read in our configuration -if [ -z "${BOOTUP:-}" ]; then - if [ -f /etc/sysconfig/init ]; then - . /etc/sysconfig/init - else - # This all seem confusing? Look in /etc/sysconfig/init, - # or in /usr/doc/initscripts-*/sysconfig.txt - BOOTUP=color - RES_COL=60 - MOVE_TO_COL="echo -en \\033[${RES_COL}G" - SETCOLOR_SUCCESS="echo -en \\033[1;32m" - SETCOLOR_FAILURE="echo -en \\033[1;31m" - SETCOLOR_WARNING="echo -en \\033[1;33m" - SETCOLOR_NORMAL="echo -en \\033[0;39m" - LOGLEVEL=1 - fi - if [ "$CONSOLETYPE" = "serial" ]; then - BOOTUP=serial - MOVE_TO_COL= - SETCOLOR_SUCCESS= - SETCOLOR_FAILURE= - SETCOLOR_WARNING= - SETCOLOR_NORMAL= - fi -fi - -if [ "${BOOTUP:-}" != "verbose" ]; then - INITLOG_ARGS="-q" -else - INITLOG_ARGS= -fi - -echo_success() { - echo -n $@ - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[ " - [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS - echo -n $"OK" - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n " ]" - echo -e "\r" - return 0 -} - -echo_failure() { - echo -n $@ - [ "$BOOTUP" = "color" ] && $MOVE_TO_COL - echo -n "[" - [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE - echo -n $"FAILED" - [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL - echo -n "]" - echo -e "\r" - return 1 -} - - -######################################################################### - -# Check if $pid (could be plural) are running -checkpid() { - local i - - for i in $* ; do - [ -d "/proc/$i" ] || return 1 - done - return 0 -} - -start_sldd() -{ - if [ -f $sldd_pid_file ]; then - local line p - read line < $sldd_pid_file - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" - done - fi - - if [ -z "$sldd_pid" ]; then - sldd_pid=`pidof -x $sldd_bin` - fi - - if [ -n "${sldd_pid:-}" ] ; then - kill -9 ${sldd_pid} > /dev/null 2>&1 - fi - - $sldd_prog > /dev/null 2>&1 & - sldd_pid=$! - - echo ${sldd_pid} > $sldd_pid_file - # Sleep is needed in order to update local gid2lid cache file before running opensm - sleep 3 -} - -stop_sldd() -{ - if [ -f $sldd_pid_file ]; then - local line p - read line < $sldd_pid_file - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" - done - fi - - if [ -z "$sldd_pid" ]; then - sldd_pid=`pidof -x $sldd_bin` - fi - - if [ -n "${sldd_pid:-}" ] ; then - kill -15 ${sldd_pid} > /dev/null 2>&1 - fi - -} - -start() -{ - if [ ! -d /sys/class/infiniband ]; then - echo - echo "Please load Infiniband driver first" - echo - return 2 - fi - - local OSM_PID= - - if [ -f $PID_FILE ]; then - local line p - read line < $PID_FILE - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" - done - fi - - if [ -z "$pid" ]; then - pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` - fi - - if [ -n "${pid:-}" ] ; then - echo $"${bin} (pid $pid) is already running..." - else - - if [ -n "${HONORE_GUID2LID_FLAG}" ]; then - # Run sldd daemod - start_sldd - fi - - # Start opensm - local START_FLAGS="" - for flag in "$DEBUG_FLAG" "$LMC_FLAG" "$MAXSMPS_FLAG" "$REASSIGN_LIDS_FLAG" "$SWEEP_FLAG" "$TIMEOUT_FLAG" "$OSM_LOG_FLAG" "$VERBOSE_FLAG" "$UPDN_FLAG" "$GUID_FILE_FLAG" "$GUID_FLAG" "$HONORE_GUID2LID_FLAG" "$CACHE_OPTIONS_FLAG" - do - [ ! -z "$flag" ] && START_FLAGS="$START_FLAGS $flag" - done - - echo $PORT_FLAG | $prog $START_FLAGS > /dev/null 2>&1 & - OSM_PID=$! - echo $OSM_PID > $PID_FILE - sleep 1 - checkpid $OSM_PID - RC=$? - [ $RC -eq 0 ] && echo_success "$bin start" || echo_failure "$bin start" - - fi -return $RC -} - -stop() -{ - local pid= - local pid1= - local pid2= - - # Stop sldd daemon - stop_sldd - - if [ -f $PID_FILE ]; then - local line p - read line < $PID_FILE - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" - done - fi - - pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` - - pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` - - if [ -n "${pid:-}" ] ; then - # Kill opensm - kill -15 $pid > /dev/null 2>&1 - cnt=0 - while [ $cnt -lt 6 ]; do echo -n "."; sleep 1; let cnt++;done - - for p in $pid - do - while checkpid $p ; do - kill -KILL $p > /dev/null 2>&1 - echo -n "." - sleep 1 - done - done - echo - checkpid $pid - RC=$? - [ $RC -eq 0 ] && echo_failure "$bin shutdown" || echo_success "$bin shutdown" - RC=$((! $RC)) - else - echo_failure "$bin shutdown" - RC=1 - fi - - # Remove pid file if any. - rm -f $PID_FILE -return $RC -} - -status() -{ - local pid - - # First try "pidof" - pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` - if [ -n "$pid" ]; then - echo $"${bin} (pid $pid) is running..." - return 0 - fi - - # Next try "/var/run/opensm.pid" files - if [ -f $PID_FILE ] ; then - read pid < $PID_FILE - if [ -n "$pid" ]; then - echo $"${bin} dead but pid file $PID_FILE exists" - return 1 - fi - fi - echo $"${bin} is stopped" - return 3 -} - - - -case $ACTION in - start) - start - ;; - stop) - stop - ;; - restart) - stop - start - ;; - status) - status - ;; - *) - echo - echo "Usage: `basename $0` {start|stop|restart|status}" - echo - exit 1 - ;; -esac - -RC=$? -exit $RC diff --git a/opensm/scripts/opensmd.in b/opensm/scripts/opensmd.in new file mode 100755 index 0000000..848d1cd --- /dev/null +++ b/opensm/scripts/opensmd.in @@ -0,0 +1,466 @@ +#!/bin/bash + +# +# Copyright (c) 2006 Mellanox Technologies. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# +# processname: /usr/sbin/opensm +# config: /etc/opensm.conf +# pidfile: /var/run/opensm.pid + +if [ ! -f /etc/opensm.conf ]; then + exit 0 +fi + +. /etc/opensm.conf + +CONFIG=/etc/opensm.conf + +prog=/usr/sbin/opensm +bin=${prog##*/} + +# Handover daemon for updating guid2lid cache file +sldd_prog=/usr/sbin/sldd.sh +sldd_bin=${sldd_prog##*/} +sldd_pid_file=/var/run/sldd.pid + +# Only use ONBOOT option if called by a runlevel directory. +# Therefore determine the base, follow a runlevel link name ... +base=${0##*/} +link=${base#*[SK][0-9][0-9]} +# ... and compare them +if [ $link == $base ] ; then + ONBOOT=yes +fi + +ACTION=$1 +shift + +if [ ! -x $prog ]; then + echo "OpenSM not installed" + exit 1 +fi + +# Check if OpenSM configured to start automatically +if [[ -z $ONBOOT || "$ONBOOT" != "yes" ]]; then + exit 0 +fi + +if ( grep -i 'SuSE Linux' /etc/issue >/dev/null 2>&1 ); then + if [ -n "$INIT_VERSION" ] ; then + # MODE=onboot + if LANG=C egrep -L "^ONBOOT=['\"]?[Nn][Oo]['\"]?" ${CONFIG} > /dev/null ; then + exit 0 + fi + fi +fi + +if [ -f /etc/init.d/functions ]; then + . /etc/init.d/functions +fi + +# Setting OpenSM start parameters +PID_FILE=/var/run/${bin}.pid +touch $PID_FILE + +if [[ -z $DEBUG || "$DEBUG" == "none" ]]; then + DEBUG_FLAG="" +else + DEBUG_FLAG="-d ${DEBUG}" +fi + +if [[ -z $LMC || "$LMC" == "0" ]]; then + LMC_FLAG="" +else + LMC_FLAG="-l ${LMC}" +fi + +if [[ -z $MAXSMPS || "$MAXSMPS" == "4" ]]; then + MAXSMPS_FLAG="" +else + MAXSMPS_FLAG="-maxsmps ${MAXSMPS}" +fi + +if [[ -z $REASSIGN_LIDS || "$REASSIGN_LIDS" == "no" ]]; then + REASSIGN_LIDS_FLAG="" +else + REASSIGN_LIDS_FLAG="-r" +fi + +if [[ -z $SWEEP || "$SWEEP" == "10" ]]; then + SWEEP_FLAG="" +else + SWEEP_FLAG="-s ${SWEEP}" +fi + +if [[ -z $TIMEOUT || "$TIMEOUT" == "100" ]]; then + TIMEOUT_FLAG="" +else + TIMEOUT_FLAG="-t ${TIMEOUT}" +fi + +if [[ -z $OSM_LOG || "$OSM_LOG" == "/var/log/osm.log" ]]; then + OSM_LOG_FLAG="" +else + OSM_LOG_FLAG="-f ${OSM_LOG}" +fi + +if [[ -z $VERBOSE || "$VERBOSE" == "none" ]]; then + VERBOSE_FLAG="" +else + VERBOSE_FLAG="${VERBOSE}" +fi + +if [[ -z $UPDN || "$UPDN" == "off" ]]; then + UPDN_FLAG="" +else + UPDN_FLAG="-u" +fi + +if [[ -z $GUID_FILE || "$GUID_FILE" == "none" ]]; then + GUID_FILE_FLAG="" +else + GUID_FILE_FLAG="-a ${GUID_FILE}" +fi + +if [[ -z $GUID || "$GUID" == "none" ]]; then + GUID_FLAG="" +else + GUID_FLAG="-g ${GUID}" +fi + +if [[ -z $HONORE_GUID2LID || "$HONORE_GUID2LID" == "none" ]]; then + HONORE_GUID2LID_FLAG="" +else + HONORE_GUID2LID_FLAG="--honor_guid2lid" +fi + +if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then + HONORE_GUID2LID_FLAG="--honor_guid2lid" +fi + + +if [[ -z $CACHE_OPTIONS || "$CACHE_OPTIONS" == "none" ]]; then + CACHE_OPTIONS_FLAG="" +else + CACHE_OPTIONS_FLAG="--cache-options" +fi + + +if [ -z $PORT_NUM ]; then + PORT_FLAG=1 +else + PORT_FLAG="${PORT_NUM}" +fi + + +######################################################################### +# Get a sane screen width +[ -z "${COLUMNS:-}" ] && COLUMNS=80 + +[ -z "${CONSOLETYPE:-}" ] && [ -x /sbin/consoletype ] && CONSOLETYPE="`/sbin/consoletype`" + +if [ -f /etc/sysconfig/i18n -a -z "${NOLOCALE:-}" ] ; then + . /etc/sysconfig/i18n + if [ "$CONSOLETYPE" != "pty" ]; then + case "${LANG:-}" in + ja_JP*|ko_KR*|zh_CN*|zh_TW*) + export LC_MESSAGES=en_US + ;; + *) + export LANG + ;; + esac + else + export LANG + fi +fi + +# Read in our configuration +if [ -z "${BOOTUP:-}" ]; then + if [ -f /etc/sysconfig/init ]; then + . /etc/sysconfig/init + else + # This all seem confusing? Look in /etc/sysconfig/init, + # or in /usr/doc/initscripts-*/sysconfig.txt + BOOTUP=color + RES_COL=60 + MOVE_TO_COL="echo -en \\033[${RES_COL}G" + SETCOLOR_SUCCESS="echo -en \\033[1;32m" + SETCOLOR_FAILURE="echo -en \\033[1;31m" + SETCOLOR_WARNING="echo -en \\033[1;33m" + SETCOLOR_NORMAL="echo -en \\033[0;39m" + LOGLEVEL=1 + fi + if [ "$CONSOLETYPE" = "serial" ]; then + BOOTUP=serial + MOVE_TO_COL= + SETCOLOR_SUCCESS= + SETCOLOR_FAILURE= + SETCOLOR_WARNING= + SETCOLOR_NORMAL= + fi +fi + +if [ "${BOOTUP:-}" != "verbose" ]; then + INITLOG_ARGS="-q" +else + INITLOG_ARGS= +fi + +echo_success() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[ " + [ "$BOOTUP" = "color" ] && $SETCOLOR_SUCCESS + echo -n $"OK" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n " ]" + echo -e "\r" + return 0 +} + +echo_failure() { + echo -n $@ + [ "$BOOTUP" = "color" ] && $MOVE_TO_COL + echo -n "[" + [ "$BOOTUP" = "color" ] && $SETCOLOR_FAILURE + echo -n $"FAILED" + [ "$BOOTUP" = "color" ] && $SETCOLOR_NORMAL + echo -n "]" + echo -e "\r" + return 1 +} + + +######################################################################### + +# Check if $pid (could be plural) are running +checkpid() { + local i + + for i in $* ; do + [ -d "/proc/$i" ] || return 1 + done + return 0 +} + +start_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -9 ${sldd_pid} > /dev/null 2>&1 + fi + + $sldd_prog > /dev/null 2>&1 & + sldd_pid=$! + + echo ${sldd_pid} > $sldd_pid_file + # Sleep is needed in order to update local gid2lid cache file before running opensm + sleep 3 +} + +stop_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -15 ${sldd_pid} > /dev/null 2>&1 + fi + +} + +start() +{ + if [ ! -d /sys/class/infiniband ]; then + echo + echo "Please load Infiniband driver first" + echo + return 2 + fi + + local OSM_PID= + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" + done + fi + + if [ -z "$pid" ]; then + pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` + fi + + if [ -n "${pid:-}" ] ; then + echo $"${bin} (pid $pid) is already running..." + else + + if [ -n "${HONORE_GUID2LID_FLAG}" ]; then + # Run sldd daemod + start_sldd + fi + + # Start opensm + local START_FLAGS="" + for flag in "$DEBUG_FLAG" "$LMC_FLAG" "$MAXSMPS_FLAG" "$REASSIGN_LIDS_FLAG" "$SWEEP_FLAG" "$TIMEOUT_FLAG" "$OSM_LOG_FLAG" "$VERBOSE_FLAG" "$UPDN_FLAG" "$GUID_FILE_FLAG" "$GUID_FLAG" "$HONORE_GUID2LID_FLAG" "$CACHE_OPTIONS_FLAG" + do + [ ! -z "$flag" ] && START_FLAGS="$START_FLAGS $flag" + done + + echo $PORT_FLAG | $prog $START_FLAGS > /dev/null 2>&1 & + OSM_PID=$! + echo $OSM_PID > $PID_FILE + sleep 1 + checkpid $OSM_PID + RC=$? + [ $RC -eq 0 ] && echo_success "$bin start" || echo_failure "$bin start" + + fi +return $RC +} + +stop() +{ + local pid= + local pid1= + local pid2= + + # Stop sldd daemon + stop_sldd + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" + done + fi + + pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` + + pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` + + if [ -n "${pid:-}" ] ; then + # Kill opensm + kill -15 $pid > /dev/null 2>&1 + cnt=0 + while [ $cnt -lt 6 ]; do echo -n "."; sleep 1; let cnt++;done + + for p in $pid + do + while checkpid $p ; do + kill -KILL $p > /dev/null 2>&1 + echo -n "." + sleep 1 + done + done + echo + checkpid $pid + RC=$? + [ $RC -eq 0 ] && echo_failure "$bin shutdown" || echo_success "$bin shutdown" + RC=$((! $RC)) + else + echo_failure "$bin shutdown" + RC=1 + fi + + # Remove pid file if any. + rm -f $PID_FILE +return $RC +} + +status() +{ + local pid + + # First try "pidof" + pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` + if [ -n "$pid" ]; then + echo $"${bin} (pid $pid) is running..." + return 0 + fi + + # Next try "/var/run/opensm.pid" files + if [ -f $PID_FILE ] ; then + read pid < $PID_FILE + if [ -n "$pid" ]; then + echo $"${bin} dead but pid file $PID_FILE exists" + return 1 + fi + fi + echo $"${bin} is stopped" + return 3 +} + + + +case $ACTION in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + status + ;; + *) + echo + echo "Usage: `basename $0` {start|stop|restart|status}" + echo + exit 1 + ;; +esac + +RC=$? +exit $RC diff --git a/opensm/scripts/redhat-opensm.init b/opensm/scripts/redhat-opensm.init deleted file mode 100755 index 3e00403..0000000 --- a/opensm/scripts/redhat-opensm.init +++ /dev/null @@ -1,382 +0,0 @@ -#!/bin/bash -# -# Bring up/down opensm -# -# chkconfig: - 15 85 -# description: Activates/Deactivates InfiniBand Subnet Manager -# -### BEGIN INIT INFO -# Provides: opensm -### END INIT INFO -# -# Copyright (c) 2006 Mellanox Technologies. All rights reserved. -# -# This Software is licensed under one of the following licenses: -# -# 1) under the terms of the "Common Public License 1.0" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/cpl.php. -# -# 2) under the terms of the "The BSD License" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/bsd-license.php. -# -# 3) under the terms of the "GNU General Public License (GPL) Version 2" a -# copy of which is available from the Open Source Initiative, see -# http://www.opensource.org/licenses/gpl-license.php. -# -# Licensee has the right to choose one of the above licenses. -# -# Redistributions of source code must retain the above copyright -# notice and one of the license notices. -# -# Redistributions in binary form must reproduce both the above copyright -# notice, one of the license notices in the documentation -# and/or other materials provided with the distribution. -# -# -# $Id: openib-1.0-opensm.init,v 1.5 2006/08/02 18:18:23 dledford Exp $ -# -# processname: /usr/sbin/opensm -# config: /etc/ofa/opensm.conf -# pidfile: /var/run/opensm.pid -. /etc/rc.d/init.d/functions - -CONFIG=/etc/ofa/opensm.conf -if [ ! -f $CONFIG ]; then - exit 0 -fi - -. $CONFIG - -prog=/usr/sbin/opensm -bin=${prog##*/} - -# Handover daemon for updating guid2lid cache file -sldd_prog=/usr/sbin/sldd.sh -sldd_bin=${sldd_prog##*/} -sldd_pid_file=/var/run/sldd.pid - -ACTION=$1 - -# Setting OpenSM start parameters -PID_FILE=/var/run/${bin}.pid -touch $PID_FILE - -if [[ -z $DEBUG || "$DEBUG" == "none" ]]; then - DEBUG_FLAG="" -else - DEBUG_FLAG="-d ${DEBUG}" -fi - -if [[ -z $LMC || "$LMC" == "0" ]]; then - LMC_FLAG="" -else - LMC_FLAG="-l ${LMC}" -fi - -if [[ -z $MAXSMPS || "$MAXSMPS" == "4" ]]; then - MAXSMPS_FLAG="" -else - MAXSMPS_FLAG="-maxsmps ${MAXSMPS}" -fi - -if [[ -z $REASSIGN_LIDS || "$REASSIGN_LIDS" == "no" ]]; then - REASSIGN_LIDS_FLAG="" -else - REASSIGN_LIDS_FLAG="-r" -fi - -if [[ -z $SWEEP || "$SWEEP" == "10" ]]; then - SWEEP_FLAG="" -else - SWEEP_FLAG="-s ${SWEEP}" -fi - -if [[ -z $TIMEOUT || "$TIMEOUT" == "100" ]]; then - TIMEOUT_FLAG="" -else - TIMEOUT_FLAG="-t ${TIMEOUT}" -fi - -if [[ -z $OSM_LOG || "$OSM_LOG" == "/tmp/osm.log" ]]; then - OSM_LOG_FLAG="" -else - OSM_LOG_FLAG="-f ${OSM_LOG}" -fi - -if [[ -z $VERBOSE || "$VERBOSE" == "none" ]]; then - VERBOSE_FLAG="" -else - VERBOSE_FLAG="${VERBOSE}" -fi - -if [[ -z $UPDN || "$UPDN" == "off" ]]; then - UPDN_FLAG="" -else - UPDN_FLAG="-u" -fi - -if [[ -z $GUID_FILE || "$GUID_FILE" == "none" ]]; then - GUID_FILE_FLAG="" -else - GUID_FILE_FLAG="-a ${GUID_FILE}" -fi - -if [[ -z $GUID || "$GUID" == "none" ]]; then - GUID_FLAG="" -else - GUID_FLAG="-g ${GUID}" -fi - -if [[ -z $HONORE_GUID2LID || "$HONORE_GUID2LID" == "none" ]]; then - HONORE_GUID2LID_FLAG="" -else - HONORE_GUID2LID_FLAG="--honor_guid2lid" -fi - -if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then - HONORE_GUID2LID_FLAG="--honor_guid2lid" -fi - - -if [[ -z $CACHE_OPTIONS || "$CACHE_OPTIONS" == "none" ]]; then - CACHE_OPTIONS_FLAG="" -else - CACHE_OPTIONS_FLAG="--cache-options" -fi - - -if [ -z $PORT_NUM ]; then - PORT_FLAG=1 -else - PORT_FLAG="${PORT_NUM}" -fi - - -######################################################################### - -start_sldd() -{ - if [ -f $sldd_pid_file ]; then - local line p - read line < $sldd_pid_file - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" - done - fi - - if [ -z "$sldd_pid" ]; then - sldd_pid=`pidof -x $sldd_bin` - fi - - if [ -n "${sldd_pid:-}" ] ; then - kill -9 ${sldd_pid} > /dev/null 2>&1 - fi - - $sldd_prog > /dev/null 2>&1 & - sldd_pid=$! - - echo ${sldd_pid} > $sldd_pid_file - # Sleep is needed in order to update local gid2lid cache file before running opensm - sleep 3 -} - -stop_sldd() -{ - if [ -f $sldd_pid_file ]; then - local line p - read line < $sldd_pid_file - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" - done - fi - - if [ -z "$sldd_pid" ]; then - sldd_pid=`pidof -x $sldd_bin` - fi - - if [ -n "${sldd_pid:-}" ] ; then - kill -15 ${sldd_pid} > /dev/null 2>&1 - fi - -} - -start() -{ - local OSM_PID= - - pid="" - - if [ -f $PID_FILE ]; then - local line p - read line < $PID_FILE - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" - done - fi - - if [ -z "$pid" ]; then - pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` - fi - - if [ -n "${pid:-}" ] ; then - echo $"${bin} (pid $pid) is already running..." - else - - if [ -n "${HONORE_GUID2LID_FLAG}" ]; then - # Run sldd daemod - start_sldd - fi - - # Start opensm - local START_FLAGS="" - for flag in "$DEBUG_FLAG" "$LMC_FLAG" "$MAXSMPS_FLAG" "$REASSIGN_LIDS_FLAG" "$SWEEP_FLAG" "$TIMEOUT_FLAG" "$OSM_LOG_FLAG" "$VERBOSE_FLAG" "$UPDN_FLAG" "$GUID_FILE_FLAG" "$GUID_FLAG" "$HONORE_GUID2LID_FLAG" "$CACHE_OPTIONS_FLAG" - do - [ ! -z "$flag" ] && START_FLAGS="$START_FLAGS $flag" - done - - echo -n "Starting IB Subnet Manager" - echo $PORT_FLAG | $prog $START_FLAGS > /dev/null 2>&1 & - cnt=0; alive=0 - while [ $cnt -lt 6 -a $alive -ne 1 ]; do - echo -n "."; - sleep 1 - alive=0 - OSM_PID=`pidof $prog` - if [ "$OSM_PID" != "" ]; then - alive=1 - fi - let cnt++; - done - - echo $OSM_PID > $PID_FILE - checkpid $OSM_PID - RC=$? - [ $RC -eq 0 ] && echo_success || echo_failure - [ $RC -eq 0 ] && touch /var/lock/subsys/opensm - echo - - fi -return $RC -} - -stop() -{ - local pid= - local pid1= - local pid2= - - # Stop sldd daemon - stop_sldd - - if [ -f $PID_FILE ]; then - local line p - read line < $PID_FILE - for p in $line ; do - [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" - done - fi - - pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` - - pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` - - if [ -n "${pid:-}" ] ; then - # Kill opensm - echo -n "Stopping IB Subnet Manager." - kill -15 $pid > /dev/null 2>&1 - cnt=0; alive=1 - while [ $cnt -lt 6 -a $alive -ne 0 ]; do - echo -n "."; - alive=0 - for p in $pid; do - if checkpid $p ; then alive=1; echo -n "-"; fi - done - let cnt++; - sleep $alive - done - - for p in $pid - do - while checkpid $p ; do - kill -KILL $p > /dev/null 2>&1 - echo -n "+" - sleep 1 - done - done - checkpid $pid - RC=$? - [ $RC -eq 0 ] && echo_failure || echo_success - echo - RC=$((! $RC)) - else - echo -n "Stopping IB Subnet Manager." - echo_failure - echo - RC=1 - fi - - # Remove pid file if any. - rm -f $PID_FILE - rm -f /var/lock/subsys/opensm - return $RC -} - -status() -{ - local pid - - # First try "pidof" - pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` - if [ -n "$pid" ]; then - echo $"${bin} (pid $pid) is running..." - return 0 - fi - - # Next try "/var/run/opensm.pid" files - if [ -f $PID_FILE ] ; then - read pid < $PID_FILE - if [ -n "$pid" ]; then - echo $"${bin} dead but pid file $PID_FILE exists" - return 1 - fi - fi - echo $"${bin} is stopped" - return 3 -} - - - -case $ACTION in - start) - start - ;; - stop) - stop - ;; - restart) - stop - start - ;; - status) - status - ;; - condrestart) - pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` - if [ -n "$pid" ]; then - stop - sleep 1 - start - fi - ;; - *) - echo - echo "Usage: `basename $0` {start|stop|restart|status}" - echo - exit 1 - ;; -esac - -RC=$? -exit $RC diff --git a/opensm/scripts/redhat-opensm.init.in b/opensm/scripts/redhat-opensm.init.in new file mode 100755 index 0000000..3e00403 --- /dev/null +++ b/opensm/scripts/redhat-opensm.init.in @@ -0,0 +1,382 @@ +#!/bin/bash +# +# Bring up/down opensm +# +# chkconfig: - 15 85 +# description: Activates/Deactivates InfiniBand Subnet Manager +# +### BEGIN INIT INFO +# Provides: opensm +### END INIT INFO +# +# Copyright (c) 2006 Mellanox Technologies. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# +# $Id: openib-1.0-opensm.init,v 1.5 2006/08/02 18:18:23 dledford Exp $ +# +# processname: /usr/sbin/opensm +# config: /etc/ofa/opensm.conf +# pidfile: /var/run/opensm.pid +. /etc/rc.d/init.d/functions + +CONFIG=/etc/ofa/opensm.conf +if [ ! -f $CONFIG ]; then + exit 0 +fi + +. $CONFIG + +prog=/usr/sbin/opensm +bin=${prog##*/} + +# Handover daemon for updating guid2lid cache file +sldd_prog=/usr/sbin/sldd.sh +sldd_bin=${sldd_prog##*/} +sldd_pid_file=/var/run/sldd.pid + +ACTION=$1 + +# Setting OpenSM start parameters +PID_FILE=/var/run/${bin}.pid +touch $PID_FILE + +if [[ -z $DEBUG || "$DEBUG" == "none" ]]; then + DEBUG_FLAG="" +else + DEBUG_FLAG="-d ${DEBUG}" +fi + +if [[ -z $LMC || "$LMC" == "0" ]]; then + LMC_FLAG="" +else + LMC_FLAG="-l ${LMC}" +fi + +if [[ -z $MAXSMPS || "$MAXSMPS" == "4" ]]; then + MAXSMPS_FLAG="" +else + MAXSMPS_FLAG="-maxsmps ${MAXSMPS}" +fi + +if [[ -z $REASSIGN_LIDS || "$REASSIGN_LIDS" == "no" ]]; then + REASSIGN_LIDS_FLAG="" +else + REASSIGN_LIDS_FLAG="-r" +fi + +if [[ -z $SWEEP || "$SWEEP" == "10" ]]; then + SWEEP_FLAG="" +else + SWEEP_FLAG="-s ${SWEEP}" +fi + +if [[ -z $TIMEOUT || "$TIMEOUT" == "100" ]]; then + TIMEOUT_FLAG="" +else + TIMEOUT_FLAG="-t ${TIMEOUT}" +fi + +if [[ -z $OSM_LOG || "$OSM_LOG" == "/tmp/osm.log" ]]; then + OSM_LOG_FLAG="" +else + OSM_LOG_FLAG="-f ${OSM_LOG}" +fi + +if [[ -z $VERBOSE || "$VERBOSE" == "none" ]]; then + VERBOSE_FLAG="" +else + VERBOSE_FLAG="${VERBOSE}" +fi + +if [[ -z $UPDN || "$UPDN" == "off" ]]; then + UPDN_FLAG="" +else + UPDN_FLAG="-u" +fi + +if [[ -z $GUID_FILE || "$GUID_FILE" == "none" ]]; then + GUID_FILE_FLAG="" +else + GUID_FILE_FLAG="-a ${GUID_FILE}" +fi + +if [[ -z $GUID || "$GUID" == "none" ]]; then + GUID_FLAG="" +else + GUID_FLAG="-g ${GUID}" +fi + +if [[ -z $HONORE_GUID2LID || "$HONORE_GUID2LID" == "none" ]]; then + HONORE_GUID2LID_FLAG="" +else + HONORE_GUID2LID_FLAG="--honor_guid2lid" +fi + +if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then + HONORE_GUID2LID_FLAG="--honor_guid2lid" +fi + + +if [[ -z $CACHE_OPTIONS || "$CACHE_OPTIONS" == "none" ]]; then + CACHE_OPTIONS_FLAG="" +else + CACHE_OPTIONS_FLAG="--cache-options" +fi + + +if [ -z $PORT_NUM ]; then + PORT_FLAG=1 +else + PORT_FLAG="${PORT_NUM}" +fi + + +######################################################################### + +start_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -9 ${sldd_pid} > /dev/null 2>&1 + fi + + $sldd_prog > /dev/null 2>&1 & + sldd_pid=$! + + echo ${sldd_pid} > $sldd_pid_file + # Sleep is needed in order to update local gid2lid cache file before running opensm + sleep 3 +} + +stop_sldd() +{ + if [ -f $sldd_pid_file ]; then + local line p + read line < $sldd_pid_file + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" + done + fi + + if [ -z "$sldd_pid" ]; then + sldd_pid=`pidof -x $sldd_bin` + fi + + if [ -n "${sldd_pid:-}" ] ; then + kill -15 ${sldd_pid} > /dev/null 2>&1 + fi + +} + +start() +{ + local OSM_PID= + + pid="" + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" + done + fi + + if [ -z "$pid" ]; then + pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` + fi + + if [ -n "${pid:-}" ] ; then + echo $"${bin} (pid $pid) is already running..." + else + + if [ -n "${HONORE_GUID2LID_FLAG}" ]; then + # Run sldd daemod + start_sldd + fi + + # Start opensm + local START_FLAGS="" + for flag in "$DEBUG_FLAG" "$LMC_FLAG" "$MAXSMPS_FLAG" "$REASSIGN_LIDS_FLAG" "$SWEEP_FLAG" "$TIMEOUT_FLAG" "$OSM_LOG_FLAG" "$VERBOSE_FLAG" "$UPDN_FLAG" "$GUID_FILE_FLAG" "$GUID_FLAG" "$HONORE_GUID2LID_FLAG" "$CACHE_OPTIONS_FLAG" + do + [ ! -z "$flag" ] && START_FLAGS="$START_FLAGS $flag" + done + + echo -n "Starting IB Subnet Manager" + echo $PORT_FLAG | $prog $START_FLAGS > /dev/null 2>&1 & + cnt=0; alive=0 + while [ $cnt -lt 6 -a $alive -ne 1 ]; do + echo -n "."; + sleep 1 + alive=0 + OSM_PID=`pidof $prog` + if [ "$OSM_PID" != "" ]; then + alive=1 + fi + let cnt++; + done + + echo $OSM_PID > $PID_FILE + checkpid $OSM_PID + RC=$? + [ $RC -eq 0 ] && echo_success || echo_failure + [ $RC -eq 0 ] && touch /var/lock/subsys/opensm + echo + + fi +return $RC +} + +stop() +{ + local pid= + local pid1= + local pid2= + + # Stop sldd daemon + stop_sldd + + if [ -f $PID_FILE ]; then + local line p + read line < $PID_FILE + for p in $line ; do + [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" + done + fi + + pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` + + pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` + + if [ -n "${pid:-}" ] ; then + # Kill opensm + echo -n "Stopping IB Subnet Manager." + kill -15 $pid > /dev/null 2>&1 + cnt=0; alive=1 + while [ $cnt -lt 6 -a $alive -ne 0 ]; do + echo -n "."; + alive=0 + for p in $pid; do + if checkpid $p ; then alive=1; echo -n "-"; fi + done + let cnt++; + sleep $alive + done + + for p in $pid + do + while checkpid $p ; do + kill -KILL $p > /dev/null 2>&1 + echo -n "+" + sleep 1 + done + done + checkpid $pid + RC=$? + [ $RC -eq 0 ] && echo_failure || echo_success + echo + RC=$((! $RC)) + else + echo -n "Stopping IB Subnet Manager." + echo_failure + echo + RC=1 + fi + + # Remove pid file if any. + rm -f $PID_FILE + rm -f /var/lock/subsys/opensm + return $RC +} + +status() +{ + local pid + + # First try "pidof" + pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` + if [ -n "$pid" ]; then + echo $"${bin} (pid $pid) is running..." + return 0 + fi + + # Next try "/var/run/opensm.pid" files + if [ -f $PID_FILE ] ; then + read pid < $PID_FILE + if [ -n "$pid" ]; then + echo $"${bin} dead but pid file $PID_FILE exists" + return 1 + fi + fi + echo $"${bin} is stopped" + return 3 +} + + + +case $ACTION in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + status + ;; + condrestart) + pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` + if [ -n "$pid" ]; then + stop + sleep 1 + start + fi + ;; + *) + echo + echo "Usage: `basename $0` {start|stop|restart|status}" + echo + exit 1 + ;; +esac + +RC=$? +exit $RC diff --git a/opensm/scripts/sldd.sh b/opensm/scripts/sldd.sh deleted file mode 100755 index 21f6126..0000000 --- a/opensm/scripts/sldd.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 2006 Mellanox Technologies. All rights reserved. -# -# This Software is licensed under one of the following licenses: -# -# 1) under the terms of the "Common Public License 1.0" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/cpl.php. -# -# 2) under the terms of the "The BSD License" a copy of which is -# available from the Open Source Initiative, see -# http://www.opensource.org/licenses/bsd-license.php. -# -# 3) under the terms of the "GNU General Public License (GPL) Version 2" a -# copy of which is available from the Open Source Initiative, see -# http://www.opensource.org/licenses/gpl-license.php. -# -# Licensee has the right to choose one of the above licenses. -# -# Redistributions of source code must retain the above copyright -# notice and one of the license notices. -# -# Redistributions in binary form must reproduce both the above copyright -# notice, one of the license notices in the documentation -# and/or other materials provided with the distribution. -# -# - -# OpenSM found to have the following problem -# when handover is performed: -# If some of the cluster nodes are rebooted during the handover they loose their LID assignment. -# The reason for it is that the standby SM does not obey its own Guid to LID table -# and simply uses the discovered LIDs. If some nodes are not available for it -# their previous LID assignment is lost forever. - -# The idea is to use an external daemon that will distribute -# the semi-static LID assignment table from the master SM to all standby SMs. -# A standby SM, becoming a master . needs to obey the copied semi static LID assignment table. - -# config: /etc/opensm.conf - -[ -f /etc/sysconfig/opensm.conf ] && CONFIG=/etc/sysconfig/opensm.conf -[ -f /etc/ofa/opensm.conf ] && CONFIG=/etc/ofa/opensm.conf - -SLDD_DEBUG=${SLDD_DEBUG:-0} - -if [ -z "$CONFIG" ]; then - [ $SLDD_DEBUG -eq 1 ] && echo "Config file not found." - exit 0 -fi - -. ${CONFIG} - -CACHE_FILE=${CACHE_FILE:-/var/cache/opensm/guid2lid} -CACHE_DIR=$(dirname ${CACHE_FILE}) -tmp_cache=${CACHE_FILE}.tmp - -PING='ping -w 1 -c 1' - -RCP=${RCP:-/usr/bin/scp} -RSH=${RSH:-/usr/bin/ssh} -IFCONFIG=${IFCONFIG:-'/sbin/ifconfig -a'} - -declare -i SLDD_DEBUG -RESCAN_TIME=${RESCAN_TIME:-60} - -if [ -z "${OSM_HOSTS}" ]; then - [ $SLDD_DEBUG -eq 1 ] && - echo "No OpenSM servers (OSM_HOSTS) configured for the IB subnet." - exit 0 -fi - - -declare -a arr_OSM_HOSTS -arr_OSM_HOSTS=(${OSM_HOSTS}) - -num_of_osm_hosts=${#arr_OSM_HOSTS[@]} - -if [ ${num_of_osm_hosts} -eq 1 ]; then - [ $SLDD_DEBUG -eq 1 ] && - echo "One OpenSM server configured in the IB subnet." && - echo "Nothing to be done for SLDD" - - exit 0 -fi - -trap 'trap_handler' 15 - -trap_handler() -{ - logger -i "SLDD: Exiting." - exit 0 -} - -is_alive() -{ - $PING $1 > /dev/null 2>&1 - return $? -} - -is_local() -{ - $IFCONFIG | grep -w "$1" > /dev/null 2>&1 - return $? -} - -update_remote_cache() -{ - /bin/rm -f ${CACHE_FILE}.upd - /bin/cp -a ${CACHE_FILE} ${CACHE_FILE}.upd - - [ $SLDD_DEBUG -eq 1 ] && - echo "Updating remote cache file" - - for host in ${OSM_HOSTS} - do - # Skip local host update - if [ "${host}" == "${local_host}" ]; then - continue - fi - - if is_alive $host; then - stat=$($RSH $host "/bin/mkdir -p ${CACHE_DIR} > /dev/null 2>&1; /bin/rm -f ${CACHE_FILE}.${local_host} > /dev/null 2>&1; echo \$?" | tr -d '[:space:]') - if [ "X${stat}" == "X0" ]; then - [ $SLDD_DEBUG -eq 1 ] && - echo "Updating $host" - logger -i "SLDD: updating $host with ${CACHE_FILE}" - $RCP ${CACHE_FILE}.upd ${host}:${CACHE_FILE}.${local_host} - /bin/cp ${CACHE_FILE}.upd ${CACHE_FILE}.${host} - else - [ $SLDD_DEBUG -eq 1 ] && - echo "$RSH to $host failed." - logger -i "SLDD: Failed to update $host with ${CACHE_FILE}. $RSH without password should be enabled" - exit 5 - fi - else - [ $SLDD_DEBUG -eq 1 ] && - echo "$host is down." - continue - fi - done -} - -get_latest_remote_cache() -{ - # Find most updated remote cache file (the suffix should be like ip address: *.*.*.*) - echo -n "$(/bin/ls -1t ${CACHE_FILE}.*.* 2> /dev/null | head -1)" -} - -get_largest_remote_cache() -{ - # Find largest (size) remote cache file (the suffix should be like ip address: *.*.*.*) - echo -n "$(/bin/ls -1S ${CACHE_FILE}.*.* 2> /dev/null | head -1)" -} - -swap_cache_files() -{ - /bin/rm -f ${CACHE_FILE}.old - /bin/mv ${CACHE_FILE} ${CACHE_FILE}.old - /bin/cp ${largest_remote_cache} ${CACHE_FILE} - touch ${CACHE_FILE}.tmp -} - -# Find local host in the osm hosts list -local_host="" -for host in ${OSM_HOSTS} -do - if is_local $host; then - local_host=${host} - fi -done - -# Get cache file info -declare -i new_size=0 -declare -i last_size=0 -declare -i largest_remote_cache_size=0 - -if [ -e ${CACHE_FILE} ]; then - last_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]') -else - touch ${CACHE_FILE} ${CACHE_FILE}.tmp -fi - -# if [ ${last_size} -gt 0 ]; then -# # First time update -# update_remote_cache -# fi - -while true -do - if [ -s "${CACHE_FILE}" ]; then - new_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]') - # Check if local cache file grew from its last version or the time stamp changed - if [ ${new_size} -gt ${last_size} ] - [ "$(/bin/ls -1t ${CACHE_FILE} ${CACHE_FILE}.tmp 2> /dev/null | head -1)" != "${CACHE_FILE}.tmp" ]; then - largest_remote_cache=$(get_largest_remote_cache) - if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then - largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') - else - largest_remote_cache_size=0 - fi - - # Check if local cache file larger than remote chache file - if [ ${new_size} -gt ${largest_remote_cache_size} ]; then - [ $SLDD_DEBUG -eq 1 ] && - echo "Local cache file larger then remote. Update remote cache files" - last_size=${new_size} - update_remote_cache - continue - fi - fi - - largest_remote_cache=$(get_largest_remote_cache) - if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then - largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') - else - largest_remote_cache_size=0 - fi - - # Update local cache file from remote - if [ ${largest_remote_cache_size} -gt ${new_size} ]; then - [ $SLDD_DEBUG -eq 1 ] && - echo "Local cache file shorter then remote. Use ${largest_remote_cache}" - logger -i "SLDD: updating local cache file with ${largest_remote_cache}" - swap_cache_files - last_size=${largest_remote_cache_size} - fi - - else # The local cache file is empty - [ $SLDD_DEBUG -eq 1 ] && - echo "${CACHE_FILE} is empty" - - largest_remote_cache=$(get_largest_remote_cache) - if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then - # Copy it to the current cache - [ $SLDD_DEBUG -eq 1 ] && - echo "Local cache file is empty. Use ${largest_remote_cache}" - logger -i "SLDD: updating local cache file with ${largest_remote_cache}" - swap_cache_files - fi - - fi - - [ $SLDD_DEBUG -eq 1 ] && - echo "Sleeping ${RESCAN_TIME} seconds." - sleep ${RESCAN_TIME} - -done diff --git a/opensm/scripts/sldd.sh.in b/opensm/scripts/sldd.sh.in new file mode 100755 index 0000000..21f6126 --- /dev/null +++ b/opensm/scripts/sldd.sh.in @@ -0,0 +1,249 @@ +#!/bin/bash +# +# Copyright (c) 2006 Mellanox Technologies. All rights reserved. +# +# This Software is licensed under one of the following licenses: +# +# 1) under the terms of the "Common Public License 1.0" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/cpl.php. +# +# 2) under the terms of the "The BSD License" a copy of which is +# available from the Open Source Initiative, see +# http://www.opensource.org/licenses/bsd-license.php. +# +# 3) under the terms of the "GNU General Public License (GPL) Version 2" a +# copy of which is available from the Open Source Initiative, see +# http://www.opensource.org/licenses/gpl-license.php. +# +# Licensee has the right to choose one of the above licenses. +# +# Redistributions of source code must retain the above copyright +# notice and one of the license notices. +# +# Redistributions in binary form must reproduce both the above copyright +# notice, one of the license notices in the documentation +# and/or other materials provided with the distribution. +# +# + +# OpenSM found to have the following problem +# when handover is performed: +# If some of the cluster nodes are rebooted during the handover they loose their LID assignment. +# The reason for it is that the standby SM does not obey its own Guid to LID table +# and simply uses the discovered LIDs. If some nodes are not available for it +# their previous LID assignment is lost forever. + +# The idea is to use an external daemon that will distribute +# the semi-static LID assignment table from the master SM to all standby SMs. +# A standby SM, becoming a master . needs to obey the copied semi static LID assignment table. + +# config: /etc/opensm.conf + +[ -f /etc/sysconfig/opensm.conf ] && CONFIG=/etc/sysconfig/opensm.conf +[ -f /etc/ofa/opensm.conf ] && CONFIG=/etc/ofa/opensm.conf + +SLDD_DEBUG=${SLDD_DEBUG:-0} + +if [ -z "$CONFIG" ]; then + [ $SLDD_DEBUG -eq 1 ] && echo "Config file not found." + exit 0 +fi + +. ${CONFIG} + +CACHE_FILE=${CACHE_FILE:-/var/cache/opensm/guid2lid} +CACHE_DIR=$(dirname ${CACHE_FILE}) +tmp_cache=${CACHE_FILE}.tmp + +PING='ping -w 1 -c 1' + +RCP=${RCP:-/usr/bin/scp} +RSH=${RSH:-/usr/bin/ssh} +IFCONFIG=${IFCONFIG:-'/sbin/ifconfig -a'} + +declare -i SLDD_DEBUG +RESCAN_TIME=${RESCAN_TIME:-60} + +if [ -z "${OSM_HOSTS}" ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "No OpenSM servers (OSM_HOSTS) configured for the IB subnet." + exit 0 +fi + + +declare -a arr_OSM_HOSTS +arr_OSM_HOSTS=(${OSM_HOSTS}) + +num_of_osm_hosts=${#arr_OSM_HOSTS[@]} + +if [ ${num_of_osm_hosts} -eq 1 ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "One OpenSM server configured in the IB subnet." && + echo "Nothing to be done for SLDD" + + exit 0 +fi + +trap 'trap_handler' 15 + +trap_handler() +{ + logger -i "SLDD: Exiting." + exit 0 +} + +is_alive() +{ + $PING $1 > /dev/null 2>&1 + return $? +} + +is_local() +{ + $IFCONFIG | grep -w "$1" > /dev/null 2>&1 + return $? +} + +update_remote_cache() +{ + /bin/rm -f ${CACHE_FILE}.upd + /bin/cp -a ${CACHE_FILE} ${CACHE_FILE}.upd + + [ $SLDD_DEBUG -eq 1 ] && + echo "Updating remote cache file" + + for host in ${OSM_HOSTS} + do + # Skip local host update + if [ "${host}" == "${local_host}" ]; then + continue + fi + + if is_alive $host; then + stat=$($RSH $host "/bin/mkdir -p ${CACHE_DIR} > /dev/null 2>&1; /bin/rm -f ${CACHE_FILE}.${local_host} > /dev/null 2>&1; echo \$?" | tr -d '[:space:]') + if [ "X${stat}" == "X0" ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Updating $host" + logger -i "SLDD: updating $host with ${CACHE_FILE}" + $RCP ${CACHE_FILE}.upd ${host}:${CACHE_FILE}.${local_host} + /bin/cp ${CACHE_FILE}.upd ${CACHE_FILE}.${host} + else + [ $SLDD_DEBUG -eq 1 ] && + echo "$RSH to $host failed." + logger -i "SLDD: Failed to update $host with ${CACHE_FILE}. $RSH without password should be enabled" + exit 5 + fi + else + [ $SLDD_DEBUG -eq 1 ] && + echo "$host is down." + continue + fi + done +} + +get_latest_remote_cache() +{ + # Find most updated remote cache file (the suffix should be like ip address: *.*.*.*) + echo -n "$(/bin/ls -1t ${CACHE_FILE}.*.* 2> /dev/null | head -1)" +} + +get_largest_remote_cache() +{ + # Find largest (size) remote cache file (the suffix should be like ip address: *.*.*.*) + echo -n "$(/bin/ls -1S ${CACHE_FILE}.*.* 2> /dev/null | head -1)" +} + +swap_cache_files() +{ + /bin/rm -f ${CACHE_FILE}.old + /bin/mv ${CACHE_FILE} ${CACHE_FILE}.old + /bin/cp ${largest_remote_cache} ${CACHE_FILE} + touch ${CACHE_FILE}.tmp +} + +# Find local host in the osm hosts list +local_host="" +for host in ${OSM_HOSTS} +do + if is_local $host; then + local_host=${host} + fi +done + +# Get cache file info +declare -i new_size=0 +declare -i last_size=0 +declare -i largest_remote_cache_size=0 + +if [ -e ${CACHE_FILE} ]; then + last_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]') +else + touch ${CACHE_FILE} ${CACHE_FILE}.tmp +fi + +# if [ ${last_size} -gt 0 ]; then +# # First time update +# update_remote_cache +# fi + +while true +do + if [ -s "${CACHE_FILE}" ]; then + new_size=$(du -b ${CACHE_FILE} | awk '{print$1}' | tr -d '[:space:]') + # Check if local cache file grew from its last version or the time stamp changed + if [ ${new_size} -gt ${last_size} ] + [ "$(/bin/ls -1t ${CACHE_FILE} ${CACHE_FILE}.tmp 2> /dev/null | head -1)" != "${CACHE_FILE}.tmp" ]; then + largest_remote_cache=$(get_largest_remote_cache) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') + else + largest_remote_cache_size=0 + fi + + # Check if local cache file larger than remote chache file + if [ ${new_size} -gt ${largest_remote_cache_size} ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file larger then remote. Update remote cache files" + last_size=${new_size} + update_remote_cache + continue + fi + fi + + largest_remote_cache=$(get_largest_remote_cache) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + largest_remote_cache_size=$(du -b ${largest_remote_cache} 2> /dev/null | awk '{print$1}' | tr -d '[:space:]') + else + largest_remote_cache_size=0 + fi + + # Update local cache file from remote + if [ ${largest_remote_cache_size} -gt ${new_size} ]; then + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file shorter then remote. Use ${largest_remote_cache}" + logger -i "SLDD: updating local cache file with ${largest_remote_cache}" + swap_cache_files + last_size=${largest_remote_cache_size} + fi + + else # The local cache file is empty + [ $SLDD_DEBUG -eq 1 ] && + echo "${CACHE_FILE} is empty" + + largest_remote_cache=$(get_largest_remote_cache) + if [[ -n "${largest_remote_cache}" && -s "${largest_remote_cache}" ]]; then + # Copy it to the current cache + [ $SLDD_DEBUG -eq 1 ] && + echo "Local cache file is empty. Use ${largest_remote_cache}" + logger -i "SLDD: updating local cache file with ${largest_remote_cache}" + swap_cache_files + fi + + fi + + [ $SLDD_DEBUG -eq 1 ] && + echo "Sleeping ${RESCAN_TIME} seconds." + sleep ${RESCAN_TIME} + +done -- 1.5.4.1.122.gaa8d From sashak at voltaire.com Sun Feb 24 10:25:20 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 24 Feb 2008 18:25:20 +0000 Subject: [ofa-general] [PATCH] opensm/scripts: make configurable scripts In-Reply-To: <20080224182434.GL3116@sashak.voltaire.com> References: <20080224141537.GI3116@sashak.voltaire.com> <20080224182434.GL3116@sashak.voltaire.com> Message-ID: <20080224182520.GM3116@sashak.voltaire.com> Make configurable opensm.init, redhat-opensm.init, opensmd and sldd.sh scripts. Signed-off-by: Sasha Khapyorsky --- opensm/scripts/opensm.init.in | 5 ++++- opensm/scripts/opensmd.in | 21 ++++++++++++--------- opensm/scripts/redhat-opensm.init.in | 14 +++++++++----- opensm/scripts/sldd.sh.in | 9 ++++++--- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/opensm/scripts/opensm.init.in b/opensm/scripts/opensm.init.in index d717279..da23b36 100644 --- a/opensm/scripts/opensm.init.in +++ b/opensm/scripts/opensm.init.in @@ -38,6 +38,9 @@ # notice, one of the license notices in the documentation # and/or other materials provided with the distribution. +prefix=@prefix@ +exec_prefix=@exec_prefix@ + # Source function library. if [[ -s /etc/init.d/functions ]]; then . /etc/init.d/functions @@ -55,7 +58,7 @@ fi start () { echo -n "Starting opensm: " - /usr/sbin/opensm -B $OPTIONS > /dev/null + @sbindir@/opensm -B $OPTIONS > /dev/null if [[ $RETVAL -eq 0 ]]; then touch /var/lock/subsys/opensm success diff --git a/opensm/scripts/opensmd.in b/opensm/scripts/opensmd.in index 848d1cd..0b150f7 100755 --- a/opensm/scripts/opensmd.in +++ b/opensm/scripts/opensmd.in @@ -27,23 +27,26 @@ # and/or other materials provided with the distribution. # # -# processname: /usr/sbin/opensm -# config: /etc/opensm.conf +# processname: @sbindir@/opensm +# config: @sysconfig@/opensm.conf # pidfile: /var/run/opensm.pid -if [ ! -f /etc/opensm.conf ]; then +prefix=@prefix@ +exec_prefix=@exec_prefix@ + +CONFIG=@sysconfdir@/ofa/opensm.conf + +if [ ! -f @CONFIG@ ]; then exit 0 fi -. /etc/opensm.conf - -CONFIG=/etc/opensm.conf +. @COFNIG -prog=/usr/sbin/opensm +prog=@sbindir@/opensm bin=${prog##*/} # Handover daemon for updating guid2lid cache file -sldd_prog=/usr/sbin/sldd.sh +sldd_prog=@sbindir@/sldd.sh sldd_bin=${sldd_prog##*/} sldd_pid_file=/var/run/sldd.pid @@ -122,7 +125,7 @@ else TIMEOUT_FLAG="-t ${TIMEOUT}" fi -if [[ -z $OSM_LOG || "$OSM_LOG" == "/var/log/osm.log" ]]; then +if [[ -z $OSM_LOG || "$OSM_LOG" == "/var/log/opensm.log" ]]; then OSM_LOG_FLAG="" else OSM_LOG_FLAG="-f ${OSM_LOG}" diff --git a/opensm/scripts/redhat-opensm.init.in b/opensm/scripts/redhat-opensm.init.in index 3e00403..4ce6605 100755 --- a/opensm/scripts/redhat-opensm.init.in +++ b/opensm/scripts/redhat-opensm.init.in @@ -37,23 +37,27 @@ # # $Id: openib-1.0-opensm.init,v 1.5 2006/08/02 18:18:23 dledford Exp $ # -# processname: /usr/sbin/opensm -# config: /etc/ofa/opensm.conf +# processname: @sbindir@/opensm +# config: @sysconfdir@/ofa/opensm.conf # pidfile: /var/run/opensm.pid + +prefix=@prefix@ +exec_prefix=@exec_prefix@ + . /etc/rc.d/init.d/functions -CONFIG=/etc/ofa/opensm.conf +CONFIG=@sysconfdir@/ofa/opensm.conf if [ ! -f $CONFIG ]; then exit 0 fi . $CONFIG -prog=/usr/sbin/opensm +prog=@sbindir@/opensm bin=${prog##*/} # Handover daemon for updating guid2lid cache file -sldd_prog=/usr/sbin/sldd.sh +sldd_prog=@sbindir@/sldd.sh sldd_bin=${sldd_prog##*/} sldd_pid_file=/var/run/sldd.pid diff --git a/opensm/scripts/sldd.sh.in b/opensm/scripts/sldd.sh.in index 21f6126..a6f660f 100755 --- a/opensm/scripts/sldd.sh.in +++ b/opensm/scripts/sldd.sh.in @@ -38,10 +38,13 @@ # the semi-static LID assignment table from the master SM to all standby SMs. # A standby SM, becoming a master . needs to obey the copied semi static LID assignment table. -# config: /etc/opensm.conf +prefix=@prefix@ +exec_prefix=@exec_prefix@ -[ -f /etc/sysconfig/opensm.conf ] && CONFIG=/etc/sysconfig/opensm.conf -[ -f /etc/ofa/opensm.conf ] && CONFIG=/etc/ofa/opensm.conf +# config: @sysconfdir@/ofa/opensm.conf + +[ -f @sysconfdir@/sysconfig/opensm.conf ] && CONFIG=@sysconfdir@/sysconfig/opensm.conf +[ -f @sysconfdir@/ofa/opensm.conf ] && CONFIG=@sysconfdir@/ofa/opensm.conf SLDD_DEBUG=${SLDD_DEBUG:-0} -- 1.5.4.1.122.gaa8d From Ecards at ns1.greetingsnecards.com Sun Feb 24 06:50:56 2008 From: Ecards at ns1.greetingsnecards.com (Electronic Card's) Date: 24 Feb 2008 16:50:56 +0200 Subject: [ofa-general] You just recieved an electronic card! Thanks! Message-ID: <20080224145056.18090.qmail@maritza.info> An HTML attachment was scrubbed... URL: From unutilizable at bankruptmiami.com Sun Feb 24 13:16:50 2008 From: unutilizable at bankruptmiami.com ($1000 is Yours to Gamble) Date: Sun, 24 Feb 2008 21:16:50 +0000 Subject: [ofa-general] With No Deposit Required Message-ID: <35d101c8772a$04c056c0$40e01b59@64-224-dsl.kielnet.net> No Credit Check! Bet on Credit http://www.kooriyama.info/ From changquing.tang at hp.com Sun Feb 24 13:51:14 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Sun, 24 Feb 2008 21:51:14 +0000 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel In-Reply-To: <200802191440.33559.jackm@dev.mellanox.co.il> References: <200802191440.33559.jackm@dev.mellanox.co.il> Message-ID: Jack: Mellanox installed RC5 on helios.mellanox.com for me, this is a 2.6.9-42 kernel system. But I still see that when errno is not zero, and I call ibv_modify_xrc_rcv_qp(), this function fails. If I clear errno to zero before I call ibv_modify_xrc_rcv_qp(), everything is fine. Can you take a look ? --CQ > -----Original Message----- > From: Jack Morgenstein [mailto:jackm at dev.mellanox.co.il] > Sent: Tuesday, February 19, 2008 6:41 AM > To: general at lists.openfabrics.org > Cc: Tang, Changqing > Subject: Re: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel > > On Tuesday 19 February 2008 02:35, Tang, Changqing wrote: > > > > I have taken sometime to trace down this bug. > > > > When running OFED 1.3 on 2.6.9-42.ELsmp kernel, > > putenv("IBV_FORK_SAFE=1"); causes ibv_get_device_list() to > print out a Warning and set errno = 22 : > > > > A:errno=0 > > libibverbs: Warning: fork()-safety requested but init failed > > B:errno=22 > > > > errno keeps value 22 and causes ibv_modify_xrc_rcv_qp() to fail. > > > > Another way to make ibv_modify_xrc_rcv_qp() to fail is to > set errno = > > 22 just before calling this function. However, this only happens on > > 2.6.9-42.ELsmp kernel, on 2.6.18-8.e15 kernel, it succeeds. > > > > 2.6.9-42.ELsmp is the kernel in Mellanox testing cluster > > helios.mellanox.com/ibd001-0032 > > > > Thanks for Mellanox guys to have a look > > > > > > --CQ > > I fixed a bug just like this in OFED 1.3 on Jan 30. The fix > is in OFED 1.3 RC4 -- are you using that version? If not, > please install RC4 and re-test. > > (The bug was in kernel space: > > =========== > IB/core: fixed thinko in return values for > ib_uverbs_xxxx_xrc_rcv_qp() procs. > Wed, 30 Jan 2008 15:11:08 +0000 (17:11 +0200) commit > 78273e00083543535edd4c9db830b4ac45eb556a > IB/core: fixed thinko in return values for > ib_uverbs_xxxx_xrc_rcv_qp() procs. > > Incorrectly returned 0 instead of in_len in several procedures. > ================= > > This bug caused userspace to return the "errno" value even > when the kernel operation completed successfully, which is > what you seem to be seeing. > > - Jack > > > > > > > From 9knbkzu7n6s11q at rocketmail.com Sun Feb 24 15:57:39 2008 From: 9knbkzu7n6s11q at rocketmail.com (Jonathan Ruffin) Date: Mon, 25 Feb 2008 07:57:39 +0800 Subject: [ofa-general] You told me that you will reply back Message-ID: <01c87784$17475b80$a21501da@9knbkzu7n6s11q> Hello! I am bored today. I am nice girl that would like to chat with you. Email me at Berit at DigitalDoorwayDesign.info only, because I am using my friend's email to write this. I will show you some great pictures of me. From dwraylenekm at raylenek.com Sun Feb 24 19:49:53 2008 From: dwraylenekm at raylenek.com (Darren Mccoy) Date: Mon, 25 Feb 2008 11:49:53 +0800 Subject: [ofa-general] There is no cheaper source of original and perfectly working software. Message-ID: <01c877a4$88970680$8cd9dadd@dwraylenekm> Don't waste time waiting for delivery of your software on a CD. Download and install it immediately. Choose the program you need from more than 270 programs in many languages. We are glad to help you to install your software. Feel free to ask questions and receive highly professional consultations. If you failed to find software you need in our list, we can try to find it for you. http://geocities.com/geoffreykey61 Original software only! From phillipwils at gmail.com Sun Feb 24 21:44:01 2008 From: phillipwils at gmail.com (Phillip Wilson) Date: Sun, 24 Feb 2008 21:44:01 -0800 Subject: [ofa-general] Internal loopback for HCA (with no external cable required) Message-ID: <6e4f44220802242144p3a712e54x4915f46fadcdf0be@mail.gmail.com> Internal loopback for HCA (with no external cable required) <> I would like to modify the existing "ibv_rc_pingpong or (ivb_xx_pingpong)" program to perform internal loopback on a HCA with no external cable required. My intention is to support internal DMA loopback for packets transmitted and received between a QP that is assigned to the same HCA port; the program will connect the QP send queue to its receive queue. I am new to Infiniband and I would like some advice on how to approach this task. So far, I have been going through the management, verb code, and the Infiniband Architecture specification volume 1 release 1.2. I have installed the OFED version 1.2.5 on Redhat (see uname –a below for details). I have two Mellanox dual port (VID=15B3: DID=5A46, VID=15B3: DID=5A44, tavor) HCAs in the system. I have configured one HCA (mthca0) for external loopback by connecting the HCA's ports together with a cable. The other HCA has no cables connected to its ports. After running the opensm program, the HCA (mthca0) cabled for external loopback had a LID value assigned to each of its ports. The other HCA (mthca1) ports were not assigned LIDs as expected. I was then able to run the ibv_xx_pingpong programs as showed below. Based on my limited understanding of the opensm program, I am not sure if it possible to use this program to setup the HCA ports (mthca1) when the port state is "PORT_DOWN". Can the guid2lid file be used to force the opensm to assigned LIDs values to ports of and HCA that does not have an active link? If not, I believe that I will need to programmatically use subnet management packets to configure the HCA for internal loopback. According to the Infiniband specification, loopback can be done by Directed Routing. That is by setting the HopCount=0, DrSLID=DrDLID and PacketLifeTime=0. <> 1.) Is it possible to modify the ibv_rc_pingpong program for internal loopback? 2.) If each mthca1 port is assigned a LID, can the ibv_rc_pingpong program be used as is? The ibv_rc_pingpong program uses the LID to route packets. Is it necessary to assigned the sm_lid and port_lid values greater than zero once the HCA is setup for internal loopback using Directed Routing as shown in figure 186 on page 799 within the Infiniband specification? If so, what other port parameters should be set? Is the port logical (i.e. PORT_ACTIVE) and physical states (LINK_UP) important for internal loopback? <> [root at hpdst165 src]# uname -a Linux hpdst165 2.6.18-53.el5 #1 SMP Wed Oct 10 16:34:55 EDT 2007 ia64 ia64 ia64 GNU/Linux External Loopback [root at hpdst165 src]# ibv_rc_pingpong & [1] 4359 [root at hpdst165 src]# local address: LID 0x0001, QPN 0x0a0406, PSN 0x46d737 [root at hpdst165 src]# ibv_rc_pingpong hpdst165 & [2] 4361 [root at hpdst165 src]# local address: LID 0x0001, QPN 0x0a0407, PSN 0xb0a59e remote address: LID 0x0001, QPN 0x0a0406, PSN 0x46d737 remote address: LID 0x0001, QPN 0x0a0407, PSN 0xb0a59e 8192000 bytes in 0.05 seconds = 1337.50 Mbit/sec 1000 iters in 0.05 seconds = 49.00 usec/iter 8192000 bytes in 0.05 seconds = 1330.63 Mbit/sec 1000 iters in 0.05 seconds = 49.25 usec/iter [1]- Done ibv_rc_pingpong [2]+ Done ibv_rc_pingpong hpdst165 [root at hpdst165 bin]# ibv_devinfo -v hca_id: mthca0 fw_ver: 3.3.12 node_guid: 0016:35ff:ffbf:0944 sys_image_guid: 0016:35ff:ffbf:0947 vendor_id: 0x02c9 vendor_part_id: 23108 hw_ver: 0xA1 board_id: HP_0030000001 phys_port_cnt: 2 max_mr_size: 0xffffffffffffffff page_size_cap: 0xfffff000 max_qp: 64512 max_qp_wr: 65535 device_cap_flags: 0x00001c76 max_sge: 28 max_sge_rd: 0 max_cq: 65408 max_cqe: 131071 max_mr: 131056 max_pd: 32768 max_qp_rd_atom: 4 max_ee_rd_atom: 0 max_res_rd_atom: 258048 max_qp_init_rd_atom: 128 max_ee_init_rd_atom: 0 atomic_cap: ATOMIC_HCA (1) max_ee: 0 max_rdd: 0 max_mw: 0 max_raw_ipv6_qp: 0 max_raw_ethy_qp: 0 max_mcast_grp: 8192 max_mcast_qp_attach: 8 max_total_mcast_qp_attach: 65536 max_ah: 0 max_fmr: 0 max_srq: 1008 max_srq_wr: 65535 max_srq_sge: 28 max_pkeys: 64 local_ca_ack_delay: 15 port: 1 state: PORT_ACTIVE (4) max_mtu: 2048 (4) active_mtu: 2048 (4) sm_lid: 1 port_lid: 1 port_lmc: 0x00 max_msg_sz: 0x80000000 port_cap_flags: 0x02510a68 max_vl_num: 8 (4) bad_pkey_cntr: 0x0 qkey_viol_cntr: 0x0 sm_sl: 0 pkey_tbl_len: 64 gid_tbl_len: 32 subnet_timeout: 18 init_type_reply: 0 active_width: 4X (2) active_speed: 2.5 Gbps (1) phys_state: LINK_UP (5) GID[ 0]: fe80:0000:0000:0000:0016:35ff:ffbf:0945 port: 2 state: PORT_ACTIVE (4) max_mtu: 2048 (4) active_mtu: 2048 (4) sm_lid: 1 port_lid: 2 port_lmc: 0x00 max_msg_sz: 0x80000000 port_cap_flags: 0x02510a68 max_vl_num: 8 (4) bad_pkey_cntr: 0x0 qkey_viol_cntr: 0x0 sm_sl: 0 pkey_tbl_len: 64 gid_tbl_len: 32 subnet_timeout: 18 init_type_reply: 0 active_width: 4X (2) active_speed: 2.5 Gbps (1) phys_state: LINK_UP (5) GID[ 0]: fe80:0000:0000:0000:0016:35ff:ffbf:0946 hca_id: mthca1 fw_ver: 3.3.12 node_guid: 0016:35ff:ffbf:1954 sys_image_guid: 0016:35ff:ffbf:1957 vendor_id: 0x02c9 vendor_part_id: 23108 hw_ver: 0xA1 board_id: HP_0030000001 phys_port_cnt: 2 max_mr_size: 0xffffffffffffffff page_size_cap: 0xfffff000 max_qp: 64512 max_qp_wr: 65535 device_cap_flags: 0x00001c76 max_sge: 28 max_sge_rd: 0 max_cq: 65408 max_cqe: 131071 max_mr: 131056 max_pd: 32768 max_qp_rd_atom: 4 max_ee_rd_atom: 0 max_res_rd_atom: 258048 max_qp_init_rd_atom: 128 max_ee_init_rd_atom: 0 atomic_cap: ATOMIC_HCA (1) max_ee: 0 max_rdd: 0 max_mw: 0 max_raw_ipv6_qp: 0 max_raw_ethy_qp: 0 max_mcast_grp: 8192 max_mcast_qp_attach: 8 max_total_mcast_qp_attach: 65536 max_ah: 0 max_fmr: 0 max_srq: 1008 max_srq_wr: 65535 max_srq_sge: 28 max_pkeys: 64 local_ca_ack_delay: 15 port: 1 state: PORT_DOWN (1) max_mtu: 2048 (4) active_mtu: 512 (2) sm_lid: 0 port_lid: 0 port_lmc: 0x00 max_msg_sz: 0x80000000 port_cap_flags: 0x02510a68 max_vl_num: 8 (4) bad_pkey_cntr: 0x0 qkey_viol_cntr: 0x0 sm_sl: 0 pkey_tbl_len: 64 gid_tbl_len: 32 subnet_timeout: 0 init_type_reply: 0 active_width: 1X (1) active_speed: 2.5 Gbps (1) phys_state: POLLING (2) GID[ 0]: fe80:0000:0000:0000:0016:35ff:ffbf:1955 port: 2 state: PORT_DOWN (1) max_mtu: 2048 (4) active_mtu: 512 (2) sm_lid: 0 port_lid: 0 port_lmc: 0x00 max_msg_sz: 0x80000000 port_cap_flags: 0x02510a68 max_vl_num: 8 (4) bad_pkey_cntr: 0x0 qkey_viol_cntr: 0x0 sm_sl: 0 pkey_tbl_len: 64 gid_tbl_len: 32 subnet_timeout: 0 init_type_reply: 0 active_width: 1X (1) active_speed: 2.5 Gbps (1) phys_state: POLLING (2) GID[ 0]: fe80:0000:0000:0000:0016:35ff:ffbf:1956 From dwrhodestrailersm at rhodestrailers.com Sun Feb 24 22:48:52 2008 From: dwrhodestrailersm at rhodestrailers.com (Elmo Dow) Date: Mon, 25 Feb 2008 12:18:52 +0530 Subject: [ofa-general] Get any soft you need without delays. Message-ID: <01c877a8$95513f40$d2775f3b@dwrhodestrailersm> The quickest and most convenient way to get software is to download it from our site. Low prices, fully functional and original programs only. Localized versions in all European languages! We provide help in installing software. You can ask any question and get a free of charge consultation. Guaranteed access to all updates! Friendly and professional service! http://geocities.com/norris.beverly Get software you need right now! From mail at eurobusinessguide.net Sun Feb 24 22:50:51 2008 From: mail at eurobusinessguide.net (Euro Company Registration) Date: Mon, 25 Feb 2008 08:50:51 +0200 (EET) Subject: [ofa-general] European company registration 2008 Message-ID: Euro Company Registration Dear Sirs, If you like to have your company registered in the registry of European companies for 2008; Please print out the enclosed form (PDF file), fill it and send it back to: Euro Business Guide P.O. Box 2021 3500 GA UTRECHT The Netherlands Updating is free of charge!! If you want to unsubscribe send an email to unsubscribe at eurobusinessguide.net. -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: eucom-form.pdf Type: application/pdf Size: 82012 bytes Desc: not available URL: From kliteyn at dev.mellanox.co.il Mon Feb 25 00:38:57 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 25 Feb 2008 10:38:57 +0200 Subject: [ofa-general] [PATCH] opensm/scripts: Fixing location of generated opensm.init script Message-ID: <47C27EA1.4070302@dev.mellanox.co.il> Signed-off-by: Yevgeny Kliteynik --- opensm/Makefile.am | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/Makefile.am b/opensm/Makefile.am index 7e1d95d..3811963 100644 --- a/opensm/Makefile.am +++ b/opensm/Makefile.am @@ -17,7 +17,7 @@ else echo "define osm_build_type \"free\"" > $(DESTDIR)/$(includedir)/infiniband/opensm/osm_build_id.h endif $(top_srcdir)/config/install-sh -m 755 -d $(DESTDIR)/$(sysconfdir)/init.d - cp $(top_srcdir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensmd + cp $(top_builddir)/scripts/opensm.init $(DESTDIR)/$(sysconfdir)/init.d/opensmd chmod 755 $(DESTDIR)/$(sysconfdir)/init.d/opensmd -- 1.5.1.4 From dotanb at dev.mellanox.co.il Mon Feb 25 00:57:30 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 25 Feb 2008 10:57:30 +0200 Subject: [ofa-general] Internal loopback for HCA (with no external cable required) In-Reply-To: <6e4f44220802242144p3a712e54x4915f46fadcdf0be@mail.gmail.com> References: <6e4f44220802242144p3a712e54x4915f46fadcdf0be@mail.gmail.com> Message-ID: <47C282FA.2030105@dev.mellanox.co.il> Hi. please look bellow. Phillip Wilson wrote: > Internal loopback for HCA (with no external cable required) > > > <> > > 1.) Is it possible to modify the ibv_rc_pingpong program for internal loopback? > It supports internal loopback even today (if the daemon and client are being executed on the same host and they are using the same IB device + IB port). > 2.) If each mthca1 port is assigned a LID, can the ibv_rc_pingpong > program be used as is? > I think that the answer is yes. I didn't check that internal loopback is working if a LID is assigned to the IB port and the port state is down, but it should work. Dotan From jsteel at biw-ag.de Mon Feb 25 01:56:50 2008 From: jsteel at biw-ag.de (jsteel at biw-ag.de) Date: Mon, 25 Feb 2008 16:56:50 +0700 Subject: [ofa-general] Become her #1! Message-ID: <002f01c87794$bddf1cc0$cc1ba71a@kplyf> Top names in meds at dirt low price! http://pdo.meanspend.com From reliability at city-soccer.de Mon Feb 25 02:14:39 2008 From: reliability at city-soccer.de (NOTICE) Date: Mon, 25 Feb 2008 10:14:39 +0000 Subject: [ofa-general] A wintry wonderland wraps up London Fashion Week Message-ID: <7a5601c87797$1ec4dfb0$2a4dfb29@[41.251.77.42]> The closest one might come to CS in an interview is when the subject is interrupted by a close friend or family member, or perhaps must answer the phone. CS is used in a completely unmonitored environment where the subject feels most comfortable and will use their natural vernacular without overtly thinking about it. Get Your $1000 in Gambling Credit Initially, all modern linguistics was historical in orientation - even the study of modern dialects involved looking at their origins. But Saussure drew a distinction between synchronic and diachronic linguistics, which is fundamental to the present day organization of the discipline. Get it Here: http://www.imyourfan.info The biological origin of language is in principle a concern of historical linguistics, but most linguists regard it as too remote to be reliably established by standard techniques of historical linguistics such as the comparative method. Less standard techniques, such as mass lexical comparison, are used by some linguists to overcome the limitations of the comparative method, but most linguists regard them as unreliable. -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: = ç}ºëÞ/Dßqt Type: image/jpeg Size: 2743 bytes Desc: not available URL: From sashak at voltaire.com Mon Feb 25 02:50:31 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 25 Feb 2008 10:50:31 +0000 Subject: [ofa-general] Re: [PATCH] opensm/scripts: Fixing location of generated opensm.init script In-Reply-To: <47C27EA1.4070302@dev.mellanox.co.il> References: <47C27EA1.4070302@dev.mellanox.co.il> Message-ID: <20080225105031.GI12954@sashak.voltaire.com> On 10:38 Mon 25 Feb , Yevgeny Kliteynik wrote: > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From vlad at dev.mellanox.co.il Mon Feb 25 02:39:49 2008 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 25 Feb 2008 12:39:49 +0200 Subject: [ofa-general] Re: [PATCH] opensm/scripts: Fixing location of generated opensm.init script In-Reply-To: <20080225105031.GI12954@sashak.voltaire.com> References: <47C27EA1.4070302@dev.mellanox.co.il> <20080225105031.GI12954@sashak.voltaire.com> Message-ID: <47C29AF5.6090100@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 10:38 Mon 25 Feb , Yevgeny Kliteynik wrote: >> Signed-off-by: Yevgeny Kliteynik > > Applied. Thanks. > > Sasha Pulled into OFED. Regards, Vladimir From vlad at lists.openfabrics.org Mon Feb 25 03:05:31 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 25 Feb 2008 03:05:31 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080225-0200 daily build status Message-ID: <20080225110531.CD85BE6084A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From dotanb at dev.mellanox.co.il Mon Feb 25 03:19:49 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 25 Feb 2008 13:19:49 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <47A870A8.5050409@voltaire.com> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> Message-ID: <47C2A455.5070603@dev.mellanox.co.il> Or Gerlitz wrote: > Sean Hefty wrote: >> The verbs also support iWarp devices and are not necessarily >> restricted to the >> 1.2 IB spec definitions. It might make sense to state that the IB >> implementation is based on the 1.2 spec in an IB specific section, >> but keep the >> general documentation transport neutral at this point. > > Sure, the page would be changed to reflect that. > > Or. Sorry, i didn't find the time to get to it until now. I changed the problematic sentence to: "This library is an implementation of the verbs based on the Infiniband specification volume 1.2 chapter 11." I hope that this is good enough. thanks Dotan From chroniclesmc43 at nextiraone.de Sun Feb 24 03:44:26 2008 From: chroniclesmc43 at nextiraone.de (Shelia Weston) Date: Mon, 24 Feb 2008 20:44:26 +0900 Subject: [ofa-general] 0ver 3OO $tyles of //atches! Message-ID: <01c87726$0b2cb100$596056dc@chroniclesmc43> K nrp ing R jo ep ytf lic wn a K jbm in uzr gR wq ep yw li wk ca was established in early 1999 as a specialist onli wh ne store selling competitively priced brand name quality re zwa pl oo ic pr as. HIGH QUALITY R hiz EPL cdi ICA ay S:R avi ol ptx ex D enj ate hmc jus jhp tsR rnf ol kyi ex S km po pn rts M ob ode bnp lsAl gdm ai gby n S lbe ilbe vt rst ibl einA La thj n slx ge & So say hn gv eB iov ell & Ro on ssBv jra lg oty ariCh tu op zlz ardFr shg an qqq ck M xj ull ne erG bpv uc muv ciPa hf ne py raiAnd there are a lot of other beautiful things in a gift by New Year!!! All the p cx ric rap es you can find on our si jnd te! Shelia Weston -------------- next part -------------- An HTML attachment was scrubbed... URL: From KelliindeedDuarte at legalview.com Mon Feb 25 06:09:34 2008 From: KelliindeedDuarte at legalview.com (Lynne Baca) Date: Mon, 25 Feb 2008 13:09:34 -0100 Subject: [ofa-general] Business Loans Message-ID: <640501c877a7$5f8fc300$2101a8c0@acasa> If you have your own business and need IMMEDIATE ready money to spend ANY way you like or wish Extra money to give the business a boost or require A low interest loan - NO STRINGS ATTACHED! Do not worry about approval... your Your credit history will not disqualify you! http://pontig.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From diego.guella at sircomtech.com Mon Feb 25 04:38:50 2008 From: diego.guella at sircomtech.com (Diego Guella) Date: Mon, 25 Feb 2008 13:38:50 +0100 Subject: [ofa-general] 1x TCA using a 4x connector Message-ID: <016d01c877ab$60c2ef00$05c8a8c0@DIEGO> Hi all, I don't know if this is the right mailing list to ask this, but maybe someone can direct me to the right place. We are thinking about developing a very simple TCA, that can connect to a switch and regular HCAs from Mellanox or other manufacturer. Can our TCA provide a 1x port, but using a _physical_ 4x connector? In other words, our TCA will be a 1x SDR TCA, but we would linke to use a 4x connector, and so use 4x cables to link it to our switch. In response to Subnet Management Packets, we will advertise a 1x SDR port, and we will behave exactly as a regular 1x TCA should do. What should we do with the unused physical lanes? Should we leave them floating? Should we connect them to GND? Or something other? Is there something particular that should we do to get this working? -------------- next part -------------- An HTML attachment was scrubbed... URL: From jackm at dev.mellanox.co.il Mon Feb 25 05:57:41 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Mon, 25 Feb 2008 15:57:41 +0200 Subject: [ofa-general] Another OFED 1.3 XRC bug with 2.6.9 kernel In-Reply-To: References: <200802191440.33559.jackm@dev.mellanox.co.il> Message-ID: <200802251557.41849.jackm@dev.mellanox.co.il> On Sunday 24 February 2008 23:51, Tang, Changqing wrote: > > Jack: > Mellanox installed RC5 on helios.mellanox.com for me, this is a 2.6.9-42 kernel > system. But I still see that when errno is not zero, and I call ibv_modify_xrc_rcv_qp(), > this function fails. > > If I clear errno to zero before I call ibv_modify_xrc_rcv_qp(), everything is fine. > > Can you take a look ? > > --CQ > Problem was that the ofed driver was not restarted on most of the cluster hosts after installation of RC5. (RC3 was therefore still running). We did the driver restart on all the hosts from here, and restarted opensm as well on the appropriate host. Things should work properly now. - Jack From dotanb at dev.mellanox.co.il Mon Feb 25 06:16:39 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 25 Feb 2008 16:16:39 +0200 Subject: [ofa-general] [PATCH] libibverbs/man: add support to the new parameter that control the SL Message-ID: <200802251616.39251.dotanb@dev.mellanox.co.il> Updated the man pages with the new parameter (-l) that control the SL value from the command line. Signed-off-by: Dotan Barak --- diff --git a/man/ibv_rc_pingpong.1 b/man/ibv_rc_pingpong.1 index e83d635..d213c6f 100644 --- a/man/ibv_rc_pingpong.1 +++ b/man/ibv_rc_pingpong.1 @@ -6,11 +6,11 @@ ibv_rc_pingpong \- simple InfiniBand RC transport test .SH SYNOPSIS .B ibv_rc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] \fBHOSTNAME\fR +[\-n iters] [\-l sl] [\-e] \fBHOSTNAME\fR .B ibv_rc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] +[\-n iters] [\-l sl] [\-e] .SH DESCRIPTION .PP @@ -38,7 +38,10 @@ post \fIDEPTH\fR receives at a time (default 1000) .TP \fB\-n\fR, \fB\-\-iters\fR=\fIITERS\fR perform \fIITERS\fR message exchanges (default 1000) -.Tp +.TP +\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR +use \fISL\fR as the service level value of the QP (default 0) +.TP \fB\-e\fR, \fB\-\-events\fR sleep while waiting for work completion events (default is to poll for completions) diff --git a/man/ibv_srq_pingpong.1 b/man/ibv_srq_pingpong.1 index 3624c6b..1d6d3b5 100644 --- a/man/ibv_srq_pingpong.1 +++ b/man/ibv_srq_pingpong.1 @@ -6,11 +6,11 @@ ibv_srq_pingpong \- simple InfiniBand shared receive queue test .SH SYNOPSIS .B ibv_srq_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-q num QPs] [\-r rx depth] -[\-n iters] [\-e] \fBHOSTNAME\fR +[\-n iters] [\-l sl] [\-e] \fBHOSTNAME\fR .B ibv_srq_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-q num QPs] [\-r rx depth] -[\-n iters] [\-e] +[\-n iters] [\-l sl] [\-e] .SH DESCRIPTION .PP @@ -43,6 +43,9 @@ post \fIDEPTH\fR receives at a time (default 1000) \fB\-n\fR, \fB\-\-iters\fR=\fIITERS\fR perform \fIITERS\fR message exchanges (default 1000) .TP +\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR +use \fISL\fR as the service level value of the QP (default 0) +.TP \fB\-e\fR, \fB\-\-events\fR sleep while waiting for work completion events (default is to poll for completions) diff --git a/man/ibv_uc_pingpong.1 b/man/ibv_uc_pingpong.1 index 6b535dc..ec97eb0 100644 --- a/man/ibv_uc_pingpong.1 +++ b/man/ibv_uc_pingpong.1 @@ -6,11 +6,11 @@ ibv_uc_pingpong \- simple InfiniBand UC transport test .SH SYNOPSIS .B ibv_uc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] \fBHOSTNAME\fR +[\-n iters] [\-l sl] [\-e] \fBHOSTNAME\fR .B ibv_uc_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] +[\-n iters] [\-l sl] [\-e] .SH DESCRIPTION .PP @@ -39,6 +39,9 @@ post \fIDEPTH\fR receives at a time (default 1000) \fB\-n\fR, \fB\-\-iters\fR=\fIITERS\fR perform \fIITERS\fR message exchanges (default 1000) .TP +\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR +use \fISL\fR as the service level value of the QP (default 0) +.TP \fB\-e\fR, \fB\-\-events\fR sleep while waiting for work completion events (default is to poll for completions) diff --git a/man/ibv_ud_pingpong.1 b/man/ibv_ud_pingpong.1 index e5c0faa..28bcadc 100644 --- a/man/ibv_ud_pingpong.1 +++ b/man/ibv_ud_pingpong.1 @@ -6,11 +6,11 @@ ibv_ud_pingpong \- simple InfiniBand UD transport test .SH SYNOPSIS .B ibv_ud_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] \fBHOSTNAME\fR +[\-n iters] [\-l sl] [\-e] \fBHOSTNAME\fR .B ibv_ud_pingpong [\-p port] [\-d device] [\-i ib port] [\-s size] [\-r rx depth] -[\-n iters] [\-e] +[\-n iters] [\-l sl] [\-e] .SH DESCRIPTION .PP @@ -39,6 +39,9 @@ post \fIDEPTH\fR receives at a time (default 500) \fB\-n\fR, \fB\-\-iters\fR=\fIITERS\fR perform \fIITERS\fR message exchanges (default 1000) .TP +\fB\-l\fR, \fB\-\-sl\fR=\fISL\fR +use \fISL\fR as the service level value of the AH (default 0) +.TP \fB\-e\fR, \fB\-\-events\fR sleep while waiting for work completion events (default is to poll for completions) From tziporet at mellanox.co.il Mon Feb 25 07:56:57 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 25 Feb 2008 17:56:57 +0200 Subject: [ofa-general] Agenda for OFED meeting today Message-ID: <6C2C79E72C305246B504CBA17B5500C90364179A@mtlexch01.mtl.com> Agenda for the OFED meeting today on OFED 1.3 GA readiness: 1. We creating RC6 with the latest critical issues fixed (see list of changes at the end of this mail). It will be available later today. To have a release this week this must be the GOLD RC and no more code changes should be allowed. Proposed schedule: RC6 - today Feb 25 GA - Thursday Feb 28 2. Status update - all 3. bugs review (blocker & critical only): 917 cri P1 SLES jeremy.brown at qlogic.com ipath build error on ppc64 895 cri P3 Othe jim at mellanox.com kernel panic while running multiple test on sdp I think we should not hold the release for both bugs 4. Open discussion List of changes since RC5: ========================== Kernel: 1. cm: flush workqueue when removing device (from Sean) 2. ipoib: set child MTU as the parent's (bug 927) 3. RDS: widen the RDMA user token to 64bits (from Olaf) 4. hw/nes: Update from Glenn (Neteffect) 5. hw/ipath: reset retry counter if a good response is seen (bug 874) and fix 4k MTU with IPoIB (bug 951) and general fixes from Ralph. User space: 1. srp_daemon: Added SRP_DAEMON_ENABLE parameter to the /etc/infiniband/openib.conf. Used to execute srp_daemon in case that HA is not required. 2. libibverbs: Fix minor issues to be consistent with the libibverbs trunk 3. Opensm: rename OpenSM startup script to opensmd and fixing location of generated opensm.init script (bug 939) From olaf.kirch at oracle.com Mon Feb 25 08:01:07 2008 From: olaf.kirch at oracle.com (Olaf Kirch) Date: Mon, 25 Feb 2008 17:01:07 +0100 Subject: [ofa-general] [PATCH] RDS: prevent oops when sending to local address Message-ID: <200802251701.09166.olaf.kirch@oracle.com> Hi, This is a bugfix for a kernel oops that Rick ran into last Friday. I know it's a close shave - is the window for 1.3 still open? Olaf --------------------------------------------------- From: Olaf Kirch Subject: RDS: prevent oops when sending to local address When a user process sends to a local IP address, RDS will quietly use the loopback transport instead of the transport originally bound to. However, loopback does not support RDMA, and hence c_trans->xmit_rdma is NULL. Trying to call xmit_rdma will give a nice and clean oops. So we really want to check early whether the transport associated with the connection does indeed support RDMA. Signed-off-by: Olaf Kirch --- net/rds/send.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) Index: ofa_kernel-1.3/net/rds/send.c =================================================================== --- ofa_kernel-1.3.orig/net/rds/send.c +++ ofa_kernel-1.3/net/rds/send.c @@ -635,7 +635,8 @@ out: return *queued; } -static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, +static int rds_cmsg_send(struct rds_sock *rs, struct rds_connection *conn, + struct rds_message *rm, struct msghdr *msg, int *allocated_mr) { @@ -649,16 +650,27 @@ static int rds_cmsg_send(struct rds_sock if (cmsg->cmsg_level != SOL_RDS) continue; + /* We explicitly check whether conn->c_trans supports + * RDMA, because when sending to a local address, conn + * will use the loopback transport which is not capable + * of RDMA. Calling xmit_rdma later would oops. + */ switch (cmsg->cmsg_type) { case RDS_CMSG_RDMA_ARGS: + if (!conn->c_trans->xmit_rdma) + return -EOPNOTSUPP; ret = rds_cmsg_rdma_args(rs, rm, cmsg); break; case RDS_CMSG_RDMA_DEST: + if (!conn->c_trans->xmit_rdma) + return -EOPNOTSUPP; ret = rds_cmsg_rdma_dest(rs, rm, cmsg); break; case RDS_CMSG_RDMA_MAP: + if (!conn->c_trans->xmit_rdma) + return -EOPNOTSUPP; ret = rds_cmsg_rdma_map(rs, rm, cmsg); if (!ret) *allocated_mr = 1; @@ -727,11 +739,6 @@ int rds_sendmsg(struct kiocb *iocb, stru rm->m_daddr = daddr; - /* Parse any control messages the user may have included. */ - ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); - if (ret) - goto out; - /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ if ((conn = rs->rs_conn) == NULL || conn->c_faddr != daddr) { @@ -745,6 +752,11 @@ int rds_sendmsg(struct kiocb *iocb, stru rs->rs_conn = conn; } + /* Parse any control messages the user may have included. */ + ret = rds_cmsg_send(rs, conn, rm, msg, &allocated_mr); + if (ret) + goto out; + /* If the connection is down, trigger a connect. We may * have scheduled a delayed reconnect however - in this case * we should not interfere. -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From hrosenstock at xsigo.com Mon Feb 25 08:05:21 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 25 Feb 2008 08:05:21 -0800 Subject: [ofa-general] Re: [PATCH] opensm: rename OpenSM startup script to opensmd In-Reply-To: <20080224141537.GI3116@sashak.voltaire.com> References: <20080224141537.GI3116@sashak.voltaire.com> Message-ID: <1203955521.8793.298.camel@hrosenstock-ws.xsigo.com> On Sun, 2008-02-24 at 14:15 +0000, Sasha Khapyorsky wrote: > Rename OpenSM startup script /etc/init.d/opensm -> /etc/init.d/opensmd. Does any OFED README or build or install script need updating due to the name change ? -- Hal > Signed-off-by: Sasha Khapyorsky [snip...] From okir at lst.de Mon Feb 25 08:07:02 2008 From: okir at lst.de (Olaf Kirch) Date: Mon, 25 Feb 2008 17:07:02 +0100 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <200802211755.41184.jackm@dev.mellanox.co.il> References: <200802202056.32451.okir@lst.de> <47BD9D74.6010007@voltaire.com> <200802211755.41184.jackm@dev.mellanox.co.il> Message-ID: <200802251707.03377.okir@lst.de> On Thursday 21 February 2008 16:55, Jack Morgenstein wrote: > On Thursday 21 February 2008 17:49, Or Gerlitz wrote: > > Is it possible to never re-initialize the key? if yes, what's the > > semantics of the M=max_map_per_fmr device attribute? I was thinking that > > after the fmr was mapped M times, something --has-- to be reinitialized, > > sorry if this is my misunderstanding, can you clarify that? > > > It does not have to be re-initialized. However, the cache needs to be flushed > (SYNC_TPT), so that we do not have the same 32-bit key multiple times in the > cache. > > The "something" which must be done is to flush the cache. Once the cache is > flushed, we again have max_map_per_fmr remap possibilities, and we don't care > what the initial sequence value is. However, the index value MUST be the > same as it was before. Okay, how can I help to move this forward? Do you want me to submit an updated patch that includes the Sinai? Olaf -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From vlad at mellanox.co.il Mon Feb 25 08:09:21 2008 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Mon, 25 Feb 2008 18:09:21 +0200 Subject: [ofa-general] RE: [PATCH] RDS: prevent oops when sending to local address In-Reply-To: <200802251701.09166.olaf.kirch@oracle.com> References: <200802251701.09166.olaf.kirch@oracle.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C9036417B9@mtlexch01.mtl.com> > -----Original Message----- > From: Olaf Kirch [mailto:olaf.kirch at oracle.com] > Sent: Monday, February 25, 2008 6:01 PM > To: Vladimir Sokolovsky > Cc: rds-devel at oss.oracle.com; general at lists.openfabrics.org > Subject: [PATCH] RDS: prevent oops when sending to local address > > Hi, > > This is a bugfix for a kernel oops that Rick ran into last > Friday. I know it's a close shave - is the window for 1.3 > still open? > > Olaf The window is closed already. After the OFED-1.3 release I will take this patch and create the new daily build. So, this patch will be included in the OFED-1.3.1. Regards, Vladimir From jackm at dev.mellanox.co.il Mon Feb 25 08:06:52 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Mon, 25 Feb 2008 18:06:52 +0200 Subject: [ofa-general] [PATCH RFC] ib_mthca: avoid recycling FMR R_Keys too soon In-Reply-To: <200802251707.03377.okir@lst.de> References: <200802202056.32451.okir@lst.de> <200802211755.41184.jackm@dev.mellanox.co.il> <200802251707.03377.okir@lst.de> Message-ID: <200802251806.52615.jackm@dev.mellanox.co.il> On Monday 25 February 2008 18:07, Olaf Kirch wrote: > Okay, how can I help to move this forward? Do you want me to submit an > updated patch that includes the Sinai? > > Olaf > That would be fine. Also, please include one for mlx4. Thanks! - Jack From hrosenstock at xsigo.com Mon Feb 25 08:12:35 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 25 Feb 2008 08:12:35 -0800 Subject: [ofa-general] [PATCH][TRIVIAL] opensm/osm_sa_slvl_record.c: Cosmetic change to osm_log message Message-ID: <1203955955.8793.303.camel@hrosenstock-ws.xsigo.com> opensm/osm_sa_slvl_record.c: Cosmetic change to osm_log message Signed-off-by: Hal Rosenstock diff --git a/opensm/opensm/osm_sa_slvl_record.c b/opensm/opensm/osm_sa_slvl_record.c index 9d717ba..9b587a5 100644 --- a/opensm/opensm/osm_sa_slvl_record.c +++ b/opensm/opensm/osm_sa_slvl_record.c @@ -459,7 +459,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) sa->p_subn); if (status != IB_SUCCESS) { OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2606: " - "osm_vendor_send status = %s\n", + "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; } From sashak at voltaire.com Mon Feb 25 08:37:59 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 25 Feb 2008 16:37:59 +0000 Subject: [ofa-general] Re: [PATCH] opensm: rename OpenSM startup script to opensmd In-Reply-To: <1203955521.8793.298.camel@hrosenstock-ws.xsigo.com> References: <20080224141537.GI3116@sashak.voltaire.com> <1203955521.8793.298.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080225163759.GL12954@sashak.voltaire.com> On 08:05 Mon 25 Feb , Hal Rosenstock wrote: > On Sun, 2008-02-24 at 14:15 +0000, Sasha Khapyorsky wrote: > > Rename OpenSM startup script /etc/init.d/opensm -> /etc/init.d/opensmd. > > Does any OFED README or build or install script need updating due to the > name change ? Not that I know. In OFED-1.2 script was named with 'd'. Sasha From sashak at voltaire.com Mon Feb 25 08:42:47 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 25 Feb 2008 16:42:47 +0000 Subject: [ofa-general] Re: [PATCH][TRIVIAL] opensm/osm_sa_slvl_record.c: Cosmetic change to osm_log message In-Reply-To: <1203955955.8793.303.camel@hrosenstock-ws.xsigo.com> References: <1203955955.8793.303.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080225164247.GM12954@sashak.voltaire.com> Hi Hal, On 08:12 Mon 25 Feb , Hal Rosenstock wrote: > opensm/osm_sa_slvl_record.c: Cosmetic change to osm_log message > > Signed-off-by: Hal Rosenstock Hmm, I did similar commit two days ago: commit 540ce9fadaae95839613969996b2ae77f6fb77be Author: Sasha Khapyorsky Date: Fri Feb 22 20:32:49 2008 +0200 opensm/osm_sa_slvl_record.c: fix typo in log print Fix typo in OSM_LOG() print. Signed-off-by: Sasha Khapyorsky diff --git a/opensm/opensm/osm_sa_slvl_record.c b/opensm/opensm/osm_sa_slvl_record.c index 9d717ba..9b587a5 100644 --- a/opensm/opensm/osm_sa_slvl_record.c +++ b/opensm/opensm/osm_sa_slvl_record.c @@ -459,7 +459,7 @@ void osm_slvl_rec_rcv_process(IN void *ctx, IN void *data) sa->p_subn); if (status != IB_SUCCESS) { OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 2606: " - "osm_vendor_send status = %s\n", + "osm_sa_vendor_send status = %s\n", ib_get_err_str(status)); goto Exit; } Thanks Anyway! :) Sasha From olaf.kirch at oracle.com Mon Feb 25 08:38:23 2008 From: olaf.kirch at oracle.com (Olaf Kirch) Date: Mon, 25 Feb 2008 17:38:23 +0100 Subject: [ofa-general] Re: [PATCH] RDS: prevent oops when sending to local address In-Reply-To: <6C2C79E72C305246B504CBA17B5500C9036417B9@mtlexch01.mtl.com> References: <200802251701.09166.olaf.kirch@oracle.com> <6C2C79E72C305246B504CBA17B5500C9036417B9@mtlexch01.mtl.com> Message-ID: <200802251738.24970.olaf.kirch@oracle.com> Hi Vlad, On Monday 25 February 2008 17:09, Vladimir Sokolovsky wrote: > The window is closed already. > After the OFED-1.3 release I will take this patch and create the new > daily build. > So, this patch will be included in the OFED-1.3.1. Okay, no problem. Thanks a lot, Olaf -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From hrosenstock at xsigo.com Mon Feb 25 08:45:21 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 25 Feb 2008 08:45:21 -0800 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <47C2A455.5070603@dev.mellanox.co.il> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> <47C2A455.5070603@dev.mellanox.co.il> Message-ID: <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> On Mon, 2008-02-25 at 13:19 +0200, Dotan Barak wrote: > Or Gerlitz wrote: > > Sean Hefty wrote: > >> The verbs also support iWarp devices and are not necessarily > >> restricted to the > >> 1.2 IB spec definitions. It might make sense to state that the IB > >> implementation is based on the 1.2 spec in an IB specific section, > >> but keep the > >> general documentation transport neutral at this point. > > > > Sure, the page would be changed to reflect that. > > > > Or. > Sorry, i didn't find the time to get to it until now. > > I changed the problematic sentence to: > "This library is an implementation of the verbs based on the Infiniband > specification volume 1.2 chapter 11." ^^^^^^^^^^ volume 1 Also, should this refer to IBA 1.2.1 rather than 1.2 (if that was what was intended by the 1.2 reference) ? > I hope that this is good enough. > > thanks > Dotan > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From xma at us.ibm.com Mon Feb 25 08:16:44 2008 From: xma at us.ibm.com (Shirley Ma) Date: Mon, 25 Feb 2008 08:16:44 -0800 Subject: ***SPAM*** Re: [ofa-general] Agenda for OFED meeting today In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90364179A@mtlexch01.mtl.com> Message-ID: Hello Tziporet, >fix 4k MTU with IPoIB (bug 951) According to the bug report: In RC5, IPoIB does not work on the QLogic 7140 SDR card. This needs to be fixed for the release. I didn't see any evidence this is a 4K MTU bug. How 4K MTU patch impact 7140 SDR card? And more inside? Thanks Shirley "Tziporet Koren" .co.il> cc Sent by: general at lists.openfabrics.org general-b Subject ounces at li [ofa-general] Agenda for OFED meeting today sts.openf abrics.or g 02/25/08 07:56 AM Agenda for the OFED meeting today on OFED 1.3 GA readiness: 1. We creating RC6 with the latest critical issues fixed (see list of changes at the end of this mail). It will be available later today. To have a release this week this must be the GOLD RC and no more code changes should be allowed. Proposed schedule: RC6 - today Feb 25 GA - Thursday Feb 28 2. Status update - all 3. bugs review (blocker & critical only): 917 cri P1 SLES jeremy.brown at qlogic.com ipath build error on ppc64 895 cri P3 Othe jim at mellanox.com kernel panic while running multiple test on sdp I think we should not hold the release for both bugs 4. Open discussion List of changes since RC5: ========================== Kernel: 1. cm: flush workqueue when removing device (from Sean) 2. ipoib: set child MTU as the parent's (bug 927) 3. RDS: widen the RDMA user token to 64bits (from Olaf) 4. hw/nes: Update from Glenn (Neteffect) 5. hw/ipath: reset retry counter if a good response is seen (bug 874) and fix 4k MTU with IPoIB (bug 951) and general fixes from Ralph. User space: 1. srp_daemon: Added SRP_DAEMON_ENABLE parameter to the /etc/infiniband/openib.conf. Used to execute srp_daemon in case that HA is not required. 2. libibverbs: Fix minor issues to be consistent with the libibverbs trunk 3. Opensm: rename OpenSM startup script to opensmd and fixing location of generated opensm.init script (bug 939) _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: graycol.gif Type: image/gif Size: 105 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic32221.gif Type: image/gif Size: 1255 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ecblank.gif Type: image/gif Size: 45 bytes Desc: not available URL: From jlentini at netapp.com Mon Feb 25 08:49:58 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 25 Feb 2008 11:49:58 -0500 (EST) Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: <20080211210044.GA4561@fieldses.org> References: <20080211173322.GA4755@fieldses.org> <20080211210044.GA4561@fieldses.org> Message-ID: On Mon, 11 Feb 2008, J. Bruce Fields wrote: > On Mon, Feb 11, 2008 at 03:56:17PM -0500, James Lentini wrote: > > > > > > On Mon, 11 Feb 2008, J. Bruce Fields wrote: > > > > > On Mon, Feb 11, 2008 at 12:25:14PM -0500, James Lentini wrote: > > > > > > > > Linux 2.6.25 will be the first official kernel release to contain the > > > > NFS/RDMA server. With the client and server now both available in > > > > 2.6.25-rc1, we've simplified our NFS/RDMA installation instructions. > > > > The new instructions are available here: > > > > > > > > http://nfs-rdma.sourceforge.net/Documents/README > > > > > > Any reason not to add that to the linux tree, say in > > > Documentation/filesystems/nfs-rdma.txt? > > > > > > --b. > > > > That sounds like a good idea Bruce. The current document is strictly a > > HOWTO. Should we add sections on the design and implementation? > > Sure, that'd be great. But I think it'd be fine to submit the howto > pretty much as it is and add the rest later. > > --b. Bruce, These are ready to include in what ever kernel version you see fit. Signed-off-by: James Lentini --- /dev/null 2008-01-17 11:34:08.491004516 -0500 +++ Documentation/nfs-rdma.txt 2008-02-25 11:43:45.705518000 -0500 @@ -0,0 +1,253 @@ +################################################################################ +# # +# NFS/RDMA README # +# # +################################################################################ + + Author: NetApp and Open Grid Computing + Date: February 11, 2008 + +Table of Contents +~~~~~~~~~~~~~~~~~ + - Overview + - Getting Help + - Installation + - Check RDMA and NFS Setup + - NFS/RDMA Setup + +Overview +~~~~~~~~ + + This document describes how to install and setup the Linux NFS/RDMA client + and server software. + + The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server + was first included in the following release, Linux 2.6.25. + + In our testing, we have obtained excellent performance results (full 10Gbit + wire bandwidth at minimal client CPU) under many workloads. The code passes + the full Connectathon test suite and operates over both Infiniband and iWARP + RDMA adapters. + +Getting Help +~~~~~~~~~~~~ + + If you get stuck, you can ask questions on the + + nfs-rdma-devel at lists.sourceforge.net + + mailing list. + +Installation +~~~~~~~~~~~~ + + These instructions are a step by step guide to building a machine for + use with NFS/RDMA. + + - Install an OpenFabrics-compatible RDMA card + + Any device that is compatible with the OpenFabrics.org software stack is + acceptable. + + Testing has been performed using several Mellanox-based IB cards, the + Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter. + + - Install a Linux distribution and tools + + The first kernel release to contain both the NFS/RDMA client and server was + Linux 2.6.25 Therefore, a distribution compatible with this and subsequent + Linux kernel release should be installed. + + The procedures described in this document have been tested with + distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). + + - Install nfs-utils-1.1.1 or greater on the client + + An NFS/RDMA mount point can only be obtained by using the mount.nfs + command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs + you are using, type: + + > /sbin/mount.nfs -V + + If the version is less than 1.1.1 or the command does not exist, + then you will need to install the latest version of nfs-utils. + + Download the latest package from: + + http://www.kernel.org/pub/linux/utils/nfs + + Uncompress the package and follow the installation instructions. + + If you will not be using GSS and NFSv4, the installation process + can be simplified by disabling these features when running configure: + + > ./configure --disable-gss --disable-nfsv4 + + For more information on this see the package's README and INSTALL files. + + After building the nfs-utils package, there will be a mount.nfs binary in + the utils/mount directory. This binary can be used to initiate NFS v2, v3, + or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. + The standard technique is to create a symlink called mount.nfs4 to mount.nfs. + + NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed + on the NFS client machine. You do not need this specific version of + nfs-utils on the server. Furthermore, only the mount.nfs command from + nfs-utils-1.1.1 is needed on the client. + + - Install a Linux kernel with NFS/RDMA + + The NFS/RDMA client and server are both included in the mainline Linux + kernel version 2.6.25 and later. This and other versions of the 2.6 Linux + kernel can be found at: + + ftp://ftp.kernel.org/pub/linux/kernel/v2.6/ + + Download the sources and place them in an appropriate location. + + - Configure the RDMA stack + + Make sure your kernel configuration has RDMA support enabled. Under + Device Drivers -> InfiniBand support, update the kernel configuration + to enable InfiniBand support [NOTE: the option name is misleading. Enabling + InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)]. + + Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or + iWARP adapter support (amso, cxgb3, etc.). + + If you are using InfiniBand, be sure to enable IP-over-InfiniBand support. + + - Configure the NFS client and server + + Your kernel configuration must also have NFS file system support and/or + NFS server support enabled. These and other NFS related configuration + options can be found under File Systems -> Network File Systems. + + - Build, install, reboot + + The NFS/RDMA code will be enabled automatically if NFS and RDMA + are turned on. The NFS/RDMA client and server are configured via the hidden + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The + value of SUNRPC_XPRT_RDMA will be: + + - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client + and server will not be built + - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M, + in this case the NFS/RDMA client and server will be built as modules + - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client + and server will be built into the kernel + + Therefore, if you have followed the steps above and turned no NFS and RDMA, + the NFS/RDMA client and server will be built. + + Build a new kernel, install it, boot it. + +Check RDMA and NFS Setup +~~~~~~~~~~~~~~~~~~~~~~~~ + + Before configuring the NFS/RDMA software, it is a good idea to test + your new kernel to ensure that the kernel is working correctly. + In particular, it is a good idea to verify that the RDMA stack + is functioning as expected and standard NFS over TCP/IP and/or UDP/IP + is working properly. + + - Check RDMA Setup + + If you built the RDMA components as modules, load them at + this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel + card: + + > modprobe ib_mthca + > modprobe ib_ipoib + + If you are using InfiniBand, make sure there is a Subnet Manager (SM) + running on the network. If your IB switch has an embedded SM, you can + use it. Otherwise, you will need to run an SM, such as OpenSM, on one + of your end nodes. + + If an SM is running on your network, you should see the following: + + > cat /sys/class/infiniband/driverX/ports/1/state + 4: ACTIVE + + where driverX is mthca0, ipath5, ehca3, etc. + + To further test the InfiniBand software stack, use IPoIB (this + assumes you have two IB hosts named host1 and host2): + + host1> ifconfig ib0 a.b.c.x + host2> ifconfig ib0 a.b.c.y + host1> ping a.b.c.y + host2> ping a.b.c.x + + For other device types, follow the appropriate procedures. + + - Check NFS Setup + + For the NFS components enabled above (client and/or server), + test their functionality over standard Ethernet using TCP/IP or UDP/IP. + +NFS/RDMA Setup +~~~~~~~~~~~~~~ + + We recommend that you use two machines, one to act as the client and + one to act as the server. + + One time configuration: + + - On the server system, configure the /etc/exports file and + start the NFS/RDMA server. + + Exports entries with the following format have been tested: + + /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash) + + Here the first IP address is the client's Ethernet address and the second + IP address is the clients IPoIB address. + + Each time a machine boots: + + - Load and configure the RDMA drivers + + For InfiniBand using a Mellanox adapter: + + > modprobe ib_mthca + > modprobe ib_ipoib + > ifconfig ib0 a.b.c.d + + NOTE: use unique addresses for the client and server + + - Start the NFS server + + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA transport module: + + > modprobe svcrdma + + Regardless of how the server was built (module or built-in), start the server: + + > /etc/init.d/nfs start + + or + + > service nfs start + + Instruct the server to listen on the RDMA transport: + + > echo rdma 2050 > /proc/fs/nfsd/portlist + + - On the client system + + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA client module: + + > modprobe xprtrdma.ko + + Regardless of how the client was built (module or built-in), issue the mount.nfs command: + + > /path/to/your/mount.nfs :/ /mnt -i -o rdma,port=2050 + + To verify that the mount is using RDMA, run "cat /proc/mounts" and check the + "proto" field for the given mount. + + Congratulations! You're using NFS/RDMA! From gretnjonh at free.fr Mon Feb 25 09:01:55 2008 From: gretnjonh at free.fr (Ashlee Tuttle) Date: Mon, 25 Feb 2008 18:01:55 +0100 Subject: [ofa-general] Our Product Really Makes Wonders Message-ID: <655293931.42602197880192@free.fr> Imagine how happy you will be with a new cock size and how good your partner will feel. Order our VPXL and start a new life of success and happiness.http://geocities.com/gaineseddy/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Feb 25 09:04:04 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 25 Feb 2008 09:04:04 -0800 Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: (James Lentini's message of "Mon, 25 Feb 2008 11:49:58 -0500 (EST)") References: <20080211173322.GA4755@fieldses.org> <20080211210044.GA4561@fieldses.org> Message-ID: > + - Install an OpenFabrics-compatible RDMA card > + > + Any device that is compatible with the OpenFabrics.org software stack is > + acceptable. This seems a little odd in the context of instructions on how to use the code in the kernel. Maybe something like: - Install an RDMA device Any device supported by the drivers in drivers/infiniband/hw is acceptable. - R. From tiger at tmzg.zzn.com Mon Feb 25 05:00:55 2008 From: tiger at tmzg.zzn.com (Tiger) Date: Mon, 25 Feb 2008 08:00:55 -0500 Subject: [ofa-general] Video Now! Message-ID: <000901c877ae$75e29ea0$033da8c0@CCRO> 录相 下载Flash Player 9 1 2 1 2 3 Make your own CD 港人羡慕台湾争民主 回归十年争普选 为君鸣不平 More -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr.gif Type: image/gif Size: 1145 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: itunes.png Type: image/png Size: 296 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: podcast_rss.gif Type: image/gif Size: 1022 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: btnrss.gif Type: image/gif Size: 627 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv1.gif Type: image/gif Size: 1024 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv2.gif Type: image/gif Size: 1024 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv3.gif Type: image/gif Size: 1021 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv4.gif Type: image/gif Size: 1025 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv5.gif Type: image/gif Size: 1033 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr1-1.jpg Type: image/jpeg Size: 2045 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr2-1.jpg Type: image/jpeg Size: 1311 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr3-1.jpg Type: image/jpeg Size: 1468 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr4-1.jpg Type: image/jpeg Size: 1579 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr5-1.jpg Type: image/jpeg Size: 1754 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv6.gif Type: image/gif Size: 1022 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv7.gif Type: image/gif Size: 1023 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr5-2.jpg Type: image/jpeg Size: 1207 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr6-1.jpg Type: image/jpeg Size: 1641 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr6-2.jpg Type: image/jpeg Size: 1932 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 61210.jpg Type: image/jpeg Size: 1480 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr7.jpg Type: image/jpeg Size: 1779 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv8.gif Type: image/gif Size: 1062 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqrv9.gif Type: image/gif Size: 1037 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr8.jpg Type: image/jpeg Size: 2805 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jqr9.jpg Type: image/jpeg Size: 1936 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: iso.jpg Type: image/jpeg Size: 2166 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 007a.gif Type: image/gif Size: 1007 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 009.gif Type: image/gif Size: 1656 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 64tnmn.jpg Type: image/jpeg Size: 1661 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: jiangmaiguo.jpg Type: image/jpeg Size: 1049 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: xianmutaiwan.jpg Type: image/jpeg Size: 1742 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 1stofjuly.jpg Type: image/jpeg Size: 2219 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 625F.jpg Type: image/jpeg Size: 2344 bytes Desc: not available URL: From jlentini at netapp.com Mon Feb 25 09:20:13 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 25 Feb 2008 12:20:13 -0500 (EST) Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: References: <20080211173322.GA4755@fieldses.org> <20080211210044.GA4561@fieldses.org> Message-ID: On Mon, 25 Feb 2008, Roland Dreier wrote: > > + - Install an OpenFabrics-compatible RDMA card > > + > > + Any device that is compatible with the OpenFabrics.org software stack is > > + acceptable. > > This seems a little odd in the context of instructions on how to use > the code in the kernel. Maybe something like: > > - Install an RDMA device > > Any device supported by the drivers in drivers/infiniband/hw is > acceptable. > > - R. I agree. Your wording is clearer. Bruce, Here's an update incorporating Roland's suggestion. Signed-off-by: James Lentini --- /dev/null 2008-01-17 11:34:08.491004516 -0500 +++ Documentation/nfs-rdma.txt 2008-02-25 12:18:17.937932000 -0500 @@ -0,0 +1,252 @@ +################################################################################ +# # +# NFS/RDMA README # +# # +################################################################################ + + Author: NetApp and Open Grid Computing + Date: February 25, 2008 + +Table of Contents +~~~~~~~~~~~~~~~~~ + - Overview + - Getting Help + - Installation + - Check RDMA and NFS Setup + - NFS/RDMA Setup + +Overview +~~~~~~~~ + + This document describes how to install and setup the Linux NFS/RDMA client + and server software. + + The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server + was first included in the following release, Linux 2.6.25. + + In our testing, we have obtained excellent performance results (full 10Gbit + wire bandwidth at minimal client CPU) under many workloads. The code passes + the full Connectathon test suite and operates over both Infiniband and iWARP + RDMA adapters. + +Getting Help +~~~~~~~~~~~~ + + If you get stuck, you can ask questions on the + + nfs-rdma-devel at lists.sourceforge.net + + mailing list. + +Installation +~~~~~~~~~~~~ + + These instructions are a step by step guide to building a machine for + use with NFS/RDMA. + + - Install an RDMA device + + Any device supported by the drivers in drivers/infiniband/hw is acceptable. + + Testing has been performed using several Mellanox-based IB cards, the + Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter. + + - Install a Linux distribution and tools + + The first kernel release to contain both the NFS/RDMA client and server was + Linux 2.6.25 Therefore, a distribution compatible with this and subsequent + Linux kernel release should be installed. + + The procedures described in this document have been tested with + distributions from Red Hat's Fedora Project (http://fedora.redhat.com/). + + - Install nfs-utils-1.1.1 or greater on the client + + An NFS/RDMA mount point can only be obtained by using the mount.nfs + command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs + you are using, type: + + > /sbin/mount.nfs -V + + If the version is less than 1.1.1 or the command does not exist, + then you will need to install the latest version of nfs-utils. + + Download the latest package from: + + http://www.kernel.org/pub/linux/utils/nfs + + Uncompress the package and follow the installation instructions. + + If you will not be using GSS and NFSv4, the installation process + can be simplified by disabling these features when running configure: + + > ./configure --disable-gss --disable-nfsv4 + + For more information on this see the package's README and INSTALL files. + + After building the nfs-utils package, there will be a mount.nfs binary in + the utils/mount directory. This binary can be used to initiate NFS v2, v3, + or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4. + The standard technique is to create a symlink called mount.nfs4 to mount.nfs. + + NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed + on the NFS client machine. You do not need this specific version of + nfs-utils on the server. Furthermore, only the mount.nfs command from + nfs-utils-1.1.1 is needed on the client. + + - Install a Linux kernel with NFS/RDMA + + The NFS/RDMA client and server are both included in the mainline Linux + kernel version 2.6.25 and later. This and other versions of the 2.6 Linux + kernel can be found at: + + ftp://ftp.kernel.org/pub/linux/kernel/v2.6/ + + Download the sources and place them in an appropriate location. + + - Configure the RDMA stack + + Make sure your kernel configuration has RDMA support enabled. Under + Device Drivers -> InfiniBand support, update the kernel configuration + to enable InfiniBand support [NOTE: the option name is misleading. Enabling + InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)]. + + Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or + iWARP adapter support (amso, cxgb3, etc.). + + If you are using InfiniBand, be sure to enable IP-over-InfiniBand support. + + - Configure the NFS client and server + + Your kernel configuration must also have NFS file system support and/or + NFS server support enabled. These and other NFS related configuration + options can be found under File Systems -> Network File Systems. + + - Build, install, reboot + + The NFS/RDMA code will be enabled automatically if NFS and RDMA + are turned on. The NFS/RDMA client and server are configured via the hidden + SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The + value of SUNRPC_XPRT_RDMA will be: + + - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client + and server will not be built + - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M, + in this case the NFS/RDMA client and server will be built as modules + - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client + and server will be built into the kernel + + Therefore, if you have followed the steps above and turned no NFS and RDMA, + the NFS/RDMA client and server will be built. + + Build a new kernel, install it, boot it. + +Check RDMA and NFS Setup +~~~~~~~~~~~~~~~~~~~~~~~~ + + Before configuring the NFS/RDMA software, it is a good idea to test + your new kernel to ensure that the kernel is working correctly. + In particular, it is a good idea to verify that the RDMA stack + is functioning as expected and standard NFS over TCP/IP and/or UDP/IP + is working properly. + + - Check RDMA Setup + + If you built the RDMA components as modules, load them at + this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel + card: + + > modprobe ib_mthca + > modprobe ib_ipoib + + If you are using InfiniBand, make sure there is a Subnet Manager (SM) + running on the network. If your IB switch has an embedded SM, you can + use it. Otherwise, you will need to run an SM, such as OpenSM, on one + of your end nodes. + + If an SM is running on your network, you should see the following: + + > cat /sys/class/infiniband/driverX/ports/1/state + 4: ACTIVE + + where driverX is mthca0, ipath5, ehca3, etc. + + To further test the InfiniBand software stack, use IPoIB (this + assumes you have two IB hosts named host1 and host2): + + host1> ifconfig ib0 a.b.c.x + host2> ifconfig ib0 a.b.c.y + host1> ping a.b.c.y + host2> ping a.b.c.x + + For other device types, follow the appropriate procedures. + + - Check NFS Setup + + For the NFS components enabled above (client and/or server), + test their functionality over standard Ethernet using TCP/IP or UDP/IP. + +NFS/RDMA Setup +~~~~~~~~~~~~~~ + + We recommend that you use two machines, one to act as the client and + one to act as the server. + + One time configuration: + + - On the server system, configure the /etc/exports file and + start the NFS/RDMA server. + + Exports entries with the following format have been tested: + + /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash) + + Here the first IP address is the client's Ethernet address and the second + IP address is the clients IPoIB address. + + Each time a machine boots: + + - Load and configure the RDMA drivers + + For InfiniBand using a Mellanox adapter: + + > modprobe ib_mthca + > modprobe ib_ipoib + > ifconfig ib0 a.b.c.d + + NOTE: use unique addresses for the client and server + + - Start the NFS server + + If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA transport module: + + > modprobe svcrdma + + Regardless of how the server was built (module or built-in), start the server: + + > /etc/init.d/nfs start + + or + + > service nfs start + + Instruct the server to listen on the RDMA transport: + + > echo rdma 2050 > /proc/fs/nfsd/portlist + + - On the client system + + If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config), + load the RDMA client module: + + > modprobe xprtrdma.ko + + Regardless of how the client was built (module or built-in), issue the mount.nfs command: + + > /path/to/your/mount.nfs :/ /mnt -i -o rdma,port=2050 + + To verify that the mount is using RDMA, run "cat /proc/mounts" and check the + "proto" field for the given mount. + + Congratulations! You're using NFS/RDMA! From bfields at fieldses.org Mon Feb 25 10:12:39 2008 From: bfields at fieldses.org (J. Bruce Fields) Date: Mon, 25 Feb 2008 13:12:39 -0500 Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: References: <20080211173322.GA4755@fieldses.org> <20080211210044.GA4561@fieldses.org> Message-ID: <20080225181239.GC12051@fieldses.org> On Mon, Feb 25, 2008 at 12:20:13PM -0500, James Lentini wrote: > > > On Mon, 25 Feb 2008, Roland Dreier wrote: > > > > + - Install an OpenFabrics-compatible RDMA card > > > + > > > + Any device that is compatible with the OpenFabrics.org software stack is > > > + acceptable. > > > > This seems a little odd in the context of instructions on how to use > > the code in the kernel. Maybe something like: > > > > - Install an RDMA device > > > > Any device supported by the drivers in drivers/infiniband/hw is > > acceptable. > > > > - R. > > I agree. Your wording is clearer. > > Bruce, > > Here's an update incorporating Roland's suggestion. > > Signed-off-by: James Lentini > > --- /dev/null 2008-01-17 11:34:08.491004516 -0500 > +++ Documentation/nfs-rdma.txt 2008-02-25 12:18:17.937932000 -0500 That actually puts the file in the top-level directory. I moved it to ./Documentation/filesystems/, and stripped trailing whitespace. Assuming that's OK, it's queued up for 2.6.26. Thanks! --b. From jlentini at netapp.com Mon Feb 25 10:15:11 2008 From: jlentini at netapp.com (James Lentini) Date: Mon, 25 Feb 2008 13:15:11 -0500 (EST) Subject: [ofa-general] Re: new NFS/RDMA instructions for 2.6.25-rc1 In-Reply-To: <20080225181239.GC12051@fieldses.org> References: <20080211173322.GA4755@fieldses.org> <20080211210044.GA4561@fieldses.org> <20080225181239.GC12051@fieldses.org> Message-ID: On Mon, 25 Feb 2008, J. Bruce Fields wrote: > On Mon, Feb 25, 2008 at 12:20:13PM -0500, James Lentini wrote: > > > > > > On Mon, 25 Feb 2008, Roland Dreier wrote: > > > > > > + - Install an OpenFabrics-compatible RDMA card > > > > + > > > > + Any device that is compatible with the OpenFabrics.org software stack is > > > > + acceptable. > > > > > > This seems a little odd in the context of instructions on how to use > > > the code in the kernel. Maybe something like: > > > > > > - Install an RDMA device > > > > > > Any device supported by the drivers in drivers/infiniband/hw is > > > acceptable. > > > > > > - R. > > > > I agree. Your wording is clearer. > > > > Bruce, > > > > Here's an update incorporating Roland's suggestion. > > > > Signed-off-by: James Lentini > > > > --- /dev/null 2008-01-17 11:34:08.491004516 -0500 > > +++ Documentation/nfs-rdma.txt 2008-02-25 12:18:17.937932000 -0500 > > That actually puts the file in the top-level directory. I moved it to > ./Documentation/filesystems/, and stripped trailing whitespace. > Assuming that's OK, Sounds good. > it's queued up for 2.6.26. Thanks! Great. > --b. > From weiny2 at llnl.gov Mon Feb 25 10:31:43 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 25 Feb 2008 10:31:43 -0800 Subject: [ofa-general] Re: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING In-Reply-To: <20080224121405.GD3116@sashak.voltaire.com> References: <20080221165655.0227c88c.weiny2@llnl.gov> <20080224121405.GD3116@sashak.voltaire.com> Message-ID: <20080225103143.2b7db98b.weiny2@llnl.gov> On Sun, 24 Feb 2008 12:14:05 +0000 Sasha Khapyorsky wrote: > Hi Ira, > > On 16:56 Thu 21 Feb , Ira Weiny wrote: > > From b8fb2151b92ddd4a7d2a4cc2ab38a6b34fffc7ab Mon Sep 17 00:00:00 2001 > > From: Ira K. Weiny > > Date: Thu, 21 Feb 2008 09:10:10 -0800 > > Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING > > > > > > + > > + osm_log(p_vend->p_log, OSM_LOG_INFO, > > + "osm_vendor_init: %d pending umads specified\n", > > + p_vend->mtbl.max); > > + > > + p_vend->mtbl.tbl = calloc(p_vend->mtbl.max, sizeof(*(p_vend->mtbl.tbl))); > > There is calloc(), I guess we also need free() somewhere? > You caught me... Revised patch attached, Ira >From 0c578c3062b3183dcd33e89aec0f1eb8a3a3a04e Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 21 Feb 2008 09:10:10 -0800 Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING Signed-off-by: Ira K. Weiny --- opensm/include/vendor/osm_vendor_ibumad.h | 4 ++-- opensm/libvendor/osm_vendor_ibumad.c | 28 +++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h index 84fd21a..3a3f070 100644 --- a/opensm/include/vendor/osm_vendor_ibumad.h +++ b/opensm/include/vendor/osm_vendor_ibumad.h @@ -141,12 +141,12 @@ typedef struct _umad_match { uint32_t version; } umad_match_t; -#define OSM_UMAD_MAX_PENDING 1000 +#define DEFAULT_OSM_UMAD_MAX_PENDING 1000 typedef struct vendor_match_tbl { - umad_match_t tbl[OSM_UMAD_MAX_PENDING]; uint32_t last_version; int max; + umad_match_t *tbl; } vendor_match_tbl_t; typedef struct _osm_vendor { diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index d51bd6d..a1fc140 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -451,6 +451,7 @@ ib_api_status_t osm_vendor_init(IN osm_vendor_t * const p_vend, IN osm_log_t * const p_log, IN const uint32_t timeout) { + char *max = NULL; int r, n_cas; OSM_LOG_ENTER(p_log); @@ -480,7 +481,31 @@ osm_vendor_init(IN osm_vendor_t * const p_vend, } p_vend->ca_count = n_cas; - p_vend->mtbl.max = OSM_UMAD_MAX_PENDING; + p_vend->mtbl.max = DEFAULT_OSM_UMAD_MAX_PENDING; + + if ((max = getenv("OSM_UMAD_MAX_PENDING")) != NULL) { + int tmp = strtol(max, NULL, 0); + if (tmp > 0) + p_vend->mtbl.max = tmp; + else + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: Error:" + "OSM_UMAD_MAX_PENDING=%d is invalid", + tmp); + } + + osm_log(p_vend->p_log, OSM_LOG_INFO, + "osm_vendor_init: %d pending umads specified\n", + p_vend->mtbl.max); + + p_vend->mtbl.tbl = calloc(p_vend->mtbl.max, sizeof(*(p_vend->mtbl.tbl))); + if (!p_vend->mtbl.tbl) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_init: Error:" + "failed to allocate vendor match table\n"); + r = IB_INSUFFICIENT_MEMORY; + goto Exit; + } Exit: OSM_LOG_EXIT(p_log); @@ -535,6 +560,7 @@ void osm_vendor_delete(IN osm_vendor_t ** const pp_vend) pthread_mutex_destroy(&(*pp_vend)->cb_mutex); pthread_mutex_destroy(&(*pp_vend)->match_tbl_mutex); + free((*pp_vend)->mtbl.tbl); free(*pp_vend); *pp_vend = NULL; } -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-libvendor-osm_vendor_ibumad.c-Add-environmen.patch Type: application/octet-stream Size: 2649 bytes Desc: not available URL: From robert.j.woodruff at intel.com Mon Feb 25 11:21:16 2008 From: robert.j.woodruff at intel.com (Woodruff, Robert J) Date: Mon, 25 Feb 2008 11:21:16 -0800 Subject: [ofa-general] Interoperability testing between OFED 1.3 and OFED 1.2.5 In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90364179A@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90364179A@mtlexch01.mtl.com> Message-ID: There was a question in today's meeting about if anyone had tested ipoib between 1.2.5 and 1.3. I just tried this and it seems to work fine, at least on RedHat EL4/rocks cluster. I tested it using Intel MPI over IPoIB and also Intel MPI over uDAPL, both interoperate fine between 1.3 and 1.2.5. woody From tziporet at dev.mellanox.co.il Mon Feb 25 12:36:31 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 25 Feb 2008 22:36:31 +0200 Subject: [ofa-general] Re: [ewg] Interoperability testing between OFED 1.3 and OFED 1.2.5 In-Reply-To: References: <6C2C79E72C305246B504CBA17B5500C90364179A@mtlexch01.mtl.com> Message-ID: <47C326CF.7060304@mellanox.co.il> Woodruff, Robert J wrote: > There was a question in today's meeting about if anyone had > tested ipoib between 1.2.5 and 1.3. > > I just tried this and it seems to work fine, at least on > RedHat EL4/rocks cluster. I tested it using Intel MPI > over IPoIB and also Intel MPI over uDAPL, both interoperate > fine between 1.3 and 1.2.5. > > Many thanks Tziporet From tziporet at dev.mellanox.co.il Mon Feb 25 13:19:50 2008 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Mon, 25 Feb 2008 23:19:50 +0200 Subject: [ewg] ***SPAM*** Re: [ofa-general] Agenda for OFED meeting today In-Reply-To: References: Message-ID: <47C330F6.8020907@mellanox.co.il> Shirley Ma wrote: > > Hello Tziporet, > > >fix 4k MTU with IPoIB (bug 951) > According to the bug report: In RC5, IPoIB does not work on the QLogic > 7140 SDR card. This needs to be fixed for the release. > > I didn't see any evidence this is a 4K MTU bug. How 4K MTU patch > impact 7140 SDR card? And more inside? > The bug was in ipath driver (not IPoIB). Qlogic can provide more data Tziporet From kliteyn at dev.mellanox.co.il Mon Feb 25 14:11:02 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 26 Feb 2008 00:11:02 +0200 Subject: [ofa-general] [PATCH] opensm/doc: fixing version in release notes Message-ID: <47C33CF6.3030306@dev.mellanox.co.il> OpenSM release notes for OFED 1.3 refer to version 3.1.10 Signed-off-by: Yevgeny Kliteynik --- opensm/doc/opensm_release_notes-3.1.10.txt | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opensm/doc/opensm_release_notes-3.1.10.txt b/opensm/doc/opensm_release_notes-3.1.10.txt index 9ebae1d..d92c58d 100644 --- a/opensm/doc/opensm_release_notes-3.1.10.txt +++ b/opensm/doc/opensm_release_notes-3.1.10.txt @@ -1,4 +1,4 @@ - OpenSM Release Notes 3.1.9 + OpenSM Release Notes 3.1.10 ============================= Version: OpenFabrics Enterprise Distribution (OFED) 1.3 @@ -11,7 +11,7 @@ Date: February 2008 This document describes the contents of the OpenSM OFED 1.3 release. OpenSM is an InfiniBand compliant Subnet Manager and Administration, and runs on top of OpenIB. The OpenSM version for this release -is openib-3.1.9 +is openib-3.1.10 This document includes the following sections: 1 This Overview section (describing new features and software -- 1.5.1.4 From rdreier at cisco.com Mon Feb 25 14:29:34 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 25 Feb 2008 14:29:34 -0800 Subject: [ofa-general] [PATCH 2.6.25] RDMA/cxgb3: Shift calculation wrong for single sge entries. In-Reply-To: <20080220233148.25301.54457.stgit@dell3.ogc.int> (Steve Wise's message of "Wed, 20 Feb 2008 17:31:48 -0600") References: <20080220233148.25301.54457.stgit@dell3.ogc.int> Message-ID: Thanks, applied, although I assume based on the Signed-off-by line that you left out a From: Bryan Rosenburg at the top (to get the authorship in git correctly). > RDMA/cxgb3: Shift calculation wrong for single sge entries. BTW, there's no need to duplicate the subject line in the message body, but if you are going to do it, please put a "Subject:" before it. Otherwise I just have to edit it out by hand to avoid git putting the subject in twice. - R. From pw at osc.edu Mon Feb 25 14:53:30 2008 From: pw at osc.edu (Pete Wyckoff) Date: Mon, 25 Feb 2008 17:53:30 -0500 Subject: [ofa-general] fmr pool free_list empty Message-ID: <20080225225330.GA3316@osc.edu> I have a test code that breaks iser reliably, making it say this: iser: iser_reg_page_vec:ib_fmr_pool_map_phys failed: -11 in 2.6.25-rc1 plus varlen, bidi patches. The trick is to require it to use FMR and to keep a large number of operations in flight. Building an sglist with a bunch of pages that are not contiguous does the job. Increasing the pool size and/or decreasing the dirty watermark seem to have no effect. Looking at the FMR dirty list unmapping code in ib_fmr_batch_release(), there is a section that pulls all the dirty entries onto a list that it will later unmap and put back on the free list. But it also plans to unmap all the free entries that have ever been remapped: /* * The free_list may hold FMRs that have been put there * because they haven't reached the max_remap count. * Invalidate their mapping as well. */ list_for_each_entry_safe(fmr, next, &pool->free_list, list) { if (fmr->remap_count == 0) continue; hlist_del_init(&fmr->cache_node); fmr->remap_count = 0; list_add_tail(&fmr->fmr->list, &fmr_list); list_move(&fmr->list, &unmap_list); } Deleting that block of code makes the problem go away. The issue seems to be that the thread doing this batch_release() holds the spinlock while gathering up the unmap victims, then it drops it to go off and do the actual unmaps. Meanwhile, the thread from iser that wants to do ib_fmr_pool_map_phys() finds that the free list is now empty and complains. Presumably this optimization of remapping the aging free list entries helps in some workloads. But emptying the free list is not good for iser. Any ideas on this fix or suggestions for a better one? Maybe if ib_fmr_pool_unmap() put returning FMRs on the front of the list, it would help keep the remap_count more bimodal, and the unmap code above would not eagerly grab all of the free ones at once. Might keep the cache a bit hotter too. -- Pete From rdreier at cisco.com Mon Feb 25 15:02:40 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 25 Feb 2008 15:02:40 -0800 Subject: [ofa-general] fmr pool free_list empty In-Reply-To: <20080225225330.GA3316@osc.edu> (Pete Wyckoff's message of "Mon, 25 Feb 2008 17:53:30 -0500") References: <20080225225330.GA3316@osc.edu> Message-ID: Ugh. > Looking at the FMR dirty list unmapping code in > ib_fmr_batch_release(), there is a section that pulls all the dirty > entries onto a list that it will later unmap and put back on the > free list. > But it also plans to unmap all the free entries that have ever been > remapped: Yes, this came from a3cd7d90 ("IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs"). That solved a real problem for Olaf, because otherwise dirty FMRs with not at the max map count might never get invalidated. It's not exactly an optimization but rather a correctness issue, because RDS relies on killing mapping eventually. On the other hand, this behavior clearly does lead to the possibility of leaving the free list temporarily empty for stupid reasons. I don't see a really good way to fix this at the momemnt, need to meditate a little. - R. From rdreier at cisco.com Mon Feb 25 15:57:19 2008 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 25 Feb 2008 15:57:19 -0800 Subject: [ofa-general] Re: [PATCH 2.6 8/8] RDMA/nes: Fix interrupt moderation low threshold In-Reply-To: <200802211434.m1LEYwnH005193@velma.neteffect.com> (gstreiff@neteffect.com's message of "Thu, 21 Feb 2008 08:34:58 -0600") References: <200802211434.m1LEYwnH005193@velma.neteffect.com> Message-ID: thanks, applied all 8 patches From phillipwils at gmail.com Mon Feb 25 17:25:52 2008 From: phillipwils at gmail.com (Phillip Wilson) Date: Mon, 25 Feb 2008 17:25:52 -0800 Subject: [ofa-general] Internal loopback for HCA (with no external cable required) In-Reply-To: <47C282FA.2030105@dev.mellanox.co.il> References: <6e4f44220802242144p3a712e54x4915f46fadcdf0be@mail.gmail.com> <47C282FA.2030105@dev.mellanox.co.il> Message-ID: <6e4f44220802251725l4a047f6n94c8379f90740ffa@mail.gmail.com> Dotan, the ibv_rc_pingpong program supports internal loopback as you stated. I verified this by removing the cable connected to the ports on the HCA (mthca0); this HCA (mthca0) was configured by the opensm program. Even though the HCA (mthca0) ports states are "Down" and the Physical state are "Polling" for each port, the ibv_rc_pingpong program was able to perform internal loopback. Thanks, Phil Is there an easy way to force the opensm to configure HCAs that are not connected to a (no cables connect to any other device or itself) fabric? When I say configure, I mean assign a Base LID, LMC, and SM LID. Otherwise, I will modify one of the management diagnostic such as smpquery to set the portinfo fields mention above. Is this a good approach? Internal Loopback perform on 'mthca0' [root at hpdst165 ~]# ibstat CA 'mthca0' CA type: MT23108 Number of ports: 2 Firmware version: 3.3.12 Hardware version: a1 Node GUID: 0x001635ffffbf0944 System image GUID: 0x001635ffffbf0947 Port 1: State: Down Physical state: Polling Rate: 10 Base lid: 1 LMC: 0 SM lid: 1 Capability mask: 0x02510a68 Port GUID: 0x001635ffffbf0945 Port 2: State: Down Physical state: Polling Rate: 10 Base lid: 2 LMC: 0 SM lid: 1 Capability mask: 0x02510a68 Port GUID: 0x001635ffffbf0946 CA 'mthca1' CA type: MT23108 Number of ports: 2 Firmware version: 3.3.12 Hardware version: a1 Node GUID: 0x001635ffffbf1954 System image GUID: 0x001635ffffbf1957 Port 1: State: Down Physical state: Polling Rate: 2 Base lid: 0 LMC: 0 SM lid: 0 Capability mask: 0x02510a68 Port GUID: 0x001635ffffbf1955 Port 2: State: Down Physical state: Polling Rate: 2 Base lid: 0 LMC: 0 SM lid: 0 Capability mask: 0x02510a68 Port GUID: 0x001635ffffbf1956 On 2/25/08, Dotan Barak wrote: > Hi. > > please look bellow. > > Phillip Wilson wrote: > > Internal loopback for HCA (with no external cable required) > > > > > > <> > > > > 1.) Is it possible to modify the ibv_rc_pingpong program for internal loopback? > > > It supports internal loopback even today (if the daemon and client are > being executed on the same host > and they are using the same IB device + IB port). > > 2.) If each mthca1 port is assigned a LID, can the ibv_rc_pingpong > > program be used as is? > > > I think that the answer is yes. I didn't check that internal loopback is > working if a LID is assigned > to the IB port and the port state is down, but it should work. > > > > Dotan > -------------- next part -------------- An HTML attachment was scrubbed... URL: From gonodhoofkeeping at cox.net Mon Feb 25 17:45:42 2008 From: gonodhoofkeeping at cox.net (Yosef Burks) Date: Mon, 25 Feb 2008 19:45:42 -0600 Subject: [ofa-general] Don't Wait! Make Your cock Bigger Message-ID: <894473294.22569554535820@cox.net> Dear openib-general at openib.orgSatisfy your lower like never before. Increase your cock. Make it thicker and harder with the product called VPXL. These patches are considered to be the safest enlargement method available. You don’t have to make expensive cock enlargement surgery or use other sometimes ineffective or even dangerous methods. Try our VPXL. It is absolutely the most potent patch you can buy. Don't hesitate, grab the chance of your lifetime and order our VPXL now.http://geocities.com/randallterry76/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From dwstadiometerm at stadiometer.com Mon Feb 25 19:53:37 2008 From: dwstadiometerm at stadiometer.com (Mollie Irwin) Date: Tue, 26 Feb 2008 10:53:37 +0700 Subject: [ofa-general] Experience for yourself the excitement of winning real money online. Message-ID: <01c87865$d6d4cf00$97a1177b@dwstadiometerm> Online gambling is not only fun and exciting. It can bring real money! All you have to do is to download free software, receive great $2400 welcome bonus and start playing. Enjoy the real casino atmosphere with Golden Gate Casino! We provide 24 hours a day, 7 days a week support and service! Truly fair play guaranteed for players. High level of security! http://geocities.com/FranciscaCleveland Enjoy pure pleasure of gambling from home without stress! From nickpiggin at yahoo.com.au Mon Feb 25 22:11:32 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Tue, 26 Feb 2008 17:11:32 +1100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080221105838.GJ11391@sgi.com> References: <20080215064859.384203497@sgi.com> <200802211520.03529.nickpiggin@yahoo.com.au> <20080221105838.GJ11391@sgi.com> Message-ID: <200802261711.33213.nickpiggin@yahoo.com.au> On Thursday 21 February 2008 21:58, Robin Holt wrote: > On Thu, Feb 21, 2008 at 03:20:02PM +1100, Nick Piggin wrote: > > > > So why can't you export a device from your xpmem driver, which > > > > can be mmap()ed to give out "anonymous" memory pages to be used > > > > for these communication buffers? > > > > > > Because we need to have heap and stack available as well. MPT does > > > not control all the communication buffer areas. I haven't checked, but > > > this is the same problem that IB will have. I believe they are > > > actually allowing any memory region be accessible, but I am not sure of > > > that. > > > > Then you should create a driver that the user program can register > > and unregister regions of their memory with. The driver can do a > > get_user_pages to get the pages, and then you'd just need to set up > > some kind of mapping so that userspace can unmap pages / won't leak > > memory (and an exit_mm notifier I guess). > > OK. You need to explain this better to me. How would this driver > supposedly work? What we have is an MPI library. It gets invoked at > process load time to establish its rank-to-rank communication regions. > It then turns control over to the processes main(). That is allowed to > run until it hits the > MPI_Init(&argc, &argv); > > The process is then totally under the users control until: > MPI_Send(intmessage, m_size, MPI_INT, my_rank+half, tag, MPI_COMM_WORLD); > MPI_Recv(intmessage, m_size, MPI_INT, my_rank+half,tag, MPI_COMM_WORLD, > &status); > > That is it. That is all our allowed interaction with the users process. OK, when you said something along the lines of "the MPT library has control of the comm buffer", then I assumed it was an area of virtual memory which is set up as part of initialization, rather than during runtime. I guess I jumped to conclusions. > That doesn't seem too unreasonable, except when you compare it to how the > driver currently works. Remember, this is done from a library which has > no insight into what the user has done to its own virtual address space. > As a result, each MPI_Send() would result in a system call (or we would > need to have a set of callouts for changes to a processes VMAs) which > would be a significant increase in communication overhead. > > Maybe I am missing what you intend to do, but what we need is a means of > tracking one processes virtual address space changes so other processes > can do direct memory accesses without the need for a system call on each > communication event. Yeah it's tricky. BTW. what is the performance difference between having a system call or no? > > Because you don't need to swap, you don't need coherency, and you > > are in control of the areas, then this seems like the best choice. > > It would allow you to use heap, stack, file-backed, anything. > > You are missing one point here. The MPI specifications that have > been out there for decades do not require the process use a library > for allocating the buffer. I realize that is a horrible shortcoming, > but that is the world we live in. Even if we could change that spec, Can you change the spec? Are you working on it? > we would still need to support the existing specs. As a result, the > user can change their virtual address space as they need and still expect > communications be cheap. That's true. How has it been supported up to now? Are you using these kind of notifiers in patched kernels? From glebn at voltaire.com Mon Feb 25 23:21:37 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Tue, 26 Feb 2008 09:21:37 +0200 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802261711.33213.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802211520.03529.nickpiggin@yahoo.com.au> <20080221105838.GJ11391@sgi.com> <200802261711.33213.nickpiggin@yahoo.com.au> Message-ID: <20080226072137.GD26611@minantech.com> On Tue, Feb 26, 2008 at 05:11:32PM +1100, Nick Piggin wrote: > > You are missing one point here. The MPI specifications that have > > been out there for decades do not require the process use a library > > for allocating the buffer. I realize that is a horrible shortcoming, > > but that is the world we live in. Even if we could change that spec, > > Can you change the spec? Not really. It will break all existing codes. MPI-2 provides a call for memory allocation (and it's beneficial to use this call for some interconnects), but many (most?) applications are still written for MPI-1 and those that are written for MPI-2 mostly uses the old habit of allocating memory by malloc(), or even use stack or BSS memory for communication buffer purposes. -- Gleb. From dotanb at dev.mellanox.co.il Mon Feb 25 23:27:28 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 26 Feb 2008 09:27:28 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> <47C2A455.5070603@dev.mellanox.co.il> <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> Message-ID: <47C3BF60.3030104@dev.mellanox.co.il> Hal Rosenstock wrote: > On Mon, 2008-02-25 at 13:19 +0200, Dotan Barak wrote: > >> Or Gerlitz wrote: >> >>> Sean Hefty wrote: >>> >>>> The verbs also support iWarp devices and are not necessarily >>>> restricted to the >>>> 1.2 IB spec definitions. It might make sense to state that the IB >>>> implementation is based on the 1.2 spec in an IB specific section, >>>> but keep the >>>> general documentation transport neutral at this point. >>>> >>> Sure, the page would be changed to reflect that. >>> >>> Or. >>> >> Sorry, i didn't find the time to get to it until now. >> >> I changed the problematic sentence to: >> "This library is an implementation of the verbs based on the Infiniband >> specification volume 1.2 chapter 11." >> > ^^^^^^^^^^ > volume 1 > > Also, should this refer to IBA 1.2.1 rather than 1.2 (if that was what > was intended by the 1.2 reference) ? > I must admit that i didn't have a chance to check IB spec 1.2.1 BUT 1) Many of the features are implemented by the IB devices, and i don't know if all of them behave according to spec 1.2.1 2) Spec 1.2 introduced new features (such as SRQ and some more) which didn't exist in spec 1.1, so i wanted to make to make sure that they are supported. (as much as i know, most of the users don't have a copy of the IB spec ) 3) If one month from now, spec 1.2.2 will be published; should we update this file? I think that sentence is good (spec 1.2) for now... Do you think that we should remove the spec version completely? (i don't think that it is wise to update the version unless there is a good reason for it ..) thanks Dotan From dotanb at dev.mellanox.co.il Mon Feb 25 23:41:12 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 26 Feb 2008 09:41:12 +0200 Subject: [ofa-general] [PATCH] libcxgb3: Remove duplicated line Message-ID: <200802260941.12366.dotanb@dev.mellanox.co.il> Remove duplicated line. Signed-off-by: Dotan Barak --- Index: ofa_1_3_dev_user/src/userspace/libcxgb3/src/cq.c =================================================================== --- ofa_1_3_dev_user.orig/src/userspace/libcxgb3/src/cq.c 2008-02-25 17:22:56.000000000 +0200 +++ ofa_1_3_dev_user/src/userspace/libcxgb3/src/cq.c 2008-02-26 09:36:57.000000000 +0200 @@ -373,7 +373,6 @@ __FUNCTION__, CQE_STATUS(cqe), chp->cq.cqid, CQE_QPID(cqe)); ret = -EINVAL; - ret = -EINVAL; } } out: From nickpiggin at yahoo.com.au Tue Feb 26 00:52:41 2008 From: nickpiggin at yahoo.com.au (Nick Piggin) Date: Tue, 26 Feb 2008 19:52:41 +1100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080226072137.GD26611@minantech.com> References: <20080215064859.384203497@sgi.com> <200802261711.33213.nickpiggin@yahoo.com.au> <20080226072137.GD26611@minantech.com> Message-ID: <200802261952.42567.nickpiggin@yahoo.com.au> On Tuesday 26 February 2008 18:21, Gleb Natapov wrote: > On Tue, Feb 26, 2008 at 05:11:32PM +1100, Nick Piggin wrote: > > > You are missing one point here. The MPI specifications that have > > > been out there for decades do not require the process use a library > > > for allocating the buffer. I realize that is a horrible shortcoming, > > > but that is the world we live in. Even if we could change that spec, > > > > Can you change the spec? > > Not really. It will break all existing codes. I meant as in eg. submit changes to MPI-3 > MPI-2 provides a call for > memory allocation (and it's beneficial to use this call for some > interconnects), but many (most?) applications are still written for MPI-1 > and those that are written for MPI-2 mostly uses the old habit of > allocating memory by malloc(), or even use stack or BSS memory for > communication buffer purposes. OK, so MPI-2 already has some way to do that... I'm not saying that we can now completely dismiss the idea of using notifiers for this, but it is just a good data point to know. Thanks, Nick From glebn at voltaire.com Tue Feb 26 01:38:09 2008 From: glebn at voltaire.com (Gleb Natapov) Date: Tue, 26 Feb 2008 11:38:09 +0200 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802261952.42567.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802261711.33213.nickpiggin@yahoo.com.au> <20080226072137.GD26611@minantech.com> <200802261952.42567.nickpiggin@yahoo.com.au> Message-ID: <20080226093809.GF26611@minantech.com> On Tue, Feb 26, 2008 at 07:52:41PM +1100, Nick Piggin wrote: > On Tuesday 26 February 2008 18:21, Gleb Natapov wrote: > > On Tue, Feb 26, 2008 at 05:11:32PM +1100, Nick Piggin wrote: > > > > You are missing one point here. The MPI specifications that have > > > > been out there for decades do not require the process use a library > > > > for allocating the buffer. I realize that is a horrible shortcoming, > > > > but that is the world we live in. Even if we could change that spec, > > > > > > Can you change the spec? > > > > Not really. It will break all existing codes. > > I meant as in eg. submit changes to MPI-3 MPI spec tries to be backward compatible. And MPI-2 spec is 10 years old, but MPI-1 is still in a wider use. HPC is moving fast in terms of HW technology, but slow in terms of SW. Fortran is still hot there :) -- Gleb. From kosaki.motohiro at jp.fujitsu.com Tue Feb 26 01:52:00 2008 From: kosaki.motohiro at jp.fujitsu.com (KOSAKI Motohiro) Date: Tue, 26 Feb 2008 18:52:00 +0900 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080226093809.GF26611@minantech.com> References: <200802261952.42567.nickpiggin@yahoo.com.au> <20080226093809.GF26611@minantech.com> Message-ID: <20080226184914.FF3A.KOSAKI.MOTOHIRO@jp.fujitsu.com> > > > > Can you change the spec? > > > > > > Not really. It will break all existing codes. > > > > I meant as in eg. submit changes to MPI-3 > > MPI spec tries to be backward compatible. And MPI-2 spec is 10 years > old, but MPI-1 is still in a wider use. HPC is moving fast in terms of HW > technology, but slow in terms of SW. Fortran is still hot there :) Agreed. many many people dislike incompatible specification change. We should accept real world spec. - kosaki From vlad at lists.openfabrics.org Tue Feb 26 03:05:37 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 26 Feb 2008 03:05:37 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080226-0200 daily build status Message-ID: <20080226110538.20F25E60844@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From sheryi_g2 at yahoo.fr Tue Feb 26 04:14:38 2008 From: sheryi_g2 at yahoo.fr (Sheryi Griffin) Date: Tue, 26 Feb 2008 13:14:38 +0100 (CET) Subject: [ofa-general] Confidentiel. Message-ID: <222904.97240.qm@web28207.mail.ukl.yahoo.com> SHERYI GRIFFIN. Très cher, Je suis SHERYI GRIFFIN agée de 22 ans et je vie en Côte d'Ivoire. Je suis fille unique de M. LUC GRIFFIN. je poccède dans une banque de la place la somme de 10.5 millions de dollars américains. Cette somme a été deposé par ma mère bien avant sa mort en Octobre 2005. Cette somme a été placée sur un compte de suspence dans une banque ici en Cote d'Ivoire mon pays. Je voudrais votre permision afin de vous remettre cet argent de sorte que je puisse venir dans votre pays pour investir et pouvoir continuer mes études. je promet de vous offrir 20% de ladite somme pour que votre precieuse aide. J'attends impatiemment votre reponse afin de vous faire parvenir un courrier pour plus d'informations concernant le transfert. sincères remerciements a vous. SHERYI GRIFFIN. --------------------------------- Yahoo! Mail innove : interface hyper pratique, messenger intégré, couleurs - découvrez la démo ! -------------- next part -------------- An HTML attachment was scrubbed... URL: From holt at sgi.com Tue Feb 26 04:28:50 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 26 Feb 2008 06:28:50 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802261952.42567.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802261711.33213.nickpiggin@yahoo.com.au> <20080226072137.GD26611@minantech.com> <200802261952.42567.nickpiggin@yahoo.com.au> Message-ID: <20080226122850.GP11391@sgi.com> On Tue, Feb 26, 2008 at 07:52:41PM +1100, Nick Piggin wrote: > On Tuesday 26 February 2008 18:21, Gleb Natapov wrote: > > On Tue, Feb 26, 2008 at 05:11:32PM +1100, Nick Piggin wrote: > > > > You are missing one point here. The MPI specifications that have > > > > been out there for decades do not require the process use a library > > > > for allocating the buffer. I realize that is a horrible shortcoming, > > > > but that is the world we live in. Even if we could change that spec, > > > > > > Can you change the spec? > > > > Not really. It will break all existing codes. > > I meant as in eg. submit changes to MPI-3 > > > > MPI-2 provides a call for > > memory allocation (and it's beneficial to use this call for some > > interconnects), but many (most?) applications are still written for MPI-1 > > and those that are written for MPI-2 mostly uses the old habit of > > allocating memory by malloc(), or even use stack or BSS memory for > > communication buffer purposes. > > OK, so MPI-2 already has some way to do that... I'm not saying that we > can now completely dismiss the idea of using notifiers for this, but it > is just a good data point to know. It is in MPI-2, but MPI-2 does not prohibit communication from regions not allocated by the MPI call. Thanks, Robin From holt at sgi.com Tue Feb 26 04:29:08 2008 From: holt at sgi.com (Robin Holt) Date: Tue, 26 Feb 2008 06:29:08 -0600 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802261711.33213.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <200802211520.03529.nickpiggin@yahoo.com.au> <20080221105838.GJ11391@sgi.com> <200802261711.33213.nickpiggin@yahoo.com.au> Message-ID: <20080226122908.GQ11391@sgi.com> > > That is it. That is all our allowed interaction with the users process. > > OK, when you said something along the lines of "the MPT library has > control of the comm buffer", then I assumed it was an area of virtual > memory which is set up as part of initialization, rather than during > runtime. I guess I jumped to conclusions. There are six regions the MPT library typically makes. The most basic one is a fixed size. It describes the MPT internal buffers, the stack, the heap, the application text, and finally the entire address space. That last region is seldom used. MPT only has control over the first two. > > That doesn't seem too unreasonable, except when you compare it to how the > > driver currently works. Remember, this is done from a library which has > > no insight into what the user has done to its own virtual address space. > > As a result, each MPI_Send() would result in a system call (or we would > > need to have a set of callouts for changes to a processes VMAs) which > > would be a significant increase in communication overhead. > > > > Maybe I am missing what you intend to do, but what we need is a means of > > tracking one processes virtual address space changes so other processes > > can do direct memory accesses without the need for a system call on each > > communication event. > > Yeah it's tricky. BTW. what is the performance difference between > having a system call or no? The system call takes many microseconds and still requires the same latency of the communication. Without it, our latency is usually below two microseconds. > > > Because you don't need to swap, you don't need coherency, and you > > > are in control of the areas, then this seems like the best choice. > > > It would allow you to use heap, stack, file-backed, anything. > > > > You are missing one point here. The MPI specifications that have > > been out there for decades do not require the process use a library > > for allocating the buffer. I realize that is a horrible shortcoming, > > but that is the world we live in. Even if we could change that spec, > > Can you change the spec? Are you working on it? Even if we changed the spec, the old specs will continue to be supported. I personally am not involved. Not sure if anybody else is working this issue. > > we would still need to support the existing specs. As a result, the > > user can change their virtual address space as they need and still expect > > communications be cheap. > > That's true. How has it been supported up to now? Are you using > these kind of notifiers in patched kernels? At fault time, we check to see if it is an anon or mspec vma. We pin the page an insert them. The remote OS then losses synchronicity with the owning processes page tables. If an unmap, madvise, etc occurs the page tables are updated without regard to our references. Fork or exit (fork is caught using an LD_PRELOAD library) cause the user pages to be recalled from the remote side and put_page returns them to the kernel. We have documented that this loss of synchronicity is due to their action and not supported. Essentially, we rely upon the application being well behaved. To this point, that has remainded true. Thanks, Robin From hrosenstock at xsigo.com Tue Feb 26 04:58:41 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 26 Feb 2008 04:58:41 -0800 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <47C3BF60.3030104@dev.mellanox.co.il> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> <47C2A455.5070603@dev.mellanox.co.il> <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> <47C3BF60.3030104@dev.mellanox.co.il> Message-ID: <1204030721.454.48.camel@hrosenstock-ws.xsigo.com> On Tue, 2008-02-26 at 09:27 +0200, Dotan Barak wrote: > Hal Rosenstock wrote: > > On Mon, 2008-02-25 at 13:19 +0200, Dotan Barak wrote: > > > >> Or Gerlitz wrote: > >> > >>> Sean Hefty wrote: > >>> > >>>> The verbs also support iWarp devices and are not necessarily > >>>> restricted to the > >>>> 1.2 IB spec definitions. It might make sense to state that the IB > >>>> implementation is based on the 1.2 spec in an IB specific section, > >>>> but keep the > >>>> general documentation transport neutral at this point. > >>>> > >>> Sure, the page would be changed to reflect that. > >>> > >>> Or. > >>> > >> Sorry, i didn't find the time to get to it until now. > >> > >> I changed the problematic sentence to: > >> "This library is an implementation of the verbs based on the Infiniband > >> specification volume 1.2 chapter 11." > >> > > ^^^^^^^^^^ > > volume 1 > > > > Also, should this refer to IBA 1.2.1 rather than 1.2 (if that was what > > was intended by the 1.2 reference) ? > > > I must admit that i didn't have a chance to check IB spec 1.2.1 BUT > 1) Many of the features are implemented by the IB devices, and i don't > know if all of them > behave according to spec 1.2.1 Aren't any of those changes optional so I'm not sure I see the issue here ? > 2) Spec 1.2 introduced new features (such as SRQ and some more) which > didn't exist in > spec 1.1, so i wanted to make to make sure that they are supported. Sure; there are spec changes going forward but the spec is backward compatible (and new features are optional). > (as much as i know, > most of the users don't have a copy of the IB spec ) The spec has been publically available for quite a while now (not just available to IBTA members). > 3) If one month from now, spec 1.2.2 will be published; should we update > this file? That's actually a larger question affecting more than this just this file. One approach would be to indicate the latest spec supported at the time of release. > I think that sentence is good (spec 1.2) for now... I'm not sure about its "goodness". In this particular place, there may not be much harm either way but in others it is misleading and inaccurate. > Do you think that we should remove the spec version completely? > (i don't think that it is wise to update the version unless there is a > good reason for it ..) That gets rid of the overhead of dealing with maintaining the spec version. However, eliminating the spec version leaves the version open which is not a good thing in all cases. -- Hal > thanks > Dotan From tziporet at mellanox.co.il Tue Feb 26 05:59:05 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 26 Feb 2008 15:59:05 +0200 Subject: [ofa-general] OFED Feb-25 meeting summary Message-ID: <6C2C79E72C305246B504CBA17B5500C903690E7A@mtlexch01.mtl.com> OFED Feb-25 meeting summary on OFED 1.3 GA readiness: 1. Agreed schedule: RC6 - Feb 25 - done GA - Feb 28 2. Status update Intel - weekend - run fine Qlogic - OK IBM - OK Neteffect - passing acceptance - no showstoppers Mellanox - OK Voltaire - no participation Chelsio - No participation 3. Bugs status: As of yesterday there are no bugs that should hold the release 4. Open discussion a. Need to test interop between OFED 1.2 and OFED 1.3 Woody from Intel already checked it and basic functionality is working. b. OFED to be used in the interop event and plug-fest: Rupert reported they are going to use OFED 1.3 There was a concern from Qlogic that they might need some change after RC6 but they found it is not needed c. Support (dot) releases: - All agreed that we might need dot releases in case of critical issues - Concern: the dot releases are less tested and QAed comparing to the major releases - We should have at least 1 month between dot releases - Must ensure only bug fixes are included in dot releases (no API/base kernel changes) Tziporet From olga.shern at gmail.com Tue Feb 26 06:14:22 2008 From: olga.shern at gmail.com (Olga Shern) Date: Tue, 26 Feb 2008 16:14:22 +0200 Subject: [ofa-general] Re: [ewg] OFED Feb-25 meeting summary In-Reply-To: <6C2C79E72C305246B504CBA17B5500C903690E7A@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C903690E7A@mtlexch01.mtl.com> Message-ID: Hi Tziporet, Sorry I have missed the meeting, I was in the middle of debug something and didn't pay attention to the time. We are continuing the tests and didn't see any additional critical issues except bug 962, that I found this morning. Olga (Voltaire) On 2/26/08, Tziporet Koren wrote: > > > OFED Feb-25 meeting summary on OFED 1.3 GA readiness: > > 1. Agreed schedule: > RC6 - Feb 25 - done > GA - Feb 28 > > 2. Status update > Intel - weekend - run fine > Qlogic - OK > IBM - OK > Neteffect - passing acceptance - no showstoppers > Mellanox - OK > Voltaire - no participation > Chelsio - No participation > > 3. Bugs status: > As of yesterday there are no bugs that should hold the release > > 4. Open discussion > a. Need to test interop between OFED 1.2 and OFED 1.3 > Woody from Intel already checked it and basic functionality > is working. > b. OFED to be used in the interop event and plug-fest: > Rupert reported they are going to use OFED 1.3 > There was a concern from Qlogic that they might need some > change after RC6 but they found it is not needed > c. Support (dot) releases: > - All agreed that we might need dot releases in case of > critical issues > - Concern: the dot releases are less tested and QAed > comparing to the major releases > - We should have at least 1 month between dot releases > - Must ensure only bug fixes are included in dot releases (no > API/base kernel changes) > > > Tziporet > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > -------------- next part -------------- An HTML attachment was scrubbed... URL: From proone at mail.com Tue Feb 26 06:22:46 2008 From: proone at mail.com (Tonia Kent) Date: Tue, 26 Feb 2008 15:22:46 +0100 Subject: [ofa-general] You Can Afford to Buy Designer Footwear Message-ID: <829137414.44115560262696@mail.com> Attention, a 2008's Footwear Collection on offer! High quality, luxury, recognizable footwear for fashion-conscious people. http://geocities.com/gregoryhopkins16/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Tue Feb 26 06:29:04 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 26 Feb 2008 08:29:04 -0600 Subject: [ofa-general] Re: [PATCH] Provide an empty stub for iwch_query_qp. In-Reply-To: <1203552300-21904-1-git-send-email-jon@opengridcomputing.com> References: <1203552300-21904-1-git-send-email-jon@opengridcomputing.com> Message-ID: <47C42230.8080105@opengridcomputing.com> Applied...Thanks. From swise at opengridcomputing.com Tue Feb 26 06:29:29 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 26 Feb 2008 08:29:29 -0600 Subject: [ofa-general] Re: [PATCH] libcxgb3: Remove duplicated line In-Reply-To: <200802260941.12366.dotanb@dev.mellanox.co.il> References: <200802260941.12366.dotanb@dev.mellanox.co.il> Message-ID: <47C42249.4090608@opengridcomputing.com> Applied...Thanks. From dotanb at dev.mellanox.co.il Tue Feb 26 07:09:37 2008 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 26 Feb 2008 17:09:37 +0200 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <1204030721.454.48.camel@hrosenstock-ws.xsigo.com> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> <47C2A455.5070603@dev.mellanox.co.il> <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> <47C3BF60.3030104@dev.mellanox.co.il> <1204030721.454.48.camel@hrosenstock-ws.xsigo.com> Message-ID: <47C42BB1.5010201@dev.mellanox.co.il> I suggest to leave verbs.h as is for OFED 1.3 and discuss this issue for the next OFED distributions. is it o.k. with you? thanks Dotan Hal Rosenstock wrote: > On Tue, 2008-02-26 at 09:27 +0200, Dotan Barak wrote: > >> Hal Rosenstock wrote: >> >>> On Mon, 2008-02-25 at 13:19 +0200, Dotan Barak wrote: >>> >>> >>>> Or Gerlitz wrote: >>>> >>>> >>>>> Sean Hefty wrote: >>>>> >>>>> >>>>>> The verbs also support iWarp devices and are not necessarily >>>>>> restricted to the >>>>>> 1.2 IB spec definitions. It might make sense to state that the IB >>>>>> implementation is based on the 1.2 spec in an IB specific section, >>>>>> but keep the >>>>>> general documentation transport neutral at this point. >>>>>> >>>>>> >>>>> Sure, the page would be changed to reflect that. >>>>> >>>>> Or. >>>>> >>>>> >>>> Sorry, i didn't find the time to get to it until now. >>>> >>>> I changed the problematic sentence to: >>>> "This library is an implementation of the verbs based on the Infiniband >>>> specification volume 1.2 chapter 11." >>>> >>>> >>> ^^^^^^^^^^ >>> volume 1 >>> >>> Also, should this refer to IBA 1.2.1 rather than 1.2 (if that was what >>> was intended by the 1.2 reference) ? >>> >>> >> I must admit that i didn't have a chance to check IB spec 1.2.1 BUT >> 1) Many of the features are implemented by the IB devices, and i don't >> know if all of them >> behave according to spec 1.2.1 >> > > Aren't any of those changes optional so I'm not sure I see the issue > here ? > > >> 2) Spec 1.2 introduced new features (such as SRQ and some more) which >> didn't exist in >> spec 1.1, so i wanted to make to make sure that they are supported. >> > > Sure; there are spec changes going forward but the spec is backward > compatible (and new features are optional). > > >> (as much as i know, >> most of the users don't have a copy of the IB spec ) >> > > The spec has been publically available for quite a while now (not just > available to IBTA members). > > >> 3) If one month from now, spec 1.2.2 will be published; should we update >> this file? >> > > That's actually a larger question affecting more than this just this > file. > > One approach would be to indicate the latest spec supported at the time > of release. > > >> I think that sentence is good (spec 1.2) for now... >> > > I'm not sure about its "goodness". In this particular place, there may > not be much harm either way but in others it is misleading and > inaccurate. > > >> Do you think that we should remove the spec version completely? >> (i don't think that it is wise to update the version unless there is a >> good reason for it ..) >> > > That gets rid of the overhead of dealing with maintaining the spec > version. However, eliminating the spec version leaves the version open > which is not a good thing in all cases. > > -- Hal > > >> thanks >> Dotan >> > > From hrosenstock at xsigo.com Tue Feb 26 07:17:16 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 26 Feb 2008 07:17:16 -0800 Subject: [ofa-general] [PATCH] libibverbs: Added the man page verbs.7 In-Reply-To: <47C42BB1.5010201@dev.mellanox.co.il> References: <200802031758.53692.dotanb@dev.mellanox.co.il> <000101c86750$1ffce090$a937170a@amr.corp.intel.com> <47A870A8.5050409@voltaire.com> <47C2A455.5070603@dev.mellanox.co.il> <1203957921.8793.320.camel@hrosenstock-ws.xsigo.com> <47C3BF60.3030104@dev.mellanox.co.il> <1204030721.454.48.camel@hrosenstock-ws.xsigo.com> <47C42BB1.5010201@dev.mellanox.co.il> Message-ID: <1204039036.454.70.camel@hrosenstock-ws.xsigo.com> On Tue, 2008-02-26 at 17:09 +0200, Dotan Barak wrote: > I suggest to leave verbs.h as is for OFED 1.3 and discuss this issue for > the next OFED distributions. > is it o.k. with you? Fine with me (clearly it's not a showstopper :-) but these things so often get lost and forgotten about... -- Hal > thanks > Dotan > > Hal Rosenstock wrote: > > On Tue, 2008-02-26 at 09:27 +0200, Dotan Barak wrote: > > > >> Hal Rosenstock wrote: > >> > >>> On Mon, 2008-02-25 at 13:19 +0200, Dotan Barak wrote: > >>> > >>> > >>>> Or Gerlitz wrote: > >>>> > >>>> > >>>>> Sean Hefty wrote: > >>>>> > >>>>> > >>>>>> The verbs also support iWarp devices and are not necessarily > >>>>>> restricted to the > >>>>>> 1.2 IB spec definitions. It might make sense to state that the IB > >>>>>> implementation is based on the 1.2 spec in an IB specific section, > >>>>>> but keep the > >>>>>> general documentation transport neutral at this point. > >>>>>> > >>>>>> > >>>>> Sure, the page would be changed to reflect that. > >>>>> > >>>>> Or. > >>>>> > >>>>> > >>>> Sorry, i didn't find the time to get to it until now. > >>>> > >>>> I changed the problematic sentence to: > >>>> "This library is an implementation of the verbs based on the Infiniband > >>>> specification volume 1.2 chapter 11." > >>>> > >>>> > >>> ^^^^^^^^^^ > >>> volume 1 > >>> > >>> Also, should this refer to IBA 1.2.1 rather than 1.2 (if that was what > >>> was intended by the 1.2 reference) ? > >>> > >>> > >> I must admit that i didn't have a chance to check IB spec 1.2.1 BUT > >> 1) Many of the features are implemented by the IB devices, and i don't > >> know if all of them > >> behave according to spec 1.2.1 > >> > > > > Aren't any of those changes optional so I'm not sure I see the issue > > here ? > > > > > >> 2) Spec 1.2 introduced new features (such as SRQ and some more) which > >> didn't exist in > >> spec 1.1, so i wanted to make to make sure that they are supported. > >> > > > > Sure; there are spec changes going forward but the spec is backward > > compatible (and new features are optional). > > > > > >> (as much as i know, > >> most of the users don't have a copy of the IB spec ) > >> > > > > The spec has been publically available for quite a while now (not just > > available to IBTA members). > > > > > >> 3) If one month from now, spec 1.2.2 will be published; should we update > >> this file? > >> > > > > That's actually a larger question affecting more than this just this > > file. > > > > One approach would be to indicate the latest spec supported at the time > > of release. > > > > > >> I think that sentence is good (spec 1.2) for now... > >> > > > > I'm not sure about its "goodness". In this particular place, there may > > not be much harm either way but in others it is misleading and > > inaccurate. > > > > > >> Do you think that we should remove the spec version completely? > >> (i don't think that it is wise to update the version unless there is a > >> good reason for it ..) > >> > > > > That gets rid of the overhead of dealing with maintaining the spec > > version. However, eliminating the spec version leaves the version open > > which is not a good thing in all cases. > > > > -- Hal > > > > > >> thanks > >> Dotan > >> > > > > > From sashak at voltaire.com Tue Feb 26 07:42:04 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 26 Feb 2008 15:42:04 +0000 Subject: [ofa-general] Re: [PATCH] opensm/doc: fixing version in release notes In-Reply-To: <47C33CF6.3030306@dev.mellanox.co.il> References: <47C33CF6.3030306@dev.mellanox.co.il> Message-ID: <20080226154204.GF3116@sashak.voltaire.com> On 00:11 Tue 26 Feb , Yevgeny Kliteynik wrote: > OpenSM release notes for OFED 1.3 refer to version 3.1.10 > > Signed-off-by: Yevgeny Kliteynik Applied to master only. Thanks. Sasha From meier3 at llnl.gov Tue Feb 26 08:46:18 2008 From: meier3 at llnl.gov (Timothy A. Meier) Date: Tue, 26 Feb 2008 08:46:18 -0800 Subject: [ofa-general] OpenSM Console Ideas? In-Reply-To: <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> References: <47BE16F2.8000507@llnl.gov> <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> Message-ID: <47C4425A.6050703@llnl.gov> Hi Hal, I haven't had very much feedback yet. Do you have any idea how many people use the console? Hal Rosenstock wrote: > Hi Tim, > > On Thu, 2008-02-21 at 16:27 -0800, Timothy A. Meier wrote: >> LLNL uses the remote console feature in OpenSM. We have a need to secure >> this remote connection with authentication/authorization and encryption >> (specifically PAM and OpenSSL). I have a working prototype, and would >> like to formalize it and share/include this with OpenSM. >> >> Before I go down this path too far, I would like to solicit ideas from >> others who use the console. >> >> Currently, the console can be used in local, loopback, or remote modes. >> If security is added, should it replace other modes, or be an additional mode? > > IMO the old modes should be preserved and I would view > authentication/authorization and encryption as an orthogonal dimension > to be supported with any of those modes. > This was my initial instinct as well. Honestly, however, once we have a secure connection, we will probably use it exclusively. I suppose the local console would also be necessary. I can preserve all modes. >> The intention is to use PAM for the AA framework, and OpenSSL for secure >> sockets. Are there any serious objections to this implementation plan? > > Is the license compatible with OpenFabrics ? > Well I am not a lawyer, but I believe that it is. OpenSSL has a dual license, both are BSD-style open source licenses (one for the toolkit, one for openssl). An alternate to OpenSSL is GNU TLS. GNU TLS is not as widely used, and has the GNU Lesser GPL which is supposed to be extremely lax. The PAM libraries are included with most linux distros, (RH, Debian, etc.) and have BSD style and GNU GPL licenses. >> The console feature has always been a configuration/command line option, >> but should the secure console be conditionally compiled/linked as well? >> (eliminate dependency on the PAM and OpenSSL libs, pam, pam_misc, cryto, ssl). > > This might depend on the licensing. Also, on one hand, it would be nice > to minimize the build options, but for those where space is an issue, > the separate configurability of this would be useful. (Not knowing the > additional size of this but it sounds like it will be large enough to > not make this a mandatory requirement of the console). > > -- Hal > Agreed. Should it NOT include the security stuff into the build, by default? And the Console be disabled by default, and if enabled, default to "local"? >> The secure console would require a relatively primitive client application, >> which I will probably package under opensm, just like osmtest. Make sense? >> >> Do you have any other ideas or suggestions for the remote console? >> > -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov From dwsanfordbryantm at sanfordbryant.com Tue Feb 26 09:58:38 2008 From: dwsanfordbryantm at sanfordbryant.com (Heriberto Kern) Date: Tue, 26 Feb 2008 19:58:38 +0200 Subject: [ofa-general] Gamble in the best online casino! Message-ID: <01c878b1$fa773500$64cda74e@dwsanfordbryantm> Play the most popular casino games at home! Black Jack, Slots, Roulette, Poker, Craps! Just download easy to use free software, register free account and play your favorite game. Receive free $2400 bonus to start play with! Great online casino Golden Gate is one of the leading casinos known for fair playing, excellent customer service available to contact 24 hour a day, 7 days a week and prompt payouts. http://geocities.com/richallen611 Start downloading free software now! From pw at osc.edu Tue Feb 26 10:26:55 2008 From: pw at osc.edu (Pete Wyckoff) Date: Tue, 26 Feb 2008 13:26:55 -0500 Subject: [PATCH 0/2] (was Re: [ofa-general] fmr pool free_list empty) In-Reply-To: References: <20080225225330.GA3316@osc.edu> Message-ID: <20080226182655.GD7033@osc.edu> rdreier at cisco.com wrote on Mon, 25 Feb 2008 15:02 -0800: > Ugh. [pw wrote:] > > Looking at the FMR dirty list unmapping code in > > ib_fmr_batch_release(), there is a section that pulls all the dirty > > entries onto a list that it will later unmap and put back on the > > free list. > > > But it also plans to unmap all the free entries that have ever been > > remapped: > > Yes, this came from a3cd7d90 ("IB/fmr_pool: ib_fmr_pool_flush() should > flush all dirty FMRs"). That solved a real problem for Olaf, because > otherwise dirty FMRs with not at the max map count might never get > invalidated. It's not exactly an optimization but rather a > correctness issue, because RDS relies on killing mapping eventually. > > On the other hand, this behavior clearly does lead to the possibility > of leaving the free list temporarily empty for stupid reasons. > > I don't see a really good way to fix this at the momemnt, need to > meditate a little. Adding CCs in case some iser users are not on the openfabrics list. Original message is here: http://lists.openfabrics.org/pipermail/general/2008-February/047111.html This quoted commit is a regression for iSER. Not sure if it causes problems for the other FMR user, SRP. It went in after v2.6.24. Following this mail are two patches. One to revert the change, and one to attempt to do Olaf's patch in such a way that it does not cause problems for other FMR users. I haven't tested the patches with RDS. It apparently isn't in the tree yet. In fact, there are no users of ib_flush_fmr_pool() in the tree, which is the only function affected by the second patch. But iSER is working again in my scenario. As a side note, I don't remember seeing this patch on the openfabrics mailing list. Perhaps I missed it. Sometimes these sorts of interactions can be spotted if proposed changes get wider attention. -- Pete From hrosenstock at xsigo.com Tue Feb 26 09:59:08 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 26 Feb 2008 09:59:08 -0800 Subject: [ofa-general] OpenSM Console Ideas? In-Reply-To: <47C4425A.6050703@llnl.gov> References: <47BE16F2.8000507@llnl.gov> <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> <47C4425A.6050703@llnl.gov> Message-ID: <1204048748.454.90.camel@hrosenstock-ws.xsigo.com> Hi Tim, On Tue, 2008-02-26 at 08:46 -0800, Timothy A. Meier wrote: > Hi Hal, > I haven't had very much feedback yet. Do you have any idea how many people > use the console? No idea. -- Hal > Hal Rosenstock wrote: > > Hi Tim, > > > > On Thu, 2008-02-21 at 16:27 -0800, Timothy A. Meier wrote: > >> LLNL uses the remote console feature in OpenSM. We have a need to secure > >> this remote connection with authentication/authorization and encryption > >> (specifically PAM and OpenSSL). I have a working prototype, and would > >> like to formalize it and share/include this with OpenSM. > >> > >> Before I go down this path too far, I would like to solicit ideas from > >> others who use the console. > >> > >> Currently, the console can be used in local, loopback, or remote modes. > >> If security is added, should it replace other modes, or be an additional mode? > > > > IMO the old modes should be preserved and I would view > > authentication/authorization and encryption as an orthogonal dimension > > to be supported with any of those modes. > > > This was my initial instinct as well. Honestly, however, once we have > a secure connection, we will probably use it exclusively. I suppose the > local console would also be necessary. I can preserve all modes. > > >> The intention is to use PAM for the AA framework, and OpenSSL for secure > >> sockets. Are there any serious objections to this implementation plan? > > > > Is the license compatible with OpenFabrics ? > > > Well I am not a lawyer, but I believe that it is. OpenSSL has a dual license, > both are BSD-style open source licenses (one for the toolkit, one for openssl). > An alternate to OpenSSL is GNU TLS. GNU TLS is not as widely used, and has > the GNU Lesser GPL which is supposed to be extremely lax. > > The PAM libraries are included with most linux distros, (RH, Debian, etc.) and > have BSD style and GNU GPL licenses. > > >> The console feature has always been a configuration/command line option, > >> but should the secure console be conditionally compiled/linked as well? > >> (eliminate dependency on the PAM and OpenSSL libs, pam, pam_misc, cryto, ssl). > > > > This might depend on the licensing. Also, on one hand, it would be nice > > to minimize the build options, but for those where space is an issue, > > the separate configurability of this would be useful. (Not knowing the > > additional size of this but it sounds like it will be large enough to > > not make this a mandatory requirement of the console). > > > > -- Hal > > > Agreed. Should it NOT include the security stuff into the build, by default? > And the Console be disabled by default, and if enabled, default to "local"? > > >> The secure console would require a relatively primitive client application, > >> which I will probably package under opensm, just like osmtest. Make sense? > >> > >> Do you have any other ideas or suggestions for the remote console? > >> > > > > From pw at osc.edu Tue Feb 26 10:27:31 2008 From: pw at osc.edu (Pete Wyckoff) Date: Tue, 26 Feb 2008 13:27:31 -0500 Subject: [ofa-general] [PATCH 1/2] Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" In-Reply-To: <20080226182655.GD7033@osc.edu> References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> Message-ID: <20080226182731.GE7033@osc.edu> This reverts commit a3cd7d9070be417a21905c997ee32d756d999b38. The original commit breaks iSER reliably, making it complain: iser: iser_reg_page_vec:ib_fmr_pool_map_phys failed: -11 The FMR cleanup thread runs ib_fmr_batch_release() as dirty entries build up. This commit causes clean but used FMR entries also to be purged. During that process, another thread can see that there are no free FMRs and fail, even though there should always have been enough available. Signed-off-by: Pete Wyckoff --- drivers/infiniband/core/fmr_pool.c | 21 ++++++--------------- 1 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7f00347..4044fdf 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; - struct ib_pool_fmr *fmr, *next; + struct ib_pool_fmr *fmr; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); @@ -158,20 +158,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #endif } - /* - * The free_list may hold FMRs that have been put there - * because they haven't reached the max_remap count. - * Invalidate their mapping as well. - */ - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count == 0) - continue; - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - list_move(&fmr->list, &unmap_list); - } - list_splice(&pool->dirty_list, &unmap_list); INIT_LIST_HEAD(&pool->dirty_list); pool->dirty_len = 0; @@ -384,6 +370,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); -- 1.5.4.1 From pw at osc.edu Tue Feb 26 10:27:53 2008 From: pw at osc.edu (Pete Wyckoff) Date: Tue, 26 Feb 2008 13:27:53 -0500 Subject: [ofa-general] [PATCH 2/2] ib fmr pool: flush used clean entries In-Reply-To: <20080226182655.GD7033@osc.edu> References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> Message-ID: <20080226182753.GF7033@osc.edu> Commit a3cd7d9070be417a21905c997ee32d756d999b38 (IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs) caused a regression for iSER and was reverted in e5507736c6449b3253347eed6f8ea77a28cf688e. This change attempts to redo the original patch so that all used FMR entries are flushed when ib_flush_fmr_pool() is called, and other FMR users are not affected. Simply move used entries from the clean list onto the dirty list before letting the cleanup thread do its job. Signed-off-by: Pete Wyckoff --- drivers/infiniband/core/fmr_pool.c | 17 ++++++++++++++++- 1 files changed, 16 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 4044fdf..06d502c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -398,8 +398,23 @@ EXPORT_SYMBOL(ib_destroy_fmr_pool); */ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) { - int serial = atomic_inc_return(&pool->req_ser); + int serial; + struct ib_pool_fmr *fmr, *next; + + /* + * The free_list holds FMRs that may have been used + * but have not been remapped enough times to be dirty. + * Put them on the dirty list now so that the cleanup + * thread will reap them too. + */ + spin_lock_irq(&pool->pool_lock); + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count > 0) + list_move(&fmr->list, &pool->dirty_list); + } + spin_unlock_irq(&pool->pool_lock); + serial = atomic_inc_return(&pool->req_ser); wake_up_process(pool->thread); if (wait_event_interruptible(pool->force_wait, -- 1.5.4.1 From ifiwasagecko at hotmail.co.uk Tue Feb 26 10:54:39 2008 From: ifiwasagecko at hotmail.co.uk (Herbert Duncan) Date: Tue, 26 Feb 2008 19:54:39 +0100 Subject: [ofa-general] Affordable Prices for Top Brand Shoes Message-ID: <198808993.35601546004442@hotmail.co.uk> Attention, a 2008's Footwear Collection on offer! Direct purchase will save you up to 60%. Buy brand names for less than wholesale prices! Armani, Bally, Ugg, Chanel and many more brands including TOP BRANDS! FREE worldwide shipping!High quality, luxury, recognizable footwear for fashion-conscious people. http://geocities.com/kerryreilly58/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From EvadesperateHeard at latimes.com Tue Feb 26 05:20:58 2008 From: EvadesperateHeard at latimes.com (Amber Clifton) Date: Tue, 26 Feb 2008 16:20:58 +0300 Subject: [ofa-general] No Hassle Lines of Credit Message-ID: <1c0d01c878ac$dd0576d0$0c01a8c0@va3a3484931c3e> Need A Business Loan? Reach Over 290 Lenders with One Easy Form. 5k-200k For Your Business! http://hinij.net.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From bhalevy at panasas.com Tue Feb 26 11:23:01 2008 From: bhalevy at panasas.com (Benny Halevy) Date: Tue, 26 Feb 2008 11:23:01 -0800 Subject: [ofa-general] Re: [PATCH 1/2] Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" In-Reply-To: <20080226182731.GE7033@osc.edu> References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182731.GE7033@osc.edu> Message-ID: <47C46715.2090603@panasas.com> Pete, the subject says "PATCH 1/2" but I didn't see any follow-up message for PATCH 2/2. Just wondering :) Benny On Feb. 26, 2008, 10:27 -0800, Pete Wyckoff wrote: > This reverts commit a3cd7d9070be417a21905c997ee32d756d999b38. > > The original commit breaks iSER reliably, making it complain: > > iser: iser_reg_page_vec:ib_fmr_pool_map_phys failed: -11 > > The FMR cleanup thread runs ib_fmr_batch_release() as dirty > entries build up. This commit causes clean but used FMR > entries also to be purged. During that process, another thread > can see that there are no free FMRs and fail, even though > there should always have been enough available. > > Signed-off-by: Pete Wyckoff > --- > drivers/infiniband/core/fmr_pool.c | 21 ++++++--------------- > 1 files changed, 6 insertions(+), 15 deletions(-) > > diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c > index 7f00347..4044fdf 100644 > --- a/drivers/infiniband/core/fmr_pool.c > +++ b/drivers/infiniband/core/fmr_pool.c > @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, > static void ib_fmr_batch_release(struct ib_fmr_pool *pool) > { > int ret; > - struct ib_pool_fmr *fmr, *next; > + struct ib_pool_fmr *fmr; > LIST_HEAD(unmap_list); > LIST_HEAD(fmr_list); > > @@ -158,20 +158,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) > #endif > } > > - /* > - * The free_list may hold FMRs that have been put there > - * because they haven't reached the max_remap count. > - * Invalidate their mapping as well. > - */ > - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { > - if (fmr->remap_count == 0) > - continue; > - hlist_del_init(&fmr->cache_node); > - fmr->remap_count = 0; > - list_add_tail(&fmr->fmr->list, &fmr_list); > - list_move(&fmr->list, &unmap_list); > - } > - > list_splice(&pool->dirty_list, &unmap_list); > INIT_LIST_HEAD(&pool->dirty_list); > pool->dirty_len = 0; > @@ -384,6 +370,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) > > i = 0; > list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { > + if (fmr->remap_count) { > + INIT_LIST_HEAD(&fmr_list); > + list_add_tail(&fmr->fmr->list, &fmr_list); > + ib_unmap_fmr(&fmr_list); > + } > ib_dealloc_fmr(fmr->fmr); > list_del(&fmr->list); > kfree(fmr); From bhalevy at panasas.com Tue Feb 26 11:47:10 2008 From: bhalevy at panasas.com (Benny Halevy) Date: Tue, 26 Feb 2008 11:47:10 -0800 Subject: [ofa-general] Re: [PATCH 1/2] Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" In-Reply-To: <20080226193901.GD6029@parisc-linux.org> References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182731.GE7033@osc.edu> <47C46715.2090603@panasas.com> <20080226193901.GD6029@parisc-linux.org> Message-ID: <47C46CBE.3070006@panasas.com> Diabolical ;-) Thanks for the pointer! Benny On Feb. 26, 2008, 11:39 -0800, Matthew Wilcox wrote: > On Tue, Feb 26, 2008 at 11:23:01AM -0800, Benny Halevy wrote: >> Pete, the subject says "PATCH 1/2" but I didn't see any follow-up message >> for PATCH 2/2. Just wondering :) > > I think the problem's on your end ... I got it and so did marc: > http://marc.info/?l=linux-scsi&m=120405067313933&w=2 > From rdreier at cisco.com Tue Feb 26 12:09:52 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 26 Feb 2008 12:09:52 -0800 Subject: [ofa-general] [PATCH 2/2] ib fmr pool: flush used clean entries In-Reply-To: <20080226182753.GF7033@osc.edu> (Pete Wyckoff's message of "Tue, 26 Feb 2008 13:27:53 -0500") References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182753.GF7033@osc.edu> Message-ID: This looks like a really nice approach to me. Olaf? - R. From dwsigmam at sigma.cl Tue Feb 26 13:32:04 2008 From: dwsigmam at sigma.cl (Pat Broussard) Date: Tue, 26 Feb 2008 21:32:04 +0000 Subject: [ofa-general] Want to be a hero in bed? Message-ID: <01c878bf$07806200$c9fbc75a@dwsigmam> Are U Tired with erectile dysfunction? Enhance your sexual life now! Want to be ready for sex in few minutes? Reproductive and ED problems solution http://geocities.com/kellykoch730 We are verified by VISA. Confidential purchase. From matthew at wil.cx Tue Feb 26 11:39:02 2008 From: matthew at wil.cx (Matthew Wilcox) Date: Tue, 26 Feb 2008 12:39:02 -0700 Subject: [ofa-general] Re: [PATCH 1/2] Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" In-Reply-To: <47C46715.2090603@panasas.com> References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182731.GE7033@osc.edu> <47C46715.2090603@panasas.com> Message-ID: <20080226193901.GD6029@parisc-linux.org> On Tue, Feb 26, 2008 at 11:23:01AM -0800, Benny Halevy wrote: > Pete, the subject says "PATCH 1/2" but I didn't see any follow-up message > for PATCH 2/2. Just wondering :) I think the problem's on your end ... I got it and so did marc: http://marc.info/?l=linux-scsi&m=120405067313933&w=2 -- Intel are signing my paycheques ... these opinions are still mine "Bill, look, we understand that you're interested in selling us this operating system, but compare it to ours. We can't possibly take such a retrograde step." From olaf.kirch at oracle.com Tue Feb 26 13:58:25 2008 From: olaf.kirch at oracle.com (Olaf Kirch) Date: Tue, 26 Feb 2008 22:58:25 +0100 Subject: [ofa-general] [PATCH 2/2] ib fmr pool: flush used clean entries In-Reply-To: References: <20080225225330.GA3316@osc.edu> <20080226182753.GF7033@osc.edu> Message-ID: <200802262258.26536.olaf.kirch@oracle.com> On Tuesday 26 February 2008 21:09, Roland Dreier wrote: > This looks like a really nice approach to me. Olaf? Yes, this looks good. I haven't had a chance to test it, but it looks like the right approach. Olaf -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax From sashak at voltaire.com Tue Feb 26 16:39:32 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 27 Feb 2008 00:39:32 +0000 Subject: [ofa-general] OpenSM Console Ideas? In-Reply-To: <1204048748.454.90.camel@hrosenstock-ws.xsigo.com> References: <47BE16F2.8000507@llnl.gov> <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> <47C4425A.6050703@llnl.gov> <1204048748.454.90.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080227003932.GB27272@sashak.voltaire.com> Hi Tim, On 09:59 Tue 26 Feb , Hal Rosenstock wrote: > > I haven't had very much feedback yet. Do you have any idea how many people > > use the console? I don't know for sure. But don't expect that there are a lot of console users right now. I think the main reason is that it is disabled by default since it is not secure. Secure console should change this IMO. Sasha From rdreier at cisco.com Tue Feb 26 16:27:45 2008 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 26 Feb 2008 16:27:45 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get a couple of small post-2.6.25-rc3 fixes, mostly to the new nes driver: Adrian Bunk (3): RDMA/nes: Fix off-by-one RDMA/nes: Fix a memory leak in schedule_nes_timer() RDMA/nes: Fix a check-after-use in nes_probe() Bryan Rosenburg (1): RDMA/cxgb3: Fix shift calc in build_phys_page_list() for 1-entry page lists Chien Tung (1): RDMA/nes: Resurrect error path dead code Faisal Latif (2): RDMA/nes: Fix use-after-free in mini_cm_dec_refcnt_listen() RDMA/nes: Fix CRC endianness for RDMA connection establishment on big-endian Glenn Streiff (1): RDMA/nes: Fix use-after-free in nes_create_cq() John Lacombe (1): RDMA/nes: Fix interrupt moderation low threshold Roland Dreier (1): MAINTAINERS: neteffect update MAINTAINERS | 2 ++ drivers/infiniband/hw/cxgb3/iwch_mem.c | 10 ++-------- drivers/infiniband/hw/nes/nes.c | 2 +- drivers/infiniband/hw/nes/nes.h | 15 +++++++++++++++ drivers/infiniband/hw/nes/nes_cm.c | 15 +++++++++------ drivers/infiniband/hw/nes/nes_hw.c | 13 +++++-------- drivers/infiniband/hw/nes/nes_hw.h | 2 +- drivers/infiniband/hw/nes/nes_verbs.c | 10 ++++------ 8 files changed, 39 insertions(+), 30 deletions(-) From prescott at hpc.ufl.edu Tue Feb 26 17:44:05 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Tue, 26 Feb 2008 20:44:05 -0500 Subject: [ofa-general] SDP performance with bzcopy testing help needed In-Reply-To: <47B3D253.7010209@hpc.ufl.edu> References: <47B20F6F.8080302@hpc.ufl.edu> <47B376CB.6050404@hpc.ufl.edu> <47B3D253.7010209@hpc.ufl.edu> Message-ID: <47C4C065.8060108@hpc.ufl.edu> Craig Prescott wrote: > Scott Weitzenkamp (sweitzen) wrote: >>> But the effect is still clear. >>> >>> throughput: >>> >>> 64K 128K 1M >>> SDP 7602.40 7560.57 5791.56 >>> BZCOPY 5454.20 6378.48 7316.28 >> Looks unclear to me. Sometimes BZCOPY does better, sometimes worse. > Fair enough. > > While measuring a broader spectrum of message sizes, I noted a > big variation in throughput and send service demand for the SDP > Sorry for the slow follow up. I've tried to get to the bottom of this. I think what I've found could explain the mixed results that have been observed when people try to see any benefit from BZCOPY. In a nutshell, on my setup I found that normal SDP "Bcopy" throughput and send service demand is highly dependent upon which core the netperf client runs on (is it the same core handling ib_mthca interrupts?) and whether or not the pages mapped by the netperf client are on a node local to the core executing that client. BZCOPY, on the other hand, shows only weak dependence upon these variables. What I did was the following: 1) Fix the netserver and ib_mthca interrupt mappings on specific cores. 2) On the netperf client machine, run netperf on each core for message sizes from 64KiB to 4MiB serially while holding ib_mthca interrupt mappings constant. 3) Repeat step 2) for all possible ib_mthca interrupt mappings. 4) Repeat step 2) and 3) with the client booted with "mem=1024M". Step 4) forces all pages mapped by the netperf client to come from node 0 (can see with /proc//numa_maps). On my setup, BZCOPY performance began to be competitive with Bcopy once the message sizes reached about 256KiB. Somewhere between 1MiB and 2MiB, BZCOPY send service demand was *always* less than Bcopy - even under the most advantageous conditions for Bcopy. I documented the setup and results here: http://hpc.ufl.edu/benchmarks/ib_sdp/smp_aff.html The last plots on the page are the ones to look at if you don't want to read the whole thing. So there is a class of conditions for which SDP Bcopy will remain competitve with BZCOPY, even as message sizes become quite large. I think this may explain why some people see it, and some people don't. Cheers, Craig From prescott at hpc.ufl.edu Tue Feb 26 17:44:43 2008 From: prescott at hpc.ufl.edu (Craig Prescott) Date: Tue, 26 Feb 2008 20:44:43 -0500 Subject: [ofa-general] SDP performance with bzcopy testing help needed In-Reply-To: <8A71B368A89016469F72CD08050AD334026D6040@maui.asicdesigners.com> References: <47B20F6F.8080302@hpc.ufl.edu> <47B376CB.6050404@hpc.ufl.edu> <47B3D253.7010209@hpc.ufl.edu> <8A71B368A89016469F72CD08050AD334026D6040@maui.asicdesigners.com> Message-ID: <47C4C08B.9040204@hpc.ufl.edu> Hi Felix; I'm really sorry for such a slow reply. Thank you for looking at the results! Indeed, the performance of sockets over TOE is quite impressive. I did manage to update the web page last week to fix the text regarding the memory region size. I apologize for getting it wrong, and thank you for setting me straight. I also added a some acknowledgements. Cheers, Craig Felix Marti wrote: > Hi Craig, > > Thank you for pulling the data together on a website. I believe the > results are quite interesting. It is probably worthwhile to point out a > few performance points: > > Sockets over TOE: > - gets line rate for small IO size (<1KB) with 1/2 line rate just north > of 256B > - cpu utilization drops to about 25% for receive and about 12.5% for > transmit - out of a single core; various folks would prolly reports this > as 8% and 3% when considering the processing power of the entire > machine. > - 1B latency is about 10usecs > > Sockets of SDP: > - gets line rate for IO sizes of about 16KB (ZCOPY disabled) and 64KB > (ZCOPY enabled) > - cpu utilization is about 100%, even for large IO and the benefit of > ZCOPY is limited (about 12.5%) > - 1B latency is about 20usecs > > You can make the same comparison for Sockets over NIC as well. > > I believe that these numbers show the benefit of running sockets apps > directly over the T3 TOE interface (instead of mapping a TCP streaming > interface to a RDMA interface and then eventually back to a TCP stream > :) which is very efficient, i.e. a lot of folks believe that TOE > provides little benefit, and even less benefit for small IO (which is so > crucial for many apps) but these results really prove them wrong. Note > that the NIC requires an IO size of 4KB to reach line rate and > performance falls off again as the IO sizes increases (beyond CPU cache > sizes). This might even be more surprising as you use a MTU of 9KB > (jumbo frames) and the NIC vs TOE comparison would tip in the TOE's > favor even faster if you were to run with MTU 1500. > > Note that there is a little correction with respect to T3 and DMA > address range (for iWarp). T3 does not have any address limitation and > can DMA to/from any 64b address. However, memory region sizes are > limited to 4GB. OFED currently attempts to map the entire address space > for DMA (which, IMHO, is questionable as the entire address space is > opened up for DMA - what about UNIX security semantics? :-/). It would > prolly be better (more secure) if apps were only to map address ranges > that they really want to DMA to/from and then a 4GB region size > limitation seems adequate. > > Regards, > felix > > > >> -----Original Message----- >> From: general-bounces at lists.openfabrics.org [mailto:general- >> bounces at lists.openfabrics.org] On Behalf Of Craig Prescott >> Sent: Wednesday, February 13, 2008 9:32 PM >> To: Scott Weitzenkamp (sweitzen) >> Cc: general at lists.openfabrics.org; jim at mellanox.com >> Subject: Re: [ofa-general] SDP performance with bzcopy testing help >> needed >> >> Scott Weitzenkamp (sweitzen) wrote: >>>> But the effect is still clear. >>>> >>>> throughput: >>>> >>>> 64K 128K 1M >>>> SDP 7602.40 7560.57 5791.56 >>>> BZCOPY 5454.20 6378.48 7316.28 >>>> >>> Looks unclear to me. Sometimes BZCOPY does better, sometimes worse. >>> >>> >> Fair enough. >> >> While measuring a broader spectrum of message sizes, I noted a >> big variation in throughput and send service demand for the SDP >> case as a function of which core/CPU the netperf ran on. >> Particularly, which CPU the netperf ran on relative to which >> CPU was handling the interrupts for ib_mthca. >> >> Netperf has an option (-T) to allow for local and remote cpu >> binding. So I used it to force the client and server to run on >> CPU 0. Further, I mapped all ib_mthca interrupts to CPU 1 (irqbalance >> was already disabled). This appears to have reduced the statistical >> error between netperf runs to negligible amounts. I'll do more runs >> to verify this and check out the other permutations, but this is what >> has come out so far. >> >> TPUT = throughput (Mbits/sec) >> LCL = send service demand (usec/KB) >> RMT = recv service demand (usec/KB) >> >> "-T 0,0" option given to netperf client: >> >> SDP BZCOPY >> -------------------- -------------------- >> MESGSIZE TPUT LCL RMT TPUT LCL RMT >> -------- ------- ----- ----- ------- ----- ----- >> 64K 7581.14 0.746 1.105 5547.66 1.491 1.495 >> 128K 7478.37 0.871 1.116 6429.84 1.282 1.291 >> 256K 7427.38 0.946 1.115 6917.20 1.197 1.201 >> 512K 7310.14 1.122 1.129 7229.13 1.145 1.150 >> 1M 7251.29 1.143 1.129 7457.95 0.996 1.109 >> 2M 7249.27 1.146 1.133 7340.26 0.502 1.105 >> 4M 7217.26 1.156 1.136 7322.63 0.397 1.096 >> >> In this case, BZCOPY send service demand is significantly >> less for the largest message sizes, though the throughput >> for large messages is not very different. >> >> However, with "-T 2,2", the result looks like this: >> >> SDP BZCOPY >> -------------------- -------------------- >> MESGSIZE TPUT LCL RMT TPUT LCL RMT >> -------- ------- ----- ----- ------- ----- ----- >> 64K 7599.40 0.841 1.114 5493.56 1.510 1.585 >> 128K 7556.53 1.039 1.121 6483.12 1.274 1.325 >> 256K 7155.13 1.128 1.180 6996.30 1.180 1.220 >> 512K 5984.26 1.357 1.277 7285.86 1.130 1.166 >> 1M 5641.28 1.443 1.343 7250.43 0.811 1.141 >> 2M 5657.98 1.439 1.387 7265.85 0.492 1.127 >> 4M 5623.94 1.447 1.370 7274.43 0.385 1.112 >> >> For BZCOPY, the results are pretty similar; but for SDP, >> the service demands are much higher, and the throughputs >> have dropped dramatically relative to "-T 0,0". >> >> In either case, though, BZCOPY is more efficient for >> large messages. >> >> Cheers, >> Craig From 9kn3d28y at hotmail.com Tue Feb 26 19:03:46 2008 From: 9kn3d28y at hotmail.com (Joanna Hurst) Date: Wed, 27 Feb 2008 11:03:46 +0800 Subject: [ofa-general] Can we talk? Message-ID: <027451694.53208852021655@hotmail.com> Hello! I am tired tonight. I am nice girl that would like to chat with you. Email me at Yvonne at ThePaganDoorway.info only, because I am using my friend's email to write this. Mind me sending some of my pictures to you? From costaesuriflowers.com at usedoffset.com Tue Feb 26 10:06:05 2008 From: costaesuriflowers.com at usedoffset.com (Evan Brooks) Date: Tue, 26 Feb 2008 23:06:05 +0500 Subject: [ofa-general] Need S0ftware? Message-ID: <000a01c87923$14cf3800$0100007f@wrpol> Here: Microsoft Windows Vista Ultimate $89 Windows XP Pro + SP2 $49 MS Office Enterprice 2007 $79 MS Office 2003 Professional $69 Acrobat Reader 8 Pro $79 Macromedia Flash Professional 8 $49 http://beulahalderpm.blogspot.com Adobe Premiere 2.O $59 Corel Grafix Suite X3 $59 Adobe Il1ustrator CS2 $59 Adobe Photoshop CS2 V9.O $69 Adobe Photoshop CS3 Extended $89 http://beulahalderpm.blogspot.com Macromedia Studio 8 $99 Autodesk Autocad 2OO7 $129 Adobe Creative Suite 2 $149 Adobe Creative Suite 3 Premium $269 http://beulahalderpm.blogspot.com and for Mac here: Adobe Acrobat PRO 7 $69 Adobe After Effects $49 Macromedia Flash Pro 8 $49 Adobe Creative Suite 2 Premium $49 http://beulahalderpm.blogspot.com Ableton Live 5.O.1 $49 Adobe Photoshop CS $49 From 9nestboxaviary at gb.lightspeedpanel.com Wed Feb 27 02:41:38 2008 From: 9nestboxaviary at gb.lightspeedpanel.com (Lewis Messer) Date: Wed, 27 Feb 2008 18:41:38 +0800 Subject: [ofa-general] I'd like to show you my pic Message-ID: <766291324.95236372287161@gb.lightspeedpanel.com> Hello! I am tired this evening. I am nice girl that would like to chat with you. Email me at Ingeborg at DigitalDoorwayDesign.info only, because I am using my friend's email to write this. Wanna see some pictures of me? From vlad at lists.openfabrics.org Wed Feb 27 03:05:51 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 27 Feb 2008 03:05:51 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080227-0200 daily build status Message-ID: <20080227110552.09D56E608C8@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.24 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From mail.xrkb at sofort-mail.de Wed Feb 27 03:41:38 2008 From: mail.xrkb at sofort-mail.de (Brunhild) Date: Wed, 27 Feb 2008 03:41:38 -0800 Subject: [ofa-general] Sexpartner gesucht? Message-ID: <4C17C438.BF5ECE33@sofort-mail.de> omuqoa plenegv vsvut ajjnhjyw ekfy jvcmdkzq vzjnck yzr Jetzt eine Affaire ? Dann schau mal hier: http://jetzt-eine-affaire02.tk Hier findest du mit Sicherheit einen geilen Seitensprungpartner in deiner Region! Liebe Gr��e Brunhild fdxlol pbpajqh kkrhv lokltdqa wxyy xjyelgwc ureosm qzu From Johncoleman63 at yahoo.com Wed Feb 27 05:26:45 2008 From: Johncoleman63 at yahoo.com (John Coleman) Date: Wed, 27 Feb 2008 08:26:45 -0500 (EST) Subject: [ofa-general] Urgent Notification Legal!!! Message-ID: <200802271326.m1RDQjU14813@uf1.urbanfriends.com> Fidelity Investments International Oak hill house, 130 Tonbridge, Hilden borough. Kent TN119DZ, United Kingdom. www.fidelity-international.com ================================ Attn: I am John Coleman, funds control manager of Fidelity Investment International, one of the world's largest fund management company with over �1.2 Trillion capital investment funds. Nevertheless, as funds control manager of Fidelity Investment International, I handle all our investor's direct capital funds and secretly extracted 1.2% Excess Maximum Return Capital Profit (EMRCP) per annum on each of the investor's marginal capital funds. As an expert, I have made over �85, 745, 000, 00 from the Investor's EMRCP and hereby looking for someone to trust who will stand as an investor to receive the fund as Annual Investment Proceeds from Fidelity marginal capital funds. Meanwhile, I have worked out the modalities and technicalities whereby the funds can be claimed in any of our 6 Clearing Houses without any hitches. Our sharing ratio will be 50-50. If you are interested, you should furnish me with your names in full, your contact address and your direct phone number so we! could discuss more on telephone as regard the transaction. N: B; Please all correspondence must be through my private telephone and E-mail, for more contact: johncolemanint at live.com Phone: +44 7024030210, +7031895354 For more information please contact the bank as follows: Mr. Harbert Koenrad International Banking Officer Dexia Bank of Netherlands. Direct Phone+31627340999 website: www.dexia.nl Email: harbert_dexia at yahoo.de I WILL BE LOOKING FORWARD TO HEARING FROM YOU IN THE HOPE OF A FUTURE COLLABORATION. Yours sincerely, John Coleman. From HNGUYEN at de.ibm.com Wed Feb 27 06:22:31 2008 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Wed, 27 Feb 2008 15:22:31 +0100 Subject: [ofa-general] Can we provide a qp_num when creating a QP ? In-Reply-To: Message-ID: Hi, > I have asked this question before. To wire a QP connection, > each side need to know > peer's side port lid and qp_num. For MPI with many ranks, this is an > alltoall exchange. > > If we can create a QP with provided qp_num, then MPI does > not need the qp_num > exchange, for a QP-pair between two processes, MPI can figure out > peer QP's qp_num. So > we can eliminate the third-party channel to exchange information, > and speedup the startup > time. > > Curently qp_num is always a return value from the driver. If > we can suggest a qp_num > when creating a QP, and the qp_num is already used, then IBV can > either error out, or > pick up another number for app. We can not support this for ehca. Regards Nam From jackm at dev.mellanox.co.il Wed Feb 27 06:20:41 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:20:41 +0200 Subject: [ofa-general] [PATCH 0 of 2] update mlx4 and mthca driver version to 1.0 and release date to now (GA) Message-ID: <200802271620.41669.jackm@dev.mellanox.co.il> Roland, The mlx4_core, mlx4_ib, and ib_mthca modules are all currently GA. As such, the module versions should be upgraded to 1.0 . As long as we're at it, I also modified the release date to Feb 28, 2008. These patches are meant for kernel 2.6.25 . - Jack From jackm at dev.mellanox.co.il Wed Feb 27 06:20:53 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:20:53 +0200 Subject: [ofa-general] [PATCH 2 of 2] mlx4: update module version and release date (for 2.6.25) Message-ID: <200802271620.53952.jackm@dev.mellanox.co.il> mlx4: Change release date to 2008, and version to 1.0 (since is GA). Signed-off-by: Jack Morgenstein Index: infiniband/drivers/infiniband/hw/mlx4/main.c =================================================================== --- infiniband.orig/drivers/infiniband/hw/mlx4/main.c 2008-02-10 08:59:00.000000000 +0200 +++ infiniband/drivers/infiniband/hw/mlx4/main.c 2008-02-27 16:11:34.000000000 +0200 @@ -44,8 +44,8 @@ #include "user.h" #define DRV_NAME "mlx4_ib" -#define DRV_VERSION "0.01" -#define DRV_RELDATE "May 1, 2006" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "February 28, 2008" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); Index: infiniband/drivers/net/mlx4/mlx4.h =================================================================== --- infiniband.orig/drivers/net/mlx4/mlx4.h 2008-01-28 13:07:57.000000000 +0200 +++ infiniband/drivers/net/mlx4/mlx4.h 2008-02-27 16:11:34.000000000 +0200 @@ -46,8 +46,8 @@ #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.01" -#define DRV_RELDATE "May 1, 2007" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "February 28, 2008" enum { MLX4_HCR_BASE = 0x80680, From jackm at dev.mellanox.co.il Wed Feb 27 06:20:49 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:20:49 +0200 Subject: [ofa-general] [PATCH 1 of 2] mthca: update module version and release date (for 2.6.25) Message-ID: <200802271620.50041.jackm@dev.mellanox.co.il> mthca: Change release date to 2008, and version to 1.0 (since is GA). Signed-off-by: Jack Morgenstein Index: infiniband/drivers/infiniband/hw/mthca/mthca_dev.h =================================================================== --- infiniband.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2008-01-27 10:43:56.000000000 +0200 +++ infiniband/drivers/infiniband/hw/mthca/mthca_dev.h 2008-02-27 16:07:19.000000000 +0200 @@ -54,8 +54,8 @@ #define DRV_NAME "ib_mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.08" -#define DRV_RELDATE "February 14, 2006" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "February 28, 2008" enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, From HNGUYEN at de.ibm.com Wed Feb 27 06:35:27 2008 From: HNGUYEN at de.ibm.com (Hoang-Nam Nguyen) Date: Wed, 27 Feb 2008 15:35:27 +0100 Subject: [ofa-general] post_recv question In-Reply-To: Message-ID: Hi, > I have a question regarding exactly _when_ a posted recv buffer is > available for the HW to use: > Consider that the post_recv methods usually just program a hw-specific > WR in the RQ, then ring a doorbell, then return. There is a delta > period between when the app returns from the post_recv call and when the > HW actually DMA's the WR and programs up the HW to enable that buffer. > (I'm assumming a specific HW design here, but I _think_ most HW behaves > this way?). > If this is all true, then from the apps point of view, the buffer isn't > really available when it returns from post_recv. This can lead to > conditions where the app advertises that recv buffer to the peer via > some out of band channel, and the peer posts a SEND which arrives > _before_ the HW has actually setup the RECV buffer. > Granted, this hole is small, but does it exist for nes, mthca, ehca, and > ipath libs/drivers? Or do they _not_ have this issue? This is true for ehca hw. Regards Nam From kliteyn at dev.mellanox.co.il Wed Feb 27 06:42:40 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 27 Feb 2008 16:42:40 +0200 Subject: [ofa-general] QoS management in OpenSM - doc Message-ID: <47C576E0.8050405@dev.mellanox.co.il> Hi Sasha, The following doc describes QoS management in OpenSM. This doc (named QoS_management_in_OpenSM.txt) has been added to the OFED docs, along with the QoS_in_OFED.txt. I'd like to add this info to OpenSM man pages as well. I'm including the text here as is, so it will be easier to follow possible changes. When those will be done, I'll fix the format to match the OpenSM man pages and post a patch. The only problem is that the whole OpenSM man has ~850 lines, while this QoS management file has ~500 lines... :) Please review. -- Yevgeny ------------------------------- QoS Management in OpenSM ============================================================================== Table of contents ============================================================================== 1. Overview 2. Full QoS Policy File 3. Simplified QoS Policy Definition 4. Policy File Syntax Guidelines 5. Examples of Full Policy File 6. Simplified QoS Policy - Details and Examples 7. SL2VL Mapping and VL Arbitration ============================================================================== 1. Overview ============================================================================== When QoS in OpenSM is enabled (-Q or --qos), OpenSM looks for QoS Policy file. The default name of OpenSM QoS policy file is /usr/local/etc/opensm/qos-policy.conf. The default may be changed by using -Y or --qos_policy_file option with OpenSM. During fabric initialization and at every heavy sweep OpenSM parses the QoS policy file, applies its settings to the discovered fabric elements, and enforces the provided policy on client requests. The overall flow for such requests is: - The request is matched against the defined matching rules such that the QoS Level definition is found. - Given the QoS Level, path(s) search is performed with the given restrictions imposed by that level. There are two ways to define QoS policy: - Full policy, where the policy file syntax provides an administrator various ways to match PathRecord/MultiPathRecord (PR/MPR) request and enforce various QoS constraints on the requested PR/MPR - Simplified QoS policy definition, where an administrator would be able to match PR/MPR requests by various ULPs and applications running on top of these ULPs. While the full policy syntax is very flexible, in many cases the simplified policy definition would be sufficient. ============================================================================== 2. Full QoS Policy File ============================================================================== QoS policy file has the following sections: I) Port Groups (denoted by port-groups). This section defines zero or more port groups that can be referred later by matching rules (see below). Port group lists ports by: - Port GUID - Port name, which is a combination of NodeDescription and IB port number - PKey, which means that all the ports in the subnet that belong to partition with a given PKey belong to this port group - Partition name, which means that all the ports in the subnet that belong to partition with a given name belong to this port group - Node type, where possible node types are: CA, SWITCH, ROUTER, ALL, and SELF (SM's port). II) QoS Setup (denoted by qos-setup). This section describes how to set up SL2VL and VL Arbitration tables on various nodes in the fabric. However, this is not supported in OFED 1.3. SL2VL and VLArb tables should be configured in the OpenSM options file (default location - /var/cache/opensm/opensm.opts). III) QoS Levels (denoted by qos-levels). Each QoS Level defines Service Level (SL) and a few optional fields: - MTU limit - Rate limit - PKey - Packet lifetime When path(s) search is performed, it is done with regards to restriction that these QoS Level parameters impose. One QoS level that is mandatory to define is a DEFAULT QoS level. It is applied to a PR/MPR query that does not match any existing match rule. Similar to any other QoS Level, it can also be explicitly referred by any match rule. IV) QoS Matching Rules (denoted by qos-match-rules). Each PathRecord/MultiPathRecord query that OpenSM receives is matched against the set of matching rules. Rules are scanned in order of appearance in the QoS policy file such as the first match takes precedence. Each rule has a name of QoS level that will be applied to the matching query. A default QoS level is applied to a query that did not match any rule. Queries can be matched by: - Source port group (whether a source port is a member of a specified group) - Destination port group (same as above, only for destination port) - PKey - QoS class - Service ID To match a certain matching rule, PR/MPR query has to match ALL the rule's criteria. However, not all the fields of the PR/MPR query have to appear in the matching rule. For instance, if the rule has a single criterion - Service ID, it will match any query that has this Service ID, disregarding rest of the query fields. However, if a certain query has only Service ID (which means that this is the only bit in the PR/MPR component mask that is on), it will not match any rule that has other matching criteria besides Service ID. ============================================================================== 3. Simplified QoS Policy Definition ============================================================================== Simplified QoS policy definition comprises of a single section denoted by qos-ulps. Similar to the full QoS policy, it has a list of match rules and their QoS Level, but in this case a match rule has only one criterion - its goal is to match a certain ULP (or a certain application on top of this ULP) PR/MPR request, and QoS Level has only one constraint - Service Level (SL). The simplified policy section may appear in the policy file in combine with the full policy, or as a stand-alone policy definition. See more details and list of match rule criteria below. ============================================================================== 4. Policy File Syntax Guidelines ============================================================================== - Empty lines are ignored. - Leading and trailing blanks, as well as empty lines, are ignored, so the indentation in the example is just for better readability. - Comments are started with the pound sign (#) and terminated by EOL. - Any keyword should be the first non-blank in the line, unless it's a comment. - Keywords that denote section/subsection start have matching closing keywords. - Having a QoS Level named "DEFAULT" is a must - it is applied to PR/MPR requests that didn't match any of the matching rules. - Any section/subsection of the policy file is optional. ============================================================================== 5. Examples of Full Policy File ============================================================================== As mentioned earlier, any section of the policy file is optional, and the only mandatory part of the policy file is a default QoS Level. Here's an example of the shortest policy file: qos-levels qos-level name: DEFAULT sl: 0 end-qos-level end-qos-levels Port groups section is missing because there are no match rules, which means that port groups are not referred anywhere, and there is no need defining them. And since this policy file doesn't have any matching rules, PR/MPR query won't match any rule, and OpenSM will enforce default QoS level. Essentially, the above example is equivalent to not having QoS policy file at all. The following example shows all the possible options and keywords in the policy file and their syntax: # # See the comments in the following example. # They explain different keywords and their meaning. # port-groups port-group # using port GUIDs name: Storage # "use" is just a description that is used for logging # Other than that, it is just a comment use: SRP Targets port-guid: 0x10000000000001, 0x10000000000005-0x1000000000FFFA port-guid: 0x1000000000FFFF end-port-group port-group name: Virtual Servers # The syntax of the port name is as follows: # "node_description/Pnum". # node_description is compared to the NodeDescription of the node, # and "Pnum" is a port number on that node. port-name: vs1 HCA-1/P1, vs2 HCA-1/P1 end-port-group # using partitions defined in the partition policy port-group name: Partitions partition: Part1 pkey: 0x1234 end-port-group # using node types: CA, ROUTER, SWITCH, SELF (for node that runs SM) # or ALL (for all the nodes in the subnet) port-group name: CAs and SM node-type: CA, SELF end-port-group end-port-groups qos-setup # This section of the policy file describes how to set up SL2VL and VL # Arbitration tables on various nodes in the fabric. # However, this is not supported in OFED 1.3 - the section is parsed # and ignored. SL2VL and VLArb tables should be configured in the # OpenSM options file (by default - /var/cache/opensm/opensm.opts). end-qos-setup qos-levels # Having a QoS Level named "DEFAULT" is a must - it is applied to # PR/MPR requests that didn't match any of the matching rules. qos-level name: DEFAULT use: default QoS Level sl: 0 end-qos-level # the whole set: SL, MTU-Limit, Rate-Limit, PKey, Packet Lifetime qos-level name: WholeSet sl: 1 mtu-limit: 4 rate-limit: 5 pkey: 0x1234 packet-life: 8 end-qos-level end-qos-levels # Match rules are scanned in order of their apperance in the policy file. # First matched rule takes precedence. qos-match-rules # matching by single criteria: QoS class qos-match-rule use: by QoS class qos-class: 7-9,11 # Name of qos-level to apply to the matching PR/MPR qos-level-name: WholeSet end-qos-match-rule # show matching by destination group and service id qos-match-rule use: Storage targets destination: Storage service-id: 0x10000000000001, 0x10000000000008-0x10000000000FFF qos-level-name: WholeSet end-qos-match-rule qos-match-rule source: Storage use: match by source group only qos-level-name: DEFAULT end-qos-match-rule qos-match-rule use: match by all parameters qos-class: 7-9,11 source: Virtual Servers destination: Storage service-id: 0x0000000000010000-0x000000000001FFFF pkey: 0x0F00-0x0FFF qos-level-name: WholeSet end-qos-match-rule end-qos-match-rules ============================================================================== 6. Simplified QoS Policy - Details and Examples ============================================================================== Simplified QoS policy match rules are tailored for matching ULPs (or some application on top of a ULP) PR/MPR requests. This section has a list of per-ULP (or per-application) match rules and the SL that should be enforced on the matched PR/MPR query. Match rules include: - Default match rule that is applied to PR/MPR query that didn't match any of the other match rules - SDP - SDP application with a specific target TCP/IP port range - SRP with a specific target IB port GUID - RDS - iSER - iSER application with a specific target TCP/IP port range - IPoIB with a default PKey - IPoIB with a specific PKey - any ULP/application with a specific Service ID in the PR/MPR query - any ULP/application with a specific PKey in the PR/MPR query - any ULP/application with a specific target IB port GUID in the PR/MPR query Since any section of the policy file is optional, as long as basic rules of the file are kept (such as no referring to nonexisting port group, having default QoS Level, etc), the simplified policy section (qos-ulps) can serve as a complete QoS policy file. The shortest policy file in this case would be as follows: qos-ulps default : 0 #default SL end-qos-ulps It is equivalent to the previous example of the shortest policy file, and it is also equivalent to not having policy file at all. Below is an example of simplified QoS policy with all the possible keywords: qos-ulps default : 0 # default SL sdp, port-num 30000 : 0 # SL for application running on top # of SDP when a destination # TCP/IPport is 30000 sdp, port-num 10000-20000 : 0 sdp : 1 # default SL for any other # application running on top of SDP rds : 2 # SL for RDS traffic iser, port-num 900 : 0 # SL for iSER with a specific target # port iser : 3 # default SL for iSER ipoib, pkey 0x0001 : 0 # SL for IPoIB on partition with # pkey 0x0001 ipoib : 4 # default IPoIB partition, # pkey=0x7FFF any, service-id 0x6234 : 6 # match any PR/MPR query with a # specific Service ID any, pkey 0x0ABC : 6 # match any PR/MPR query with a # specific PKey srp, target-port-guid 0x1234 : 5 # SRP when SRP Target is located on # a specified IB port GUID any, target-port-guid 0x0ABC-0xFFFFF : 6 # match any PR/MPR query with # a specific target port GUID end-qos-ulps Similar to the full policy definition, matching of PR/MPR queries is done in order of appearance in the QoS policy file such as the first match takes precedence, except for the "default" rule, which is applied only if the query didn't match any other rule. All other sections of the QoS policy file take precedence over the qos-ulps section. That is, if a policy file has both qos-match-rules and qos-ulps sections, then any query is matched first against the rules in the qos-match-rules section, and only if there was no match, the query is matched against the rules in qos-ulps section. Note that some of these match rules may overlap, so in order to use the simplified QoS definition effectively, it is important to understand how each of the ULPs is matched: 6.1 IPoIB IPoIB query is matched by PKey. Default PKey for IPoIB partition is 0x7fff, so the following three match rules are equivalent: ipoib : ipoib, 0x7fff : any, pkey 0x7fff : 6.2 SDP SDP PR query is matched by Service ID. The Service-ID for SDP is 0x000000000001PPPP, where PPPP are 4 hex digits holding the remote TCP/IP Port Number to connect to. The following two match rules are equivalent: sdp : any, service-id 0x0000000000010000-0x000000000001ffff : 6.3 RDS Similar to SDP, RDS PR query is matched by Service ID. The Service ID for RDS is 0x000000000106PPPP, where PPPP are 4 hex digits holding the remote TCP/IP Port Number to connect to. Default port number for RDS is 0x48CA, which makes a default Service-ID 0x00000000010648CA. The following two match rules are equivalent: rds : any, service-id 0x00000000010648CA : 6.4 iSER Similar to RDS, iSER query is matched by Service ID, where the the Service ID is also 0x000000000106PPPP. Default port number for iSER is 0x035C, which makes a default Service-ID 0x000000000106035C. The following two match rules are equivalent: iser : any, service-id 0x000000000106035C : 6.5 SRP Service ID for SRP varies from storage vendor to vendor, thus SRP query is matched by the target IB port GUID. The following two match rules are equivalent: srp, target-port-guid 0x1234 : any, target-port-guid 0x1234 : Note that any of the above ULPs might contain target port GUID in the PR query, so in order for these queries not to be recognized by the QoS manager as SRP, the SRP match rule (or any match rule that refers to the target port guid only) should be placed at the end of the qos-ulps match rules. 6.6 MPI SL for MPI is manually configured by MPI admin. OpenSM is not forcing any SL on the MPI traffic, and that's why it is the only ULP that did not appear in the qos-ulps section. ============================================================================== 7. SL2VL Mapping and VL Arbitration ============================================================================== OpenSM cached options file has a set of QoS related configuration parameters, that are used to configure SL2VL mapping and VL arbitration on IB ports. These parameters are: - Max VLs: the maximum number of VLs that will be on the subnet. - High limit: the limit of High Priority component of VL Arbitration table (IBA 7.6.9). - VLArb low table: Low priority VL Arbitration table (IBA 7.6.9) template. - VLArb high table: High priority VL Arbitration table (IBA 7.6.9) template. - SL2VL: SL2VL Mapping table (IBA 7.6.6) template. It is a list of VLs corresponding to SLs 0-15 (Note that VL15 used here means drop this SL). There are separate QoS configuration parameters sets for various target types: CAs, routers, switch external ports, and switch's enhanced port 0. The names of such parameters are prefixed by "qos__" string. Here is a full list of the currently supported sets: qos_ca_ - QoS configuration parameters set for CAs. qos_rtr_ - parameters set for routers. qos_sw0_ - parameters set for switches' port 0. qos_swe_ - parameters set for switches' external ports. Here's the example of typical default values for CAs and switches' external ports (hard-coded in OpenSM initialization): qos_ca_max_vls=15 qos_ca_high_limit=0 qos_ca_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 qos_ca_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 qos_ca_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 qos_swe_max_vls=15 qos_swe_high_limit=0 qos_swe_vlarb_high=0:4,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0 qos_swe_vlarb_low=0:0,1:4,2:4,3:4,4:4,5:4,6:4,7:4,8:4,9:4,10:4,11:4,12:4,13:4,14:4 qos_swe_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 VL arbitration tables (both high and low) are lists of VL/Weight pairs. Each list entry contains a VL number (values from 0-14), and a weighting value (values 0-255), indicating the number of 64 byte units (credits) which may be transmitted from that VL when its turn in the arbitration occurs. A weight of 0 indicates that this entry should be skipped. If a list entry is programmed for VL15 or for a VL that is not supported or is not currently configured by the port, the port may either skip that entry or send from any supported VL for that entry. Note, that the same VLs may be listed multiple times in the High or Low priority arbitration tables, and, further, it can be listed in both tables. The limit of high-priority VLArb table (qos__high_limit) indicates the number of high-priority packets that can be transmitted without an opportunity to send a low-priority packet. Specifically, the number of bytes that can be sent is high_limit times 4K bytes. A high_limit value of 255 indicates that the byte limit is unbounded. Note: if the 255 value is used, the low priority VLs may be starved. A value of 0 indicates that only a single packet from the high-priority table may be sent before an opportunity is given to the low-priority table. Keep in mind that ports usually transmit packets of size equal to MTU. For instance, for 4KB MTU a single packet will require 64 credits, so in order to achieve effective VL arbitration for packets of 4KB MTU, the weighting values for each VL should be multiples of 64. Below is an example of SL2VL and VL Arbitration configuration on subnet: qos_ca_max_vls=15 qos_ca_high_limit=6 qos_ca_vlarb_high=0:4 qos_ca_vlarb_low=0:0,1:64,2:128,3:192,4:0,5:64,6:64,7:64 qos_ca_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 qos_swe_max_vls=15 qos_swe_high_limit=6 qos_swe_vlarb_high=0:4 qos_swe_vlarb_low=0:0,1:64,2:128,3:192,4:0,5:64,6:64,7:64 qos_swe_sl2vl=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,7 In this example, there are 8 VLs configured on subnet: VL0 to VL7. VL0 is defined as a high priority VL, and it is limited to 6 x 4KB = 24KB in a single transmission burst. Such configuration would suilt VL that needs low latency and uses small MTU when transmitting packets. Rest of VLs are defined as low priority VLs with different weights, while VL4 is effectively turned off. From jackm at dev.mellanox.co.il Wed Feb 27 06:48:02 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:48:02 +0200 Subject: [ofa-general] [PATCH 0 of 2] print "event received for bogus qp" messages in debug mode only (mthca and mlx4) Message-ID: <200802271648.02911.jackm@dev.mellanox.co.il> Roland, The HCA hardware may generate an async event for a QP while the driver is in the process of destroying that QP. When the qp event handler attempts to handle the event, it notices that the QP no longer exists, prints out a message (in /var/log/messages) that an event was received for a non-existent QP, and returns (i.e., the event is ignored). The following are QP events: EVENT_TYPE_PATH_MIG EVENT_TYPE_COMM_EST EVENT_TYPE_SQ_DRAINED EVENT_TYPE_SRQ_QP_LAST_WQE EVENT_TYPE_WQ_CATAS_ERROR EVENT_TYPE_PATH_MIG_FAILED EVENT_TYPE_WQ_INVAL_REQ_ERROR EVENT_TYPE_WQ_ACCESS_ERROR Once the QP is destroyed, they are of no interest, since the QP is gone. This patch set simply changes the message to print out only when running in debug mode. - Jack From jackm at dev.mellanox.co.il Wed Feb 27 06:48:07 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:48:07 +0200 Subject: [ofa-general] [PATCH 1 of 2] mlx4_core: for events for non-existent QPs, print a message only in debug mode Message-ID: <200802271648.07444.jackm@dev.mellanox.co.il> mlx4: print message only in debug mode for events received for non-existent QPs Events received for non-existent QPs are of interest only when debugging. Print messages when this occurs only if running in debug mode. In addition, add the event number to the printout. Signed-off-by: Jack Morgenstein Index: infiniband/drivers/net/mlx4/qp.c =================================================================== --- infiniband.orig/drivers/net/mlx4/qp.c 2008-01-28 13:07:57.000000000 +0200 +++ infiniband/drivers/net/mlx4/qp.c 2008-02-27 16:25:07.000000000 +0200 @@ -55,7 +55,8 @@ void mlx4_qp_event(struct mlx4_dev *dev, spin_unlock(&qp_table->lock); if (!qp) { - mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn); + mlx4_dbg(dev, "Async event %d for bogus QP %08x\n", + event_type, qpn); return; } From jackm at dev.mellanox.co.il Wed Feb 27 06:48:08 2008 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 27 Feb 2008 16:48:08 +0200 Subject: [ofa-general] [PATCH 2 of 2] mthca: for events for non-existent QPs, print a message only in debug mode Message-ID: <200802271648.08253.jackm@dev.mellanox.co.il> mthca: print message only in debug mode for events received for non-existent QPs Events received for non-existent QPs are of interest only when debugging. Print messages when this occurs only if running in debug mode. In addition, add the event number to the printout. Signed-off-by: Jack Morgenstein Index: infiniband/drivers/infiniband/hw/mthca/mthca_qp.c =================================================================== --- infiniband.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2008-02-05 09:32:14.000000000 +0200 +++ infiniband/drivers/infiniband/hw/mthca/mthca_qp.c 2008-02-27 16:22:46.000000000 +0200 @@ -249,7 +249,8 @@ void mthca_qp_event(struct mthca_dev *de spin_unlock(&dev->qp_table.lock); if (!qp) { - mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + mthca_dbg(dev, "Async event %d for bogus QP %08x\n", + (int) event_type, qpn); return; } From eli at dev.mellanox.co.il Wed Feb 27 07:07:01 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Wed, 27 Feb 2008 17:07:01 +0200 Subject: [ofa-general] checksum offload patches Message-ID: <1204124821.3358.9.camel@mtls03> Roland, I am sending modified checksum offload patches for ipoib and mlx4. Let me know if you think this is the right approach and I will generate a patch also for mthca. Eli From eli at dev.mellanox.co.il Wed Feb 27 07:07:08 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Wed, 27 Feb 2008 17:07:08 +0200 Subject: [ofa-general] [PATCH] IB/ipoib: Add checksum offload support Message-ID: <1204124828.3358.10.camel@mtls03> IB/ipoib: Add checksum offload support If the HCA supports checksum offload it will have IB_DEVICE_UD_IP_CSUM set in its capability flags and this will cause ipoib to set NETIF_F_IP_CSUM. It also means that the HCA can verify checksum of incoming packets. Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 7 ++++++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 12 ++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 +++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 054fab8..19a41ff 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -87,6 +87,7 @@ enum { IPOIB_MCAST_STARTED = 8, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, + IPOIB_FLAG_CSUM = 11, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -318,6 +319,7 @@ struct ipoib_dev_priv { struct dentry *mcg_dentry; struct dentry *path_dentry; #endif + int hca_caps; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 52b1beb..3bdb4dd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1380,6 +1380,9 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); ipoib_warn(priv, "enabling connected mode " "will cause multicast packet drops\n"); + + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG); + ipoib_flush_paths(dev); return count; } @@ -1388,6 +1391,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); dev->mtu = min(priv->mcast_mtu, dev->mtu); ipoib_flush_paths(dev); + + if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; + return count; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 08c4396..7450ce0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -37,6 +37,7 @@ #include #include +#include #include @@ -231,6 +232,11 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb->dev = dev; /* XXX get correct PACKET_ type here */ skb->pkt_type = PACKET_HOST; + + /* check rx csum */ + if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + netif_receive_skb(skb); repost: @@ -442,6 +448,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, return; } + if (dev->flags & NETIF_F_IP_CSUM && + skb->ip_summed == CHECKSUM_PARTIAL) + priv->tx_wr.send_flags |= IB_SEND_IP_CSUM; + else + priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), address->ah, qpn, tx_req->mapping, skb_headlen(skb), diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f96477a..2c2a922 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1100,11 +1100,26 @@ int ipoib_add_pkey_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_pkey); } +static void set_csum(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) + return; + + if (!(priv->hca_caps & IB_DEVICE_UD_IP_CSUM)) + return; + + dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + set_bit(IPOIB_FLAG_CSUM, &priv->flags); +} + static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; int result = -ENOMEM; + struct ib_device_attr *device_attr; priv = ipoib_intf_alloc(format); if (!priv) @@ -1119,6 +1134,23 @@ static struct net_device *ipoib_add_port(const char *format, goto device_init_failed; } + device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL); + if (!device_attr) { + printk(KERN_WARNING "%s: allocation of %lu bytes failed\n", + hca->name, sizeof *device_attr); + goto device_init_failed; + } + + result = ib_query_device(hca, device_attr); + if (result) { + printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", + hca->name, port, result); + kfree(device_attr); + goto device_init_failed; + } + priv->hca_caps = device_attr->device_cap_flags; + kfree(device_attr); + /* * Set the full membership bit, so that we join the right * broadcast group, etc. @@ -1136,6 +1168,7 @@ static struct net_device *ipoib_add_port(const char *format, } else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + set_csum(priv->dev); result = ipoib_dev_init(priv->dev, hca, port); if (result < 0) { -- 1.5.4.2 From eli at dev.mellanox.co.il Wed Feb 27 07:07:11 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Wed, 27 Feb 2008 17:07:11 +0200 Subject: [ofa-general] [PATCH] IB/mlx4: Add checksum offload support Message-ID: <1204124831.3358.11.camel@mtls03> IB/mlx4: Add checksum offload support ConnectX devices support checksum generation and verification of TCP,UDP/IP packets. This patch checks if the HCA supports this and sets a capability flag. Signed-off-by: Eli Cohen Signed-off-by: Ali Ayub --- drivers/infiniband/hw/mlx4/cq.c | 9 +++++++++ drivers/infiniband/hw/mlx4/main.c | 2 ++ drivers/infiniband/hw/mlx4/qp.c | 3 +++ drivers/net/mlx4/fw.c | 3 +++ include/linux/mlx4/cq.h | 4 ++-- include/linux/mlx4/qp.h | 2 ++ 6 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 7360bba..561c526 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -309,6 +309,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, int is_error; u32 g_mlpath_rqpn; u16 wqe_ctr; + __be32 status; + +#define CSUM_MASK_BITS cpu_to_be32(0x13c00000) +#define CSUM_VAL_BITS cpu_to_be32(0x10400000) +#define CSUM_MASK2_BITS cpu_to_be32(0x0c000000) cqe = next_cqe_sw(cq); if (!cqe) @@ -434,6 +439,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; + status = cqe->ipoib_status; + wc->csum_ok = (status & CSUM_MASK_BITS) == CSUM_VAL_BITS && + (status & CSUM_MASK2_BITS) && + cqe->checksum == 0xffff; } return 0; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 96a39b5..ef5e9db 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -99,6 +99,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 958e205..8c1b29e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1436,6 +1436,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | (wr->send_flags & IB_SEND_SOLICITED ? cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | + ((wr->send_flags & IB_SEND_IP_CSUM) ? + cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | + MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; if (wr->opcode == IB_WR_SEND_WITH_IMM || diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 61dc495..63edf48 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -730,6 +730,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3); + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000); if (err) diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 0181e0a..5fdc859 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -45,11 +45,11 @@ struct mlx4_cqe { u8 sl; u8 reserved1; __be16 rlid; - u32 reserved2; + __be32 ipoib_status; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; - u8 reserved3[3]; + u8 reserved2[3]; u8 owner_sr_opcode; }; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 09a2230..f30e965 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -162,6 +162,8 @@ enum { MLX4_WQE_CTRL_FENCE = 1 << 6, MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, MLX4_WQE_CTRL_SOLICITED = 1 << 1, + MLX4_WQE_CTRL_IP_CSUM = 1 << 4, + MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, }; struct mlx4_wqe_ctrl_seg { -- 1.5.4.2 From changquing.tang at hp.com Wed Feb 27 07:12:22 2008 From: changquing.tang at hp.com (Tang, Changqing) Date: Wed, 27 Feb 2008 15:12:22 +0000 Subject: [ofa-general] Can we provide a qp_num when creating a QP ? In-Reply-To: References: Message-ID: Thanks. I am thinking the similar thing of reserved port in TCP/IP world. --CQ > -----Original Message----- > From: Hoang-Nam Nguyen [mailto:HNGUYEN at de.ibm.com] > Sent: Wednesday, February 27, 2008 8:23 AM > To: Tang, Changqing > Cc: general at lists.openfabrics.org; > general-bounces at lists.openfabrics.org > Subject: Re: [ofa-general] Can we provide a qp_num when > creating a QP ? > > Hi, > > I have asked this question before. To wire a QP connection, > > each side need to know peer's side port lid and qp_num. For > MPI with > > many ranks, this is an alltoall exchange. > > > > If we can create a QP with provided qp_num, then > MPI does not > > need the qp_num exchange, for a QP-pair between two > processes, MPI can > > figure out peer QP's qp_num. So we can eliminate the third-party > > channel to exchange information, and speedup the startup time. > > > > Curently qp_num is always a return value from the > driver. If > > we can suggest a qp_num when creating a QP, and the qp_num > is already > > used, then IBV can either error out, or pick up another number for > > app. > We can not support this for ehca. > Regards > Nam > > From kliteyn at dev.mellanox.co.il Wed Feb 27 07:13:52 2008 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 27 Feb 2008 17:13:52 +0200 Subject: [ofa-general] [PATCH] opensm/man: added -Y/--qos_policy_file option to OSM man Message-ID: <47C57E30.4070103@dev.mellanox.co.il> Sasha, Added -Y/--qos_policy_file option description to OpenSM manual. Please apply to ofed_1_3 and master. Signed-off-by: Yevgeny Kliteynik --- opensm/man/opensm.8.in | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in index 97eecc8..5322ab7 100644 --- a/opensm/man/opensm.8.in +++ b/opensm/man/opensm.8.in @@ -1,4 +1,4 @@ -.TH OPENSM 8 "Aug 16, 2007" "OpenIB" "OpenIB Management" +.TH OPENSM 8 "Feb 27, 2008" "OpenIB" "OpenIB Management" .SH NAME opensm \- InfiniBand subnet manager and administration (SM/SA) @@ -17,7 +17,8 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) [\-console [off | local | socket | loopback]] [\-console-port ] [\-i(gnore-guids) ] [\-f | \-\-log_file] [\-L | \-\-log_limit ] [\-e(rase_log_file)] [\-P(config)] -[\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] +[\-Q | \-\-qos [\-Y | \-\-qos_policy_file ]] +[\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] [\-B | \-\-daemon] [\-I | \-\-inactive] [\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] [\-\-prefix_routes_file ] @@ -212,6 +213,10 @@ The default path is \fB\%@CONF_DIR@/prefix\-routes.conf\fP. \fB\-Q\fR, \fB\-\-qos\fR This option enables QoS setup. It is disabled by default. .TP +\fB\-Y\fR, \fB\-\-qos_policy_file\fR +This option defines the optional QoS policy file. The default +name is \fB\%@CONF_DIR@/@QOS_POLICY_FILE@\fP. +.TP \fB\-N\fR, \fB\-\-no_part_enforce\fR This option disables partition enforcement on switch external ports. .TP -- 1.5.1.4 From typesettersn4 at wernke.com Wed Feb 27 07:54:24 2008 From: typesettersn4 at wernke.com (Heriberto Kendall) Date: Wed, 27 Feb 2008 10:54:24 -0500 Subject: [ofa-general] JoySuperDick Message-ID: <01c8792f$1d2d7000$63199fbe@typesettersn4> EliSizableFuckstick http://www.bnidaieatw.com From weiny2 at llnl.gov Wed Feb 27 09:10:03 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 27 Feb 2008 09:10:03 -0800 Subject: [ofa-general] OpenSM Console Ideas? In-Reply-To: <1204048748.454.90.camel@hrosenstock-ws.xsigo.com> References: <47BE16F2.8000507@llnl.gov> <1203689708.8793.159.camel@hrosenstock-ws.xsigo.com> <47C4425A.6050703@llnl.gov> <1204048748.454.90.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080227091003.0cd4f0ad.weiny2@llnl.gov> On Tue, 26 Feb 2008 09:59:08 -0800 Hal Rosenstock wrote: > Hi Tim, > > On Tue, 2008-02-26 at 08:46 -0800, Timothy A. Meier wrote: > > Hi Hal, > > I haven't had very much feedback yet. Do you have any idea how many people > > use the console? > > No idea. I will chime in; here at LLNL we use the console extensively. Perhaps this is just because I was so involved in the development of it, but one of the first things I do when we are having IB problems is check the "status" of OpenSM via the console. The two most useful things you can do are: 1) get "status" (sweeping, idle, etc.) 2) change the log level We have found it is _very_ useful to find out how OpenSM views the fabric. As well as using the diag tools. Furthermore as OpenSM improves we can use this interface for faster and less "fabric invasive" information gathering than the diags can provide. Finally, we like the ability to control the SM. Changes such as log level, forcing a "resweep", changing SM priority, or turning the perfmgr on and off have been used for a variety of testing. This ties into what I asked in Boomtown; Do users just "hope IB works"? Perhaps IB does "just work" for many installations but as IB grows I think we will see a need for more _live_ network debugging techniques. That is one of the needs we have here. As such, we are always coming up with ideas for commands and diags which could benefit from a secure console interface. However, we hesitate to go forward without a secure and "standard" (read on by default) way to access this information. > > -- Hal > > > Hal Rosenstock wrote: > > > Hi Tim, > > > > > > On Thu, 2008-02-21 at 16:27 -0800, Timothy A. Meier wrote: > > >> LLNL uses the remote console feature in OpenSM. We have a need to secure > > >> this remote connection with authentication/authorization and encryption > > >> (specifically PAM and OpenSSL). I have a working prototype, and would > > >> like to formalize it and share/include this with OpenSM. > > >> > > >> Before I go down this path too far, I would like to solicit ideas from > > >> others who use the console. > > >> > > >> Currently, the console can be used in local, loopback, or remote modes. > > >> If security is added, should it replace other modes, or be an additional mode? > > > > > > IMO the old modes should be preserved and I would view > > > authentication/authorization and encryption as an orthogonal dimension > > > to be supported with any of those modes. > > > > > This was my initial instinct as well. Honestly, however, once we have > > a secure connection, we will probably use it exclusively. I suppose the > > local console would also be necessary. I can preserve all modes. Personally, I think we should support only local and secure. Local is useful for debug and development, while the secure connection can be used to connect to a live system. > > > > >> The intention is to use PAM for the AA framework, and OpenSSL for secure > > >> sockets. Are there any serious objections to this implementation plan? > > > > > > Is the license compatible with OpenFabrics ? > > > > > Well I am not a lawyer, but I believe that it is. OpenSSL has a dual license, > > both are BSD-style open source licenses (one for the toolkit, one for openssl). > > An alternate to OpenSSL is GNU TLS. GNU TLS is not as widely used, and has > > the GNU Lesser GPL which is supposed to be extremely lax. > > > > The PAM libraries are included with most linux distros, (RH, Debian, etc.) and > > have BSD style and GNU GPL licenses. I would like to provide an interface into OpenSM which can be on by default (compile and runtime). (Sasha already said this but...) My belief is; if we provide this interface to the users "by default" I think it will cause A) more users to use it and B) more development of uses/ideas for the interface. But this means making the connection secure. > > > > >> The console feature has always been a configuration/command line option, > > >> but should the secure console be conditionally compiled/linked as well? > > >> (eliminate dependency on the PAM and OpenSSL libs, pam, pam_misc, cryto, ssl). > > > > > > This might depend on the licensing. Also, on one hand, it would be nice > > > to minimize the build options, but for those where space is an issue, > > > the separate configurability of this would be useful. (Not knowing the > > > additional size of this but it sounds like it will be large enough to > > > not make this a mandatory requirement of the console). > > > > > > -- Hal > > > > > Agreed. Should it NOT include the security stuff into the build, by default? > > And the Console be disabled by default, and if enabled, default to "local"? See above; I would really like to have this stuff enabled by default. I think a lot of users don't use the tools because they don't know about them or they take too much effort to configure. Having to recompile OpenSM with the --enable-console is really too much trouble for some users. Perhaps the license issues will not allow this to happen but I don't think that is true. > > > > >> The secure console would require a relatively primitive client application, > > >> which I will probably package under opensm, just like osmtest. Make sense? > > >> > > >> Do you have any other ideas or suggestions for the remote console? > > >> A couple of things I can think of off the top of my head: 1) More ability to control OpenSM without restart. 2) ability to read OpenSM's routing information Thanks, Ira From sashak at voltaire.com Wed Feb 27 10:43:41 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 27 Feb 2008 18:43:41 +0000 Subject: [ofa-general] Re: [PATCH] opensm/man: added -Y/--qos_policy_file option to OSM man In-Reply-To: <47C57E30.4070103@dev.mellanox.co.il> References: <47C57E30.4070103@dev.mellanox.co.il> Message-ID: <20080227184341.GB24401@sashak.voltaire.com> On 17:13 Wed 27 Feb , Yevgeny Kliteynik wrote: > Sasha, > > Added -Y/--qos_policy_file option description to OpenSM manual. > Please apply to ofed_1_3 and master. > > Signed-off-by: Yevgeny Kliteynik Applies. Thanks. Sasha From andrea at qumranet.com Wed Feb 27 11:26:10 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 27 Feb 2008 20:26:10 +0100 Subject: [ofa-general] [PATCH] mmu notifiers #v7 In-Reply-To: <20080221161028.GA14220@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> Message-ID: <20080227192610.GF28483@v2.random> Hello, I hope this will can be considered final for .25 and be merged. Risk is zero, the only discussion here is to make an API that will last forever, functionality-wise all these patches provides zero risk and zero overhead when MMU_NOTIFIER=n. This last patch covers KVM and GRU and hopefully all other non-blocking users optimally, and the below API will hopefully last forever (but even if it lasts just for .25 and .26 is changed that's fine with us, it's a kernel _internal_ API anyway, there's absolutely nothing visible to userland). What Christoph need to do when he's back from vacations to support sleepable mmu notifiers is to add a CONFIG_XPMEM config option that will switch the i_mmap_lock from a semaphore to a mutex (any other change to this patch will be minor compared to that) so XPMEM hardware will have kernels compiled that way. I don't see other sane ways to remove the "atomic" parameter from the API (apparently required by Andrew for merging something not restricted to the xpmem current usage with only anonymous memory) and I don't want to have such a locking-change intrusive dependency for all other non-blocking users that are fine without having to alter how the VM works (for example KVM and GRU). Very minor changes will be required to this patch to make it work after the VM locking will be altered (for example the CONFIG_XPMEM should also switch the mmu_register/unregister locking from RCU to mutex as well). XPMEM then will only compile if CONFIG_XPMEM=y and in turn the invalidate_range_* will support scheduling inside. I don't think pretending to merge all in one block (I mean including xpmem support that requires blocking methods) is good idea anymore as long as we agree the "atomic" parameter shouldn't be merged. But we can quite easily agree on the below to be optimal for GRU/KVM and trivially extendible once a CONFIG_XPMEM will be added. So this first part can go in now I think. Signed-off-by: Andrea Arcangeli Signed-off-by: Christoph Lameter diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -228,6 +229,8 @@ struct mm_struct { #ifdef CONFIG_CGROUP_MEM_CONT struct mem_cgroup *mem_cgroup; #endif + + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,159 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include +#include + +struct mmu_notifier; + +struct mmu_notifier_ops { + /* + * Called when nobody can register any more notifier in the mm + * and after the "mn" notifier has been disarmed already. + */ + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + + /* + * invalidate_page is called in atomic context after any pte + * has been updated and before dropping the PT lock required + * to update any Linux pte. Once the PT lock will be released + * the pte will have its final value to export through the + * secondary MMU. Before this is invoked any secondary MMU is + * still ok to read/write to the page previously pointed by + * the Linux pte because the old page hasn't been freed yet. + * If required set_page_dirty has to be called internally to + * this method. + */ + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * Age page is called in atomic context inside the PT lock + * right after the VM is test-and-clearing the young/accessed + * bitflag in the pte. This way the VM will provide proper + * aging to the accesses to the page through the secondary + * MMUs and not only to the ones through the Linux pte. + */ + int (*age_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * invalidate_range_begin() and invalidate_range_end() must be + * paired. Multiple invalidate_range_begin/ends may be nested + * or called concurrently. + */ + void (*invalidate_range_begin)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); + void (*invalidate_range_end)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); +}; + +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +#ifdef CONFIG_MMU_NOTIFIER + +struct mmu_notifier_head { + struct hlist_head head; + spinlock_t lock; +}; + +#include + +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the notifier is guaranteed to be visible to all threads. + */ +extern void mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +/* + * RCU is used to traverse the list. A quiescent period needs to pass + * before the "struct mmu_notifier" can be freed. Alternatively it + * can be synchronously freed inside ->release when the list can't + * change anymore and nobody could possibly walk it. + */ +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_release(struct mm_struct *mm); +extern int mmu_notifier_age_page(struct mm_struct *mm, + unsigned long address); + +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) +{ + INIT_HLIST_HEAD(&mnh->head); + spin_lock_init(&mnh->lock); +} + +#define mmu_notifier(function, mm, args...) \ + do { \ + struct mmu_notifier *__mn; \ + struct hlist_node *__n; \ + \ + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ + rcu_read_lock(); \ + hlist_for_each_entry_rcu(__mn, __n, \ + &(mm)->mmu_notifier.head, \ + hlist) \ + if (__mn->ops->function) \ + __mn->ops->function(__mn, \ + mm, \ + args); \ + rcu_read_unlock(); \ + } \ + } while (0) + +#define ptep_clear_flush_notify(__vma, __address, __ptep) \ +({ \ + pte_t __pte; \ + __pte = ptep_clear_flush(__vma, __address, __ptep); \ + mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ + __pte; \ +}) + +#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ +({ \ + int __young; \ + __young = ptep_clear_flush_young(__vma, __address, __ptep); \ + __young |= mmu_notifier_age_page((__vma)->vm_mm, __address); \ + __young; \ +}) + +#else /* CONFIG_MMU_NOTIFIER */ + +struct mmu_notifier_head {}; + +#define mmu_notifier_register(mn, mm) do {} while(0) +#define mmu_notifier_unregister(mn, mm) do {} while (0) +#define mmu_notifier_release(mm) do {} while (0) +#define mmu_notifier_age_page(mm, address) ({ 0; }) +#define mmu_notifier_head_init(mmh) do {} while (0) + +/* + * Notifiers that use the parameters that they were passed so that the + * compiler does not complain about unused variables but does proper + * parameter checks even if !CONFIG_MMU_NOTIFIER. + * Macros generate no code. + */ +#define mmu_notifier(function, mm, args...) \ + do { \ + if (0) { \ + struct mmu_notifier *__mn; \ + \ + __mn = (struct mmu_notifier *)(0x00ff); \ + __mn->ops->function(__mn, mm, args); \ + }; \ + } while (0) + +#define ptep_clear_flush_young_notify ptep_clear_flush_young +#define ptep_clear_flush_notify ptep_clear_flush + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -362,6 +362,7 @@ static struct mm_struct * mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + mmu_notifier_head_init(&mm->mmu_notifier); return mm; } diff --git a/mm/Kconfig b/mm/Kconfig --- a/mm/Kconfig +++ b/mm/Kconfig @@ -193,3 +193,7 @@ config VIRT_TO_BUS config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + def_bool y + bool "MMU notifier, for paging KVM/RDMA" diff --git a/mm/Makefile b/mm/Makefile --- a/mm/Makefile +++ b/mm/Makefile @@ -33,4 +33,4 @@ obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o - +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -194,7 +194,7 @@ __xip_unmap (struct address_space * mapp if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); page_remove_rmap(page, vma); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); diff --git a/mm/fremap.c b/mm/fremap.c --- a/mm/fremap.c +++ b/mm/fremap.c @@ -214,7 +214,9 @@ asmlinkage long sys_remap_file_pages(uns spin_unlock(&mapping->i_mmap_lock); } + mmu_notifier(invalidate_range_begin, mm, start, start + size); err = populate_range(mm, vma, start, size, pgoff); + mmu_notifier(invalidate_range_end, mm, start, start + size); if (!err && !(flags & MAP_NONBLOCK)) { if (unlikely(has_write_lock)) { downgrade_write(&mm->mmap_sem); diff --git a/mm/hugetlb.c b/mm/hugetlb.c --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -755,6 +755,7 @@ void __unmap_hugepage_range(struct vm_ar BUG_ON(start & ~HPAGE_MASK); BUG_ON(end & ~HPAGE_MASK); + mmu_notifier(invalidate_range_begin, mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); @@ -775,6 +776,7 @@ void __unmap_hugepage_range(struct vm_ar } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier(invalidate_range_end, mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); diff --git a/mm/memory.c b/mm/memory.c --- a/mm/memory.c +++ b/mm/memory.c @@ -611,6 +611,9 @@ int copy_page_range(struct mm_struct *ds if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier(invalidate_range_begin, src_mm, addr, end); + dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { @@ -621,6 +624,11 @@ int copy_page_range(struct mm_struct *ds vma, addr, next)) return -ENOMEM; } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier(invalidate_range_end, src_mm, + vma->vm_start, end); + return 0; } @@ -897,7 +905,9 @@ unsigned long zap_page_range(struct vm_a lru_add_drain(); tlb = tlb_gather_mmu(mm, 0); update_hiwater_rss(mm); + mmu_notifier(invalidate_range_begin, mm, address, end); end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); + mmu_notifier(invalidate_range_end, mm, address, end); if (tlb) tlb_finish_mmu(tlb, address, end); return end; @@ -1463,10 +1473,11 @@ int apply_to_page_range(struct mm_struct { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long start = addr, end = addr + size; int err; BUG_ON(addr >= end); + mmu_notifier(invalidate_range_begin, mm, start, end); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -1474,6 +1485,7 @@ int apply_to_page_range(struct mm_struct if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier(invalidate_range_end, mm, start, end); return err; } EXPORT_SYMBOL_GPL(apply_to_page_range); @@ -1675,7 +1687,7 @@ gotten: * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush(vma, address, page_table); + ptep_clear_flush_notify(vma, address, page_table); set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1747,11 +1747,13 @@ static void unmap_region(struct mm_struc lru_add_drain(); tlb = tlb_gather_mmu(mm, 0); update_hiwater_rss(mm); + mmu_notifier(invalidate_range_begin, mm, start, end); unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, next? next->vm_start: 0); tlb_finish_mmu(tlb, start, end); + mmu_notifier(invalidate_range_end, mm, start, end); } /* @@ -2048,6 +2050,7 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); + mmu_notifier_release(mm); /* * Walk the list again, actually closing and freeing it, diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,73 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * Copyright (C) 2008 SGI + * Christoph Lameter + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include + +/* + * No synchronization. This function can only be called when only a single + * process remains that performs teardown. + */ +void mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + struct hlist_node *n, *tmp; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + hlist_for_each_entry_safe(mn, n, tmp, + &mm->mmu_notifier.head, hlist) { + hlist_del(&mn->hlist); + if (mn->ops->release) + mn->ops->release(mn, mm); + } + } +} + +/* + * If no young bitflag is supported by the hardware, ->age_page can + * unmap the address and return 1 or 0 depending if the mapping previously + * existed or not. + */ +int mmu_notifier_age_page(struct mm_struct *mm, unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + int young = 0; + + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, + &mm->mmu_notifier.head, hlist) { + if (mn->ops->age_page) + young |= mn->ops->age_page(mn, mm, address); + } + rcu_read_unlock(); + } + + return young; +} + +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier.head); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_register); + +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) +{ + spin_lock(&mm->mmu_notifier.lock); + hlist_del_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier.lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); diff --git a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -198,10 +198,12 @@ success: dirty_accountable = 1; } + mmu_notifier(invalidate_range_begin, mm, start, end); if (is_vm_hugetlb_page(vma)) hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + mmu_notifier(invalidate_range_end, mm, start, end); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; diff --git a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c +++ b/mm/mremap.c @@ -74,6 +74,7 @@ static void move_ptes(struct vm_area_str struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + unsigned long old_start; if (vma->vm_file) { /* @@ -100,6 +101,9 @@ static void move_ptes(struct vm_area_str spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); arch_enter_lazy_mmu_mode(); + old_start = old_addr; + mmu_notifier(invalidate_range_begin, vma->vm_mm, + old_start, old_end); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) @@ -108,6 +112,7 @@ static void move_ptes(struct vm_area_str pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); set_pte_at(mm, new_addr, new_pte, pte); } + mmu_notifier(invalidate_range_end, vma->vm_mm, old_start, old_end); arch_leave_lazy_mmu_mode(); if (new_ptl != old_ptl) diff --git a/mm/rmap.c b/mm/rmap.c --- a/mm/rmap.c +++ b/mm/rmap.c @@ -287,7 +287,7 @@ static int page_referenced_one(struct pa if (vma->vm_flags & VM_LOCKED) { referenced++; *mapcount = 1; /* break early from loop */ - } else if (ptep_clear_flush_young(vma, address, pte)) + } else if (ptep_clear_flush_young_notify(vma, address, pte)) referenced++; /* Pretend the page is referenced if the task has the @@ -454,7 +454,7 @@ static int page_mkclean_one(struct page pte_t entry; flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + entry = ptep_clear_flush_notify(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); @@ -712,14 +712,14 @@ static int try_to_unmap_one(struct page * skipped over this mm) then we should reactivate it. */ if (!migration && ((vma->vm_flags & VM_LOCKED) || - (ptep_clear_flush_young(vma, address, pte)))) { + (ptep_clear_flush_young_notify(vma, address, pte)))) { ret = SWAP_FAIL; goto out_unmap; } /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) @@ -844,12 +844,12 @@ static void try_to_unmap_cluster(unsigne page = vm_normal_page(vma, address, *pte); BUG_ON(!page || PageAnon(page)); - if (ptep_clear_flush_young(vma, address, pte)) + if (ptep_clear_flush_young_notify(vma, address, pte)) continue; /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) From a-aarons at abcmax.com Wed Feb 27 12:05:19 2008 From: a-aarons at abcmax.com (Mike Ramsey) Date: Wed, 27 Feb 2008 12:05:19 -0800 Subject: [ofa-general] Don't miss to see my pic Message-ID: <01c87939$05b7af80$7509ce74@a-aarons> Hello! I am tired this afternoon. I am nice girl that would like to chat with you. Email me at Anette at ThePaganDoorway.info only, because I am using my friend's email to write this. Don't miss some of my naughty pictures. From a.p.zijlstra at chello.nl Wed Feb 27 12:04:43 2008 From: a.p.zijlstra at chello.nl (Peter Zijlstra) Date: Wed, 27 Feb 2008 21:04:43 +0100 Subject: [ofa-general] ***SPAM*** Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080227192610.GF28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: <1204142683.6242.410.camel@lappy> On Wed, 2008-02-27 at 20:26 +0100, Andrea Arcangeli wrote: > Hello, > > I hope this will can be considered final for .25 and be merged. Risk > is zero, the only discussion here is to make an API that will last > forever, functionality-wise all these patches provides zero risk and > zero overhead when MMU_NOTIFIER=n. This last patch covers KVM and GRU > and hopefully all other non-blocking users optimally, and the below > API will hopefully last forever (but even if it lasts just for .25 and > ..26 is changed that's fine with us, it's a kernel _internal_ API > anyway, there's absolutely nothing visible to userland). > > What Christoph need to do when he's back from vacations to support > sleepable mmu notifiers is to add a CONFIG_XPMEM config option that > will switch the i_mmap_lock from a semaphore to a mutex (any other > change to this patch will be minor compared to that) so XPMEM hardware > will have kernels compiled that way. I don't see other sane ways to > remove the "atomic" parameter from the API (apparently required by > Andrew for merging something not restricted to the xpmem current usage > with only anonymous memory) and I don't want to have such a > locking-change intrusive dependency for all other non-blocking users > that are fine without having to alter how the VM works (for example > KVM and GRU). Very minor changes will be required to this patch to > make it work after the VM locking will be altered (for example the > CONFIG_XPMEM should also switch the mmu_register/unregister locking > from RCU to mutex as well). XPMEM then will only compile if > CONFIG_XPMEM=y and in turn the invalidate_range_* will support > scheduling inside. > > I don't think pretending to merge all in one block (I mean including > xpmem support that requires blocking methods) is good idea anymore as > long as we agree the "atomic" parameter shouldn't be merged. But we > can quite easily agree on the below to be optimal for GRU/KVM and > trivially extendible once a CONFIG_XPMEM will be added. So this first > part can go in now I think. > > Signed-off-by: Andrea Arcangeli > Signed-off-by: Christoph Lameter Acked-by: Peter Zijlstra > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -10,6 +10,7 @@ > #include > #include > #include > +#include > #include > #include > > @@ -228,6 +229,8 @@ struct mm_struct { > #ifdef CONFIG_CGROUP_MEM_CONT > struct mem_cgroup *mem_cgroup; > #endif > + > + struct mmu_notifier_head mmu_notifier; /* MMU notifier list */ > }; > > #endif /* _LINUX_MM_TYPES_H */ > diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h > new file mode 100644 > --- /dev/null > +++ b/include/linux/mmu_notifier.h > @@ -0,0 +1,159 @@ > +#ifndef _LINUX_MMU_NOTIFIER_H > +#define _LINUX_MMU_NOTIFIER_H > + > +#include > +#include > + > +struct mmu_notifier; > + > +struct mmu_notifier_ops { > + /* > + * Called when nobody can register any more notifier in the mm > + * and after the "mn" notifier has been disarmed already. > + */ > + void (*release)(struct mmu_notifier *mn, > + struct mm_struct *mm); > + > + /* > + * invalidate_page is called in atomic context after any pte > + * has been updated and before dropping the PT lock required > + * to update any Linux pte. Once the PT lock will be released > + * the pte will have its final value to export through the > + * secondary MMU. Before this is invoked any secondary MMU is > + * still ok to read/write to the page previously pointed by > + * the Linux pte because the old page hasn't been freed yet. > + * If required set_page_dirty has to be called internally to > + * this method. > + */ > + void (*invalidate_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); > + > + /* > + * Age page is called in atomic context inside the PT lock > + * right after the VM is test-and-clearing the young/accessed > + * bitflag in the pte. This way the VM will provide proper > + * aging to the accesses to the page through the secondary > + * MMUs and not only to the ones through the Linux pte. > + */ > + int (*age_page)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address); > + > + /* > + * invalidate_range_begin() and invalidate_range_end() must be > + * paired. Multiple invalidate_range_begin/ends may be nested > + * or called concurrently. > + */ > + void (*invalidate_range_begin)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end); > + void (*invalidate_range_end)(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end); > +}; > + > +struct mmu_notifier { > + struct hlist_node hlist; > + const struct mmu_notifier_ops *ops; > +}; > + > +#ifdef CONFIG_MMU_NOTIFIER > + > +struct mmu_notifier_head { > + struct hlist_head head; > + spinlock_t lock; > +}; > + > +#include > + > +/* > + * RCU is used to traverse the list. A quiescent period needs to pass > + * before the notifier is guaranteed to be visible to all threads. > + */ > +extern void mmu_notifier_register(struct mmu_notifier *mn, > + struct mm_struct *mm); > +/* > + * RCU is used to traverse the list. A quiescent period needs to pass > + * before the "struct mmu_notifier" can be freed. Alternatively it > + * can be synchronously freed inside ->release when the list can't > + * change anymore and nobody could possibly walk it. > + */ > +extern void mmu_notifier_unregister(struct mmu_notifier *mn, > + struct mm_struct *mm); > +extern void mmu_notifier_release(struct mm_struct *mm); > +extern int mmu_notifier_age_page(struct mm_struct *mm, > + unsigned long address); > + > +static inline void mmu_notifier_head_init(struct mmu_notifier_head *mnh) > +{ > + INIT_HLIST_HEAD(&mnh->head); > + spin_lock_init(&mnh->lock); > +} > + > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + struct mmu_notifier *__mn; \ > + struct hlist_node *__n; \ > + \ > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > + rcu_read_lock(); \ > + hlist_for_each_entry_rcu(__mn, __n, \ > + &(mm)->mmu_notifier.head, \ > + hlist) \ > + if (__mn->ops->function) \ > + __mn->ops->function(__mn, \ > + mm, \ > + args); \ > + rcu_read_unlock(); \ > + } \ > + } while (0) > + > +#define ptep_clear_flush_notify(__vma, __address, __ptep) \ > +({ \ > + pte_t __pte; \ > + __pte = ptep_clear_flush(__vma, __address, __ptep); \ > + mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ > + __pte; \ > +}) > + > +#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ > +({ \ > + int __young; \ > + __young = ptep_clear_flush_young(__vma, __address, __ptep); \ > + __young |= mmu_notifier_age_page((__vma)->vm_mm, __address); \ > + __young; \ > +}) > + > +#else /* CONFIG_MMU_NOTIFIER */ > + > +struct mmu_notifier_head {}; > + > +#define mmu_notifier_register(mn, mm) do {} while(0) > +#define mmu_notifier_unregister(mn, mm) do {} while (0) > +#define mmu_notifier_release(mm) do {} while (0) > +#define mmu_notifier_age_page(mm, address) ({ 0; }) > +#define mmu_notifier_head_init(mmh) do {} while (0) > + > +/* > + * Notifiers that use the parameters that they were passed so that the > + * compiler does not complain about unused variables but does proper > + * parameter checks even if !CONFIG_MMU_NOTIFIER. > + * Macros generate no code. > + */ > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + if (0) { \ > + struct mmu_notifier *__mn; \ > + \ > + __mn = (struct mmu_notifier *)(0x00ff); \ > + __mn->ops->function(__mn, mm, args); \ > + }; \ > + } while (0) > + > +#define ptep_clear_flush_young_notify ptep_clear_flush_young > +#define ptep_clear_flush_notify ptep_clear_flush > + > +#endif /* CONFIG_MMU_NOTIFIER */ > + > +#endif /* _LINUX_MMU_NOTIFIER_H */ > diff --git a/kernel/fork.c b/kernel/fork.c > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -362,6 +362,7 @@ static struct mm_struct * mm_init(struct > > if (likely(!mm_alloc_pgd(mm))) { > mm->def_flags = 0; > + mmu_notifier_head_init(&mm->mmu_notifier); > return mm; > } > > diff --git a/mm/Kconfig b/mm/Kconfig > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -193,3 +193,7 @@ config VIRT_TO_BUS > config VIRT_TO_BUS > def_bool y > depends on !ARCH_NO_VIRT_TO_BUS > + > +config MMU_NOTIFIER > + def_bool y > + bool "MMU notifier, for paging KVM/RDMA" > diff --git a/mm/Makefile b/mm/Makefile > --- a/mm/Makefile > +++ b/mm/Makefile > @@ -33,4 +33,4 @@ obj-$(CONFIG_SMP) += allocpercpu.o > obj-$(CONFIG_SMP) += allocpercpu.o > obj-$(CONFIG_QUICKLIST) += quicklist.o > obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o > - > +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o > diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c > --- a/mm/filemap_xip.c > +++ b/mm/filemap_xip.c > @@ -194,7 +194,7 @@ __xip_unmap (struct address_space * mapp > if (pte) { > /* Nuke the page table entry. */ > flush_cache_page(vma, address, pte_pfn(*pte)); > - pteval = ptep_clear_flush(vma, address, pte); > + pteval = ptep_clear_flush_notify(vma, address, pte); > page_remove_rmap(page, vma); > dec_mm_counter(mm, file_rss); > BUG_ON(pte_dirty(pteval)); > diff --git a/mm/fremap.c b/mm/fremap.c > --- a/mm/fremap.c > +++ b/mm/fremap.c > @@ -214,7 +214,9 @@ asmlinkage long sys_remap_file_pages(uns > spin_unlock(&mapping->i_mmap_lock); > } > > + mmu_notifier(invalidate_range_begin, mm, start, start + size); > err = populate_range(mm, vma, start, size, pgoff); > + mmu_notifier(invalidate_range_end, mm, start, start + size); > if (!err && !(flags & MAP_NONBLOCK)) { > if (unlikely(has_write_lock)) { > downgrade_write(&mm->mmap_sem); > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -755,6 +755,7 @@ void __unmap_hugepage_range(struct vm_ar > BUG_ON(start & ~HPAGE_MASK); > BUG_ON(end & ~HPAGE_MASK); > > + mmu_notifier(invalidate_range_begin, mm, start, end); > spin_lock(&mm->page_table_lock); > for (address = start; address < end; address += HPAGE_SIZE) { > ptep = huge_pte_offset(mm, address); > @@ -775,6 +776,7 @@ void __unmap_hugepage_range(struct vm_ar > } > spin_unlock(&mm->page_table_lock); > flush_tlb_range(vma, start, end); > + mmu_notifier(invalidate_range_end, mm, start, end); > list_for_each_entry_safe(page, tmp, &page_list, lru) { > list_del(&page->lru); > put_page(page); > diff --git a/mm/memory.c b/mm/memory.c > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -611,6 +611,9 @@ int copy_page_range(struct mm_struct *ds > if (is_vm_hugetlb_page(vma)) > return copy_hugetlb_page_range(dst_mm, src_mm, vma); > > + if (is_cow_mapping(vma->vm_flags)) > + mmu_notifier(invalidate_range_begin, src_mm, addr, end); > + > dst_pgd = pgd_offset(dst_mm, addr); > src_pgd = pgd_offset(src_mm, addr); > do { > @@ -621,6 +624,11 @@ int copy_page_range(struct mm_struct *ds > vma, addr, next)) > return -ENOMEM; > } while (dst_pgd++, src_pgd++, addr = next, addr != end); > + > + if (is_cow_mapping(vma->vm_flags)) > + mmu_notifier(invalidate_range_end, src_mm, > + vma->vm_start, end); > + > return 0; > } > > @@ -897,7 +905,9 @@ unsigned long zap_page_range(struct vm_a > lru_add_drain(); > tlb = tlb_gather_mmu(mm, 0); > update_hiwater_rss(mm); > + mmu_notifier(invalidate_range_begin, mm, address, end); > end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); > + mmu_notifier(invalidate_range_end, mm, address, end); > if (tlb) > tlb_finish_mmu(tlb, address, end); > return end; > @@ -1463,10 +1473,11 @@ int apply_to_page_range(struct mm_struct > { > pgd_t *pgd; > unsigned long next; > - unsigned long end = addr + size; > + unsigned long start = addr, end = addr + size; > int err; > > BUG_ON(addr >= end); > + mmu_notifier(invalidate_range_begin, mm, start, end); > pgd = pgd_offset(mm, addr); > do { > next = pgd_addr_end(addr, end); > @@ -1474,6 +1485,7 @@ int apply_to_page_range(struct mm_struct > if (err) > break; > } while (pgd++, addr = next, addr != end); > + mmu_notifier(invalidate_range_end, mm, start, end); > return err; > } > EXPORT_SYMBOL_GPL(apply_to_page_range); > @@ -1675,7 +1687,7 @@ gotten: > * seen in the presence of one thread doing SMC and another > * thread doing COW. > */ > - ptep_clear_flush(vma, address, page_table); > + ptep_clear_flush_notify(vma, address, page_table); > set_pte_at(mm, address, page_table, entry); > update_mmu_cache(vma, address, entry); > lru_cache_add_active(new_page); > diff --git a/mm/mmap.c b/mm/mmap.c > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1747,11 +1747,13 @@ static void unmap_region(struct mm_struc > lru_add_drain(); > tlb = tlb_gather_mmu(mm, 0); > update_hiwater_rss(mm); > + mmu_notifier(invalidate_range_begin, mm, start, end); > unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); > vm_unacct_memory(nr_accounted); > free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, > next? next->vm_start: 0); > tlb_finish_mmu(tlb, start, end); > + mmu_notifier(invalidate_range_end, mm, start, end); > } > > /* > @@ -2048,6 +2050,7 @@ void exit_mmap(struct mm_struct *mm) > vm_unacct_memory(nr_accounted); > free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); > tlb_finish_mmu(tlb, 0, end); > + mmu_notifier_release(mm); > > /* > * Walk the list again, actually closing and freeing it, > diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c > new file mode 100644 > --- /dev/null > +++ b/mm/mmu_notifier.c > @@ -0,0 +1,73 @@ > +/* > + * linux/mm/mmu_notifier.c > + * > + * Copyright (C) 2008 Qumranet, Inc. > + * Copyright (C) 2008 SGI > + * Christoph Lameter > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include > +#include > +#include > + > +/* > + * No synchronization. This function can only be called when only a single > + * process remains that performs teardown. > + */ > +void mmu_notifier_release(struct mm_struct *mm) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n, *tmp; > + > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > + hlist_for_each_entry_safe(mn, n, tmp, > + &mm->mmu_notifier.head, hlist) { > + hlist_del(&mn->hlist); > + if (mn->ops->release) > + mn->ops->release(mn, mm); > + } > + } > +} > + > +/* > + * If no young bitflag is supported by the hardware, ->age_page can > + * unmap the address and return 1 or 0 depending if the mapping previously > + * existed or not. > + */ > +int mmu_notifier_age_page(struct mm_struct *mm, unsigned long address) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n; > + int young = 0; > + > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > + rcu_read_lock(); > + hlist_for_each_entry_rcu(mn, n, > + &mm->mmu_notifier.head, hlist) { > + if (mn->ops->age_page) > + young |= mn->ops->age_page(mn, mm, address); > + } > + rcu_read_unlock(); > + } > + > + return young; > +} > + > +void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) > +{ > + spin_lock(&mm->mmu_notifier.lock); > + hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier.head); > + spin_unlock(&mm->mmu_notifier.lock); > +} > +EXPORT_SYMBOL_GPL(mmu_notifier_register); > + > +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) > +{ > + spin_lock(&mm->mmu_notifier.lock); > + hlist_del_rcu(&mn->hlist); > + spin_unlock(&mm->mmu_notifier.lock); > +} > +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); > diff --git a/mm/mprotect.c b/mm/mprotect.c > --- a/mm/mprotect.c > +++ b/mm/mprotect.c > @@ -198,10 +198,12 @@ success: > dirty_accountable = 1; > } > > + mmu_notifier(invalidate_range_begin, mm, start, end); > if (is_vm_hugetlb_page(vma)) > hugetlb_change_protection(vma, start, end, vma->vm_page_prot); > else > change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); > + mmu_notifier(invalidate_range_end, mm, start, end); > vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); > vm_stat_account(mm, newflags, vma->vm_file, nrpages); > return 0; > diff --git a/mm/mremap.c b/mm/mremap.c > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -74,6 +74,7 @@ static void move_ptes(struct vm_area_str > struct mm_struct *mm = vma->vm_mm; > pte_t *old_pte, *new_pte, pte; > spinlock_t *old_ptl, *new_ptl; > + unsigned long old_start; > > if (vma->vm_file) { > /* > @@ -100,6 +101,9 @@ static void move_ptes(struct vm_area_str > spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); > arch_enter_lazy_mmu_mode(); > > + old_start = old_addr; > + mmu_notifier(invalidate_range_begin, vma->vm_mm, > + old_start, old_end); > for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, > new_pte++, new_addr += PAGE_SIZE) { > if (pte_none(*old_pte)) > @@ -108,6 +112,7 @@ static void move_ptes(struct vm_area_str > pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); > set_pte_at(mm, new_addr, new_pte, pte); > } > + mmu_notifier(invalidate_range_end, vma->vm_mm, old_start, old_end); > > arch_leave_lazy_mmu_mode(); > if (new_ptl != old_ptl) > diff --git a/mm/rmap.c b/mm/rmap.c > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -287,7 +287,7 @@ static int page_referenced_one(struct pa > if (vma->vm_flags & VM_LOCKED) { > referenced++; > *mapcount = 1; /* break early from loop */ > - } else if (ptep_clear_flush_young(vma, address, pte)) > + } else if (ptep_clear_flush_young_notify(vma, address, pte)) > referenced++; > > /* Pretend the page is referenced if the task has the > @@ -454,7 +454,7 @@ static int page_mkclean_one(struct page > pte_t entry; > > flush_cache_page(vma, address, pte_pfn(*pte)); > - entry = ptep_clear_flush(vma, address, pte); > + entry = ptep_clear_flush_notify(vma, address, pte); > entry = pte_wrprotect(entry); > entry = pte_mkclean(entry); > set_pte_at(mm, address, pte, entry); > @@ -712,14 +712,14 @@ static int try_to_unmap_one(struct page > * skipped over this mm) then we should reactivate it. > */ > if (!migration && ((vma->vm_flags & VM_LOCKED) || > - (ptep_clear_flush_young(vma, address, pte)))) { > + (ptep_clear_flush_young_notify(vma, address, pte)))) { > ret = SWAP_FAIL; > goto out_unmap; > } > > /* Nuke the page table entry. */ > flush_cache_page(vma, address, page_to_pfn(page)); > - pteval = ptep_clear_flush(vma, address, pte); > + pteval = ptep_clear_flush_notify(vma, address, pte); > > /* Move the dirty bit to the physical page now the pte is gone. */ > if (pte_dirty(pteval)) > @@ -844,12 +844,12 @@ static void try_to_unmap_cluster(unsigne > page = vm_normal_page(vma, address, *pte); > BUG_ON(!page || PageAnon(page)); > > - if (ptep_clear_flush_young(vma, address, pte)) > + if (ptep_clear_flush_young_notify(vma, address, pte)) > continue; > > /* Nuke the page table entry. */ > flush_cache_page(vma, address, pte_pfn(*pte)); > - pteval = ptep_clear_flush(vma, address, pte); > + pteval = ptep_clear_flush_notify(vma, address, pte); > > /* If nonlinear, store the file page offset in the pte. */ > if (page->index != linear_page_index(vma, address)) > From andrea at qumranet.com Wed Feb 27 14:06:56 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Wed, 27 Feb 2008 23:06:56 +0100 Subject: [ofa-general] [PATCH] KVM swapping with mmu notifiers #v7 In-Reply-To: <20080220104517.GV7128@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220104517.GV7128@v2.random> Message-ID: <20080227220656.GJ28483@v2.random> Same as before but one one hand ported to #v7 API and on the other hand ported to latest kvm.git. Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 41962e7..e1287ab 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4583329..4067b0f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -642,6 +642,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) account_shadowed(kvm, gfn); } +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte, *curr_spte; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + kvm_unmap_spte(kvm, curr_spte); + } +} + +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); +} + +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) +{ + u64 *spte; + int young = 0; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + int _young; + u64 _spte = *spte; + BUG_ON(!(_spte & PT_PRESENT_MASK)); + _young = _spte & PT_ACCESSED_MASK; + if (_young) { + young = !!_young; + set_shadow_pte(spte, _spte & ~PT_ACCESSED_MASK); + } + spte = rmap_next(kvm, rmapp, spte); + } + return young; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + int i; + int young = 0; + + /* + * If mmap_sem isn't taken, we can look the memslots with only + * the mmu_lock by skipping over the slots with userspace_addr == 0. + */ + spin_lock(&kvm->mmu_lock); + for (i = 0; i < kvm->nmemslots; i++) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + unsigned long start = memslot->userspace_addr; + unsigned long end; + + /* mmu_lock protects userspace_addr */ + if (!start) + continue; + + end = start + (memslot->npages << PAGE_SHIFT); + if (hva >= start && hva < end) { + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; + young |= kvm_age_rmapp(kvm, &memslot->rmap[gfn_offset]); + } + } + spin_unlock(&kvm->mmu_lock); + + if (young) + kvm_flush_remote_tlbs(kvm); + + return young; +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 17f9d16..b014b19 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -380,6 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; struct page *page; int largepage = 0; + unsigned mmu_seq; pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -415,6 +416,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } + mmu_seq = read_seqbegin(&vcpu->kvm->arch.mmu_notifier_invalidate_lock); page = gfn_to_page(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); @@ -440,6 +442,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); + + if (read_seqretry(&vcpu->kvm->arch.mmu_notifier_invalidate_lock, mmu_seq)) { + down_read(¤t->mm->mmap_sem); + if (page != gfn_to_page(vcpu->kvm, walker.gfn)) + BUG(); + up_read(¤t->mm->mmap_sem); + kvm_release_page_clean(page); + } + up_read(&vcpu->kvm->slots_lock); return write_pt; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f09840..6eafb74 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3319,6 +3319,47 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu->arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + struct kvm_arch *kvm_arch; + kvm_arch = container_of(mn, struct kvm_arch, mmu_notifier); + return container_of(kvm_arch, struct kvm, arch); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + write_seqlock(&kvm->arch.mmu_notifier_invalidate_lock); + kvm_unmap_hva(kvm, address); + write_sequnlock(&kvm->arch.mmu_notifier_invalidate_lock); +} + +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + for (; start < end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +int kvm_mmu_notifier_age_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm->mm); + return kvm_age_hva(kvm, address); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_page = kvm_mmu_notifier_invalidate_page, + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, + .age_page = kvm_mmu_notifier_age_page, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3328,6 +3369,10 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops; + mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm); + seqlock_init(&kvm->arch.mmu_notifier_invalidate_lock); + return kvm; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 024b57c..305b7c3 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -303,6 +304,9 @@ struct kvm_arch{ struct page *apic_access_page; gpa_t wall_clock; + + struct mmu_notifier mmu_notifier; + seqlock_t mmu_notifier_invalidate_lock; }; struct kvm_vm_stat { @@ -422,6 +426,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_zap_all(struct kvm *kvm); As usual (for completeness) I append the change to the memslot readonly locking through kvm->mmu_lock: Signed-off-by: Andrea Arcangeli diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f09840..a519fd8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3379,16 +3379,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + unsigned long userspace_addr; + down_write(¤t->mm->mmap_sem); - memslot->userspace_addr = do_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, - 0); + userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); up_write(¤t->mm->mmap_sem); - if (IS_ERR((void *)memslot->userspace_addr)) - return PTR_ERR((void *)memslot->userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); + + /* set userspace_addr atomically for kvm_hva_to_rmapp */ + spin_lock(&kvm->mmu_lock); + memslot->userspace_addr = userspace_addr; + spin_unlock(&kvm->mmu_lock); } else { if (!old.user_alloc && old.rmap) { int ret; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 30bf832..8f3b6d6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -326,7 +326,15 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.rmap, 0, npages * sizeof(*new.rmap)); new.user_alloc = user_alloc; - new.userspace_addr = mem->userspace_addr; + /* + * hva_to_rmmap() serialzies with the mmu_lock and to be + * safe it has to ignore memslots with !user_alloc && + * !userspace_addr. + */ + if (user_alloc) + new.userspace_addr = mem->userspace_addr; + else + new.userspace_addr = 0; } if (npages && !new.lpage_info) { int largepages = npages / KVM_PAGES_PER_HPAGE; @@ -355,14 +363,18 @@ int __kvm_set_memory_region(struct kvm *kvm, memset(new.dirty_bitmap, 0, dirty_bytes); } + spin_lock(&kvm->mmu_lock); if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; *memslot = new; + spin_unlock(&kvm->mmu_lock); r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); if (r) { + spin_lock(&kvm->mmu_lock); *memslot = old; + spin_unlock(&kvm->mmu_lock); goto out_free; } From clameter at sgi.com Wed Feb 27 14:11:19 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:11:19 -0800 (PST) Subject: [ofa-general] Re: Demand paging for memory regions In-Reply-To: <20080214000103.GG31435@obsidianresearch.com> References: <47B2174E.5000708@opengridcomputing.com> <20080212232329.GC31435@obsidianresearch.com> <20080213012638.GD31435@obsidianresearch.com> <20080213040905.GQ29340@mv.qlogic.com> <20080213232308.GB7597@osc.edu> <20080214000103.GG31435@obsidianresearch.com> Message-ID: On Wed, 13 Feb 2008, Jason Gunthorpe wrote: > Christoph: It seemed to me you were first talking about > freeing/swapping/faulting RDMA'able pages - but would pure migration > as a special hardware supported case be useful like Catilan suggested? That is a special case of the proposed solution. You could mlock the regions of interest. Those can then only be migrated but not swapped out. However, I think we need some limit on the number of pages one can mlock. Otherwise the VM can get into a situation where reclaim is not possible because the majority of memory is either mlocked or pinned by I/O etc. From clameter at sgi.com Wed Feb 27 14:23:29 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:23:29 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080219133405.GH7128@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802191954.14874.nickpiggin@yahoo.com.au> <20080219133405.GH7128@v2.random> Message-ID: On Tue, 19 Feb 2008, Andrea Arcangeli wrote: > Yes, that's why I kept maintaining my patch and I posted the last > revision to Andrew. I use pte/tlb locking of the core VM, it's > unintrusive and obviously safe. Furthermore it can be extended with > Christoph's stuff in a 100% backwards compatible fashion later if needed. How would that work? You rely on the pte locking. Thus calls are all in an atomic context. I think we need a general scheme that allows sleeping when references are invalidates. Even the GRU has performance issues when using the KVM patch. From clameter at sgi.com Wed Feb 27 14:35:59 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:35:59 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <200802201008.49933.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> Message-ID: On Wed, 20 Feb 2008, Nick Piggin wrote: > On Friday 15 February 2008 17:49, Christoph Lameter wrote: > > The invalidation of address ranges in a mm_struct needs to be > > performed when pages are removed or permissions etc change. > > > > If invalidate_range_begin() is called with locks held then we > > pass a flag into invalidate_range() to indicate that no sleeping is > > possible. Locks are only held for truncate and huge pages. > > You can't sleep inside rcu_read_lock()! Could you be specific? This refers to page migration? Hmmm... Guess we would need to inc the refcount there instead? > I must say that for a patch that is up to v8 or whatever and is > posted twice a week to such a big cc list, it is kind of slack to > not even test it and expect other people to review it. It was tested with the GRU and XPmem. Andrea also reported success. > Also, what we are going to need here are not skeleton drivers > that just do all the *easy* bits (of registering their callbacks), > but actual fully working examples that do everything that any > real driver will need to do. If not for the sanity of the driver > writer, then for the sanity of the VM developers (I don't want > to have to understand xpmem or infiniband in order to understand > how the VM works). There are 3 different drivers that can already use it but the code is complex and not easy to review. Skeletons are easy to allow people to get started with it. > > lru_add_drain(); > > tlb = tlb_gather_mmu(mm, 0); > > update_hiwater_rss(mm); > > + mmu_notifier(invalidate_range_begin, mm, address, end, atomic); > > end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); > > if (tlb) > > tlb_finish_mmu(tlb, address, end); > > + mmu_notifier(invalidate_range_end, mm, address, end, atomic); > > return end; > > } > > > > Where do you invalidate for munmap()? zap_page_range() called from unmap_vmas(). > Also, how to you resolve the case where you are not allowed to sleep? > I would have thought either you have to handle it, in which case nobody > needs to sleep; or you can't handle it, in which case the code is > broken. That can be done in a variety of ways: 1. Change VM locking 2. Not handle file backed mappings (XPmem could work mostly in such a config) 3. Keep the refcount elevated until pages are freed in another execution context. From clameter at sgi.com Wed Feb 27 14:39:46 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:39:46 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080220010038.GQ7128@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080220010038.GQ7128@v2.random> Message-ID: On Wed, 20 Feb 2008, Andrea Arcangeli wrote: > Well, xpmem requirements are complex. As as side effect of the > simplicity of my approach, my patch is 100% safe since #v1. Now it > also works for GRU and it cluster invalidates. The patch has to satisfy RDMA, XPMEM, GRU and KVM. I keep hearing that we have a KVM only solution that works 100% (which makes me just switch ignore the rest of the argument because 100% solutions usually do not exist). > rcu_read_lock), no "atomic" parameters, and it doesn't open a window > where sptes have a view on older pages and linux pte has view on newer > pages (this can happen with remap_file_pages with my KVM swapping > patch to use V8 Christoph's patch). Ok so you are now getting away from keeping the refcount elevated? That was your design decision.... > > Also, how to you resolve the case where you are not allowed to sleep? > > I would have thought either you have to handle it, in which case nobody > > needs to sleep; or you can't handle it, in which case the code is > > broken. > > I also asked exactly this, glad you reasked this too. It would have helped if you would have repeated my answers that you had already gotten before. You knew I was on vacation.... From wtantisi at cs.cmu.edu Wed Feb 27 14:39:54 2008 From: wtantisi at cs.cmu.edu (Wittawat Tantisiriroj) Date: Wed, 27 Feb 2008 17:39:54 -0500 Subject: [ofa-general] IPoIB Connected Mode Throughput Question Message-ID: <47C5E6BA.6090103@cs.cmu.edu> Hi, We have set up a small InfiniBand cluster to do several network storage experiments over IPoIB. However, we had the problem getting a good throughput with IPoIB-connected mode. So, we followed the same benchmark that Michael S. Tsirkin did in "http://lists.openfabrics.org/pipermail/general/2006-November/029500.html". We realize that even with a simple scenario we still get only ~620MB/s throughput from IPoIB-CM. I searched around with Google, but I cannot find any information regarding IPoIB-CM throughput. My question is: - Is this throughput typical/normal in most system? - Is there any necessary tweak tuning with TCP, ib_ipoib or kernel parameters in order to get ~900 MB/s? I have tried to TCP buffer size, but it still does not improve the throughput. - Should I use a OFED distribution instead of a standard built-in with a standard kernel? (I hope it does not matter) System ===== Processor: Intel(R) Pentium(R) D CPU 3.00GHz Memory: 4GB OS: Debian Etch with 2.6.24.2 kernel Network ====== Network card: Mellanox MT25204 (InfiniHost III Lx HCA) (4x, 10Gbps) Switch: Mellanox Gazelle (MTS9600) 96 ports with 4X (10 Gb/s) each Network Software Stack: Standard IPoIB built-in with the 2.6.24.2 kernel IPoIB Configuration: Connected mode with MTU=65520 Benchmark ======== Server: ib265 # ifconfig ib0 mtu 65520 # netserver Client: ib266 # ifconfig ib0 mtu 65520 # netperf -H ib265 -f M TCP STREAM TEST to ib265 Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. MBytes/sec 87380 16384 16384 10.01 620.27 Thank in advance, Wittawat From steiner at sgi.com Wed Feb 27 14:42:32 2008 From: steiner at sgi.com (Jack Steiner) Date: Wed, 27 Feb 2008 16:42:32 -0600 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> Message-ID: <20080227224232.GA18581@sgi.com> > > > Also, what we are going to need here are not skeleton drivers > > that just do all the *easy* bits (of registering their callbacks), > > but actual fully working examples that do everything that any > > real driver will need to do. If not for the sanity of the driver > > writer, then for the sanity of the VM developers (I don't want > > to have to understand xpmem or infiniband in order to understand > > how the VM works). > > There are 3 different drivers that can already use it but the code is > complex and not easy to review. Skeletons are easy to allow people to get > started with it. I posted the full GRU driver late last week. It is a lot of code & somewhat difficult to understand w/o access to full chip specs (sorry). The code is fairly well commented & the parts related to TLB management should be understandable. From clameter at sgi.com Wed Feb 27 14:43:41 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:43:41 -0800 (PST) Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <200802201055.21343.nickpiggin@yahoo.com.au> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> Message-ID: On Wed, 20 Feb 2008, Nick Piggin wrote: > I don't know how this is supposed to solve anything. The sleeping > problem happens I guess mostly in truncate. And all you are doing > is putting these rmap callbacks in page_mkclean and try_to_unmap. truncate is handled by the range invalidates. This is special code to deal with the unnap/clean of an individual page. > That doesn't seem right. To start with, the new callbacks aren't > even called in the places where invalidate_page isn't allowed to > sleep. > > The problem is unmap_mapping_range, right? And unmap_mapping_range > must walk the rmaps with the mmap lock held, which is why it can't > sleep. And it can't hold any mmap_sem so it cannot prevent address Nope. unmap_mapping_range is already handled by the range callbacks. > So in the meantime, you could have eg. a fault come in and set up a > new page for one of the processes, and that page might even get > exported via the same external driver. And now you have a totally > inconsistent view. The situation that you are imagining has already been dealt with by the earlier patches. This is only to allow sleeping while unmapping individual pages. From clameter at sgi.com Wed Feb 27 14:50:50 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:50:50 -0800 (PST) Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219142725.GA23200@sgi.com> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219142725.GA23200@sgi.com> Message-ID: On Tue, 19 Feb 2008, Jack Steiner wrote: > In general, though, I agree. Most users of mmu_notifiers would likely > required a mutex or something equivalent. The skeletons shows how to do most of it using a spinlock and a counter. From clameter at sgi.com Wed Feb 27 14:55:29 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:55:29 -0800 (PST) Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219225923.GA18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219225923.GA18912@wotan.suse.de> Message-ID: On Tue, 19 Feb 2008, Nick Piggin wrote: > I thought that could be used by a non-sleeping user (not intending > to try supporting sleeping users). If it is useless then it should > go away (BTW. I didn't see your recent patch, some of my confusion > I think stems from Christoph's novel way of merging and splitting > patches). What is so novel about introducing functionality step by step? From clameter at sgi.com Wed Feb 27 14:56:37 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 14:56:37 -0800 (PST) Subject: [ofa-general] Re: [patch] my mmu notifiers In-Reply-To: <20080219231157.GC18912@wotan.suse.de> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> Message-ID: On Wed, 20 Feb 2008, Nick Piggin wrote: > But why does _anybody_ (why does Christoph's patches) need to invalidate > when they are going to be more permissive? This should be done lazily by > the driver, I would have thought. Correct. If you find such places then we can avoid the invalidates there. From clameter at sgi.com Wed Feb 27 15:06:10 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 15:06:10 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080227192610.GF28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: On Wed, 27 Feb 2008, Andrea Arcangeli wrote: > I hope this will can be considered final for .25 and be merged. Risk > is zero, the only discussion here is to make an API that will last > forever, functionality-wise all these patches provides zero risk and > zero overhead when MMU_NOTIFIER=n. This last patch covers KVM and GRU > and hopefully all other non-blocking users optimally, and the below Ok so it somehow works slowly with GRU and you are happy with it. What about the RDMA folks etc etc? > API will hopefully last forever (but even if it lasts just for .25 and > .26 is changed that's fine with us, it's a kernel _internal_ API > anyway, there's absolutely nothing visible to userland). Would it not be better to have a solution that fits all instead of hacking something in now and then having to modify it later? > What Christoph need to do when he's back from vacations to support > sleepable mmu notifiers is to add a CONFIG_XPMEM config option that > will switch the i_mmap_lock from a semaphore to a mutex (any other > change to this patch will be minor compared to that) so XPMEM hardware > will have kernels compiled that way. I don't see other sane ways to > remove the "atomic" parameter from the API (apparently required by > Andrew for merging something not restricted to the xpmem current usage > with only anonymous memory) and I don't want to have such a > locking-change intrusive dependency for all other non-blocking users > that are fine without having to alter how the VM works (for example > KVM and GRU). Very minor changes will be required to this patch to > make it work after the VM locking will be altered (for example the > CONFIG_XPMEM should also switch the mmu_register/unregister locking > from RCU to mutex as well). XPMEM then will only compile if > CONFIG_XPMEM=y and in turn the invalidate_range_* will support > scheduling inside. Hmmm.. There were earlier discussions of changing the anon vma lock to a rw lock because of contention issues in large systems. Maybe we can just generally switch the locks taken while walking rmaps to semaphores? That would still require to put the invalidate outside of the pte lock. From xma at us.ibm.com Wed Feb 27 15:17:58 2008 From: xma at us.ibm.com (Shirley Ma) Date: Wed, 27 Feb 2008 15:17:58 -0800 Subject: [ofa-general] IPoIB Connected Mode Throughput Question In-Reply-To: <47C5E6BA.6090103@cs.cmu.edu> Message-ID: Hello Wittawat, You could try sysctl -w to set up big buffer for both send and recv, for example: net.ipv4.tcp_rmem = "4096 87380 4149248" net.ipv4.tcp_wmem = "4096 87380 4149248" And please try multiple streams test instead of 1, like iperf you might see 1.2GB/s or above. Good luck! Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From andrea at qumranet.com Wed Feb 27 15:43:17 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 00:43:17 +0100 Subject: [ofa-general] Re: [kvm-devel] [PATCH] mmu notifiers #v7 In-Reply-To: References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: <20080227234317.GM28483@v2.random> On Wed, Feb 27, 2008 at 03:06:10PM -0800, Christoph Lameter wrote: > Ok so it somehow works slowly with GRU and you are happy with it. What As far as GRU is concerned, performance is the same as with your patch (Jack can confirm). > about the RDMA folks etc etc? If RDMA/IB folks needed to block in invalidate_range, I guess they need to do so on top of tmpfs too, and that never worked with your patch anyway. > Would it not be better to have a solution that fits all instead of hacking > something in now and then having to modify it later? The whole point is that your solution fits only GRU and KVM too. XPMEM in your patch works in a hacked mode limited to anonymous memory only, Robin already received incoming mail asking to allow xpmem to work on more than anonymous memory, so your solution-that-fits-all doesn't actually fit some of Robin's customer needs. So if it doesn't even entirely satisfy xpmem users, imagine the other potential blocking-users of this code. > Hmmm.. There were earlier discussions of changing the anon vma lock to a > rw lock because of contention issues in large systems. Maybe we can just > generally switch the locks taken while walking rmaps to semaphores? That > would still require to put the invalidate outside of the pte lock. anon_vma lock can remain a spinlock unless you also want to schedule inside try_to_unmap. If converting the i_mmap_lock to a mutex is a big trouble, another way that might work to allow invalidate_range to block, would be to try to boost the mm_users to prevent the mmu_notifier_release to run in another cpu the moment after i_mmap_lock spinlock is unlocked. But even if that works, it'll run slower and the mmu notifiers RCU locking should be switched to a mutex, so it'd be nice to have it as a separate option. From andrea at qumranet.com Wed Feb 27 15:57:24 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 00:57:24 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802191954.14874.nickpiggin@yahoo.com.au> <20080219133405.GH7128@v2.random> Message-ID: <20080227235724.GA8091@v2.random> On Wed, Feb 27, 2008 at 02:23:29PM -0800, Christoph Lameter wrote: > How would that work? You rely on the pte locking. Thus calls are all in an I don't rely on the pte locking in #v7, exactly to satisfy GRU (so far purely theoretical) performance complains. > atomic context. I think we need a general scheme that allows sleeping when Calls are still in atomic context until we change the i_mmap_lock to a mutex under a CONFIG_XPMEM, or unless we boost mm_users, drop the lock and restart the loop at every different mm. In any case those changes should be under CONFIG_XPMEM IMHO given desktop users definitely don't need this (regular non-blocking mmu notifiers in my patch are all what a desktop user need as far as I can tell). > references are invalidates. Even the GRU has performance issues when using > the KVM patch. GRU will perform the same with #v7 or V8. From maidenhoodp006 at victoriagospelhall.com Wed Feb 27 16:10:01 2008 From: maidenhoodp006 at victoriagospelhall.com (Eloise Barnard) Date: Wed, 27 Feb 2008 19:10:01 -0500 Subject: [ofa-general] MacPenisBroad Message-ID: <01c87974$59cf8a80$bc2af0c9@maidenhoodp006> AvaWallopingCock http://www.zhbvdiaeg.com From clameter at sgi.com Wed Feb 27 16:08:07 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 16:08:07 -0800 (PST) Subject: [ofa-general] Re: [kvm-devel] [PATCH] mmu notifiers #v7 In-Reply-To: <20080227234317.GM28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080227234317.GM28483@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > If RDMA/IB folks needed to block in invalidate_range, I guess they > need to do so on top of tmpfs too, and that never worked with your > patch anyway. How about blocking in invalidate_page()? It can be made to work... > > Would it not be better to have a solution that fits all instead of hacking > > something in now and then having to modify it later? > > The whole point is that your solution fits only GRU and KVM too. Well so we do not address the issues? > XPMEM in your patch works in a hacked mode limited to anonymous memory > only, Robin already received incoming mail asking to allow xpmem to > work on more than anonymous memory, so your solution-that-fits-all > doesn't actually fit some of Robin's customer needs. So if it doesn't > even entirely satisfy xpmem users, imagine the other potential > blocking-users of this code. The solutions have been mentioned... > anon_vma lock can remain a spinlock unless you also want to schedule > inside try_to_unmap. Either that or a separate rmap as also mentioned before. From clameter at sgi.com Wed Feb 27 16:10:08 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 16:10:08 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> Message-ID: On Wed, 27 Feb 2008, Christoph Lameter wrote: > Could you be specific? This refers to page migration? Hmmm... Guess we > would need to inc the refcount there instead? Argh. No its the callback list scanning. Yuck. No one noticed. From andrea at qumranet.com Wed Feb 27 16:11:04 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 01:11:04 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> Message-ID: <20080228001104.GB8091@v2.random> On Wed, Feb 27, 2008 at 02:35:59PM -0800, Christoph Lameter wrote: > Could you be specific? This refers to page migration? Hmmm... Guess we If the reader schedule, the synchronize_rcu will return in the other cpu and the objects in the list will be freed and overwritten, and when the task is scheduled back in, it'll follow dangling pointers... You can't use RCU if you want any of your invalidate methods to schedule. Otherwise it's like having zero locking. > 2. Not handle file backed mappings (XPmem could work mostly in such a > config) IMHO that fits under your definition of "hacking something in now and then having to modify it later". > 3. Keep the refcount elevated until pages are freed in another execution > context. Page refcount is not enough (the mmu_notifier_release will run in another cpu the moment after i_mmap_lock is unlocked) but mm_users may prevent us to change the i_mmap_lock to a mutex, but it'll slowdown truncate as it'll have to drop the lock and restart the radix tree walk every time so a change like this better fits as a separate CONFIG_XPMEM IMHO. From clameter at sgi.com Wed Feb 27 16:14:08 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 16:14:08 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080228001104.GB8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > > 3. Keep the refcount elevated until pages are freed in another execution > > context. > > Page refcount is not enough (the mmu_notifier_release will run in > another cpu the moment after i_mmap_lock is unlocked) but mm_users may > prevent us to change the i_mmap_lock to a mutex, but it'll slowdown > truncate as it'll have to drop the lock and restart the radix tree > walk every time so a change like this better fits as a separate > CONFIG_XPMEM IMHO. Erm. This would also be needed by RDMA etc. From andrea at qumranet.com Wed Feb 27 16:21:21 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 01:21:21 +0100 Subject: [ofa-general] Re: [kvm-devel] [PATCH] mmu notifiers #v7 In-Reply-To: References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080227234317.GM28483@v2.random> Message-ID: <20080228002121.GC8091@v2.random> On Wed, Feb 27, 2008 at 04:08:07PM -0800, Christoph Lameter wrote: > On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > > > If RDMA/IB folks needed to block in invalidate_range, I guess they > > need to do so on top of tmpfs too, and that never worked with your > > patch anyway. > > How about blocking in invalidate_page()? It can be made to work... Yes, it can be made to work with even more extended VM changes than to only allow invalidate_range to schedule. Those core VM changes should only be done "by default" (w/o CONFIG_XPMEM=y), if they're doing good to the VM regardless of xpmem requirements. And I'm not really sure of that. I think they don't do any good or they would be a mutex already... > Well so we do not address the issues? I'm not suggesting not to address the issues, just that those issues requires VM core changes, and likely those changes should be switchable under a CONFIG_XPMEM, so I see no reason to delay the mmu notifier until those changes are done and merged too. It's kind of a separate problem. > Either that or a separate rmap as also mentioned before. DRI also wants invalidate_page by (mm,addr). From clameter at sgi.com Wed Feb 27 16:24:53 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 16:24:53 -0800 (PST) Subject: [ofa-general] Re: [kvm-devel] [PATCH] mmu notifiers #v7 In-Reply-To: <20080228002121.GC8091@v2.random> References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080227234317.GM28483@v2.random> <20080228002121.GC8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > I'm not suggesting not to address the issues, just that those issues > requires VM core changes, and likely those changes should be > switchable under a CONFIG_XPMEM, so I see no reason to delay the mmu > notifier until those changes are done and merged too. It's kind of a > separate problem. No its the core problem of the mmu notifier. It needs to be usable for a lot of scenarios. From andrea at qumranet.com Wed Feb 27 16:38:17 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 01:38:17 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080220010038.GQ7128@v2.random> Message-ID: <20080228003817.GD8091@v2.random> On Wed, Feb 27, 2008 at 02:39:46PM -0800, Christoph Lameter wrote: > On Wed, 20 Feb 2008, Andrea Arcangeli wrote: > > > Well, xpmem requirements are complex. As as side effect of the > > simplicity of my approach, my patch is 100% safe since #v1. Now it > > also works for GRU and it cluster invalidates. > > The patch has to satisfy RDMA, XPMEM, GRU and KVM. I keep hearing that we > have a KVM only solution that works 100% (which makes me just switch > ignore the rest of the argument because 100% solutions usually do not > exist). I only said 100% safe, I didn't imply anything other than it won't crash the kernel ;). #v6 and #v7 only leaves XPMEM out AFIK, and that can be supported later with a CONFIG_XPMEM that purely changes some VM locking. #v7 also provides maximum performance to GRU. > > rcu_read_lock), no "atomic" parameters, and it doesn't open a window > > where sptes have a view on older pages and linux pte has view on newer > > pages (this can happen with remap_file_pages with my KVM swapping > > patch to use V8 Christoph's patch). > > Ok so you are now getting away from keeping the refcount elevated? That > was your design decision.... No, I'm not getting away from it. If I would get away from it, I would be forced to implement invalidate_range_begin. However even if I don't get away from it, the fact I only implement invalidate_range_end, and that's called after the PT lock is dropped, opens a little window with lost coherency (which may not be detectable by userland anyway). But this little window is fine for KVM and it doesn't impose any security risk. But clearly proving the locking safe becomes a bit more complex in #v7 than in #v6. > It would have helped if you would have repeated my answers that you had > already gotten before. You knew I was on vacation.... I didn't remember the BUG_ON crystal clear sorry, but not sure why you think it was your call, this was a lowlevel XPMEM question and Robin promptly answered/reminded about it infact. From andrea at qumranet.com Wed Feb 27 16:42:26 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 01:42:26 +0100 Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> Message-ID: <20080228004226.GE8091@v2.random> On Wed, Feb 27, 2008 at 02:43:41PM -0800, Christoph Lameter wrote: > Nope. unmap_mapping_range is already handled by the range callbacks. But they're called with atomic=1 on anything but anonymous memory. I understood Andrew asked to remove the atomic param and to allow sleeping for all kind of vmas. I also understood certain XPMEM customers asked to use XPMEM on something more than anonymous memory. > The situation that you are imagining has already been dealt with [..] I guess there's some misunderstanding, I think Nick was referring to the above problem. From andrea at qumranet.com Wed Feb 27 16:52:50 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 01:52:50 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> Message-ID: <20080228005249.GF8091@v2.random> On Wed, Feb 27, 2008 at 04:14:08PM -0800, Christoph Lameter wrote: > Erm. This would also be needed by RDMA etc. The only RDMA I know is Quadrics, and Quadrics apparently doesn't need to schedule inside the invalidate methods AFIK, so I doubt the above is true. It'd be interesting to know if IB is like Quadrics and it also doesn't require blocking to invalidate certain remote mappings. From clameter at sgi.com Wed Feb 27 17:01:50 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 17:01:50 -0800 (PST) Subject: [ofa-general] Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) In-Reply-To: <20080228004226.GE8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064933.376635032@sgi.com> <200802201055.21343.nickpiggin@yahoo.com.au> <20080228004226.GE8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > On Wed, Feb 27, 2008 at 02:43:41PM -0800, Christoph Lameter wrote: > > Nope. unmap_mapping_range is already handled by the range callbacks. > > But they're called with atomic=1 on anything but anonymous memory. I > understood Andrew asked to remove the atomic param and to allow > sleeping for all kind of vmas. I also understood certain XPMEM > customers asked to use XPMEM on something more than anonymous memory. Yes but the patch that is discussed here does not handle that situation. From clameter at sgi.com Wed Feb 27 17:03:21 2008 From: clameter at sgi.com (Christoph Lameter) Date: Wed, 27 Feb 2008 17:03:21 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080228005249.GF8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > On Wed, Feb 27, 2008 at 04:14:08PM -0800, Christoph Lameter wrote: > > Erm. This would also be needed by RDMA etc. > > The only RDMA I know is Quadrics, and Quadrics apparently doesn't need > to schedule inside the invalidate methods AFIK, so I doubt the above > is true. It'd be interesting to know if IB is like Quadrics and it > also doesn't require blocking to invalidate certain remote mappings. RDMA works across a network and I would assume that it needs confirmation that a connection has been torn down before pages can be unmapped. From andrea at qumranet.com Wed Feb 27 17:10:20 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 02:10:20 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> Message-ID: <20080228011020.GG8091@v2.random> On Wed, Feb 27, 2008 at 05:03:21PM -0800, Christoph Lameter wrote: > RDMA works across a network and I would assume that it needs confirmation > that a connection has been torn down before pages can be unmapped. Depends on the latency of the network, for example with page pinning it can even try to reduce the wait time, by tearing down the mapping in range_begin and spin waiting the ack only later in range_end. From ardavis at ichips.intel.com Wed Feb 27 18:32:06 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Wed, 27 Feb 2008 18:32:06 -0800 Subject: [ofa-general] Re: [PATCH][DAPL v2] memory leak fixes In-Reply-To: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> References: <20080219111912.27986.22505.stgit@b64-10.internal.keyresearch.com> Message-ID: <47C61D26.4070003@ichips.intel.com> Patrick Marchand Latifi wrote: > Hi all, > > Here's a set of patches for dapl 2.0.x. > Please have a look and let me know if there's any issue. > > All these patches apply against the master branch. Thanks, applied. -arlin From ardavis at ichips.intel.com Wed Feb 27 18:31:38 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Wed, 27 Feb 2008 18:31:38 -0800 Subject: [ofa-general] Re: [PATCH][DAPL v1] memory leak fixes In-Reply-To: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> References: <20080219111704.27820.3034.stgit@b64-10.internal.keyresearch.com> Message-ID: <47C61D0A.7070602@ichips.intel.com> Patrick Marchand Latifi wrote: > Hi all, > > Here's a set of patches for dapl 1.2.x. > Please have a look and let me know if there's any issue. Thanks, applied. -arlin From ardavis at ichips.intel.com Wed Feb 27 18:36:50 2008 From: ardavis at ichips.intel.com (Arlin Davis) Date: Wed, 27 Feb 2008 18:36:50 -0800 Subject: [ofa-general] Re: [PATCH] [DAPL v1] fix reuse of va_list in debugging mode In-Reply-To: <20080224050204.23945.75341.stgit@b64-10.internal.keyresearch.com> References: <20080224050204.23945.75341.stgit@b64-10.internal.keyresearch.com> Message-ID: <47C61E42.9070501@ichips.intel.com> Patrick Marchand Latifi wrote: > Make sure we reinitialize the va_list since va_list is undefined > if a function traverses the va_list with va_arg. > > This patch fixes the debugging case when both stdout and syslog > output is wanted. > Thanks, applied fixes for v1 and v2. -arlin From jeff at splitrockpr.com Wed Feb 27 20:57:12 2008 From: jeff at splitrockpr.com (Jeffrey Scott) Date: Wed, 27 Feb 2008 20:57:12 -0800 Subject: [ofa-general] Sonoma Workshop discount -- last chance Message-ID: <897E165C52EE4980A1CF62E3788BBE8B@Gaucho> The OpenFabrics Alliance is hosting the 4th Annual International Sonoma Workshop from April 7-9. The early bird registration period runs through Friday, February 29. You have two more days to register at the discounted rate of $495. To register now, visit http://www.acteva.com/booking.cfm?bevaID=149831. Membership in the Alliance is not necessary. At the workshop, you'll hear end users from a variety of industries discuss how the OFA is helping them address pain points in their enterprise data centers and high-performance computing environments. The Sonoma Workshop will also provide developers in the OFA community with an opportunity to hear from server and storage OEMs, distributors of major operating systems, and leading ISVs. Developers will learn about the support plans these companies have for the OpenFabrics software stack as well as their views on the requirements for the software stack moving forward. Last year's Sonoma Workshop was a huge success. We're looking forward to an even better workshop this spring, including a special presentation from Andy Bechtolsheim. The workshop will take place at The Lodge at Sonoma in the heart of California's wine country. Don't miss it! Register for the workshop and reserve a hotel room today. Be sure to sign up by February 29 for the early bird registration rate. More details about the workshop are available at www.OpenFabrics.org . The OpenFabrics Alliance looks forward to seeing you in Sonoma. ----------------------------------- Jeffrey Scott Split Rock Communications for the OpenFabrics Alliance -------------- next part -------------- An HTML attachment was scrubbed... URL: From ruebeneze at yahoo.fr Wed Feb 27 18:18:34 2008 From: ruebeneze at yahoo.fr (ruebeneze at yahoo.fr) Date: Thu, 28 Feb 2008 03:18:34 +0100 (CET) Subject: [ofa-general] Dear Friend, Message-ID: <20080228021834.4F724338172@fresno224.webperoni.de> Dear Friend, I am very happy to inform you about my success in getting the funds transfered to Mexico, for investements and business establishements. Now, I want you to contact my clark at the information bellow. NAME; Mr.Okwoba, Ask him to send you the total sum of ($800,000.00) United States Dollars in an International Bank Certified Draft, which I kept for your compensation. Fill the bellow information and contact him through his email (okwoba_onyibo007 at yahoo.fr ) immediately without any delays. 1. Your Full Names:................................. 2. Your Address:...... .......................... 3. Your Sex:...................................... 4. Your Age:...................................... 5. Your Marital Status:...................... 6 Your Occupation:............................. 7. Your Direct Phone Number:............................. 8. Your Resident City:........................................ 9. Your Resident State:........................................ 10. Your Country:................................................. Regards, Mr RUEBEN EZE From outshinefo9 at chrystell.com Wed Feb 27 22:59:25 2008 From: outshinefo9 at chrystell.com (Shelby Collins) Date: Thu, 28 Feb 2008 14:59:25 +0800 Subject: [ofa-general] Microsoft Windows Vista Ultimate includes Message-ID: <01c87a1a$8211c480$37bf8d3d@outshinefo9> Microsoft Windows Vista Ultimate new features: • Mobility-based operating system meets all your computing needs whether you're working from home, working on the road, or searching for entertainment options • Combines all the features of a business-focused operating system, all the efficiency features of a mobility-focused operating system, and all of the digital entertainment features of a consumer-focused operating system http://heathergordineerpc.blogspot.com System Requirements • 1-gigahertz (GHz) 32-bit (x86) processor or 1-GHz 64-bit (x64) processor • 1 GB RAM • 40-GB hard disk that has 15 GB of free hard disk space (the 15GB of free space provides room for temporary file storage during the install or upgrade.) • Internal or external DVD-burning hardware device From backtrackingi3 at syntheticfur.com Wed Feb 27 23:19:07 2008 From: backtrackingi3 at syntheticfur.com (Liz Salter) Date: Thu, 28 Feb 2008 09:19:07 +0200 Subject: [ofa-general] Microsoft Windows ready to download Message-ID: <01c879ea$f7fe3780$664ca24e@backtrackingi3> Microsoft Windows Vista Ultimate new features: • Mobility-based operating system meets all your computing needs whether you're working from home, working on the road, or searching for entertainment options • Combines all the features of a business-focused operating system, all the efficiency features of a mobility-focused operating system, and all of the digital entertainment features of a consumer-focused operating system http://helgahaughter.blogspot.com System Requirements • 1-gigahertz (GHz) 32-bit (x86) processor or 1-GHz 64-bit (x64) processor • 1 GB RAM • 40-GB hard disk that has 15 GB of free hard disk space (the 15GB of free space provides room for temporary file storage during the install or upgrade.) • Internal or external DVD-burning hardware device From izike at qumranet.com Thu Feb 28 00:42:06 2008 From: izike at qumranet.com (izik eidus) Date: Thu, 28 Feb 2008 10:42:06 +0200 Subject: [ofa-general] Re: [PATCH] KVM swapping with mmu notifiers #v7 In-Reply-To: <20080227220656.GJ28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080220104517.GV7128@v2.random> <20080227220656.GJ28483@v2.random> Message-ID: <47C673DE.6000902@qumranet.com> ציטוט Andrea Arcangeli: > Same as before but one one hand ported to #v7 API and on the other > hand ported to latest kvm.git. > > Signed-off-by: Andrea Arcangeli > > diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig > index 41962e7..e1287ab 100644 > --- a/arch/x86/kvm/Kconfig > +++ b/arch/x86/kvm/Kconfig > @@ -21,6 +21,7 @@ config KVM > tristate "Kernel-based Virtual Machine (KVM) support" > depends on HAVE_KVM && EXPERIMENTAL > select PREEMPT_NOTIFIERS > + select MMU_NOTIFIER > select ANON_INODES > ---help--- > Support hosting fully virtualized guest machines using hardware > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 4583329..4067b0f 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -642,6 +642,110 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) > account_shadowed(kvm, gfn); > } > > +static void kvm_unmap_spte(struct kvm *kvm, u64 *spte) > +{ > + struct page *page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); > + get_page(page); > + rmap_remove(kvm, spte); > + set_shadow_pte(spte, shadow_trap_nonpresent_pte); > + kvm_flush_remote_tlbs(kvm); > + __free_page(page); > with large page support i think we need here put_page... > +} > + > +static void kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) > +{ > + u64 *spte, *curr_spte; > + > + spte = rmap_next(kvm, rmapp, NULL); > + while (spte) { > + BUG_ON(!(*spte & PT_PRESENT_MASK)); > + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); > + curr_spte = spte; > + spte = rmap_next(kvm, rmapp, spte); > + kvm_unmap_spte(kvm, curr_spte); > + } > +} > + > +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva) > +{ > + int i; > + > + /* > + * If mmap_sem isn't taken, we can look the memslots with only > + * the mmu_lock by skipping over the slots with userspace_addr == 0. > + */ > + spin_lock(&kvm->mmu_lock); > + for (i = 0; i < kvm->nmemslots; i++) { > + struct kvm_memory_slot *memslot = &kvm->memslots[i]; > + unsigned long start = memslot->userspace_addr; > + unsigned long end; > + > + /* mmu_lock protects userspace_addr */ > + if (!start) > + continue; > + > + end = start + (memslot->npages << PAGE_SHIFT); > + if (hva >= start && hva < end) { > + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; > + kvm_unmap_rmapp(kvm, &memslot->rmap[gfn_offset]); > + } > + } > + spin_unlock(&kvm->mmu_lock); > +} > + > +static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) > +{ > + u64 *spte; > + int young = 0; > + > + spte = rmap_next(kvm, rmapp, NULL); > + while (spte) { > + int _young; > + u64 _spte = *spte; > + BUG_ON(!(_spte & PT_PRESENT_MASK)); > + _young = _spte & PT_ACCESSED_MASK; > + if (_young) { > + young = !!_young; > + set_shadow_pte(spte, _spte & ~PT_ACCESSED_MASK); > + } > + spte = rmap_next(kvm, rmapp, spte); > + } > + return young; > +} > + > +int kvm_age_hva(struct kvm *kvm, unsigned long hva) > +{ > + int i; > + int young = 0; > + > + /* > + * If mmap_sem isn't taken, we can look the memslots with only > + * the mmu_lock by skipping over the slots with userspace_addr == 0. > + */ > + spin_lock(&kvm->mmu_lock); > + for (i = 0; i < kvm->nmemslots; i++) { > + struct kvm_memory_slot *memslot = &kvm->memslots[i]; > + unsigned long start = memslot->userspace_addr; > + unsigned long end; > + > + /* mmu_lock protects userspace_addr */ > + if (!start) > + continue; > + > + end = start + (memslot->npages << PAGE_SHIFT); > + if (hva >= start && hva < end) { > + gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; > + young |= kvm_age_rmapp(kvm, &memslot->rmap[gfn_offset]); > + } > + } > + spin_unlock(&kvm->mmu_lock); > + > + if (young) > + kvm_flush_remote_tlbs(kvm); > + > + return young; > +} > + > #ifdef MMU_DEBUG > static int is_empty_shadow_page(u64 *spt) > { > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 17f9d16..b014b19 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -380,6 +380,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, > int r; > struct page *page; > int largepage = 0; > + unsigned mmu_seq; > > pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code); > kvm_mmu_audit(vcpu, "pre page fault"); > @@ -415,6 +416,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, > largepage = 1; > } > } > + mmu_seq = read_seqbegin(&vcpu->kvm->arch.mmu_notifier_invalidate_lock); > page = gfn_to_page(vcpu->kvm, walker.gfn); > up_read(¤t->mm->mmap_sem); > > @@ -440,6 +442,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, > ++vcpu->stat.pf_fixed; > kvm_mmu_audit(vcpu, "post page fault (fixed)"); > spin_unlock(&vcpu->kvm->mmu_lock); > + > + if (read_seqretry(&vcpu->kvm->arch.mmu_notifier_invalidate_lock, mmu_seq)) { > + down_read(¤t->mm->mmap_sem); > + if (page != gfn_to_page(vcpu->kvm, walker.gfn)) > + BUG(); > + up_read(¤t->mm->mmap_sem); > + kvm_release_page_clean(page); > + } > + > up_read(&vcpu->kvm->slots_lock); > > return write_pt; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6f09840..6eafb74 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3319,6 +3319,47 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) > free_page((unsigned long)vcpu->arch.pio_data); > } > > +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) > +{ > + struct kvm_arch *kvm_arch; > + kvm_arch = container_of(mn, struct kvm_arch, mmu_notifier); > + return container_of(kvm_arch, struct kvm, arch); > +} > + > +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address) > +{ > + struct kvm *kvm = mmu_notifier_to_kvm(mn); > + BUG_ON(mm != kvm->mm); > + write_seqlock(&kvm->arch.mmu_notifier_invalidate_lock); > + kvm_unmap_hva(kvm, address); > + write_sequnlock(&kvm->arch.mmu_notifier_invalidate_lock); > +} > + > +void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end) > +{ > + for (; start < end; start += PAGE_SIZE) > + kvm_mmu_notifier_invalidate_page(mn, mm, start); > +} > + > +int kvm_mmu_notifier_age_page(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address) > +{ > + struct kvm *kvm = mmu_notifier_to_kvm(mn); > + BUG_ON(mm != kvm->mm); > + return kvm_age_hva(kvm, address); > +} > + > +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { > + .invalidate_page = kvm_mmu_notifier_invalidate_page, > + .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, > + .age_page = kvm_mmu_notifier_age_page, > +}; > + > struct kvm *kvm_arch_create_vm(void) > { > struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); > @@ -3328,6 +3369,10 @@ struct kvm *kvm_arch_create_vm(void) > > INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); > > + kvm->arch.mmu_notifier.ops = &kvm_mmu_notifier_ops; > + mmu_notifier_register(&kvm->arch.mmu_notifier, current->mm); > + seqlock_init(&kvm->arch.mmu_notifier_invalidate_lock); > + > return kvm; > } > > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h > index 024b57c..305b7c3 100644 > --- a/include/asm-x86/kvm_host.h > +++ b/include/asm-x86/kvm_host.h > @@ -13,6 +13,7 @@ > > #include > #include > +#include > > #include > #include > @@ -303,6 +304,9 @@ struct kvm_arch{ > struct page *apic_access_page; > > gpa_t wall_clock; > + > + struct mmu_notifier mmu_notifier; > + seqlock_t mmu_notifier_invalidate_lock; > }; > > struct kvm_vm_stat { > @@ -422,6 +426,8 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); > int kvm_mmu_setup(struct kvm_vcpu *vcpu); > void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); > > +void kvm_unmap_hva(struct kvm *kvm, unsigned long hva); > +int kvm_age_hva(struct kvm *kvm, unsigned long hva); > int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); > void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); > void kvm_mmu_zap_all(struct kvm *kvm); > > > As usual (for completeness) I append the change to the memslot > readonly locking through kvm->mmu_lock: > > Signed-off-by: Andrea Arcangeli > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6f09840..a519fd8 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3379,16 +3379,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, > */ > if (!user_alloc) { > if (npages && !old.rmap) { > + unsigned long userspace_addr; > + > down_write(¤t->mm->mmap_sem); > - memslot->userspace_addr = do_mmap(NULL, 0, > - npages * PAGE_SIZE, > - PROT_READ | PROT_WRITE, > - MAP_SHARED | MAP_ANONYMOUS, > - 0); > + userspace_addr = do_mmap(NULL, 0, > + npages * PAGE_SIZE, > + PROT_READ | PROT_WRITE, > + MAP_SHARED | MAP_ANONYMOUS, > + 0); > up_write(¤t->mm->mmap_sem); > > - if (IS_ERR((void *)memslot->userspace_addr)) > - return PTR_ERR((void *)memslot->userspace_addr); > + if (IS_ERR((void *)userspace_addr)) > + return PTR_ERR((void *)userspace_addr); > + > + /* set userspace_addr atomically for kvm_hva_to_rmapp */ > + spin_lock(&kvm->mmu_lock); > + memslot->userspace_addr = userspace_addr; > + spin_unlock(&kvm->mmu_lock); > } else { > if (!old.user_alloc && old.rmap) { > int ret; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 30bf832..8f3b6d6 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -326,7 +326,15 @@ int __kvm_set_memory_region(struct kvm *kvm, > memset(new.rmap, 0, npages * sizeof(*new.rmap)); > > new.user_alloc = user_alloc; > - new.userspace_addr = mem->userspace_addr; > + /* > + * hva_to_rmmap() serialzies with the mmu_lock and to be > + * safe it has to ignore memslots with !user_alloc && > + * !userspace_addr. > + */ > + if (user_alloc) > + new.userspace_addr = mem->userspace_addr; > + else > + new.userspace_addr = 0; > } > if (npages && !new.lpage_info) { > int largepages = npages / KVM_PAGES_PER_HPAGE; > @@ -355,14 +363,18 @@ int __kvm_set_memory_region(struct kvm *kvm, > memset(new.dirty_bitmap, 0, dirty_bytes); > } > > + spin_lock(&kvm->mmu_lock); > if (mem->slot >= kvm->nmemslots) > kvm->nmemslots = mem->slot + 1; > > *memslot = new; > + spin_unlock(&kvm->mmu_lock); > > r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); > if (r) { > + spin_lock(&kvm->mmu_lock); > *memslot = old; > + spin_unlock(&kvm->mmu_lock); > goto out_free; > } > > From eli at dev.mellanox.co.il Thu Feb 28 02:32:50 2008 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Thu, 28 Feb 2008 12:32:50 +0200 Subject: [ofa-general] IPoIB Connected Mode Throughput Question In-Reply-To: <47C5E6BA.6090103@cs.cmu.edu> References: <47C5E6BA.6090103@cs.cmu.edu> Message-ID: <1204194770.3358.43.camel@mtls03> On Wed, 2008-02-27 at 17:39 -0500, Wittawat Tantisiriroj wrote: > Hi, > We have set up a small InfiniBand cluster to do several network > storage experiments over IPoIB. However, we had the problem getting a > good throughput with IPoIB-connected mode. So, we followed the same > benchmark that Michael S. Tsirkin did in > "http://lists.openfabrics.org/pipermail/general/2006-November/029500.html". > We realize that even with a simple scenario we still get only ~620MB/s > throughput from IPoIB-CM. I searched around with Google, but I cannot > find any information regarding IPoIB-CM throughput. > > My question is: > > - Is this throughput typical/normal in most system? You can't say there is a typical result for this check -- it depends on the "strength" of your system. One thing you can do is watch how much of the CPU is used - you can use htop for that (it gives you per CPU utilization). If you use 100% CPU than stronger machines will give higher results. On my systems (AMD @2.4 Ghz) / mt25204 I get: [root at sw186 ~]# netperf -H 11.4.3.185 -f M TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.4.3.185 (11.4.3.185) port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. MBytes/sec 87380 16384 16384 10.01 755.20 On other systems with Arbel (mt25218) I get 980 MB/s etc. > > - Is there any necessary tweak tuning with TCP, ib_ipoib or kernel > parameters in order to get ~900 MB/s? I have tried to TCP buffer size, > but it still does not improve the throughput. > > - Should I use a OFED distribution instead of a standard built-in with a > standard kernel? (I hope it does not matter) > > System > ===== > Processor: Intel(R) Pentium(R) D CPU 3.00GHz > Memory: 4GB > OS: Debian Etch with 2.6.24.2 kernel > > Network > ====== > Network card: Mellanox MT25204 (InfiniHost III Lx HCA) (4x, 10Gbps) > Switch: Mellanox Gazelle (MTS9600) 96 ports with 4X (10 Gb/s) each > Network Software Stack: Standard IPoIB built-in with the 2.6.24.2 kernel > IPoIB Configuration: Connected mode with MTU=65520 > > Benchmark > ======== > Server: ib265 > # ifconfig ib0 mtu 65520 > # netserver > > Client: ib266 > # ifconfig ib0 mtu 65520 > # netperf -H ib265 -f M > > TCP STREAM TEST to ib265 > Recv Send Send > Socket Socket Message Elapsed > Size Size Size Time Throughput > bytes bytes bytes secs. MBytes/sec > 87380 16384 16384 10.01 620.27 > > Thank in advance, > Wittawat > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From holt at sgi.com Thu Feb 28 02:53:18 2008 From: holt at sgi.com (Robin Holt) Date: Thu, 28 Feb 2008 04:53:18 -0600 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080228005249.GF8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> Message-ID: <20080228105317.GS11391@sgi.com> On Thu, Feb 28, 2008 at 01:52:50AM +0100, Andrea Arcangeli wrote: > On Wed, Feb 27, 2008 at 04:14:08PM -0800, Christoph Lameter wrote: > > Erm. This would also be needed by RDMA etc. > > The only RDMA I know is Quadrics, and Quadrics apparently doesn't need > to schedule inside the invalidate methods AFIK, so I doubt the above > is true. It'd be interesting to know if IB is like Quadrics and it > also doesn't require blocking to invalidate certain remote mappings. We got an answer from the IB guys already. They do not track which of their handles are being used by remote processes so neither approach will work for their purposes with the exception of straight unmaps. In that case, they could use the callout to remove TLB information and rely on the lack of page table information to kill the users process. Without changes to their library spec, I don't believe anything further is possible. If they did change their library spec, I believe they could get things to work the same way that XPMEM has gotten things to work, where a message is sent to the remote side for TLB clearing and that will require sleeping. Thanks, Robin From vlad at lists.openfabrics.org Thu Feb 28 03:08:09 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 28 Feb 2008 03:08:09 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080228-0200 daily build status Message-ID: <20080228110810.1381BE60B8E@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.24 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From uncorroborated84 at shoeiprint.com Thu Feb 28 03:49:41 2008 From: uncorroborated84 at shoeiprint.com (Edna Shepherd) Date: Thu, 28 Feb 2008 13:49:41 +0200 Subject: [ofa-general] Microsoft Windows Vista ready to download Message-ID: <01c87a10$c4360880$bb00fc58@uncorroborated84> Microsoft Windows Vista Ultimate new features: • Mobility-based operating system meets all your computing needs whether you're working from home, working on the road, or searching for entertainment options • Combines all the features of a business-focused operating system, all the efficiency features of a mobility-focused operating system, and all of the digital entertainment features of a consumer-focused operating system http://pollyzickefooseuo.blogspot.com System Requirements • 1-gigahertz (GHz) 32-bit (x86) processor or 1-GHz 64-bit (x64) processor • 1 GB RAM • 40-GB hard disk that has 15 GB of free hard disk space (the 15GB of free space provides room for temporary file storage during the install or upgrade.) • Internal or external DVD-burning hardware device From tziporet at mellanox.co.il Thu Feb 28 07:14:33 2008 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Thu, 28 Feb 2008 17:14:33 +0200 Subject: [ofa-general] OFED 1.3 GA release Message-ID: <6C2C79E72C305246B504CBA17B5500C9036E2C64@mtlexch01.mtl.com> I am happy to announce the OFED 1.3 GA release. The release can be found under: http://www.openfabrics.org/builds/ofed-1.3/release/OFED-1.3.tgz It will soon be available on the OpenFabrics download page: http://www.openfabrics.org/downloads.htm This release was produced by a joint effort of all the companies in the EWG group. I wish to thank all who contributed its success. Tziporet ======================================================================== ======= OFED 1.3 GA Release summary: ============================ The OpenFabrics Enterprise Distribution (OFED) version 1.3 software package supports InfiniBand and iWARP fabrics. It is composed of several software modules intended for use on a computer cluster constructed as an InfiniBand subnet or an iWARP network. OFED package contains the following components: =============================================== The OFED Distribution package generates RPMs for installing the following: - OpenFabrics core and ULPs: - IB HCA drivers (mthca, mlx4, ipath, ehca) - iWARP RNIC driver (cxgb3, nes) - core - Upper Layer Protocols: IPoIB, SDP, SRP Initiator and target, iSER Initiator, RDS, uDAPL and qlgc_vnic - OpenFabrics utilities: - OpenSM (OSM): InfiniBand Subnet Manager - Diagnostic tools - Performance tests - MPI: - OSU MPI stack supporting the InfiniBand and iWARP interface - Open MPI stack supporting the InfiniBand and iWARP interface - OSU MVAPICH2 stack supporting the InfiniBand and iWARP interface - MPI benchmark tests (OSU benchmarks, Intel MPI benchmarks, Presta) - Extra packages: - open-iscsi: open-iscsi initiator with iSER support - ib-bonding: Bonding driver for IPoIB interface - Sources of all software modules (under conditions mentioned in the modules' LICENSE files) - Documentation Third Party Packages -------------------- The following third party packages have been tested with OFED 1.3: 1. Intel MPI, Version 3.0 - Package ID: l_mpi_p_3.0.043 2. HP MPI, Version 2.2.5.1 Main Changes from OFED 1.2.5 ============================ General changes: o Kernel code based on 2.6.24 o Quality of Service support in OpenSM, CMA, IPoIB, SRP, SDP and RDS. (See documents QoS_in_OFED.txt and QoS_management_in_OpenSM.txt for details) o Added Neteffect driver (nes) o Added SRP target package: Based on Mellanox IBGD SRPT and interfaces with SCST scsi target o New verbs to support Extended RC (XRC). o Updated SA cache Integrated with sa_query module; use of SA events (GID up/down) to provide additional synchronization; User-space control for dynamic enable/disable and update Package and install: o There is a new install.pl script. See OFED_Installation_Guide.txt for more details on the new installation and build procedures. o User space packages are now in different source RPMs (as opposed to one source RPM in previous OFED releases). o The option for a build without installing is no longer supported. o Added the script make-dist to generate a tarball with kernel sources for each kernel. ULPs and utilities: o See the attached release note for major changes in the ULPs and utilities. In addition, each component's release notes includes all changes from OFED 1.2.5. Supported Platforms and Operating Systems ========================================= CPU architectures: ------------------ - x86_64 - x86 - ppc64 - ia64 Linux Operating Systems: ------------------------ - RedHat EL4 up4: 2.6.9-42.ELsmp - RedHat EL4 up5: 2.6.9-55.ELsmp - RedHat EL4 up6: 2.6.9-67.ELsmp - RedHat EL5: 2.6.18-8.el5 - RedHat EL5 up1: 2.6.18-53.el5 - Fedora C6: 2.6.18-8.fc6 * - SLES10: 2.6.16.21-0.8-smp - SLES10 SP1: 2.6.16.46-0.12-smp - SLES10 SP1 up1: 2.6.16.53-0.16-smp - OpenSuSE 10.3: 2.6.22-*-* * - kernel.org: 2.6.23 and 2.6.24 * OSes that are partially tested HCAs and RNICs Supported ------------------------ This release supports IB HCAs by Mellanox Technologies, Qlogic and IBM as well as iWARP RNICs by NetEffect and Chelsio Communications . o Mellanox Technologies HCAs (SDR and DDR Modes are Supported): - InfiniHost (fw-23108 Rev 3.5.000) - InfiniHost III Ex (MemFree: fw-25218 Rev 5.3.000 with memory: fw-25208 Rev 4.8.200) - InfiniHost III Lx (fw-25204 Rev 1.2.000) - ConnectX IB (fw-25408 Rev 2.3.000) o Qlogic HCAs: - QHT6040 (PathScale InfiniPath HT-460) - QHT6140 (PathScale InfiniPath HT-465) - QLE6140 (PathScale InfiniPath PE-880) o IBM HCAs: - GX Dual-port SDR 4x IB HCA - GX Dual-port SDR 12x IB HCA - GX Dual-port DDR 4x IB HCA - GX Dual-port DDR 12x IB HCA o Chelsio RNICs: - S310/S320 10GbE Storage Accelerators - R310/R320 10GbE iWARP Adapters o NetEffect RNICs: - NE020 10Gb iWARP Adapter Infiniband Switches Supported ----------------------------- This release was tested with switches and gateways provided by the following companies: - Cisco - Voltaire - Qlogic - Flextronics -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: OFED_release_notes.txt URL: From JuniormcleodBradford at micropersuasion.com Thu Feb 28 08:23:26 2008 From: JuniormcleodBradford at micropersuasion.com (Kendall Randall) Date: Thu, 28 Feb 2008 16:23:26 +0000 Subject: [ofa-general] Business Loans Message-ID: <1c0fb01c87a26$5bb89af0$1401a8c0@poste02> Need A Business Loan? Reach Over 290 Lenders with One Easy Form. 5k-200k For Your Business! http://yankig.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From bs at q-leap.de Thu Feb 28 09:42:19 2008 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 28 Feb 2008 18:42:19 +0100 Subject: [ofa-general] page allocation failure Message-ID: <200802281842.19303.bs@q-leap.de> Hello, on several on our Lustre Servers we can see page allocation failures. This is with 2.6.22 + kernel modules from ofed 1.2.5 [44464.764559] Lustre: 24052:0:(ldlm_lib.c:698:target_handle_connect()) Skipped 16 previous similar messages [54132.351263] ib_cm/2: page allocation failure. order:0, mode:0x10d0 [54132.360738] [54132.360741] Call Trace: [54132.367803] [] show_trace+0x34/0x47 [54132.373235] [] dump_stack+0x12/0x17 [54132.378937] [] __alloc_pages+0x2a3/0x2bc [54132.386180] [] dma_alloc_pages+0x9b/0xbf [54132.395120] [] dma_alloc_coherent+0x76/0x1cc [54132.401651] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 [54132.408897] [] :ib_mthca:mthca_alloc_qp_common+0x246/0x4e5 [54132.418884] [] :ib_mthca:mthca_alloc_qp+0xab/0x102 [54132.425774] [] :ib_mthca:mthca_create_qp+0x126/0x281 [54132.432716] [] :ib_core:ib_create_qp+0x17/0x91 [54132.439102] [] :rdma_cm:rdma_create_qp+0x2d/0x153 [54132.446301] [] :ko2iblnd:kiblnd_create_conn+0x81c/0x1250 [54132.456992] [] :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 [54132.469847] [] :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 [54132.478821] [] :rdma_cm:cma_req_handler+0x322/0x389 [54132.485637] [] :ib_cm:cm_process_work+0x17/0xad [54132.492182] [] :ib_cm:cm_req_handler+0x7ae/0x81b [54132.499236] [] :ib_cm:cm_work_handler+0x2d/0xbaa [54132.506690] [] run_workqueue+0x7f/0x10b [54132.512652] [] worker_thread+0xda/0xe4 [54132.520136] [] kthread+0x47/0x75 [54132.525570] [] child_rip+0xa/0x12 [54132.532975] [54132.535527] Mem-info: [54132.538157] Node 0 DMA per-cpu: [54132.542303] CPU 0: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.551752] CPU 1: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.561661] CPU 2: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.571154] CPU 3: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.580597] CPU 4: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.592354] CPU 5: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.601794] CPU 6: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.610719] CPU 7: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.619630] Node 0 DMA32 per-cpu: [54132.623551] CPU 0: Hot: hi: 186, btch: 31 usd: 49 Cold: hi: 62, btch: 15 usd: 49 [54132.632691] CPU 1: Hot: hi: 186, btch: 31 usd: 26 Cold: hi: 62, btch: 15 usd: 3 [54132.642680] CPU 2: Hot: hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 54 [54132.651897] CPU 3: Hot: hi: 186, btch: 31 usd: 1 Cold: hi: 62, btch: 15 usd: 13 [54132.663321] CPU 4: Hot: hi: 186, btch: 31 usd: 43 Cold: hi: 62, btch: 15 usd: 55 [54132.673282] CPU 5: Hot: hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 49 [54132.683636] CPU 6: Hot: hi: 186, btch: 31 usd: 25 Cold: hi: 62, btch: 15 usd: 1 [54132.693156] CPU 7: Hot: hi: 186, btch: 31 usd: 13 Cold: hi: 62, btch: 15 usd: 56 [54132.703412] Node 0 Normal per-cpu: [54132.707024] CPU 0: Hot: hi: 186, btch: 31 usd: 130 Cold: hi: 62, btch: 15 usd: 14 [54132.719317] CPU 1: Hot: hi: 186, btch: 31 usd: 81 Cold: hi: 62, btch: 15 usd: 1 [54132.729276] CPU 2: Hot: hi: 186, btch: 31 usd: 134 Cold: hi: 62, btch: 15 usd: 2 [54132.738819] CPU 3: Hot: hi: 186, btch: 31 usd: 124 Cold: hi: 62, btch: 15 usd: 8 [54132.748078] CPU 4: Hot: hi: 186, btch: 31 usd: 21 Cold: hi: 62, btch: 15 usd: 4 [54132.758029] CPU 5: Hot: hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 9 [54132.766855] CPU 6: Hot: hi: 186, btch: 31 usd: 120 Cold: hi: 62, btch: 15 usd: 13 [54132.776462] CPU 7: Hot: hi: 186, btch: 31 usd: 166 Cold: hi: 62, btch: 15 usd: 12 [54132.786009] Active:28507 inactive:62701 dirty:8386 writeback:27 unstable:0 [54132.786010] free:5586 slab:273528 mapped:2136 pagetables:699 bounce:0 [54132.803082] Node 0 DMA free:11192kB min:20kB low:24kB high:28kB active:0kB inactive:0kB present:10660kB pages_scanned:0 all_unreclaimable? yes [54132.816507] lowmem_reserve[]: 0 3255 4013 [54132.820811] Node 0 DMA32 free:9812kB min:6564kB low:8204kB high:9844kB active:52536kB inactive:134508kB present:3333728kB pages_scanned:0 all_unreclaimable? no [54132.839252] lowmem_reserve[]: 0 0 757 [54132.843205] Node 0 Normal free:1340kB min:1524kB low:1904kB high:2284kB active:61492kB inactive:116296kB present:775680kB pages_scanned:800 all_unreclaimable? no [54132.859932] lowmem_reserve[]: 0 0 0 [54132.863784] Node 0 DMA: 6*4kB 4*8kB 4*16kB 4*32kB 3*64kB 0*128kB 2*256kB 0*512kB 2*1024kB 0*2048kB 2*4096kB = 11192kB [54132.876957] Node 0 DMA32: 48*4kB 33*8kB 26*16kB 3*32kB 1*64kB 1*128kB 1*256kB 0*512kB 0*1024kB 0*2048kB 2*4096kB = 9608kB [54132.891138] Node 0 Normal: 0*4kB 0*8kB 1*16kB 1*32kB 0*64kB 1*128kB 1*256kB 0*512kB 1*1024kB 0*2048kB 0*4096kB = 1456kB [54132.903195] Swap cache: add 0, delete 0, find 0/0, race 0+0 [54132.909967] Free swap = 4200888kB [54132.913677] Total swap = 4200888kB [54132.917229] Free swap: 4200888kB [54132.967201] 1245184 pages of RAM [54132.971121] 231685 reserved pages [54132.974973] 58033 pages shared [54132.978329] 0 pages swap cached [54132.982267] LustreError: 4103:0:(o2iblnd.c:791:kiblnd_create_conn()) Can't create QP: -12 [54177.640441] ib_cm/5: page allocation failure. order:0, mode:0x10d0 [54177.648631] [54177.648632] Call Trace: [54177.653908] [] show_trace+0x34/0x47 [54177.660073] [] dump_stack+0x12/0x17 [54177.667176] [] __alloc_pages+0x2a3/0x2bc [54177.682952] [] dma_alloc_pages+0x9b/0xbf [54177.688811] [] dma_alloc_coherent+0x76/0x1cc [54177.695277] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 [54177.702683] [] :ib_mthca:mthca_alloc_cq_buf+0x38/0x86 [54177.711034] [] :ib_mthca:mthca_init_cq+0x12a/0x397 [54177.718478] [] :ib_mthca:mthca_create_cq+0xf0/0x1be [54177.725601] [] :ib_core:ib_create_cq+0x27/0x56 [54177.732384] [] :ko2iblnd:kiblnd_create_conn+0x3b0/0x1250 [54177.739683] [] :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 [54177.748451] [] :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 [54177.757088] [] :rdma_cm:cma_req_handler+0x322/0x389 [54177.763985] [] :ib_cm:cm_process_work+0x17/0xad [54177.770664] [] :ib_cm:cm_req_handler+0x7ae/0x81b [54177.777248] [] :ib_cm:cm_work_handler+0x2d/0xbaa [54177.784045] [] run_workqueue+0x7f/0x10b [54177.790439] [] worker_thread+0xda/0xe4 [54177.799862] [] kthread+0x47/0x75 [54177.805672] [] child_rip+0xa/0x12 [54177.811717] [54177.813851] Mem-info: [54177.816666] Node 0 DMA per-cpu: [54177.820479] CPU 0: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.829621] CPU 1: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.839216] CPU 2: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.849488] CPU 3: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.859625] CPU 4: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.871977] CPU 5: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.881930] CPU 6: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.891980] CPU 7: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.902800] Node 0 DMA32 per-cpu: [54177.906462] CPU 0: Hot: hi: 186, btch: 31 usd: 10 Cold: hi: 62, btch: 15 usd: 58 [54177.916162] CPU 1: Hot: hi: 186, btch: 31 usd: 26 Cold: hi: 62, btch: 15 usd: 3 [54177.926049] CPU 2: Hot: hi: 186, btch: 31 usd: 139 Cold: hi: 62, btch: 15 usd: 54 [54177.936948] CPU 3: Hot: hi: 186, btch: 31 usd: 1 Cold: hi: 62, btch: 15 usd: 13 [54177.946968] CPU 4: Hot: hi: 186, btch: 31 usd: 56 Cold: hi: 62, btch: 15 usd: 55 [54177.956868] CPU 5: Hot: hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 57 [54177.965685] CPU 6: Hot: hi: 186, btch: 31 usd: 25 Cold: hi: 62, btch: 15 usd: 1 [54177.975412] CPU 7: Hot: hi: 186, btch: 31 usd: 13 Cold: hi: 62, btch: 15 usd: 56 [54177.986045] Node 0 Normal per-cpu: [54177.990527] CPU 0: Hot: hi: 186, btch: 31 usd: 128 Cold: hi: 62, btch: 15 usd: 14 [54178.002993] CPU 1: Hot: hi: 186, btch: 31 usd: 81 Cold: hi: 62, btch: 15 usd: 1 [54178.012136] CPU 2: Hot: hi: 186, btch: 31 usd: 113 Cold: hi: 62, btch: 15 usd: 2 [54178.022533] CPU 3: Hot: hi: 186, btch: 31 usd: 124 Cold: hi: 62, btch: 15 usd: 8 [54178.032316] CPU 4: Hot: hi: 186, btch: 31 usd: 27 Cold: hi: 62, btch: 15 usd: 4 [54178.041380] CPU 5: Hot: hi: 186, btch: 31 usd: 24 Cold: hi: 62, btch: 15 usd: 9 [54178.050941] CPU 6: Hot: hi: 186, btch: 31 usd: 120 Cold: hi: 62, btch: 15 usd: 13 [54178.061180] CPU 7: Hot: hi: 186, btch: 31 usd: 166 Cold: hi: 62, btch: 15 usd: 12 [54178.072162] Active:28319 inactive:62389 dirty:8381 writeback:27 unstable:0 [54178.072163] free:5581 slab:273603 mapped:2117 pagetables:690 bounce:0 [54178.087805] Node 0 DMA free:11192kB min:20kB low:24kB high:28kB active:0kB inactive:0kB present:10660kB pages_scanned:0 all_unreclaimable? yes [54178.103794] lowmem_reserve[]: 0 3255 4013 [54178.108294] Node 0 DMA32 free:9784kB min:6564kB low:8204kB high:9844kB active:51792kB inactive:133260kB present:3333728kB pages_scanned:0 all_unreclaimable? no [54178.129648] lowmem_reserve[]: 0 0 757 [54178.133756] Node 0 Normal free:1348kB min:1524kB low:1904kB high:2284kB active:61484kB inactive:116296kB present:775680kB pages_scanned:728 all_unreclaimable? no [54178.154399] lowmem_reserve[]: 0 0 0 [54178.158450] Node 0 DMA: 6*4kB 4*8kB 4*16kB 4*32kB 3*64kB 0*128kB 2*256kB 0*512kB 2*1024kB 0*2048kB 2*4096kB = 11192kB [54178.172214] Node 0 DMA32: 65*4kB 17*8kB 37*16kB 6*32kB 0*64kB 0*128kB 1*256kB 0*512kB 0*1024kB 0*2048kB 2*4096kB = 9628kB [54178.188210] Node 0 Normal: 0*4kB 1*8kB 1*16kB 1*32kB 0*64kB 1*128kB 1*256kB 0*512kB 1*1024kB 0*2048kB 0*4096kB = 1464kB [54178.202288] Swap cache: add 0, delete 0, find 0/0, race 0+0 [54178.208654] Free swap = 4200888kB [54178.212390] Total swap = 4200888kB [54178.218597] Free swap: 4200888kB [54178.264623] 1245184 pages of RAM [54178.268302] 231685 reserved pages [54178.271793] 57602 pages shared [54178.275306] 0 pages swap cached [54178.278778] LustreError: 4106:0:(o2iblnd.c:732:kiblnd_create_conn()) Can't create CQ: -12 [54277.772930] ib_cm/2: page allocation failure. order:0, mode:0x10d0 [54277.781944] [54277.781945] Call Trace: [54277.788321] [] show_trace+0x34/0x47 [54277.793761] [] dump_stack+0x12/0x17 [54277.799744] [] __alloc_pages+0x2a3/0x2bc [54277.806044] [] dma_alloc_pages+0x9b/0xbf [54277.814225] [] dma_alloc_coherent+0x76/0x1cc [54277.821449] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 [54277.831300] [] :ib_mthca:mthca_alloc_qp_common+0x246/0x4e5 [54277.838934] [] :ib_mthca:mthca_alloc_qp+0xab/0x102 [54277.846467] [] :ib_mthca:mthca_create_qp+0x126/0x281 [54277.854289] [] :ib_core:ib_create_qp+0x17/0x91 [54277.862274] [] :rdma_cm:rdma_create_qp+0x2d/0x153 [54277.870048] [] :ko2iblnd:kiblnd_create_conn+0x81c/0x1250 [54277.877973] [] :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 [54277.886679] [] :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 [54277.895646] [] :rdma_cm:cma_req_handler+0x322/0x389 [54277.903470] [] :ib_cm:cm_process_work+0x17/0xad [54277.910567] [] :ib_cm:cm_req_handler+0x7ae/0x81b [54277.918121] [] :ib_cm:cm_work_handler+0x2d/0xbaa [54277.926378] [] run_workqueue+0x7f/0x10b [54277.932202] [] worker_thread+0xda/0xe4 [54277.938003] [] kthread+0x47/0x75 [54277.944032] [] child_rip+0xa/0x12 [54277.950581] Any ideas? Thanks, Bernd -- Bernd Schubert Q-Leap Networks GmbH From bs at q-leap.de Thu Feb 28 09:44:07 2008 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 28 Feb 2008 18:44:07 +0100 Subject: [ofa-general] page allocation failure In-Reply-To: <200802281842.19303.bs@q-leap.de> References: <200802281842.19303.bs@q-leap.de> Message-ID: <200802281844.08148.bs@q-leap.de> On Thursday 28 February 2008 18:42:19 Bernd Schubert wrote: > Hello, > > on several on our Lustre Servers we can see page allocation failures. > > This is with 2.6.22 + kernel modules from ofed 1.2.5 Er, correction, it's 1.2.5.5 > > > [44464.764559] Lustre: 24052:0:(ldlm_lib.c:698:target_handle_connect()) > Skipped 16 previous similar messages [54132.351263] ib_cm/2: page > allocation failure. order:0, mode:0x10d0 [54132.360738] > [54132.360741] Call Trace: > [54132.367803] [] show_trace+0x34/0x47 > [54132.373235] [] dump_stack+0x12/0x17 > [54132.378937] [] __alloc_pages+0x2a3/0x2bc > [54132.386180] [] dma_alloc_pages+0x9b/0xbf > [54132.395120] [] dma_alloc_coherent+0x76/0x1cc > [54132.401651] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 > [54132.408897] [] > :ib_mthca:mthca_alloc_qp_common+0x246/0x4e5 [54132.418884] > [] :ib_mthca:mthca_alloc_qp+0xab/0x102 [54132.425774] > [] :ib_mthca:mthca_create_qp+0x126/0x281 [54132.432716] > [] :ib_core:ib_create_qp+0x17/0x91 [54132.439102] > [] :rdma_cm:rdma_create_qp+0x2d/0x153 [54132.446301] > [] :ko2iblnd:kiblnd_create_conn+0x81c/0x1250 > [54132.456992] [] > :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 [54132.469847] > [] :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 > [54132.478821] [] :rdma_cm:cma_req_handler+0x322/0x389 > [54132.485637] [] :ib_cm:cm_process_work+0x17/0xad > [54132.492182] [] :ib_cm:cm_req_handler+0x7ae/0x81b > [54132.499236] [] :ib_cm:cm_work_handler+0x2d/0xbaa > [54132.506690] [] run_workqueue+0x7f/0x10b > [54132.512652] [] worker_thread+0xda/0xe4 > [54132.520136] [] kthread+0x47/0x75 > [54132.525570] [] child_rip+0xa/0x12 > [54132.532975] > [54132.535527] Mem-info: > [54132.538157] Node 0 DMA per-cpu: > [54132.542303] CPU 0: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54132.551752] CPU 1: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.561661] CPU 2: Hot: > hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 > [54132.571154] CPU 3: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54132.580597] CPU 4: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.592354] CPU 5: Hot: > hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 > [54132.601794] CPU 6: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54132.610719] CPU 7: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54132.619630] Node 0 DMA32 > per-cpu: > [54132.623551] CPU 0: Hot: hi: 186, btch: 31 usd: 49 Cold: hi: > 62, btch: 15 usd: 49 [54132.632691] CPU 1: Hot: hi: 186, btch: 31 > usd: 26 Cold: hi: 62, btch: 15 usd: 3 [54132.642680] CPU 2: Hot: > hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 54 > [54132.651897] CPU 3: Hot: hi: 186, btch: 31 usd: 1 Cold: hi: > 62, btch: 15 usd: 13 [54132.663321] CPU 4: Hot: hi: 186, btch: 31 > usd: 43 Cold: hi: 62, btch: 15 usd: 55 [54132.673282] CPU 5: Hot: > hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 49 > [54132.683636] CPU 6: Hot: hi: 186, btch: 31 usd: 25 Cold: hi: > 62, btch: 15 usd: 1 [54132.693156] CPU 7: Hot: hi: 186, btch: 31 > usd: 13 Cold: hi: 62, btch: 15 usd: 56 [54132.703412] Node 0 Normal > per-cpu: > [54132.707024] CPU 0: Hot: hi: 186, btch: 31 usd: 130 Cold: hi: > 62, btch: 15 usd: 14 [54132.719317] CPU 1: Hot: hi: 186, btch: 31 > usd: 81 Cold: hi: 62, btch: 15 usd: 1 [54132.729276] CPU 2: Hot: > hi: 186, btch: 31 usd: 134 Cold: hi: 62, btch: 15 usd: 2 > [54132.738819] CPU 3: Hot: hi: 186, btch: 31 usd: 124 Cold: hi: > 62, btch: 15 usd: 8 [54132.748078] CPU 4: Hot: hi: 186, btch: 31 > usd: 21 Cold: hi: 62, btch: 15 usd: 4 [54132.758029] CPU 5: Hot: > hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 9 > [54132.766855] CPU 6: Hot: hi: 186, btch: 31 usd: 120 Cold: hi: > 62, btch: 15 usd: 13 [54132.776462] CPU 7: Hot: hi: 186, btch: 31 > usd: 166 Cold: hi: 62, btch: 15 usd: 12 [54132.786009] Active:28507 > inactive:62701 dirty:8386 writeback:27 unstable:0 [54132.786010] free:5586 > slab:273528 mapped:2136 pagetables:699 bounce:0 [54132.803082] Node 0 DMA > free:11192kB min:20kB low:24kB high:28kB active:0kB inactive:0kB > present:10660kB pages_scanned:0 all_unreclaimable? yes [54132.816507] > lowmem_reserve[]: 0 3255 4013 > [54132.820811] Node 0 DMA32 free:9812kB min:6564kB low:8204kB high:9844kB > active:52536kB inactive:134508kB present:3333728kB pages_scanned:0 > all_unreclaimable? no [54132.839252] lowmem_reserve[]: 0 0 757 > [54132.843205] Node 0 Normal free:1340kB min:1524kB low:1904kB high:2284kB > active:61492kB inactive:116296kB present:775680kB pages_scanned:800 > all_unreclaimable? no [54132.859932] lowmem_reserve[]: 0 0 0 > [54132.863784] Node 0 DMA: 6*4kB 4*8kB 4*16kB 4*32kB 3*64kB 0*128kB 2*256kB > 0*512kB 2*1024kB 0*2048kB 2*4096kB = 11192kB [54132.876957] Node 0 DMA32: > 48*4kB 33*8kB 26*16kB 3*32kB 1*64kB 1*128kB 1*256kB 0*512kB 0*1024kB > 0*2048kB 2*4096kB = 9608kB [54132.891138] Node 0 Normal: 0*4kB 0*8kB 1*16kB > 1*32kB 0*64kB 1*128kB 1*256kB 0*512kB 1*1024kB 0*2048kB 0*4096kB = 1456kB > [54132.903195] Swap cache: add 0, delete 0, find 0/0, race 0+0 > [54132.909967] Free swap = 4200888kB > [54132.913677] Total swap = 4200888kB > [54132.917229] Free swap: 4200888kB > [54132.967201] 1245184 pages of RAM > [54132.971121] 231685 reserved pages > [54132.974973] 58033 pages shared > [54132.978329] 0 pages swap cached > [54132.982267] LustreError: 4103:0:(o2iblnd.c:791:kiblnd_create_conn()) > Can't create QP: -12 [54177.640441] ib_cm/5: page allocation failure. > order:0, mode:0x10d0 [54177.648631] > [54177.648632] Call Trace: > [54177.653908] [] show_trace+0x34/0x47 > [54177.660073] [] dump_stack+0x12/0x17 > [54177.667176] [] __alloc_pages+0x2a3/0x2bc > [54177.682952] [] dma_alloc_pages+0x9b/0xbf > [54177.688811] [] dma_alloc_coherent+0x76/0x1cc > [54177.695277] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 > [54177.702683] [] :ib_mthca:mthca_alloc_cq_buf+0x38/0x86 > [54177.711034] [] :ib_mthca:mthca_init_cq+0x12a/0x397 > [54177.718478] [] :ib_mthca:mthca_create_cq+0xf0/0x1be > [54177.725601] [] :ib_core:ib_create_cq+0x27/0x56 > [54177.732384] [] > :ko2iblnd:kiblnd_create_conn+0x3b0/0x1250 [54177.739683] > [] :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 > [54177.748451] [] > :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 [54177.757088] > [] :rdma_cm:cma_req_handler+0x322/0x389 [54177.763985] > [] :ib_cm:cm_process_work+0x17/0xad [54177.770664] > [] :ib_cm:cm_req_handler+0x7ae/0x81b [54177.777248] > [] :ib_cm:cm_work_handler+0x2d/0xbaa [54177.784045] > [] run_workqueue+0x7f/0x10b > [54177.790439] [] worker_thread+0xda/0xe4 > [54177.799862] [] kthread+0x47/0x75 > [54177.805672] [] child_rip+0xa/0x12 > [54177.811717] > [54177.813851] Mem-info: > [54177.816666] Node 0 DMA per-cpu: > [54177.820479] CPU 0: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54177.829621] CPU 1: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.839216] CPU 2: Hot: > hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 > [54177.849488] CPU 3: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54177.859625] CPU 4: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.871977] CPU 5: Hot: > hi: 0, btch: 1 usd: 0 Cold: hi: 0, btch: 1 usd: 0 > [54177.881930] CPU 6: Hot: hi: 0, btch: 1 usd: 0 Cold: hi: > 0, btch: 1 usd: 0 [54177.891980] CPU 7: Hot: hi: 0, btch: 1 > usd: 0 Cold: hi: 0, btch: 1 usd: 0 [54177.902800] Node 0 DMA32 > per-cpu: > [54177.906462] CPU 0: Hot: hi: 186, btch: 31 usd: 10 Cold: hi: > 62, btch: 15 usd: 58 [54177.916162] CPU 1: Hot: hi: 186, btch: 31 > usd: 26 Cold: hi: 62, btch: 15 usd: 3 [54177.926049] CPU 2: Hot: > hi: 186, btch: 31 usd: 139 Cold: hi: 62, btch: 15 usd: 54 > [54177.936948] CPU 3: Hot: hi: 186, btch: 31 usd: 1 Cold: hi: > 62, btch: 15 usd: 13 [54177.946968] CPU 4: Hot: hi: 186, btch: 31 > usd: 56 Cold: hi: 62, btch: 15 usd: 55 [54177.956868] CPU 5: Hot: > hi: 186, btch: 31 usd: 30 Cold: hi: 62, btch: 15 usd: 57 > [54177.965685] CPU 6: Hot: hi: 186, btch: 31 usd: 25 Cold: hi: > 62, btch: 15 usd: 1 [54177.975412] CPU 7: Hot: hi: 186, btch: 31 > usd: 13 Cold: hi: 62, btch: 15 usd: 56 [54177.986045] Node 0 Normal > per-cpu: > [54177.990527] CPU 0: Hot: hi: 186, btch: 31 usd: 128 Cold: hi: > 62, btch: 15 usd: 14 [54178.002993] CPU 1: Hot: hi: 186, btch: 31 > usd: 81 Cold: hi: 62, btch: 15 usd: 1 [54178.012136] CPU 2: Hot: > hi: 186, btch: 31 usd: 113 Cold: hi: 62, btch: 15 usd: 2 > [54178.022533] CPU 3: Hot: hi: 186, btch: 31 usd: 124 Cold: hi: > 62, btch: 15 usd: 8 [54178.032316] CPU 4: Hot: hi: 186, btch: 31 > usd: 27 Cold: hi: 62, btch: 15 usd: 4 [54178.041380] CPU 5: Hot: > hi: 186, btch: 31 usd: 24 Cold: hi: 62, btch: 15 usd: 9 > [54178.050941] CPU 6: Hot: hi: 186, btch: 31 usd: 120 Cold: hi: > 62, btch: 15 usd: 13 [54178.061180] CPU 7: Hot: hi: 186, btch: 31 > usd: 166 Cold: hi: 62, btch: 15 usd: 12 [54178.072162] Active:28319 > inactive:62389 dirty:8381 writeback:27 unstable:0 [54178.072163] free:5581 > slab:273603 mapped:2117 pagetables:690 bounce:0 [54178.087805] Node 0 DMA > free:11192kB min:20kB low:24kB high:28kB active:0kB inactive:0kB > present:10660kB pages_scanned:0 all_unreclaimable? yes [54178.103794] > lowmem_reserve[]: 0 3255 4013 > [54178.108294] Node 0 DMA32 free:9784kB min:6564kB low:8204kB high:9844kB > active:51792kB inactive:133260kB present:3333728kB pages_scanned:0 > all_unreclaimable? no [54178.129648] lowmem_reserve[]: 0 0 757 > [54178.133756] Node 0 Normal free:1348kB min:1524kB low:1904kB high:2284kB > active:61484kB inactive:116296kB present:775680kB pages_scanned:728 > all_unreclaimable? no [54178.154399] lowmem_reserve[]: 0 0 0 > [54178.158450] Node 0 DMA: 6*4kB 4*8kB 4*16kB 4*32kB 3*64kB 0*128kB 2*256kB > 0*512kB 2*1024kB 0*2048kB 2*4096kB = 11192kB [54178.172214] Node 0 DMA32: > 65*4kB 17*8kB 37*16kB 6*32kB 0*64kB 0*128kB 1*256kB 0*512kB 0*1024kB > 0*2048kB 2*4096kB = 9628kB [54178.188210] Node 0 Normal: 0*4kB 1*8kB 1*16kB > 1*32kB 0*64kB 1*128kB 1*256kB 0*512kB 1*1024kB 0*2048kB 0*4096kB = 1464kB > [54178.202288] Swap cache: add 0, delete 0, find 0/0, race 0+0 > [54178.208654] Free swap = 4200888kB > [54178.212390] Total swap = 4200888kB > [54178.218597] Free swap: 4200888kB > [54178.264623] 1245184 pages of RAM > [54178.268302] 231685 reserved pages > [54178.271793] 57602 pages shared > [54178.275306] 0 pages swap cached > [54178.278778] LustreError: 4106:0:(o2iblnd.c:732:kiblnd_create_conn()) > Can't create CQ: -12 [54277.772930] ib_cm/2: page allocation failure. > order:0, mode:0x10d0 [54277.781944] > [54277.781945] Call Trace: > [54277.788321] [] show_trace+0x34/0x47 > [54277.793761] [] dump_stack+0x12/0x17 > [54277.799744] [] __alloc_pages+0x2a3/0x2bc > [54277.806044] [] dma_alloc_pages+0x9b/0xbf > [54277.814225] [] dma_alloc_coherent+0x76/0x1cc > [54277.821449] [] :ib_mthca:mthca_buf_alloc+0x1bd/0x2a3 > [54277.831300] [] > :ib_mthca:mthca_alloc_qp_common+0x246/0x4e5 [54277.838934] > [] :ib_mthca:mthca_alloc_qp+0xab/0x102 [54277.846467] > [] :ib_mthca:mthca_create_qp+0x126/0x281 [54277.854289] > [] :ib_core:ib_create_qp+0x17/0x91 [54277.862274] > [] :rdma_cm:rdma_create_qp+0x2d/0x153 [54277.870048] > [] :ko2iblnd:kiblnd_create_conn+0x81c/0x1250 > [54277.877973] [] > :ko2iblnd:kiblnd_passive_connect+0x605/0xdd0 [54277.886679] > [] :ko2iblnd:kiblnd_cm_callback+0x255/0xeb0 > [54277.895646] [] :rdma_cm:cma_req_handler+0x322/0x389 > [54277.903470] [] :ib_cm:cm_process_work+0x17/0xad > [54277.910567] [] :ib_cm:cm_req_handler+0x7ae/0x81b > [54277.918121] [] :ib_cm:cm_work_handler+0x2d/0xbaa > [54277.926378] [] run_workqueue+0x7f/0x10b > [54277.932202] [] worker_thread+0xda/0xe4 > [54277.938003] [] kthread+0x47/0x75 > [54277.944032] [] child_rip+0xa/0x12 > [54277.950581] > > > Any ideas? > > Thanks, > Bernd -- Bernd Schubert Q-Leap Networks GmbH From rdreier at cisco.com Thu Feb 28 10:10:06 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 28 Feb 2008 10:10:06 -0800 Subject: [ofa-general] [PATCH 1 of 2] mlx4_core: for events for non-existent QPs, print a message only in debug mode In-Reply-To: <200802271648.07444.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Wed, 27 Feb 2008 16:48:07 +0200") References: <200802271648.07444.jackm@dev.mellanox.co.il> Message-ID: > Events received for non-existent QPs are of interest only when debugging. What if we just move modifying the QP to RESET to before we clear the QP out of the table? Then events for bogus QPs should never happen, and we can leave the output as a warning. - R. From rdreier at cisco.com Thu Feb 28 10:12:03 2008 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 28 Feb 2008 10:12:03 -0800 Subject: [ofa-general] [PATCH 1 of 2] mlx4_core: for events for non-existent QPs, print a message only in debug mode In-Reply-To: <200802271648.07444.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Wed, 27 Feb 2008 16:48:07 +0200") References: <200802271648.07444.jackm@dev.mellanox.co.il> Message-ID: actually it seems for mlx4 it really indicates something wrong if we get an event for a non-existent QP... for mthca I see how it can happen but have you seen it for mlx4? From clameter at sgi.com Thu Feb 28 10:43:54 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 10:43:54 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080228011020.GG8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > On Wed, Feb 27, 2008 at 05:03:21PM -0800, Christoph Lameter wrote: > > RDMA works across a network and I would assume that it needs confirmation > > that a connection has been torn down before pages can be unmapped. > > Depends on the latency of the network, for example with page pinning > it can even try to reduce the wait time, by tearing down the mapping > in range_begin and spin waiting the ack only later in range_end. What about invalidate_page()? From clameter at sgi.com Thu Feb 28 11:48:10 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 11:48:10 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080227192610.GF28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: On Wed, 27 Feb 2008, Andrea Arcangeli wrote: > What Christoph need to do when he's back from vacations to support > sleepable mmu notifiers is to add a CONFIG_XPMEM config option that > will switch the i_mmap_lock from a semaphore to a mutex (any other > change to this patch will be minor compared to that) so XPMEM hardware > will have kernels compiled that way. I don't see other sane ways to > remove the "atomic" parameter from the API (apparently required by > Andrew for merging something not restricted to the xpmem current usage > with only anonymous memory) and I don't want to have such a > locking-change intrusive dependency for all other non-blocking users > that are fine without having to alter how the VM works (for example > KVM and GRU). Very minor changes will be required to this patch to > make it work after the VM locking will be altered (for example the > CONFIG_XPMEM should also switch the mmu_register/unregister locking > from RCU to mutex as well). XPMEM then will only compile if > CONFIG_XPMEM=y and in turn the invalidate_range_* will support > scheduling inside. This is not going to work even if the mutex would work as easily as you think since the patch here still does an rcu_lock/unlock around a callback. > I don't think pretending to merge all in one block (I mean including > xpmem support that requires blocking methods) is good idea anymore as > long as we agree the "atomic" parameter shouldn't be merged. But we > can quite easily agree on the below to be optimal for GRU/KVM and > trivially extendible once a CONFIG_XPMEM will be added. So this first > part can go in now I think. Changing the locking for the callouts for users of the mmu notivier that f.e. require a response via the network (RDMA, XPMEM etc) is not trivial at all. RCU lock cannot be used. So we are looking at totally disjunct methods for those users who have to sleep. > +struct mmu_notifier_ops { > + /* > + * Called when nobody can register any more notifier in the mm > + * and after the "mn" notifier has been disarmed already. > + */ > + void (*release)(struct mmu_notifier *mn, > + struct mm_struct *mm); Who disarms the notifier? Why is the method not called to disarm the notifier on exit? > +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o > diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c > --- a/mm/filemap_xip.c > +++ b/mm/filemap_xip.c > @@ -194,7 +194,7 @@ __xip_unmap (struct address_space * mapp > if (pte) { > /* Nuke the page table entry. */ > flush_cache_page(vma, address, pte_pfn(*pte)); > - pteval = ptep_clear_flush(vma, address, pte); > + pteval = ptep_clear_flush_notify(vma, address, pte); > page_remove_rmap(page, vma); > dec_mm_counter(mm, file_rss); > BUG_ON(pte_dirty(pteval)); Well a bit better but now we have to modify both the macro and the code in teh VM. It would be easier to put the notify call in here. > @@ -2048,6 +2050,7 @@ void exit_mmap(struct mm_struct *mm) > vm_unacct_memory(nr_accounted); > free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); > tlb_finish_mmu(tlb, 0, end); > + mmu_notifier_release(mm); The release should be called much earlier to allow the driver to release all resources in one go. This way each vma must be processed individually. For our gobs of memory this method may create a scaling problem on exit(). From charlie9 at pitih.com Thu Feb 28 12:21:17 2008 From: charlie9 at pitih.com (Gabrielle Vickers) Date: Fri, 29 Feb 2008 04:21:17 +0800 Subject: [ofa-general] WorldwideWorldwidePhentrimine Message-ID: <01c87a8a$870e5c80$0c70c87c@charlie9> InfoMedsOnlineDrugstore http://ellenamersondg.blogspot.com From wtantisi at cs.cmu.edu Thu Feb 28 13:32:42 2008 From: wtantisi at cs.cmu.edu (Wittawat Tantisiriroj) Date: Thu, 28 Feb 2008 16:32:42 -0500 Subject: [ofa-general] IPoIB Connected Mode Throughput Question In-Reply-To: <1204194770.3358.43.camel@mtls03> References: <47C5E6BA.6090103@cs.cmu.edu> <1204194770.3358.43.camel@mtls03> Message-ID: <47C7287A.3020301@cs.cmu.edu> Thank Eli, I haven't thought that CPU was our bottleneck. So, I focused on tuning TCP and kernel parameters. However, it is the case. With iperf, I can create two threads simultaneously and use both cores to handle the network traffic. As the results, it yields much better throughput ~850 MB/s. Sadly, this would likely to be a huge issue with TCP when 10GE becomes widely use. Wittawat On 02/28/2008 05:32 AM, Eli Cohen wrote: > On Wed, 2008-02-27 at 17:39 -0500, Wittawat Tantisiriroj wrote: > >> Hi, >> We have set up a small InfiniBand cluster to do several network >> storage experiments over IPoIB. However, we had the problem getting a >> good throughput with IPoIB-connected mode. So, we followed the same >> benchmark that Michael S. Tsirkin did in >> "http://lists.openfabrics.org/pipermail/general/2006-November/029500.html". >> We realize that even with a simple scenario we still get only ~620MB/s >> throughput from IPoIB-CM. I searched around with Google, but I cannot >> find any information regarding IPoIB-CM throughput. >> >> My question is: >> >> - Is this throughput typical/normal in most system? >> > You can't say there is a typical result for this check -- it depends on > the "strength" of your system. One thing you can do is watch how much of > the CPU is used - you can use htop for that (it gives you per CPU > utilization). If you use 100% CPU than stronger machines will give > higher results. > > On my systems (AMD @2.4 Ghz) / mt25204 I get: > [root at sw186 ~]# netperf -H 11.4.3.185 -f M > TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.4.3.185 > (11.4.3.185) port 0 AF_INET > Recv Send Send > Socket Socket Message Elapsed > Size Size Size Time Throughput > bytes bytes bytes secs. MBytes/sec > > 87380 16384 16384 10.01 755.20 > > On other systems with Arbel (mt25218) I get 980 MB/s etc. >> Is there any necessary tweak tuning with TCP, ib_ipoib or kernel >> parameters in order to get ~900 MB/s? I have tried to TCP buffer size, >> but it still does not improve the throughput. >> >> - Should I use a OFED distribution instead of a standard built-in with a >> standard kernel? (I hope it does not matter) >> >> System >> ===== >> Processor: Intel(R) Pentium(R) D CPU 3.00GHz >> Memory: 4GB >> OS: Debian Etch with 2.6.24.2 kernel >> >> Network >> ====== >> Network card: Mellanox MT25204 (InfiniHost III Lx HCA) (4x, 10Gbps) >> Switch: Mellanox Gazelle (MTS9600) 96 ports with 4X (10 Gb/s) each >> Network Software Stack: Standard IPoIB built-in with the 2.6.24.2 kernel >> IPoIB Configuration: Connected mode with MTU=65520 >> >> Benchmark >> ======== >> Server: ib265 >> # ifconfig ib0 mtu 65520 >> # netserver >> >> Client: ib266 >> # ifconfig ib0 mtu 65520 >> # netperf -H ib265 -f M >> >> TCP STREAM TEST to ib265 >> Recv Send Send >> Socket Socket Message Elapsed >> Size Size Size Time Throughput >> bytes bytes bytes secs. MBytes/sec >> 87380 16384 16384 10.01 620.27 >> >> Thank in advance, >> Wittawat >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From andrea at qumranet.com Thu Feb 28 13:52:57 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Thu, 28 Feb 2008 22:52:57 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: <20080228215257.GJ8091@v2.random> On Thu, Feb 28, 2008 at 11:48:10AM -0800, Christoph Lameter wrote: > > make it work after the VM locking will be altered (for example the ^^^^^^^^^^^^^^^ > > CONFIG_XPMEM should also switch the mmu_register/unregister locking ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > from RCU to mutex as well). XPMEM then will only compile if ^^^^^^^^^^^^^^^^^^^^^^^^^ > > CONFIG_XPMEM=y and in turn the invalidate_range_* will support > > scheduling inside. > > This is not going to work even if the mutex would work as easily as you > think since the patch here still does an rcu_lock/unlock around a callback. See underlined. > > +struct mmu_notifier_ops { > > + /* > > + * Called when nobody can register any more notifier in the mm > > + * and after the "mn" notifier has been disarmed already. > > + */ > > + void (*release)(struct mmu_notifier *mn, > > + struct mm_struct *mm); > > Who disarms the notifier? Why is the method not called to disarm the > notifier on exit? The notifier is auto-disarmed by mmu_notifier_release, your patch works the same way. ->release is further called just in case anybody wants to know the notifier was disarmed. > > @@ -2048,6 +2050,7 @@ void exit_mmap(struct mm_struct *mm) > > vm_unacct_memory(nr_accounted); > > free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); > > tlb_finish_mmu(tlb, 0, end); > > + mmu_notifier_release(mm); > > The release should be called much earlier to allow the driver to release > all resources in one go. This way each vma must be processed individually. > For our gobs of memory this method may create a scaling problem on exit(). Good point, it has to be called earlier for GRU, but it's not a performance issue. GRU doesn't pin the pages so it should make the global invalidate in ->release _before_ unmap_vmas. Linux can't fault in the ptes anymore because mm_users is zero so there's no need of a ->release_begin/end, the _begin is enough. In #v6 I was invalidating inside unmap_vmas so it was ok. The performance issues you're talking about refers to #v6 I guess, for #v7 there's a single call. Thanks! diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2039,6 +2039,7 @@ void exit_mmap(struct mm_struct *mm) unsigned long end; /* mm's last user has gone, and its about to be pulled down */ + mmu_notifier_release(mm); arch_exit_mmap(mm); lru_add_drain(); @@ -2050,7 +2051,6 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); tlb_finish_mmu(tlb, 0, end); - mmu_notifier_release(mm); /* * Walk the list again, actually closing and freeing it, From xma at us.ibm.com Thu Feb 28 13:59:40 2008 From: xma at us.ibm.com (Shirley Ma) Date: Thu, 28 Feb 2008 13:59:40 -0800 Subject: [ofa-general] IPoIB Connected Mode Throughput Question In-Reply-To: <47C7287A.3020301@cs.cmu.edu> Message-ID: The cpu utilization is so high? Do you have a connectX to test? Hopefully tcp offloading, hw checksum could bring cpu utilization down. Thanks Shirley -------------- next part -------------- An HTML attachment was scrubbed... URL: From clameter at sgi.com Thu Feb 28 14:00:24 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 14:00:24 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080228215257.GJ8091@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080228215257.GJ8091@v2.random> Message-ID: On Thu, 28 Feb 2008, Andrea Arcangeli wrote: > > This is not going to work even if the mutex would work as easily as you > > think since the patch here still does an rcu_lock/unlock around a callback. > > See underlined. Mutex is not acceptable for performance reasons. I think we can just drop the RCU lock if we simply unregister the mmu notifier in release and forbid the drivers from removing themselves from the notification chain. They can simply do nothing until release. At that time there is no concurrency and thus its safe to remove even without rcu locking. > Good point, it has to be called earlier for GRU, but it's not a > performance issue. GRU doesn't pin the pages so it should make the > global invalidate in ->release _before_ unmap_vmas. Linux can't fault > in the ptes anymore because mm_users is zero so there's no need of a > ->release_begin/end, the _begin is enough. I do not follow you about the _begin without end but the following fix seems okay. From clameter at sgi.com Thu Feb 28 15:05:30 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 15:05:30 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080227192610.GF28483@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: On Wed, 27 Feb 2008, Andrea Arcangeli wrote: > +struct mmu_notifier_head { > + struct hlist_head head; > + spinlock_t lock; > +}; Still think that the lock here is not of too much use and can be easily replaced by mmap_sem. > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + struct mmu_notifier *__mn; \ > + struct hlist_node *__n; \ > + \ > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > + rcu_read_lock(); \ > + hlist_for_each_entry_rcu(__mn, __n, \ > + &(mm)->mmu_notifier.head, \ > + hlist) \ > + if (__mn->ops->function) \ > + __mn->ops->function(__mn, \ > + mm, \ > + args); \ > + rcu_read_unlock(); \ > + } \ > + } while (0) Andrew recomended local variables for parameters used multile times. This means the mm parameter here. > +/* > + * Notifiers that use the parameters that they were passed so that the > + * compiler does not complain about unused variables but does proper > + * parameter checks even if !CONFIG_MMU_NOTIFIER. > + * Macros generate no code. > + */ > +#define mmu_notifier(function, mm, args...) \ > + do { \ > + if (0) { \ > + struct mmu_notifier *__mn; \ > + \ > + __mn = (struct mmu_notifier *)(0x00ff); \ > + __mn->ops->function(__mn, mm, args); \ > + }; \ > + } while (0) Note also Andrew's comments on the use of 0x00ff... > +/* > + * No synchronization. This function can only be called when only a single > + * process remains that performs teardown. > + */ > +void mmu_notifier_release(struct mm_struct *mm) > +{ > + struct mmu_notifier *mn; > + struct hlist_node *n, *tmp; > + > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > + hlist_for_each_entry_safe(mn, n, tmp, > + &mm->mmu_notifier.head, hlist) { > + hlist_del(&mn->hlist); > + if (mn->ops->release) > + mn->ops->release(mn, mm); > + } > + } > +} One could avoid a hlist_for_each_entry_safe here by simply always deleting the first object. Also re the _notify variants: The binding to pte_clear_flush_young etc will become a problem for notifiers that want to sleep because pte_clear_flush is usually called with the pte lock held. See f.e. try_to_unmap_one, page_mkclean_one etc. It would be better if the notifier calls could be moved outside of the pte lock. From weiny2 at llnl.gov Thu Feb 28 15:10:04 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 28 Feb 2008 15:10:04 -0800 Subject: [ofa-general] [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. Message-ID: <20080228151004.651e1b0a.weiny2@llnl.gov> We have found that some tools and log files print guids without leading 0's (eg 0x8f10400411f56 vs 0x0008f10400411f56) The perl script "-S" option were failing without leading 0's. This patch formats the guids properly before performing lookups. Ira >From f1ab294d12569f81913c3c169ecefb003c11f714 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 28 Feb 2008 15:03:23 -0800 Subject: [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. Signed-off-by: Ira K. Weiny --- infiniband-diags/scripts/IBswcountlimits.pm | 17 ++++++++++++++++- infiniband-diags/scripts/iblinkinfo.pl | 8 +++++--- infiniband-diags/scripts/ibqueryerrors.pl | 4 +++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/infiniband-diags/scripts/IBswcountlimits.pm b/infiniband-diags/scripts/IBswcountlimits.pm index bddc421..3fd1efa 100755 --- a/infiniband-diags/scripts/IBswcountlimits.pm +++ b/infiniband-diags/scripts/IBswcountlimits.pm @@ -431,6 +431,21 @@ sub get_num_ports } # ========================================================================= +# format_switch_guid(guid) +# The diags store the switch guids as strings. This converts the guid supplied +# to the correct string format. +# eg: 0x0008f10400411f56 and 0x8f10400411f56 Should be equal but the strings +# are not. +# +sub format_switch_guid +{ + my $guid = $_[0]; + my $guid_str = ""; + sprintf($guid_str, "0x%0X", $guid); + return ($guid_str); +} + +# ========================================================================= # convert_dr_to_guid(direct_route) # sub convert_dr_to_guid @@ -442,7 +457,7 @@ sub convert_dr_to_guid foreach my $line (@lines) { if ($line =~ /^PortGuid:\.+(.*)/) { $guid = $1; } } - return $guid; + return format_switch_guid($guid); } # ========================================================================= diff --git a/infiniband-diags/scripts/iblinkinfo.pl b/infiniband-diags/scripts/iblinkinfo.pl index 195c8cf..93152d5 100755 --- a/infiniband-diags/scripts/iblinkinfo.pl +++ b/infiniband-diags/scripts/iblinkinfo.pl @@ -80,9 +80,11 @@ chomp $argv0; if (!getopts("hcpldRS:D:C:P:g")) { usage_and_exit $argv0; } if (defined $Getopt::Std::opt_h) { usage_and_exit $argv0; } -if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } -if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } +if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } +if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } +if (defined $Getopt::Std::opt_S) { + $single_switch = format_switch_guid($Getopt::Std::opt_S); +} if (defined $Getopt::Std::opt_d) { $only_down_links = $Getopt::Std::opt_d; } if (defined $Getopt::Std::opt_l) { $line_mode = $Getopt::Std::opt_l; } if (defined $Getopt::Std::opt_p) { $print_add_switch = $Getopt::Std::opt_p; } diff --git a/infiniband-diags/scripts/ibqueryerrors.pl b/infiniband-diags/scripts/ibqueryerrors.pl index ef61e9b..249fba3 100755 --- a/infiniband-diags/scripts/ibqueryerrors.pl +++ b/infiniband-diags/scripts/ibqueryerrors.pl @@ -171,7 +171,9 @@ if (defined $Getopt::Std::opt_c) { if (defined $Getopt::Std::opt_r) { $report_port_info = $Getopt::Std::opt_r; } if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } +if (defined $Getopt::Std::opt_S) { + $single_switch = format_switch_guid($Getopt::Std::opt_S); +} if (defined $Getopt::Std::opt_d) { $include_data_counters = $Getopt::Std::opt_d; } -- 1.5.1 From steiner at sgi.com Thu Feb 28 15:17:33 2008 From: steiner at sgi.com (Jack Steiner) Date: Thu, 28 Feb 2008 17:17:33 -0600 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080228215257.GJ8091@v2.random> References: <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080228215257.GJ8091@v2.random> Message-ID: <20080228231732.GA21604@sgi.com> > > The release should be called much earlier to allow the driver to release > > all resources in one go. This way each vma must be processed individually. > > For our gobs of memory this method may create a scaling problem on exit(). > > Good point, it has to be called earlier for GRU, but it's not a > performance issue. GRU doesn't pin the pages so it should make the > global invalidate in ->release _before_ unmap_vmas. Linux can't fault > in the ptes anymore because mm_users is zero so there's no need of a > ->release_begin/end, the _begin is enough. > I disagree. The location of the callout IS a performance issue. In simple comparisons of the 2 patches (Christoph's vs. Andrea's), Andrea's has a 7X increase in the number of TLB purges being issued to the GRU. TLB flushing is slow and can impact the performance of of tasks using the GRU. --- jack From weiny2 at llnl.gov Thu Feb 28 15:21:10 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 28 Feb 2008 15:21:10 -0800 Subject: [ofa-general] [PATCH] Create script to automate perltidy command Message-ID: <20080228152110.50a6895c.weiny2@llnl.gov> >From 37ff9757607389ed6fbc250d825732093ca49e5f Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 28 Feb 2008 15:19:51 -0800 Subject: [PATCH] Create script to automate perltidy command Signed-off-by: Ira K. Weiny --- infiniband-diags/perltidy.sh | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 61 insertions(+), 0 deletions(-) create mode 100755 infiniband-diags/perltidy.sh diff --git a/infiniband-diags/perltidy.sh b/infiniband-diags/perltidy.sh new file mode 100755 index 0000000..77564df --- /dev/null +++ b/infiniband-diags/perltidy.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# +# Copyright (c) 2006 The Regents of the University of California. +# +# Produced at Lawrence Livermore National Laboratory. +# Written by Ira Weiny . +# +# This software is available to you under a choice of one of two +# licenses. You may choose to be licensed under the terms of the GNU +# General Public License (GPL) Version 2, available from the file +# COPYING in the main directory of this source tree, or the +# OpenIB.org BSD license below: +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above +# copyright notice, this list of conditions and the following +# disclaimer. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +tidy_cmd="perltidy -pt=2 -sbt=2 -bt=2 -nsfs -b -t -nola -ce -sbl -nbbc" + +argv0=`basename $0` +scripts_dir=`dirname $0`/scripts + +if [ "$1" == "-h" ]; then + echo "$argv0 [-h]" + echo " Run perltidy on all perl scripts and modules in the scripts directory" + exit 1 +fi + +cd $scripts_dir + +for file in *.pl ; do + echo "tidy : $scripts_dir/$file" + $tidy_cmd $file +done + +for file in *.pm ; do + echo "tidy : $scripts_dir/$file" + $tidy_cmd $file +done + +exit 0 + -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Create-script-to-automate-perltidy-command.patch Type: application/octet-stream Size: 2670 bytes Desc: not available URL: From andrea at qumranet.com Thu Feb 28 16:24:59 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 01:24:59 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080228231732.GA21604@sgi.com> References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080228215257.GJ8091@v2.random> <20080228231732.GA21604@sgi.com> Message-ID: <20080229001126.GL8091@v2.random> On Thu, Feb 28, 2008 at 05:17:33PM -0600, Jack Steiner wrote: > I disagree. The location of the callout IS a performance issue. In simple > comparisons of the 2 patches (Christoph's vs. Andrea's), Andrea's has a 7X > increase in the number of TLB purges being issued to the GRU. TLB flushing Are you sure that you're referring to #v7? From andrea at qumranet.com Thu Feb 28 16:40:01 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 01:40:01 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> Message-ID: <20080229004001.GN8091@v2.random> On Thu, Feb 28, 2008 at 03:05:30PM -0800, Christoph Lameter wrote: > Still think that the lock here is not of too much use and can be easily > replaced by mmap_sem. I can use the mmap_sem. > > +#define mmu_notifier(function, mm, args...) \ > > + do { \ > > + struct mmu_notifier *__mn; \ > > + struct hlist_node *__n; \ > > + \ > > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > > + rcu_read_lock(); \ > > + hlist_for_each_entry_rcu(__mn, __n, \ > > + &(mm)->mmu_notifier.head, \ > > + hlist) \ > > + if (__mn->ops->function) \ > > + __mn->ops->function(__mn, \ > > + mm, \ > > + args); \ > > + rcu_read_unlock(); \ > > + } \ > > + } while (0) > > Andrew recomended local variables for parameters used multile times. This > means the mm parameter here. I don't exactly see what "buggy macro" meant? I already use parenthesis as needed to avoid the need of local variables to be safe. Not really sure what's buggy, sorry! > Note also Andrew's comments on the use of 0x00ff... I thought I tried the (void) but it didn't work and your solution worked, but perhaps I did something wrong, I'll try again with (void) nevertheless. > > +/* > > + * No synchronization. This function can only be called when only a single > > + * process remains that performs teardown. > > + */ > > +void mmu_notifier_release(struct mm_struct *mm) > > +{ > > + struct mmu_notifier *mn; > > + struct hlist_node *n, *tmp; > > + > > + if (unlikely(!hlist_empty(&mm->mmu_notifier.head))) { > > + hlist_for_each_entry_safe(mn, n, tmp, > > + &mm->mmu_notifier.head, hlist) { > > + hlist_del(&mn->hlist); > > + if (mn->ops->release) > > + mn->ops->release(mn, mm); > > + } > > + } > > +} > > One could avoid a hlist_for_each_entry_safe here by simply always deleting > the first object. Agreed, the current construct come from the fact we previously didn't assume nobody could ever call mmu_notifier_unregister by the time mm_users is 0. > Also re the _notify variants: The binding to pte_clear_flush_young etc > will become a problem for notifiers that want to sleep because > pte_clear_flush is usually called with the pte lock held. See f.e. > try_to_unmap_one, page_mkclean_one etc. Calling __free_page out of the PT lock is much bigger change. do_wp_page will require changes anyway when the sleepable notifiers are merged. > It would be better if the notifier calls could be moved outside of the > pte lock. The point is that it can't make a difference right now, and my objective was to avoid unnecessary source code duplication (later it will be necessary, right now it isn't). By the time you rework do_wp_page, removing _notify will be a very minor detail compared to the rest of the changes to do_wp_page IMHO. Expanding it now won't provide a real advantage later. From andrea at qumranet.com Thu Feb 28 16:55:30 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 01:55:30 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> Message-ID: <20080229005530.GO8091@v2.random> On Thu, Feb 28, 2008 at 10:43:54AM -0800, Christoph Lameter wrote: > What about invalidate_page()? That would just spin waiting an ack (just like the smp-tlb-flushing invalidates in numa already does). Thinking more about this, we could also parallelize it with an invalidate_page_before/end. If it takes 1usec to flush remotely, scheduling would be overkill, but spending 1usec in a while loop isn't nice if we can parallelize that 1usec with the ipi-tlb-flush. Not sure if it makes sense... it certainly would be quick to add it (especially thanks to _notify ;). From akpm at linux-foundation.org Thu Feb 28 16:56:08 2008 From: akpm at linux-foundation.org (Andrew Morton) Date: Thu, 28 Feb 2008 16:56:08 -0800 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080229004001.GN8091@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080229004001.GN8091@v2.random> Message-ID: <20080228165608.de7c8ae4.akpm@linux-foundation.org> On Fri, 29 Feb 2008 01:40:01 +0100 Andrea Arcangeli wrote: > > > +#define mmu_notifier(function, mm, args...) \ > > > + do { \ > > > + struct mmu_notifier *__mn; \ > > > + struct hlist_node *__n; \ > > > + \ > > > + if (unlikely(!hlist_empty(&(mm)->mmu_notifier.head))) { \ > > > + rcu_read_lock(); \ > > > + hlist_for_each_entry_rcu(__mn, __n, \ > > > + &(mm)->mmu_notifier.head, \ > > > + hlist) \ > > > + if (__mn->ops->function) \ > > > + __mn->ops->function(__mn, \ > > > + mm, \ > > > + args); \ > > > + rcu_read_unlock(); \ > > > + } \ > > > + } while (0) > > > > Andrew recomended local variables for parameters used multile times. This > > means the mm parameter here. > > I don't exactly see what "buggy macro" meant? multiple refernces to the argument, so mmu_notifier(foo, bar(), zot); will call bar() either once or twice. Unlikely in this case, but bad practice. Easily fixable by using another temporary. From clameter at sgi.com Thu Feb 28 16:59:59 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 16:59:59 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229005530.GO8091@v2.random> References: <20080215064859.384203497@sgi.com> <20080215064932.620773824@sgi.com> <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > On Thu, Feb 28, 2008 at 10:43:54AM -0800, Christoph Lameter wrote: > > What about invalidate_page()? > > That would just spin waiting an ack (just like the smp-tlb-flushing > invalidates in numa already does). And thus the device driver may stop receiving data on a UP system? It will never get the ack. > Thinking more about this, we could also parallelize it with an > invalidate_page_before/end. If it takes 1usec to flush remotely, > scheduling would be overkill, but spending 1usec in a while loop isn't > nice if we can parallelize that 1usec with the ipi-tlb-flush. Not sure > if it makes sense... it certainly would be quick to add it (especially > thanks to _notify ;). invalidate_page_before/end could be realized as an invalidate_range_begin/end on a page sized range? From clameter at sgi.com Thu Feb 28 17:03:01 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 17:03:01 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080229004001.GN8091@v2.random> References: <20080219084357.GA22249@wotan.suse.de> <20080219135851.GI7128@v2.random> <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080229004001.GN8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > > Also re the _notify variants: The binding to pte_clear_flush_young etc > > will become a problem for notifiers that want to sleep because > > pte_clear_flush is usually called with the pte lock held. See f.e. > > try_to_unmap_one, page_mkclean_one etc. > > Calling __free_page out of the PT lock is much bigger > change. do_wp_page will require changes anyway when the sleepable > notifiers are merged. I thought you wanted to get rid of the sync via pte lock? What changes to do_wp_page do you envision? > > It would be better if the notifier calls could be moved outside of the > > pte lock. > > The point is that it can't make a difference right now, and my > objective was to avoid unnecessary source code duplication (later it > will be necessary, right now it isn't). By the time you rework > do_wp_page, removing _notify will be a very minor detail compared to > the rest of the changes to do_wp_page IMHO. Expanding it now won't > provide a real advantage later. What is the trouble with the current do_wp_page modifications? There is no need for invalidate_page() there so far. invalidate_range() does the trick there. From clameter at sgi.com Thu Feb 28 17:13:02 2008 From: clameter at sgi.com (Christoph Lameter) Date: Thu, 28 Feb 2008 17:13:02 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080229001126.GL8091@v2.random> References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080228215257.GJ8091@v2.random> <20080228231732.GA21604@sgi.com> <20080229001126.GL8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > On Thu, Feb 28, 2008 at 05:17:33PM -0600, Jack Steiner wrote: > > I disagree. The location of the callout IS a performance issue. In simple > > comparisons of the 2 patches (Christoph's vs. Andrea's), Andrea's has a 7X > > increase in the number of TLB purges being issued to the GRU. TLB flushing > > Are you sure that you're referring to #v7? Jack: AFAICT Andrea moved the release callout and things will be fine in the next release. From polyplus.com at ethanfan.com Thu Feb 28 20:00:00 2008 From: polyplus.com at ethanfan.com (Phillip Williams) Date: Thu, 28 Feb 2008 22:00:00 -0600 Subject: [ofa-general] Buy OEM Software Message-ID: <000401c87a87$524cd600$0100007f@peitb> Here: Microsoft Windows Vista Ultimate $89 Windows XP Pro + SP2 $49 MS Office Enterprice 2007 $79 MS Office 2003 Professional $69 Acrobat Reader 8 Pro $79 Macromedia Flash Professional 8 $49 http://karlarathburnpg.blogspot.com Adobe Premiere 2.O $59 Corel Grafix Suite X3 $59 Adobe Il1ustrator CS2 $59 Adobe Photoshop CS2 V9.O $69 Adobe Photoshop CS3 Extended $89 http://karlarathburnpg.blogspot.com Macromedia Studio 8 $99 Autodesk Autocad 2OO7 $129 Adobe Creative Suite 2 $149 Adobe Creative Suite 3 Premium $269 http://karlarathburnpg.blogspot.com and for Mac here: Adobe Acrobat PRO 7 $69 Adobe After Effects $49 Macromedia Flash Pro 8 $49 Adobe Creative Suite 2 Premium $49 http://karlarathburnpg.blogspot.com Ableton Live 5.O.1 $49 Adobe Photoshop CS $49 From chu11 at llnl.gov Thu Feb 28 20:17:19 2008 From: chu11 at llnl.gov (Albert Chu) Date: Thu, 28 Feb 2008 20:17:19 -0800 (PST) Subject: [ofa-general] [OpenSM] updn routing performance fix??? Message-ID: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> Hey Sasha, While doing some other development, I noticed that some switch ports were not used in routing even though they were up/healthy. I wrote a script (will try to submit to infiniband-diags when I clean it up) that analyzes dump_lfts to see what ports are used in routing. Here's an output chunk: Unbalanced Switch Port Usage: MT47396 Infiniscale-III Mellanox Technologies, 0x000b8cffff004662, 40 Port 013: 12 Port 014: 12 Port 015: 12 Port 016: 12 Port 017: 12 Port 018: 12 Port 019: 12 Port 020: 12 Port 021: 12 Port 022: 0 Port 023: 11 Port 024: 11 In the above example, Port 022 is not used for routing at all on this switch. Naturally, we think this is bad. After some investigation, I found out that after the initial heavy sweep is done, some of the ports on some switches are down (I assume hardware racing during bringup), and thus opensm does not route through those ports. When opensm does a heavy resweep later on (I assume b/c some traps are received when those down ports come up), opensm keeps the same old forwarding tables from before b/c ignore_existing_lfts is FALSE and b/c the least hops are the same (other ports on the switch go to the same parent). Thus, we get healthy ports not forwarding to a parent switch. There are multiple ways to deal with this. I made the attached patch which solved the problem on one of our test clusters. It's pretty simple. Store all of the "bad ports" that were found during a switch configuration. During the next heavy resweep, if some of those "bad ports" are now up, I set ignore_existing_lfts to TRUE for just that switch, leading to a completely new forwarding table of the switch. During my performance testing on this patch, performance with a few mpibench tests is actually worse by a few percent with this patch. I am only using 120 of 144 nodes on this cluster. It's not a big cluster, has two levels worth of switches (24 port switches going up to a 288 port switch. Yup, the cluster is not "filled out" yet :-). So there is some randomness on which specific nodes run the job and if the lid routing layout is better/worse for that specific set of nodes. Intuitively, we think this will be better as a whole even though my current testing can't show it. Can you think of anything that would make this patch worse for performance as a whole? Could you see some side effect leading to a lot more traffic on the network? Al -- Albert Chu chu11 at llnl.gov 925-422-5311 Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Do-not-ignore-existing-lfts-when-new-ports-exist.patch Type: text/x-patch Size: 6677 bytes Desc: not available URL: From panda at cse.ohio-state.edu Thu Feb 28 21:17:48 2008 From: panda at cse.ohio-state.edu (Dhabaleswar Panda) Date: Fri, 29 Feb 2008 00:17:48 -0500 (EST) Subject: [ofa-general] Announcing the release of MVAPICH 1.0 Message-ID: The MVAPICH team is pleased to announce the availability of MVAPICH 1.0 with the following NEW features: - New Scalable and robust job startup - Enhanced and robust mpirun_rsh framework to provide scalable launching on multi-thousand core clusters - Running time of `MPI Hello World' program on 1K cores is around 4 sec and on 32K cores is around 80 sec - Available for OpenFabrics/Gen2, OpenFabrics/Gen2-UD and QLogic InfiniPath devices - Performance graph at: http://mvapich.cse.ohio-state.edu/performance/startup.shtml - Enhanced support for SLURM - Available for OpenFabrics/Gen2, OpenFabrics/Gen2-UD and QLogic InfiniPath devices - New OpenFabrics Gen2 Unreliable-Datagram (UD)-based design for large-scale InfiniBand clusters (multi-thousand cores) - delivers performance and scalability with constant memory footprint for communication contexts - Only 40MB per process even with 16K processes connected to each other - Performance graph at: http://mvapich.cse.ohio-state.edu/performance/mvapich/ud_memory.shtml - zero-copy protocol for large data transfer - shared memory communication between cores within a node - multi-core optimized collectives (MPI_Bcast, MPI_Barrier, MPI_Reduce and MPI_Allreduce) - enhanced MPI_Allgather collective - New features for OpenFabrics Gen2-IB interface - enhanced coalescing support with varying degree of coalescing - support for ConnectX adapter - support for asynchronous progress at both sender and receiver to overlap computation and communication - multi-core optimized collectives (MPI_Bcast) - tuned collectives (MPI_Allgather, MPI_Bcast) based on network adapter characteristics - Performance graph at: http://mvapich.cse.ohio-state.edu/performance/collective.shtml - network-level fault tolerance with Automatic Path Migration (APM) for tolerating intermittent network failures over InfiniBand. - New Support for QLogic InfiniPath adapters - high-performance point-to-point communication - optimized collectives (MPI_Bcast and MPI_Barrier) with k-nomial algorithms while exploiting multi-core architecture - Optimized and high-performance ADIO driver for Lustre - This MPI-IO support is a contribution from Future Technologies Group, Oak Ridge National Laboratory. (http://ft.ornl.gov/doku/doku.php?id=ft:pio:start) - Performance graph at: http://mvapich.cse.ohio-state.edu/performance/mvapich/romio.shtml - Flexible user defined processor affinity for better resource utilization on multi-core systems - flexible process bindings to cores - allows memory-intensive applications to run with a subset of cores on each chip for better performance More details on all features and supported platforms can be obtained by visiting the following URL: http://mvapich.cse.ohio-state.edu/overview/mvapich/features.shtml MVAPICH 1.0 continues to deliver excellent performance. Sample performance numbers include: - with OpenFabrics/Gen2 on EM64T quad-core with PCIe and ConnectX-DDR: - 1.51 microsec one-way latency (4 bytes) - 1404 MB/sec unidirectional bandwidth - 2713 MB/sec bidirectional bandwidth - with PSM on Opteron with Hypertransport and QLogic-SDR: - 1.25 microsec one-way latency (4 bytes) - 953 MB/sec unidirectional bandwidth - 1891 MB/sec bidirectional bandwidth Performance numbers for all other platforms, system configurations and operations can be viewed by visiting `Performance' section of the project's web page. For downloading MVAPICH 1.0, associated user guide and accessing the anonymous SVN, please visit the following URL: http://mvapich.cse.ohio-state.edu All feedbacks, including bug reports and hints for performance tuning, are welcome. Please post it to the mvapich-discuss mailing list. Thanks, The MVAPICH Team ====================================================================== MVAPICH/MVAPICH2 project is currently supported with funding from U.S. National Science Foundation, U.S. DOE Office of Science, Mellanox, Intel, Cisco Systems, QLogic, Sun Microsystems and Linux Networx; and with equipment support from Advanced Clustering, AMD, Appro, Chelsio, Dell, Fujitsu, Fulcrum, IBM, Intel, Mellanox, Microway, NetEffect, QLogic and Sun Microsystems. Other technology partner includes Etnus. ====================================================================== From sebastian.schmitzdorff at hamburgnet.de Fri Feb 29 00:29:21 2008 From: sebastian.schmitzdorff at hamburgnet.de (Sebastian Schmitzdorff) Date: Fri, 29 Feb 2008 09:29:21 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? (fwd) In-Reply-To: <20080209221711.GB20332@cefeid.wcss.wroc.pl> References: <20080206154701.GA11384@cefeid.wcss.wroc.pl> <1202320491.14810.29.camel@trinity.ogc.int> <20080206213543.GA21176@cefeid.wcss.wroc.pl> <20080209221711.GB20332@cefeid.wcss.wroc.pl> Message-ID: <47C7C261.2060007@hamburgnet.de> hi pawel, I was wondering if you have achieved better nfs rdma benchmark results by now? regards Sebastian Pawel Dziekonski schrieb: > hi, > > the saga continues. ;) > > very basic benchmarks and surprising (at least for me) results - it > look's like reading is much slower than writing and NFS/RDMA is twice > slower in reading than classic NFS. :o > > results below - comments appreciated! > regards, Pawel > > > both nfs server and client have 8-cores, 16 GB RAM, Mellanox DDR HCAs > (MT25204) connected port-port (no switch). > > local_hdd - 2 sata2 disks in soft-raid0, > nfs_ipoeth - classic nfs over ethernet, > nfs_ipoib - classic nfs over IPoIB, > nfs_rdma - NFS/RDMA. > > simple write of 36GB file with dd (both machines have 16GB RAM): > /usr/bin/time -p dd if=/dev/zero of=/mnt/qqq bs=1M count=36000 > > local_hdd sys 54.52 user 0.04 real 254.59 > > nfs_ipoib sys 36.35 user 0.00 real 266.63 > nfs_rdma sys 39.03 user 0.02 real 323.77 > nfs_ipoeth sys 34.21 user 0.01 real 375.24 > > remount /mnt to clear cache and read a file from nfs share and > write it to /dev/: > /usr/bin/time -p dd if=/mnt/qqq of=/scratch/qqq bs=1M > > nfs_ipoib sys 59.04 user 0.02 real 571.57 > nfs_ipoeth sys 58.92 user 0.02 real 606.61 > nfs_rdma sys 62.57 user 0.03 real 1296.36 > > > > results from bonnie++: > > Version 1.03c ------Sequential Write ------ --Sequential Read -- --Random- > -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks-- > Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP > local_hdd 35G:128k 93353 12 58329 6 143293 7 243.6 1 > local_hdd 35G:256k 92283 11 58189 6 144202 8 172.2 2 > local_hdd 35G:512k 93879 12 57715 6 144167 8 128.2 4 > local_hdd 35G:1024k 93075 12 58637 6 144172 8 95.3 7 > nfs_ipoeth 35G:128k 91325 7 31848 4 64299 4 170.2 1 > nfs_ipoeth 35G:256k 90668 7 32036 5 64542 4 163.2 2 > nfs_ipoeth 35G:512k 93348 7 31757 5 64454 4 85.7 3 > nfs_ipoet 35G:1024k 91283 7 31869 5 64241 5 51.7 4 > nfs_ipoib 35G:128k 91733 7 36641 5 65839 4 178.4 2 > nfs_ipoib 35G:256k 92453 7 36567 6 66682 4 166.9 3 > nfs_ipoib 35G:512k 91157 7 37660 6 66318 4 86.8 3 > nfs_ipoib 35G:1024k 92111 7 35786 6 66277 5 53.3 4 > nfs_rdma 35G:128k 91152 8 29942 5 32147 2 187.0 1 > nfs_rdma 35G:256k 89772 7 30560 5 34587 2 158.4 3 > nfs_rdma 35G:512k 91290 7 29698 5 34277 2 60.9 2 > nfs_rdma 35G:1024k 91336 8 29052 5 31742 2 41.5 3 > ------Sequential Create------ --------Random Create-------- > -Create-- --Read--- -Delete-- -Create-- --Read--- -Delete-- > files:max:min /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP /sec %CP > local_hdd 16 10587 36 +++++ +++ 8674 29 10727 35 +++++ +++ 7015 28 > local_hdd 16 11372 41 +++++ +++ 8490 29 11192 43 +++++ +++ 6881 27 > local_hdd 16 10789 35 +++++ +++ 8520 29 11468 46 +++++ +++ 6651 24 > local_hdd 16 10841 40 +++++ +++ 8443 28 11162 41 +++++ +++ 6441 22 > nfs_ipoeth 16 3753 7 13390 12 3795 7 3773 8 22181 16 3635 7 > nfs_ipoeth 16 3762 8 12358 7 3713 8 3753 7 20448 13 3632 6 > nfs_ipoeth 16 3834 7 12697 6 3729 8 3725 9 22807 11 3673 7 > nfs_ipoeth 16 3729 8 14260 10 3774 7 3744 7 25285 14 3688 7 > nfs_ipoib 16 6803 17 +++++ +++ 6843 15 6820 14 +++++ +++ 5834 11 > nfs_ipoib 16 6587 16 +++++ +++ 4959 9 6832 14 +++++ +++ 5608 12 > nfs_ipoib 16 6820 18 +++++ +++ 6636 15 6479 15 +++++ +++ 5679 13 > nfs_ipoib 16 6475 14 +++++ +++ 6435 14 5543 11 +++++ +++ 5431 11 > nfs_rdma 16 7014 15 +++++ +++ 6714 10 7001 14 +++++ +++ 5683 8 > nfs_rdma 16 7038 13 +++++ +++ 6713 12 6956 11 +++++ +++ 5488 8 > nfs_rdma 16 7058 12 +++++ +++ 6797 11 6989 14 +++++ +++ 5761 9 > nfs_rdma 16 7201 13 +++++ +++ 6821 12 7072 15 +++++ +++ 5609 9 > > > -- Hamburgnet, Geschäftsführer Sebastian Schmitzdorff http://www.hamburgnet.de Kottwitzstrasse 49 D-20253 Hamburg fon: 040 / 736 72 322 fax: 040 / 736 72 321 From slipsit71 at fhgs.net Fri Feb 29 02:58:13 2008 From: slipsit71 at fhgs.net (Roslyn Parks) Date: Fri, 29 Feb 2008 18:58:13 +0800 Subject: [ofa-general] AddtoCartNewProductsForOurCustomers Message-ID: <01c87b05$08a2d080$4bbaf9da@slipsit71> CustomerSupportMoneybackPolicyMeds http://aishaglossonmc.blogspot.com From vlad at lists.openfabrics.org Fri Feb 29 03:09:14 2008 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 29 Feb 2008 03:09:14 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20080229-0200 daily build status Message-ID: <20080229110915.08478E60AC5@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.24 Passed on x86_64 with linux-2.6.22.5-31-default Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.24 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.24 Failed: From general at openib.org Fri Feb 29 03:10:17 2008 From: general at openib.org (general at openib.org) Date: Fri, 29 Feb 2008 03:10:17 -0800 (PST) Subject: [ofa-general] Best Sales 2008! Message-ID: <20080229050938.3342.qmail@nym-nb-tcvetkov> An HTML attachment was scrubbed... URL: From sashak at voltaire.com Fri Feb 29 05:01:23 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 29 Feb 2008 13:01:23 +0000 Subject: [ofa-general] Re: [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. In-Reply-To: <20080228151004.651e1b0a.weiny2@llnl.gov> References: <20080228151004.651e1b0a.weiny2@llnl.gov> Message-ID: <20080229130123.GG27272@sashak.voltaire.com> Hi Ira, On 15:10 Thu 28 Feb , Ira Weiny wrote: > We have found that some tools and log files print guids without leading 0's > (eg 0x8f10400411f56 vs 0x0008f10400411f56) > > The perl script "-S" option were failing without leading 0's. This patch > formats the guids properly before performing lookups. > > Ira > > From f1ab294d12569f81913c3c169ecefb003c11f714 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 28 Feb 2008 15:03:23 -0800 > Subject: [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. > > > Signed-off-by: Ira K. Weiny > --- > infiniband-diags/scripts/IBswcountlimits.pm | 17 ++++++++++++++++- > infiniband-diags/scripts/iblinkinfo.pl | 8 +++++--- > infiniband-diags/scripts/ibqueryerrors.pl | 4 +++- > 3 files changed, 24 insertions(+), 5 deletions(-) > > diff --git a/infiniband-diags/scripts/IBswcountlimits.pm b/infiniband-diags/scripts/IBswcountlimits.pm > index bddc421..3fd1efa 100755 > --- a/infiniband-diags/scripts/IBswcountlimits.pm > +++ b/infiniband-diags/scripts/IBswcountlimits.pm > @@ -431,6 +431,21 @@ sub get_num_ports > } > > # ========================================================================= > +# format_switch_guid(guid) > +# The diags store the switch guids as strings. This converts the guid supplied > +# to the correct string format. > +# eg: 0x0008f10400411f56 and 0x8f10400411f56 Should be equal but the strings > +# are not. > +# > +sub format_switch_guid > +{ > + my $guid = $_[0]; > + my $guid_str = ""; > + sprintf($guid_str, "0x%0X", $guid); > + return ($guid_str); > +} It doesn't work for me (I used this with perl -d). This sub returns nothing, shouldn't this be: $guid_str = sprintf("0x%0X", $guid) ? Also $guid is string here. Sasha > + > +# ========================================================================= > # convert_dr_to_guid(direct_route) > # > sub convert_dr_to_guid > @@ -442,7 +457,7 @@ sub convert_dr_to_guid > foreach my $line (@lines) { > if ($line =~ /^PortGuid:\.+(.*)/) { $guid = $1; } > } > - return $guid; > + return format_switch_guid($guid); > } > > # ========================================================================= > diff --git a/infiniband-diags/scripts/iblinkinfo.pl b/infiniband-diags/scripts/iblinkinfo.pl > index 195c8cf..93152d5 100755 > --- a/infiniband-diags/scripts/iblinkinfo.pl > +++ b/infiniband-diags/scripts/iblinkinfo.pl > @@ -80,9 +80,11 @@ chomp $argv0; > > if (!getopts("hcpldRS:D:C:P:g")) { usage_and_exit $argv0; } > if (defined $Getopt::Std::opt_h) { usage_and_exit $argv0; } > -if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > -if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } > +if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > +if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > +if (defined $Getopt::Std::opt_S) { > + $single_switch = format_switch_guid($Getopt::Std::opt_S); > +} > if (defined $Getopt::Std::opt_d) { $only_down_links = $Getopt::Std::opt_d; } > if (defined $Getopt::Std::opt_l) { $line_mode = $Getopt::Std::opt_l; } > if (defined $Getopt::Std::opt_p) { $print_add_switch = $Getopt::Std::opt_p; } > diff --git a/infiniband-diags/scripts/ibqueryerrors.pl b/infiniband-diags/scripts/ibqueryerrors.pl > index ef61e9b..249fba3 100755 > --- a/infiniband-diags/scripts/ibqueryerrors.pl > +++ b/infiniband-diags/scripts/ibqueryerrors.pl > @@ -171,7 +171,9 @@ if (defined $Getopt::Std::opt_c) { > if (defined $Getopt::Std::opt_r) { $report_port_info = $Getopt::Std::opt_r; } > if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } > +if (defined $Getopt::Std::opt_S) { > + $single_switch = format_switch_guid($Getopt::Std::opt_S); > +} > if (defined $Getopt::Std::opt_d) { > $include_data_counters = $Getopt::Std::opt_d; > } > -- > 1.5.1 From sashak at voltaire.com Fri Feb 29 05:02:31 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 29 Feb 2008 13:02:31 +0000 Subject: [ofa-general] Re: [PATCH] Create script to automate perltidy command In-Reply-To: <20080228152110.50a6895c.weiny2@llnl.gov> References: <20080228152110.50a6895c.weiny2@llnl.gov> Message-ID: <20080229130231.GH27272@sashak.voltaire.com> On 15:21 Thu 28 Feb , Ira Weiny wrote: > From 37ff9757607389ed6fbc250d825732093ca49e5f Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 28 Feb 2008 15:19:51 -0800 > Subject: [PATCH] Create script to automate perltidy command > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From andrea at qumranet.com Fri Feb 29 05:09:05 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 14:09:05 +0100 Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080229004001.GN8091@v2.random> Message-ID: <20080229130905.GS8091@v2.random> On Thu, Feb 28, 2008 at 05:03:01PM -0800, Christoph Lameter wrote: > I thought you wanted to get rid of the sync via pte lock? Sure. _notify is happening inside the pt lock by coincidence, to reduce the changes to mm/* as long as the mmu notifiers aren't sleep capable. > What changes to do_wp_page do you envision? Converting it to invalidate_range_begin/end. > What is the trouble with the current do_wp_page modifications? There is > no need for invalidate_page() there so far. invalidate_range() does the > trick there. No trouble, it's just that I didn't want to mangle over the logic of do_wp_page unless it was strictly required, the patch has to be obviously safe. You need to keep that bit of your patch to make the mmu notifiers sleepable. From andrea at qumranet.com Fri Feb 29 05:13:02 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 14:13:02 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> Message-ID: <20080229131302.GT8091@v2.random> On Thu, Feb 28, 2008 at 04:59:59PM -0800, Christoph Lameter wrote: > And thus the device driver may stop receiving data on a UP system? It will > never get the ack. Not sure to follow, sorry. My idea was: post the invalidate in the mmio region of the device smp_call_function() while (mmio device wait-bitflag is on); Instead of the current: smp_call_function() post the invalidate in the mmio region of the device while (mmio device wait-bitflag is on); To decrease the wait loop time. > invalidate_page_before/end could be realized as an > invalidate_range_begin/end on a page sized range? If we go this route, once you add support to xpmem, you'll have to make the anon_vma lock a mutex too, that would be fine with me though. The main reason invalidate_page exists, is to allow you to leave it as non-sleep-capable even after you make invalidate_range sleep capable, and to implement the mmu_rmap_notifiers sleep capable in all the paths that invalidate_page would be called. That was the strategy you had in your patch. I'll try to drop invalidate_page. I wonder if then you won't need the mmu_rmap_notifiers anymore. From pawel.dziekonski at wcss.pl Fri Feb 29 05:18:24 2008 From: pawel.dziekonski at wcss.pl (Pawel Dziekonski) Date: Fri, 29 Feb 2008 14:18:24 +0100 Subject: [nfs-rdma-devel] [ofa-general] Status of NFS-RDMA ? (fwd) In-Reply-To: <47C7C261.2060007@hamburgnet.de> References: <20080206154701.GA11384@cefeid.wcss.wroc.pl> <1202320491.14810.29.camel@trinity.ogc.int> <20080206213543.GA21176@cefeid.wcss.wroc.pl> <20080209221711.GB20332@cefeid.wcss.wroc.pl> <47C7C261.2060007@hamburgnet.de> Message-ID: <20080229131824.GB27052@cefeid.wcss.wroc.pl> On Fri, 29 Feb 2008 at 09:29:21AM +0100, Sebastian Schmitzdorff wrote: > hi pawel, > > I was wondering if you have achieved better nfs rdma benchmark results by > now? no :( -- Pawel Dziekonski Wroclaw Centre for Networking & Supercomputing, HPC Department Politechnika Wr., pl. Grunwaldzki 9, bud. D2/101, 50-377 Wroclaw, POLAND phone: +48 71 3202043, fax: +48 71 3225797, http://www.wcss.wroc.pl From sashak at voltaire.com Fri Feb 29 05:38:14 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 29 Feb 2008 13:38:14 +0000 Subject: [ofa-general] Re: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING In-Reply-To: <20080225103143.2b7db98b.weiny2@llnl.gov> References: <20080221165655.0227c88c.weiny2@llnl.gov> <20080224121405.GD3116@sashak.voltaire.com> <20080225103143.2b7db98b.weiny2@llnl.gov> Message-ID: <20080229133814.GA1485@sashak.voltaire.com> On 10:31 Mon 25 Feb , Ira Weiny wrote: > > From 0c578c3062b3183dcd33e89aec0f1eb8a3a3a04e Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 21 Feb 2008 09:10:10 -0800 > Subject: [PATCH] opensm/libvendor/osm_vendor_ibumad.c: Add environment variable control for OSM_UMAD_MAX_PENDING > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From pericaecal at mo-sci.com Fri Feb 29 09:34:00 2008 From: pericaecal at mo-sci.com (Socorrito Campbell) Date: Fri, 29 Feb 2008 11:34:00 -0600 Subject: [ofa-general] Adobe Prem!ere Pro cs3 for MAC, XP, Vis+a 79. Retail 721 "save 2843" Message-ID: <000801c87af0$b5268780$0100007f@uvsqytn> Prince Harry is being pulled out of Afghanistan because of security concerns after media outlets revealed he had been deployed there since December with his British army regiment. Britain's Ministry of Defense said Friday that Harry presents a high-value target for the Taliban. " autodesk autocad electrical 2006 - 99 zend studio - 49 symantec norton antivirus 10.1 for mac - 29 microsoft visio 2007 professional - 39 visit "newsoftdeal. com" |n lnternet Explorer Delete " before you visit |n lnternet Explorer mindjet mindmanager pro 7.0 - 39 luxology modo 301 for mac - 129 adobe encore dvd 2 - 49 ulead photoImpact x3 - 29 avid newscutter xp 6.7.2 - 69 adobe encore dvd 2 - 49 adobe audition 2.0 - 49 " Kenyan Pact May Bring an End to Violent Unrest The race for the White House is being closely watched in places outside the U.S. Jesus Esquivel, of Mexico's Proceso political magazine, Suzanne Goldenberg, of the British newspaper The Guardian, and K.P. Nayar, of India's The Telegraph share their readers' sentiments. Turkey Pulls Troops from Iraq Senior Statesmen on Campaign to Abolish Nukes From swise at opengridcomputing.com Fri Feb 29 08:49:28 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 29 Feb 2008 10:49:28 -0600 Subject: [ofa-general] Re: [PATCH] iwch_create_cq off by one error In-Reply-To: <1204158583-22858-1-git-send-email-jon@opengridcomputing.com> References: <1204158583-22858-1-git-send-email-jon@opengridcomputing.com> Message-ID: <47C83798.3090702@opengridcomputing.com> Acked-by: Steve Wise Jon Mason wrote: > The cxbg3 driver is unnecessarily decreasing the number of cq entries > by one when creating a cq. This will cause an error of not having as > many cqs as requested by the user, if the user requests a power of 2 > cq length. > > Thanks, > Jon > > Signed-off-by: Jon Mason > --- > drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- > 1 files changed, 1 insertions(+), 1 deletions(-) > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c > index f0c7775..800ef6d 100644 > --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c > +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c > @@ -188,7 +188,7 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve > return ERR_PTR(-ENOMEM); > } > chp->rhp = rhp; > - chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1; > + chp->ibcq.cqe = 1 << chp->cq.size_log2; > spin_lock_init(&chp->lock); > atomic_set(&chp->refcnt, 1); > init_waitqueue_head(&chp->wait); From swise at opengridcomputing.com Fri Feb 29 08:49:52 2008 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 29 Feb 2008 10:49:52 -0600 Subject: [ofa-general] Re: [PATCH] update max_inline_data when creating a qp In-Reply-To: <1204158583-22858-2-git-send-email-jon@opengridcomputing.com> References: <1204158583-22858-1-git-send-email-jon@opengridcomputing.com> <1204158583-22858-2-git-send-email-jon@opengridcomputing.com> Message-ID: <47C837B0.2040803@opengridcomputing.com> Acked-by: Steve Wise Jon Mason wrote: > mthca modifies the attributes passed in when creating the qp to have > (amongst other things) the max_inline_data. While cxgb3 does modify > the attributes of some of the parameters, it does not modify the > max_inline_data to be what the adapter supports. cxgb should conform > to the same behavior as mthca (as some user space programs are > expecting this) and return the max_inline_data. > > Signed-off-by: Jon Mason > --- > drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 +++ > 1 files changed, 3 insertions(+), 0 deletions(-) > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c > index 800ef6d..5f0c9d3 100644 > --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c > +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c > @@ -818,8 +818,11 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, > kfree(qhp); > return ERR_PTR(-ENOMEM); > } > + > attrs->cap.max_recv_wr = rqsize - 1; > attrs->cap.max_send_wr = sqsize; > + attrs->cap.max_inline_data = T3_MAX_INLINE; > + > qhp->rhp = rhp; > qhp->attr.pd = php->pdid; > qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; From sashak at voltaire.com Fri Feb 29 09:21:35 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 29 Feb 2008 17:21:35 +0000 Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> Message-ID: <20080229172135.GD1485@sashak.voltaire.com> Hi Al, On 20:17 Thu 28 Feb , Albert Chu wrote: > > After some investigation, I found out that after the initial heavy sweep > is done, some of the ports on some switches are down (I assume hardware > racing during bringup), and thus opensm does not route through those > ports. When opensm does a heavy resweep later on (I assume b/c some traps > are received when those down ports come up), opensm keeps the same old > forwarding tables from before b/c ignore_existing_lfts is FALSE and b/c > the least hops are the same (other ports on the switch go to the same > parent). Thus, we get healthy ports not forwarding to a parent switch. I see the problem. Actually I think it is even worse - for example if new switch(es) is connected to a fabric routing will not be rebalanced on existing ones. > There are multiple ways to deal with this. I made the attached patch > which solved the problem on one of our test clusters. It's pretty simple. > Store all of the "bad ports" that were found during a switch > configuration. During the next heavy resweep, if some of those "bad > ports" are now up, I set ignore_existing_lfts to TRUE for just that > switch, leading to a completely new forwarding table of the switch. Why to not keep is_bad flag on osm_physp_t itself - it would save some comparison loops? Hmm, thinking more about this - currently we are tracking port state migrations to INIT during subnet discovery. It is to keep port tables up to date. I think it could be used for 'ignore_exsting_lfts' update as well. Something like this (not tested): diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h index e2fe86d..567ff6f 100644 --- a/opensm/include/opensm/osm_switch.h +++ b/opensm/include/opensm/osm_switch.h @@ -110,6 +110,7 @@ typedef struct _osm_switch { osm_mcast_tbl_t mcast_tbl; uint32_t discovery_count; unsigned need_update; + unsigned ignore_existing_lfts; void *priv; } osm_switch_t; /* diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c index ecac2a8..a1b547e 100644 --- a/opensm/opensm/osm_port_info_rcv.c +++ b/opensm/opensm/osm_port_info_rcv.c @@ -316,6 +316,9 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw) p_node->sw->need_update = 0; + + if (p_physp->need_update) + p_node->sw->ignore_existing_lfts = 1; if (port_num == 0) pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp); diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index 38b2c4e..dec1d0a 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -148,6 +148,7 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, p_sw->discovery_count = 0; p_sw->need_update = 1; + p_sw->ignore_existing_lfts = 0; } /********************************************************************** diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c index d74cb6c..67223e5 100644 --- a/opensm/opensm/osm_switch.c +++ b/opensm/opensm/osm_switch.c @@ -101,6 +101,7 @@ osm_switch_init(IN osm_switch_t * const p_sw, p_sw->switch_info = *p_si; p_sw->num_ports = num_ports; p_sw->need_update = 1; + p_sw->ignore_existing_lfts = 1; status = osm_fwd_tbl_init(&p_sw->fwd_tbl, p_si); if (status != IB_SUCCESS) @@ -303,7 +304,7 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw, 3. the physical port has a remote port (the link is up) 4. the port has min-hops to the target (avoid loops) */ - if (!ignore_existing) { + if (!ignore_existing && !p_sw->ignore_existing_lfts) { port_num = osm_fwd_tbl_get(&p_sw->fwd_tbl, lid_ho); if (port_num != OSM_NO_PATH) { Here I added 'ignore_existing_lfts' flag per switch too. What do you think? Regardless to this it also could be useful to add to the console a command to set p_subn->ignore_existing_lfts up manually. > During my performance testing on this patch, performance with a few > mpibench tests is actually worse by a few percent with this patch. I am > only using 120 of 144 nodes on this cluster. It's not a big cluster, has > two levels worth of switches (24 port switches going up to a 288 port > switch. Yup, the cluster is not "filled out" yet :-). So there is some > randomness on which specific nodes run the job and if the lid routing > layout is better/worse for that specific set of nodes. > > Intuitively, we think this will be better as a whole even though my > current testing can't show it. Can you think of anything that would make > this patch worse for performance as a whole? Could you see some side > effect leading to a lot more traffic on the network? Hmm, interesting... Are you running mpibench during heavy sweep? If so could the degradation be due to a fact of path migration and potential packet drops? Sasha From chu11 at llnl.gov Fri Feb 29 09:58:23 2008 From: chu11 at llnl.gov (Albert Chu) Date: Fri, 29 Feb 2008 09:58:23 -0800 (PST) Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <20080229172135.GD1485@sashak.voltaire.com> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> <20080229172135.GD1485@sashak.voltaire.com> Message-ID: <50643.128.15.244.112.1204307903.squirrel@127.0.0.1> Hey Sasha, > Why to not keep is_bad flag on osm_physp_t itself - it would save some > comparison loops? Oh, that's a lot simpler :-) > Here I added 'ignore_existing_lfts' flag per switch too. What do you > think? What you're trying to do is calculate "ignore_existing_lfts" when the port trap is received rather than during routing later on? Logically it looks fine. I tried to make a fix from the "trap side" instead of the "routing side" initially too, but I didn't see a clean way to do it (obviously I don't know the code as well). I'll try it out when I get a chance. (FYI, I noticed + if (p_physp->need_update) should probably be: + if (p_physp->need_update && p_node->sw) given the code a few lines above? ) > Regardless to this it also could be useful to add to the console a > command to set p_subn->ignore_existing_lfts up manually. Yeah, like you said above, this would especially be needed when a new switch is added to the network. I'll work with Ira on this. > Hmm, interesting... Are you running mpibench during heavy sweep? If so > could the degradation be due to a fact of path migration and potential > packet drops? Afraid not, it was after the heavy sweeps. I ran opensm in the foreground and saw nothing going on besides the occasional lite sweep. I've seen similar "inconsistencies" on performance when I've run ~120 node jobs on this cluster. So I personally think the tests are due to randomness of the nodes selected. I don't know if anything can be definitive until a 140+ node job is run (which I don't know if I can :-(). Thanks, Al > Hi Al, > > On 20:17 Thu 28 Feb , Albert Chu wrote: >> >> After some investigation, I found out that after the initial heavy sweep >> is done, some of the ports on some switches are down (I assume hardware >> racing during bringup), and thus opensm does not route through those >> ports. When opensm does a heavy resweep later on (I assume b/c some >> traps >> are received when those down ports come up), opensm keeps the same old >> forwarding tables from before b/c ignore_existing_lfts is FALSE and b/c >> the least hops are the same (other ports on the switch go to the same >> parent). Thus, we get healthy ports not forwarding to a parent switch. > > I see the problem. Actually I think it is even worse - for example if new > switch(es) is connected to a fabric routing will not be rebalanced on > existing ones. > >> There are multiple ways to deal with this. I made the attached patch >> which solved the problem on one of our test clusters. It's pretty >> simple. >> Store all of the "bad ports" that were found during a switch >> configuration. During the next heavy resweep, if some of those "bad >> ports" are now up, I set ignore_existing_lfts to TRUE for just that >> switch, leading to a completely new forwarding table of the switch. > > Why to not keep is_bad flag on osm_physp_t itself - it would save some > comparison loops? > > Hmm, thinking more about this - currently we are tracking port state > migrations to INIT during subnet discovery. It is to keep port tables > up to date. I think it could be used for 'ignore_exsting_lfts' update as > well. Something like this (not tested): > > diff --git a/opensm/include/opensm/osm_switch.h > b/opensm/include/opensm/osm_switch.h > index e2fe86d..567ff6f 100644 > --- a/opensm/include/opensm/osm_switch.h > +++ b/opensm/include/opensm/osm_switch.h > @@ -110,6 +110,7 @@ typedef struct _osm_switch { > osm_mcast_tbl_t mcast_tbl; > uint32_t discovery_count; > unsigned need_update; > + unsigned ignore_existing_lfts; > void *priv; > } osm_switch_t; > /* > diff --git a/opensm/opensm/osm_port_info_rcv.c > b/opensm/opensm/osm_port_info_rcv.c > index ecac2a8..a1b547e 100644 > --- a/opensm/opensm/osm_port_info_rcv.c > +++ b/opensm/opensm/osm_port_info_rcv.c > @@ -316,6 +316,9 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, > > if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw) > p_node->sw->need_update = 0; > + > + if (p_physp->need_update) > + p_node->sw->ignore_existing_lfts = 1; > > if (port_num == 0) > pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp); > diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c > index 38b2c4e..dec1d0a 100644 > --- a/opensm/opensm/osm_state_mgr.c > +++ b/opensm/opensm/osm_state_mgr.c > @@ -148,6 +148,7 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * > const p_map_item, > > p_sw->discovery_count = 0; > p_sw->need_update = 1; > + p_sw->ignore_existing_lfts = 0; > } > > /********************************************************************** > diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c > index d74cb6c..67223e5 100644 > --- a/opensm/opensm/osm_switch.c > +++ b/opensm/opensm/osm_switch.c > @@ -101,6 +101,7 @@ osm_switch_init(IN osm_switch_t * const p_sw, > p_sw->switch_info = *p_si; > p_sw->num_ports = num_ports; > p_sw->need_update = 1; > + p_sw->ignore_existing_lfts = 1; > > status = osm_fwd_tbl_init(&p_sw->fwd_tbl, p_si); > if (status != IB_SUCCESS) > @@ -303,7 +304,7 @@ osm_switch_recommend_path(IN const osm_switch_t * > const p_sw, > 3. the physical port has a remote port (the link is up) > 4. the port has min-hops to the target (avoid loops) > */ > - if (!ignore_existing) { > + if (!ignore_existing && !p_sw->ignore_existing_lfts) { > port_num = osm_fwd_tbl_get(&p_sw->fwd_tbl, lid_ho); > > if (port_num != OSM_NO_PATH) { > > > Here I added 'ignore_existing_lfts' flag per switch too. What do you > think? > > Regardless to this it also could be useful to add to the console a > command to set p_subn->ignore_existing_lfts up manually. > >> During my performance testing on this patch, performance with a few >> mpibench tests is actually worse by a few percent with this patch. I am >> only using 120 of 144 nodes on this cluster. It's not a big cluster, >> has >> two levels worth of switches (24 port switches going up to a 288 port >> switch. Yup, the cluster is not "filled out" yet :-). So there is some >> randomness on which specific nodes run the job and if the lid routing >> layout is better/worse for that specific set of nodes. >> >> Intuitively, we think this will be better as a whole even though my >> current testing can't show it. Can you think of anything that would >> make >> this patch worse for performance as a whole? Could you see some side >> effect leading to a lot more traffic on the network? > > Hmm, interesting... Are you running mpibench during heavy sweep? If so > could the degradation be due to a fact of path migration and potential > packet drops? > > Sasha > -- Albert Chu chu11 at llnl.gov 925-422-5311 Computer Scientist High Performance Systems Division Lawrence Livermore National Laboratory From hrosenstock at xsigo.com Fri Feb 29 10:10:00 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 29 Feb 2008 10:10:00 -0800 Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <20080229172135.GD1485@sashak.voltaire.com> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> <20080229172135.GD1485@sashak.voltaire.com> Message-ID: <1204308601.6469.74.camel@hrosenstock-ws.xsigo.com> Sasha, On Fri, 2008-02-29 at 17:21 +0000, Sasha Khapyorsky wrote: > Hi Al, > > On 20:17 Thu 28 Feb , Albert Chu wrote: > > > > After some investigation, I found out that after the initial heavy sweep > > is done, some of the ports on some switches are down (I assume hardware > > racing during bringup), and thus opensm does not route through those > > ports. When opensm does a heavy resweep later on (I assume b/c some traps > > are received when those down ports come up), opensm keeps the same old > > forwarding tables from before b/c ignore_existing_lfts is FALSE and b/c > > the least hops are the same (other ports on the switch go to the same > > parent). Thus, we get healthy ports not forwarding to a parent switch. > > I see the problem. Actually I think it is even worse - for example if new > switch(es) is connected to a fabric routing will not be rebalanced on > existing ones. Also, would a console command to rebalance make sense ? -- Hal > > There are multiple ways to deal with this. I made the attached patch > > which solved the problem on one of our test clusters. It's pretty simple. > > Store all of the "bad ports" that were found during a switch > > configuration. During the next heavy resweep, if some of those "bad > > ports" are now up, I set ignore_existing_lfts to TRUE for just that > > switch, leading to a completely new forwarding table of the switch. > > Why to not keep is_bad flag on osm_physp_t itself - it would save some > comparison loops? > > Hmm, thinking more about this - currently we are tracking port state > migrations to INIT during subnet discovery. It is to keep port tables > up to date. I think it could be used for 'ignore_exsting_lfts' update as > well. Something like this (not tested): > > diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h > index e2fe86d..567ff6f 100644 > --- a/opensm/include/opensm/osm_switch.h > +++ b/opensm/include/opensm/osm_switch.h > @@ -110,6 +110,7 @@ typedef struct _osm_switch { > osm_mcast_tbl_t mcast_tbl; > uint32_t discovery_count; > unsigned need_update; > + unsigned ignore_existing_lfts; > void *priv; > } osm_switch_t; > /* > diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c > index ecac2a8..a1b547e 100644 > --- a/opensm/opensm/osm_port_info_rcv.c > +++ b/opensm/opensm/osm_port_info_rcv.c > @@ -316,6 +316,9 @@ __osm_pi_rcv_process_switch_port(IN osm_sm_t * sm, > > if (ib_port_info_get_port_state(p_pi) > IB_LINK_INIT && p_node->sw) > p_node->sw->need_update = 0; > + > + if (p_physp->need_update) > + p_node->sw->ignore_existing_lfts = 1; > > if (port_num == 0) > pi_rcv_check_and_fix_lid(sm->p_log, p_pi, p_physp); > diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c > index 38b2c4e..dec1d0a 100644 > --- a/opensm/opensm/osm_state_mgr.c > +++ b/opensm/opensm/osm_state_mgr.c > @@ -148,6 +148,7 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, > > p_sw->discovery_count = 0; > p_sw->need_update = 1; > + p_sw->ignore_existing_lfts = 0; > } > > /********************************************************************** > diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c > index d74cb6c..67223e5 100644 > --- a/opensm/opensm/osm_switch.c > +++ b/opensm/opensm/osm_switch.c > @@ -101,6 +101,7 @@ osm_switch_init(IN osm_switch_t * const p_sw, > p_sw->switch_info = *p_si; > p_sw->num_ports = num_ports; > p_sw->need_update = 1; > + p_sw->ignore_existing_lfts = 1; > > status = osm_fwd_tbl_init(&p_sw->fwd_tbl, p_si); > if (status != IB_SUCCESS) > @@ -303,7 +304,7 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw, > 3. the physical port has a remote port (the link is up) > 4. the port has min-hops to the target (avoid loops) > */ > - if (!ignore_existing) { > + if (!ignore_existing && !p_sw->ignore_existing_lfts) { > port_num = osm_fwd_tbl_get(&p_sw->fwd_tbl, lid_ho); > > if (port_num != OSM_NO_PATH) { > > > Here I added 'ignore_existing_lfts' flag per switch too. What do you > think? > > Regardless to this it also could be useful to add to the console a > command to set p_subn->ignore_existing_lfts up manually. > > > During my performance testing on this patch, performance with a few > > mpibench tests is actually worse by a few percent with this patch. I am > > only using 120 of 144 nodes on this cluster. It's not a big cluster, has > > two levels worth of switches (24 port switches going up to a 288 port > > switch. Yup, the cluster is not "filled out" yet :-). So there is some > > randomness on which specific nodes run the job and if the lid routing > > layout is better/worse for that specific set of nodes. > > > > Intuitively, we think this will be better as a whole even though my > > current testing can't show it. Can you think of anything that would make > > this patch worse for performance as a whole? Could you see some side > > effect leading to a lot more traffic on the network? > > Hmm, interesting... Are you running mpibench during heavy sweep? If so > could the degradation be due to a fact of path migration and potential > packet drops? > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From arthur.jones at qlogic.com Fri Feb 29 10:13:37 2008 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 29 Feb 2008 10:13:37 -0800 Subject: [ofa-general] [PATCH] IB/ipath - update MAINTAINERS Message-ID: <20080229181336.19479.19182.stgit@eng-46.internal.keyresearch.com> I'll be leaving QLogic soon for another job and Ralph has graciously offered to take over the IPath driver maintainership. Signed-off-by: Arthur Jones Signed-off-by: Ralph Campbell --- MAINTAINERS | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2340cfb..e58a976 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2087,7 +2087,7 @@ L: netdev at vger.kernel.org S: Maintained IPATH DRIVER: -P: Arthur Jones +P: Ralph Campbell M: infinipath at qlogic.com L: general at lists.openfabrics.org T: git git://git.qlogic.com/ipath-linux-2.6 From weiny2 at llnl.gov Fri Feb 29 10:23:03 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Fri, 29 Feb 2008 10:23:03 -0800 Subject: [ofa-general] Re: [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. In-Reply-To: <20080229130123.GG27272@sashak.voltaire.com> References: <20080228151004.651e1b0a.weiny2@llnl.gov> <20080229130123.GG27272@sashak.voltaire.com> Message-ID: <20080229102303.1d85b510.weiny2@llnl.gov> Your right... This does not work. On my test cluster it seemed to work because there is only 1 switch. :-( I will redo the fix, Ira On Fri, 29 Feb 2008 13:01:23 +0000 Sasha Khapyorsky wrote: > Hi Ira, > > On 15:10 Thu 28 Feb , Ira Weiny wrote: > > We have found that some tools and log files print guids without leading 0's > > (eg 0x8f10400411f56 vs 0x0008f10400411f56) > > > > The perl script "-S" option were failing without leading 0's. This patch > > formats the guids properly before performing lookups. > > > > Ira > > > > From f1ab294d12569f81913c3c169ecefb003c11f714 Mon Sep 17 00:00:00 2001 > > From: Ira K. Weiny > > Date: Thu, 28 Feb 2008 15:03:23 -0800 > > Subject: [PATCH] Fix bug with "-S" option which prevented some formated GUIDs from being found. > > > > > > Signed-off-by: Ira K. Weiny > > --- > > infiniband-diags/scripts/IBswcountlimits.pm | 17 ++++++++++++++++- > > infiniband-diags/scripts/iblinkinfo.pl | 8 +++++--- > > infiniband-diags/scripts/ibqueryerrors.pl | 4 +++- > > 3 files changed, 24 insertions(+), 5 deletions(-) > > > > diff --git a/infiniband-diags/scripts/IBswcountlimits.pm b/infiniband-diags/scripts/IBswcountlimits.pm > > index bddc421..3fd1efa 100755 > > --- a/infiniband-diags/scripts/IBswcountlimits.pm > > +++ b/infiniband-diags/scripts/IBswcountlimits.pm > > @@ -431,6 +431,21 @@ sub get_num_ports > > } > > > > # ========================================================================= > > +# format_switch_guid(guid) > > +# The diags store the switch guids as strings. This converts the guid supplied > > +# to the correct string format. > > +# eg: 0x0008f10400411f56 and 0x8f10400411f56 Should be equal but the strings > > +# are not. > > +# > > +sub format_switch_guid > > +{ > > + my $guid = $_[0]; > > + my $guid_str = ""; > > + sprintf($guid_str, "0x%0X", $guid); > > + return ($guid_str); > > +} > > It doesn't work for me (I used this with perl -d). This sub returns > nothing, shouldn't this be: $guid_str = sprintf("0x%0X", $guid) ? > Also $guid is string here. > > Sasha > > > + > > +# ========================================================================= > > # convert_dr_to_guid(direct_route) > > # > > sub convert_dr_to_guid > > @@ -442,7 +457,7 @@ sub convert_dr_to_guid > > foreach my $line (@lines) { > > if ($line =~ /^PortGuid:\.+(.*)/) { $guid = $1; } > > } > > - return $guid; > > + return format_switch_guid($guid); > > } > > > > # ========================================================================= > > diff --git a/infiniband-diags/scripts/iblinkinfo.pl b/infiniband-diags/scripts/iblinkinfo.pl > > index 195c8cf..93152d5 100755 > > --- a/infiniband-diags/scripts/iblinkinfo.pl > > +++ b/infiniband-diags/scripts/iblinkinfo.pl > > @@ -80,9 +80,11 @@ chomp $argv0; > > > > if (!getopts("hcpldRS:D:C:P:g")) { usage_and_exit $argv0; } > > if (defined $Getopt::Std::opt_h) { usage_and_exit $argv0; } > > -if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > > -if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > > -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } > > +if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > > +if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > > +if (defined $Getopt::Std::opt_S) { > > + $single_switch = format_switch_guid($Getopt::Std::opt_S); > > +} > > if (defined $Getopt::Std::opt_d) { $only_down_links = $Getopt::Std::opt_d; } > > if (defined $Getopt::Std::opt_l) { $line_mode = $Getopt::Std::opt_l; } > > if (defined $Getopt::Std::opt_p) { $print_add_switch = $Getopt::Std::opt_p; } > > diff --git a/infiniband-diags/scripts/ibqueryerrors.pl b/infiniband-diags/scripts/ibqueryerrors.pl > > index ef61e9b..249fba3 100755 > > --- a/infiniband-diags/scripts/ibqueryerrors.pl > > +++ b/infiniband-diags/scripts/ibqueryerrors.pl > > @@ -171,7 +171,9 @@ if (defined $Getopt::Std::opt_c) { > > if (defined $Getopt::Std::opt_r) { $report_port_info = $Getopt::Std::opt_r; } > > if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } > > if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } > > -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } > > +if (defined $Getopt::Std::opt_S) { > > + $single_switch = format_switch_guid($Getopt::Std::opt_S); > > +} > > if (defined $Getopt::Std::opt_d) { > > $include_data_counters = $Getopt::Std::opt_d; > > } > > -- > > 1.5.1 From dwdalanm at dalan.com Fri Feb 29 10:42:27 2008 From: dwdalanm at dalan.com (Sonia Roper) Date: Fri, 29 Feb 2008 22:42:27 +0400 Subject: [ofa-general] Prosper with University Degree Message-ID: <926524347.97388086907713@dalan.com> An HTML attachment was scrubbed... URL: From tofucius.com at thedoppkit.com Fri Feb 29 11:08:24 2008 From: tofucius.com at thedoppkit.com (Nickolas Wilson) Date: Fri, 29 Feb 2008 14:08:24 -0500 Subject: [ofa-general] FYI Message-ID: <000401c87b06$405e4800$0100007f@kayhm> See here: http://leonormcelvainnm.blogspot.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From weiny2 at llnl.gov Fri Feb 29 11:25:09 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Fri, 29 Feb 2008 11:25:09 -0800 Subject: [ofa-general] [PATCH] Fix bug which prevented some GUIDs from being found due to formating issues. Message-ID: <20080229112509.64f1bbb4.weiny2@llnl.gov> Correct (hopefully ;-) version of patch. I also found a couple more tools which were affected so they are included in this patch. Ira >From c98dc28daf141781ed0cd15f09c95f9f0eda8eae Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 28 Feb 2008 15:03:23 -0800 Subject: [PATCH] Fix bug which prevented some GUIDs from being found due to formating issues. Signed-off-by: Ira K. Weiny --- infiniband-diags/scripts/IBswcountlimits.pm | 16 +++++++++++++++- infiniband-diags/scripts/ibfindnodesusing.pl | 2 +- infiniband-diags/scripts/iblinkinfo.pl | 8 +++++--- infiniband-diags/scripts/ibprintca.pl | 2 +- infiniband-diags/scripts/ibprintrt.pl | 2 +- infiniband-diags/scripts/ibprintswitch.pl | 2 +- infiniband-diags/scripts/ibqueryerrors.pl | 4 +++- 7 files changed, 27 insertions(+), 9 deletions(-) diff --git a/infiniband-diags/scripts/IBswcountlimits.pm b/infiniband-diags/scripts/IBswcountlimits.pm index bddc421..b88867e 100755 --- a/infiniband-diags/scripts/IBswcountlimits.pm +++ b/infiniband-diags/scripts/IBswcountlimits.pm @@ -431,6 +431,20 @@ sub get_num_ports } # ========================================================================= +# format_guid(guid) +# The diags store the guids as strings. This converts the guid supplied +# to the correct string format. +# eg: 0x0008f10400411f56 == 0x8f10400411f56 +# +sub format_guid +{ + my $guid = hex $_[0]; + my $guid_str = ""; + $guid_str = sprintf("0x%016x", $guid); + return ($guid_str); +} + +# ========================================================================= # convert_dr_to_guid(direct_route) # sub convert_dr_to_guid @@ -442,7 +456,7 @@ sub convert_dr_to_guid foreach my $line (@lines) { if ($line =~ /^PortGuid:\.+(.*)/) { $guid = $1; } } - return $guid; + return format_guid($guid); } # ========================================================================= diff --git a/infiniband-diags/scripts/ibfindnodesusing.pl b/infiniband-diags/scripts/ibfindnodesusing.pl index 2521255..1bf0987 100755 --- a/infiniband-diags/scripts/ibfindnodesusing.pl +++ b/infiniband-diags/scripts/ibfindnodesusing.pl @@ -92,7 +92,7 @@ if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } if (defined $Getopt::Std::opt_C) { $ca_name = $Getopt::Std::opt_C; } if (defined $Getopt::Std::opt_P) { $ca_port = $Getopt::Std::opt_P; } -my $target_switch = $ARGV[0]; +my $target_switch = format_guid($ARGV[0]); my $target_port = $ARGV[1]; get_link_ends($regenerate_map, $ca_name, $ca_port); diff --git a/infiniband-diags/scripts/iblinkinfo.pl b/infiniband-diags/scripts/iblinkinfo.pl index 195c8cf..b2c90a1 100755 --- a/infiniband-diags/scripts/iblinkinfo.pl +++ b/infiniband-diags/scripts/iblinkinfo.pl @@ -80,9 +80,11 @@ chomp $argv0; if (!getopts("hcpldRS:D:C:P:g")) { usage_and_exit $argv0; } if (defined $Getopt::Std::opt_h) { usage_and_exit $argv0; } -if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } -if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } +if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } +if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } +if (defined $Getopt::Std::opt_S) { + $single_switch = format_guid($Getopt::Std::opt_S); +} if (defined $Getopt::Std::opt_d) { $only_down_links = $Getopt::Std::opt_d; } if (defined $Getopt::Std::opt_l) { $line_mode = $Getopt::Std::opt_l; } if (defined $Getopt::Std::opt_p) { $print_add_switch = $Getopt::Std::opt_p; } diff --git a/infiniband-diags/scripts/ibprintca.pl b/infiniband-diags/scripts/ibprintca.pl index 0c6ca0e..38b4330 100755 --- a/infiniband-diags/scripts/ibprintca.pl +++ b/infiniband-diags/scripts/ibprintca.pl @@ -67,7 +67,7 @@ if (defined $Getopt::Std::opt_l) { $list_hcas = $Getopt::Std::opt_l; } if (defined $Getopt::Std::opt_C) { $ca_name = $Getopt::Std::opt_C; } if (defined $Getopt::Std::opt_P) { $ca_port = $Getopt::Std::opt_P; } -my $target_hca = $ARGV[0]; +my $target_hca = format_guid($ARGV[0]); my $cache_file = get_cache_file($ca_name, $ca_port); diff --git a/infiniband-diags/scripts/ibprintrt.pl b/infiniband-diags/scripts/ibprintrt.pl index 2918dc6..86dcb64 100755 --- a/infiniband-diags/scripts/ibprintrt.pl +++ b/infiniband-diags/scripts/ibprintrt.pl @@ -67,7 +67,7 @@ if (defined $Getopt::Std::opt_l) { $list_rts = $Getopt::Std::opt_l; } if (defined $Getopt::Std::opt_C) { $ca_name = $Getopt::Std::opt_C; } if (defined $Getopt::Std::opt_P) { $ca_port = $Getopt::Std::opt_P; } -my $target_rt = $ARGV[0]; +my $target_rt = format_guid($ARGV[0]); my $cache_file = get_cache_file($ca_name, $ca_port); diff --git a/infiniband-diags/scripts/ibprintswitch.pl b/infiniband-diags/scripts/ibprintswitch.pl index 9548619..2dc0040 100755 --- a/infiniband-diags/scripts/ibprintswitch.pl +++ b/infiniband-diags/scripts/ibprintswitch.pl @@ -66,7 +66,7 @@ if (defined $Getopt::Std::opt_l) { $list_switches = $Getopt::Std::opt_l; } if (defined $Getopt::Std::opt_C) { $ca_name = $Getopt::Std::opt_C; } if (defined $Getopt::Std::opt_P) { $ca_port = $Getopt::Std::opt_P; } -my $target_switch = $ARGV[0]; +my $target_switch = format_guid($ARGV[0]); my $cache_file = get_cache_file($ca_name, $ca_port); diff --git a/infiniband-diags/scripts/ibqueryerrors.pl b/infiniband-diags/scripts/ibqueryerrors.pl index ef61e9b..200a40c 100755 --- a/infiniband-diags/scripts/ibqueryerrors.pl +++ b/infiniband-diags/scripts/ibqueryerrors.pl @@ -171,7 +171,9 @@ if (defined $Getopt::Std::opt_c) { if (defined $Getopt::Std::opt_r) { $report_port_info = $Getopt::Std::opt_r; } if (defined $Getopt::Std::opt_R) { $regenerate_map = $Getopt::Std::opt_R; } if (defined $Getopt::Std::opt_D) { $direct_route = $Getopt::Std::opt_D; } -if (defined $Getopt::Std::opt_S) { $single_switch = $Getopt::Std::opt_S; } +if (defined $Getopt::Std::opt_S) { + $single_switch = format_guid($Getopt::Std::opt_S); +} if (defined $Getopt::Std::opt_d) { $include_data_counters = $Getopt::Std::opt_d; } -- 1.5.1 From weiny2 at llnl.gov Fri Feb 29 11:34:58 2008 From: weiny2 at llnl.gov (Ira Weiny) Date: Fri, 29 Feb 2008 11:34:58 -0800 Subject: [ofa-general] [PATCH] infiniband-diags/scripts/ibprintswitch.pl: fix printing of ports Message-ID: <20080229113458.2bf6a194.weiny2@llnl.gov> >From 9bbe965428a44751c0068f536a03b43c723bb57c Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Fri, 29 Feb 2008 11:32:06 -0800 Subject: [PATCH] infiniband-diags/scripts/ibprintswitch.pl: fix printing of ports When there was only one switch in the system this would fail to print the switch ports as designed. This fixes this and makes the code consistent with printca and printrt. Signed-off-by: Ira K. Weiny --- infiniband-diags/scripts/ibprintswitch.pl | 9 +++++---- 1 files changed, 5 insertions(+), 4 deletions(-) diff --git a/infiniband-diags/scripts/ibprintswitch.pl b/infiniband-diags/scripts/ibprintswitch.pl index 2dc0040..6712201 100755 --- a/infiniband-diags/scripts/ibprintswitch.pl +++ b/infiniband-diags/scripts/ibprintswitch.pl @@ -97,9 +97,7 @@ sub main my $desc = $2; if ($in_switch eq "yes") { $in_switch = "no"; - foreach my $port (sort { $a <=> $b } (keys %ports)) { - print $ports{$port}; - } + goto DONE; } if ("0x$guid" eq $target_switch || $desc =~ /.*$target_switch.*/) { print $line; @@ -114,7 +112,10 @@ sub main } } - + DONE: + foreach my $port (sort { $a <=> $b } (keys %ports)) { + print $ports{$port}; + } if (!$found_switch) { print "Switch \"$target_switch\" not found\n"; print " Try running with the \"-R\" option.\n"; -- 1.5.1 From clameter at sgi.com Fri Feb 29 11:46:44 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 11:46:44 -0800 (PST) Subject: [ofa-general] Re: [PATCH] mmu notifiers #v7 In-Reply-To: <20080229130905.GS8091@v2.random> References: <20080219231157.GC18912@wotan.suse.de> <20080220010941.GR7128@v2.random> <20080220103942.GU7128@v2.random> <20080221045430.GC15215@wotan.suse.de> <20080221144023.GC9427@v2.random> <20080221161028.GA14220@sgi.com> <20080227192610.GF28483@v2.random> <20080229004001.GN8091@v2.random> <20080229130905.GS8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > On Thu, Feb 28, 2008 at 05:03:01PM -0800, Christoph Lameter wrote: > > I thought you wanted to get rid of the sync via pte lock? > > Sure. _notify is happening inside the pt lock by coincidence, to > reduce the changes to mm/* as long as the mmu notifiers aren't > sleep capable. Ok if this is a coincidence then it would be better to separate the notifier callouts from the pte macro calls. From clameter at sgi.com Fri Feb 29 11:55:17 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 11:55:17 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229131302.GT8091@v2.random> References: <200802201008.49933.nickpiggin@yahoo.com.au> <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > On Thu, Feb 28, 2008 at 04:59:59PM -0800, Christoph Lameter wrote: > > And thus the device driver may stop receiving data on a UP system? It will > > never get the ack. > > Not sure to follow, sorry. > > My idea was: > > post the invalidate in the mmio region of the device > smp_call_function() > while (mmio device wait-bitflag is on); So the device driver on UP can only operate through interrupts? If you are hogging the only cpu then driver operations may not be possible. > > invalidate_page_before/end could be realized as an > > invalidate_range_begin/end on a page sized range? > > If we go this route, once you add support to xpmem, you'll have to > make the anon_vma lock a mutex too, that would be fine with me > though. The main reason invalidate_page exists, is to allow you to > leave it as non-sleep-capable even after you make invalidate_range > sleep capable, and to implement the mmu_rmap_notifiers sleep capable > in all the paths that invalidate_page would be called. That was the > strategy you had in your patch. I'll try to drop invalidate_page. I > wonder if then you won't need the mmu_rmap_notifiers anymore. I am mainly concerned with making the mmu notifier a generally useful feature for multiple users. Xpmem is one example of a different user. It should be considered as one example of a different type of callback user. It is not the gold standard that you make it to be. RDMA is another and there are likely scores of others (DMA engines etc) once it becomes clear that such a feature is available. In general the mmu notifier will allows us to fix the problems caused by memory pinning and mlock by various devices and other mechanisms that need to directly access memory. And yes I would like to get rid of the mmu_rmap_notifiers altogether. It would be much cleaner with just one mmu_notifier that can sleep in all functions. From andrea at qumranet.com Fri Feb 29 12:17:44 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 21:17:44 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> Message-ID: <20080229201744.GB8091@v2.random> On Fri, Feb 29, 2008 at 11:55:17AM -0800, Christoph Lameter wrote: > > post the invalidate in the mmio region of the device > > smp_call_function() > > while (mmio device wait-bitflag is on); > > So the device driver on UP can only operate through interrupts? If you are > hogging the only cpu then driver operations may not be possible. There was no irq involved in the above pseudocode, the irq if something would run in the remote system. Still irqs can run fine during the while loop like they run fine on top of smp_call_function. The send-irq and the following spin-on-a-bitflag works exactly as smp_call_function except this isn't a numa-CPU to invalidate. > And yes I would like to get rid of the mmu_rmap_notifiers altogether. It > would be much cleaner with just one mmu_notifier that can sleep in all > functions. Agreed. I just thought xpmem needed an invalidate-by-page, but I'm glad if xpmem can go in sync with the KVM/GRU/DRI model in this regard. From TamradiedEsposito at usps.com Fri Feb 29 02:33:24 2008 From: TamradiedEsposito at usps.com (Rosella Esposito) Date: Fri, 29 Feb 2008 15:33:24 +0500 Subject: [ofa-general] No Hassle Business Loans Message-ID: Need A Business Loan? Reach Over 290 Lenders with One Easy Form. 5k-200k For Your Business! http://hillib.com.cn/ -------------- next part -------------- An HTML attachment was scrubbed... URL: From clameter at sgi.com Fri Feb 29 13:03:16 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 13:03:16 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229201744.GB8091@v2.random> References: <20080228001104.GB8091@v2.random> <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > Agreed. I just thought xpmem needed an invalidate-by-page, but > I'm glad if xpmem can go in sync with the KVM/GRU/DRI model in this > regard. That means we need both the anon_vma locks and the i_mmap_lock to become semaphores. I think semaphores are better than mutexes. Rik and Lee saw some performance improvements because list can be traversed in parallel when the anon_vma lock is switched to be a rw lock. Sounds like we get to a conceptually clean version here? From andrea at qumranet.com Fri Feb 29 13:23:27 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 22:23:27 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> Message-ID: <20080229212327.GC8091@v2.random> On Fri, Feb 29, 2008 at 01:03:16PM -0800, Christoph Lameter wrote: > That means we need both the anon_vma locks and the i_mmap_lock to become > semaphores. I think semaphores are better than mutexes. Rik and Lee saw > some performance improvements because list can be traversed in parallel > when the anon_vma lock is switched to be a rw lock. The improvement was with a rw spinlock IIRC, so I don't see how it's related to this. Perhaps the rwlock spinlock can be changed to a rw semaphore without measurable overscheduling in the fast path. However theoretically speaking the rw_lock spinlock is more efficient than a rw semaphore in case of a little contention during the page fault fast path because the critical section is just a list_add so it'd be overkill to schedule while waiting. That's why currently it's a spinlock (or rw spinlock). > Sounds like we get to a conceptually clean version here? I don't have a strong opinion if it should become a semaphore unconditionally or only with a CONFIG_XPMEM=y. But keep in mind preempt-rt runs quite a bit slower, or we could rip spinlocks out of the kernel in the first place ;) From rdreier at cisco.com Fri Feb 29 13:26:25 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 13:26:25 -0800 Subject: [ofa-general] Re: [PATCH] IB/ipath - update MAINTAINERS In-Reply-To: <20080229181336.19479.19182.stgit@eng-46.internal.keyresearch.com> (Arthur Jones's message of "Fri, 29 Feb 2008 10:13:37 -0800") References: <20080229181336.19479.19182.stgit@eng-46.internal.keyresearch.com> Message-ID: thanks, applied. From rdreier at cisco.com Fri Feb 29 13:28:25 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 13:28:25 -0800 Subject: [ofa-general] [PATCH] for-2.6.25: ib/cm: flush workqueue when removing device In-Reply-To: <000101c87582$6f5ea110$9c98070a@amr.corp.intel.com> (Sean Hefty's message of "Fri, 22 Feb 2008 10:40:45 -0800") References: <000101c87582$6f5ea110$9c98070a@amr.corp.intel.com> Message-ID: thanks, applied From rdreier at cisco.com Fri Feb 29 13:29:27 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 13:29:27 -0800 Subject: [ofa-general] Re: [PATCH 1/2] Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" In-Reply-To: <20080226182731.GE7033@osc.edu> (Pete Wyckoff's message of "Tue, 26 Feb 2008 13:27:31 -0500") References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182731.GE7033@osc.edu> Message-ID: thanks, applied From clameter at sgi.com Fri Feb 29 13:29:22 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 13:29:22 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229212327.GC8091@v2.random> References: <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> <20080229212327.GC8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > I don't have a strong opinion if it should become a semaphore > unconditionally or only with a CONFIG_XPMEM=y. But keep in mind > preempt-rt runs quite a bit slower, or we could rip spinlocks out of > the kernel in the first place ;) D you just skip comments of people on the mmu_notifier? It took me to remind you about Andrew's comments to note those. And I just responded on the XPmem issue in the morning. Again for the gazillionth time: There will be no CONFIG_XPMEM because the functionality needs to be generic and not XPMEM specific. From rdreier at cisco.com Fri Feb 29 13:32:33 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 13:32:33 -0800 Subject: [ofa-general] [PATCH 2/2] ib fmr pool: flush used clean entries In-Reply-To: <20080226182753.GF7033@osc.edu> (Pete Wyckoff's message of "Tue, 26 Feb 2008 13:27:53 -0500") References: <20080225225330.GA3316@osc.edu> <20080226182655.GD7033@osc.edu> <20080226182753.GF7033@osc.edu> Message-ID: thanks, applied From clameter at sgi.com Fri Feb 29 13:34:34 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 13:34:34 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229212327.GC8091@v2.random> References: <20080228005249.GF8091@v2.random> <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> <20080229212327.GC8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > On Fri, Feb 29, 2008 at 01:03:16PM -0800, Christoph Lameter wrote: > > That means we need both the anon_vma locks and the i_mmap_lock to become > > semaphores. I think semaphores are better than mutexes. Rik and Lee saw > > some performance improvements because list can be traversed in parallel > > when the anon_vma lock is switched to be a rw lock. > > The improvement was with a rw spinlock IIRC, so I don't see how it's > related to this. AFAICT The rw semaphore fastpath is similar in performance to a rw spinlock. > Perhaps the rwlock spinlock can be changed to a rw semaphore without > measurable overscheduling in the fast path. However theoretically Overscheduling? You mean overhead? > speaking the rw_lock spinlock is more efficient than a rw semaphore in > case of a little contention during the page fault fast path because > the critical section is just a list_add so it'd be overkill to > schedule while waiting. That's why currently it's a spinlock (or rw > spinlock). On the other hand a semaphore puts the process to sleep and may actually improve performance because there is less time spend in a busy loop. Other processes may do something useful and we stay off the contended cacheline reducing traffic on the interconnect. > preempt-rt runs quite a bit slower, or we could rip spinlocks out of > the kernel in the first place ;) The question is why that is the case and it seesm that there are issues with interrupt on/off that are important here and particularly significant with the SLAB allocator (significant hacks there to deal with that issue). The fastpath that we have in the works for SLUB may address a large part of that issue because it no longer relies on disabling interrupts. From andrea at qumranet.com Fri Feb 29 13:48:00 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 22:48:00 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> <20080229212327.GC8091@v2.random> Message-ID: <20080229214800.GD8091@v2.random> On Fri, Feb 29, 2008 at 01:34:34PM -0800, Christoph Lameter wrote: > On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > > > On Fri, Feb 29, 2008 at 01:03:16PM -0800, Christoph Lameter wrote: > > > That means we need both the anon_vma locks and the i_mmap_lock to become > > > semaphores. I think semaphores are better than mutexes. Rik and Lee saw > > > some performance improvements because list can be traversed in parallel > > > when the anon_vma lock is switched to be a rw lock. > > > > The improvement was with a rw spinlock IIRC, so I don't see how it's > > related to this. > > AFAICT The rw semaphore fastpath is similar in performance to a rw > spinlock. read side is taken in the slow path. write side is taken in the fast path. pagefault is fast path, VM during swapping is slow path. > > Perhaps the rwlock spinlock can be changed to a rw semaphore without > > measurable overscheduling in the fast path. However theoretically > > Overscheduling? You mean overhead? The only possible overhead that a rw semaphore could ever generate vs a rw lock is overscheduling. > > speaking the rw_lock spinlock is more efficient than a rw semaphore in > > case of a little contention during the page fault fast path because > > the critical section is just a list_add so it'd be overkill to > > schedule while waiting. That's why currently it's a spinlock (or rw > > spinlock). > > On the other hand a semaphore puts the process to sleep and may actually > improve performance because there is less time spend in a busy loop. > Other processes may do something useful and we stay off the contended > cacheline reducing traffic on the interconnect. Yes, that's the positive side, the negative side is that you'll put the task in uninterruptible sleep and call schedule() and require a wakeup, because a list_add taking <1usec is running in the other cpu. No other downside. But that's the only reason it's a spinlock right now, infact there can't be any other reason. From rdreier at cisco.com Fri Feb 29 13:53:39 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 13:53:39 -0800 Subject: [ofa-general] Re: [PATCH] update max_inline_data when creating a qp In-Reply-To: <47C837B0.2040803@opengridcomputing.com> (Steve Wise's message of "Fri, 29 Feb 2008 10:49:52 -0600") References: <1204158583-22858-1-git-send-email-jon@opengridcomputing.com> <1204158583-22858-2-git-send-email-jon@opengridcomputing.com> <47C837B0.2040803@opengridcomputing.com> Message-ID: thanks, applied From rdreier at cisco.com Fri Feb 29 14:06:55 2008 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 14:06:55 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get some more small post-2.6.25-rc3 fixes: Arthur Jones (1): MAINTAINERS: update ipath owner Jon Mason (1): RDMA/cxgb3: Return correct max_inline_data when creating a QP Pete Wyckoff (2): Revert "IB/fmr_pool: ib_fmr_pool_flush() should flush all dirty FMRs" IB/fmr_pool: Flush all dirty FMRs from ib_fmr_pool_flush() Sean Hefty (1): IB/cm: Flush workqueue when removing device MAINTAINERS | 2 +- drivers/infiniband/core/cm.c | 3 +- drivers/infiniband/core/fmr_pool.c | 38 +++++++++++++++----------- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 ++ 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fed09b5..f229e16 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2143,7 +2143,7 @@ L: netdev at vger.kernel.org S: Maintained IPATH DRIVER: -P: Arthur Jones +P: Ralph Campbell M: infinipath at qlogic.com L: general at lists.openfabrics.org T: git git://git.qlogic.com/ipath-linux-2.6 diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b10ade9..4df4051 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3759,6 +3759,7 @@ static void cm_remove_one(struct ib_device *device) port = cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + flush_workqueue(cm.wq); cm_remove_port_fs(port); } kobject_put(&cm_dev->dev_obj); @@ -3813,6 +3814,7 @@ static void __exit ib_cm_cleanup(void) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); + ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { @@ -3820,7 +3822,6 @@ static void __exit ib_cm_cleanup(void) kfree(timewait_info); } - ib_unregister_client(&cm_client); class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 7f00347..06d502c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -139,7 +139,7 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; - struct ib_pool_fmr *fmr, *next; + struct ib_pool_fmr *fmr; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); @@ -158,20 +158,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool) #endif } - /* - * The free_list may hold FMRs that have been put there - * because they haven't reached the max_remap count. - * Invalidate their mapping as well. - */ - list_for_each_entry_safe(fmr, next, &pool->free_list, list) { - if (fmr->remap_count == 0) - continue; - hlist_del_init(&fmr->cache_node); - fmr->remap_count = 0; - list_add_tail(&fmr->fmr->list, &fmr_list); - list_move(&fmr->list, &unmap_list); - } - list_splice(&pool->dirty_list, &unmap_list); INIT_LIST_HEAD(&pool->dirty_list); pool->dirty_len = 0; @@ -384,6 +370,11 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { + if (fmr->remap_count) { + INIT_LIST_HEAD(&fmr_list); + list_add_tail(&fmr->fmr->list, &fmr_list); + ib_unmap_fmr(&fmr_list); + } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); @@ -407,8 +398,23 @@ EXPORT_SYMBOL(ib_destroy_fmr_pool); */ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) { - int serial = atomic_inc_return(&pool->req_ser); + int serial; + struct ib_pool_fmr *fmr, *next; + + /* + * The free_list holds FMRs that may have been used + * but have not been remapped enough times to be dirty. + * Put them on the dirty list now so that the cleanup + * thread will reap them too. + */ + spin_lock_irq(&pool->pool_lock); + list_for_each_entry_safe(fmr, next, &pool->free_list, list) { + if (fmr->remap_count > 0) + list_move(&fmr->list, &pool->dirty_list); + } + spin_unlock_irq(&pool->pool_lock); + serial = atomic_inc_return(&pool->req_ser); wake_up_process(pool->thread); if (wait_event_interruptible(pool->force_wait, diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index df1838f..ee3d63c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -819,8 +819,11 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, kfree(qhp); return ERR_PTR(-ENOMEM); } + attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize; + attrs->cap.max_inline_data = T3_MAX_INLINE; + qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; From clameter at sgi.com Fri Feb 29 14:12:57 2008 From: clameter at sgi.com (Christoph Lameter) Date: Fri, 29 Feb 2008 14:12:57 -0800 (PST) Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: <20080229214800.GD8091@v2.random> References: <20080228011020.GG8091@v2.random> <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> <20080229212327.GC8091@v2.random> <20080229214800.GD8091@v2.random> Message-ID: On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > > AFAICT The rw semaphore fastpath is similar in performance to a rw > > spinlock. > > read side is taken in the slow path. Slowpath meaning VM slowpath or lock slow path? Its seems that the rwsem read side path is pretty efficient: static inline void __down_read(struct rw_semaphore *sem) { __asm__ __volatile__( "# beginning down_read\n\t" LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ " jns 1f\n" " call call_rwsem_down_read_failed\n" "1:\n\t" "# ending down_read\n\t" : "+m" (sem->count) : "a" (sem) : "memory", "cc"); } > > write side is taken in the fast path. > > pagefault is fast path, VM during swapping is slow path. Not sure what you are saying here. A pagefault should be considered as a fast path and swapping is not performance critical? > > > Perhaps the rwlock spinlock can be changed to a rw semaphore without > > > measurable overscheduling in the fast path. However theoretically > > > > Overscheduling? You mean overhead? > > The only possible overhead that a rw semaphore could ever generate vs > a rw lock is overscheduling. Ok too many calls to schedule() because the slow path (of the semaphore) is taken? > > On the other hand a semaphore puts the process to sleep and may actually > > improve performance because there is less time spend in a busy loop. > > Other processes may do something useful and we stay off the contended > > cacheline reducing traffic on the interconnect. > > Yes, that's the positive side, the negative side is that you'll put > the task in uninterruptible sleep and call schedule() and require a > wakeup, because a list_add taking <1usec is running in the > other cpu. No other downside. But that's the only reason it's a > spinlock right now, infact there can't be any other reason. But that is only happening for the contended case. Certainly a spinlock is better for 2p system but the more processors content for the lock (and the longer the hold off is, typical for the processors with 4p or 8p or more) the better a semaphore will work. From shindigqml04 at venturescfdc.com Fri Feb 29 14:25:33 2008 From: shindigqml04 at venturescfdc.com (Gene Werner) Date: Fri, 29 Feb 2008 16:25:33 -0600 Subject: [ofa-general] Annotation scaling and control of layers by viewport minimize workarounds Message-ID: <01c87aef$b4d9bc80$15d54818@shindigqml04> New Features: http://elisadilgerog.blogspot.com System Requirements • Intel® Pentium® 4 processor, 2.2 GHz Recommended • Microsoft® Windows Vista™, Windows® XP Home and Professional (SP2), Windows® 2000 (SP4) • 512 MB RAM • 1500 MB free disk space for installation • 1024x768 VGA with True Color From andrea at qumranet.com Fri Feb 29 14:41:44 2008 From: andrea at qumranet.com (Andrea Arcangeli) Date: Fri, 29 Feb 2008 23:41:44 +0100 Subject: [ofa-general] Re: [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges In-Reply-To: References: <20080229005530.GO8091@v2.random> <20080229131302.GT8091@v2.random> <20080229201744.GB8091@v2.random> <20080229212327.GC8091@v2.random> <20080229214800.GD8091@v2.random> Message-ID: <20080229224144.GE8091@v2.random> On Fri, Feb 29, 2008 at 02:12:57PM -0800, Christoph Lameter wrote: > On Fri, 29 Feb 2008, Andrea Arcangeli wrote: > > > > AFAICT The rw semaphore fastpath is similar in performance to a rw > > > spinlock. > > > > read side is taken in the slow path. > > Slowpath meaning VM slowpath or lock slow path? Its seems that the rwsem With slow path I meant the VM. Sorry if that was confusing given locks also have fast paths (no contention) and slow paths (contention). > read side path is pretty efficient: Yes. The assembly doesn't worry me at all. > > pagefault is fast path, VM during swapping is slow path. > > Not sure what you are saying here. A pagefault should be considered as a > fast path and swapping is not performance critical? Yes, swapping is I/O bound and it rarely becomes CPU hog in the common case. There are corner case workloads (including OOM) where swapping can become cpu bound (that's also where rwlock helps). But certainly the speed of fork() and a page fault, is critical for _everyone_, not just a few workloads and setups. > Ok too many calls to schedule() because the slow path (of the semaphore) > is taken? Yes, that's the only possible worry when converting a spinlock to mutex. > But that is only happening for the contended case. Certainly a spinlock is > better for 2p system but the more processors content for the lock (and > the longer the hold off is, typical for the processors with 4p or 8p or > more) the better a semaphore will work. Sure. That's also why the PT lock switches for >4way compiles. Config option helps to keep the VM optimal for everyone. Here it is possible it won't be necessary but I can't be sure given both i_mmap_lock and anon-vma lock are used in some many places. Some TPC comparison would be nice before making a default switch IMHO. From ostozbbq at tizzi.com Fri Feb 29 14:58:10 2008 From: ostozbbq at tizzi.com (laura) Date: Fri, 29 Feb 2008 14:58:10 -0800 Subject: [ofa-general] is it you? laura here Message-ID: <9614448_@TLZ5407604_@TLZ> Hi It`s laura again. Will you ever contact me? I made those nude pictures especially for you and I wont write to you again! If you wanna see them just drop me a line at: blaura87 at golovaonline.com From sashak at voltaire.com Fri Feb 29 17:46:35 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 1 Mar 2008 01:46:35 +0000 Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <50643.128.15.244.112.1204307903.squirrel@127.0.0.1> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> <20080229172135.GD1485@sashak.voltaire.com> <50643.128.15.244.112.1204307903.squirrel@127.0.0.1> Message-ID: <20080301014635.GF1485@sashak.voltaire.com> On 09:58 Fri 29 Feb , Albert Chu wrote: > > What you're trying to do is calculate "ignore_existing_lfts" when the port > trap is received rather than during routing later on? Not trap but when PortInfo is received during subnet discovery phase of sweep (before routing configuration). > Logically it looks > fine. I tried to make a fix from the "trap side" instead of the "routing > side" initially too, but I didn't see a clean way to do it (obviously I > don't know the code as well). I'll try it out when I get a chance. > > (FYI, I noticed > + if (p_physp->need_update) > should probably be: > + if (p_physp->need_update && p_node->sw) > given the code a few lines above? > ) Yeah, this should be similar, but I don't understand yet why p_node->sw check is really needed few lines above - for switches PortInfo is queried only after SwitchInfo receive where p_node->sw is initialized. Probably we can just remove this check here. > > Regardless to this it also could be useful to add to the console a > > command to set p_subn->ignore_existing_lfts up manually. > > Yeah, like you said above, this would especially be needed when a new > switch is added to the network. I'll work with Ira on this. Thanks. > > Hmm, interesting... Are you running mpibench during heavy sweep? If so > > could the degradation be due to a fact of path migration and potential > > packet drops? > > Afraid not, it was after the heavy sweeps. I ran opensm in the foreground > and saw nothing going on besides the occasional lite sweep. > > I've seen similar "inconsistencies" on performance when I've run ~120 node > jobs on this cluster. So I personally think the tests are due to > randomness of the nodes selected. I don't know if anything can be > definitive until a 140+ node job is run (which I don't know if I can :-(). Ok. Sasha From sashak at voltaire.com Fri Feb 29 17:47:16 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 1 Mar 2008 01:47:16 +0000 Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <1204308601.6469.74.camel@hrosenstock-ws.xsigo.com> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> <20080229172135.GD1485@sashak.voltaire.com> <1204308601.6469.74.camel@hrosenstock-ws.xsigo.com> Message-ID: <20080301014716.GG1485@sashak.voltaire.com> Hi Hal, On 10:10 Fri 29 Feb , Hal Rosenstock wrote: > > > > I see the problem. Actually I think it is even worse - for example if new > > switch(es) is connected to a fabric routing will not be rebalanced on > > existing ones. > > Also, would a console command to rebalance make sense ? Yes, I think it could be useful. Sasha From sashak at voltaire.com Fri Feb 29 18:04:41 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 1 Mar 2008 02:04:41 +0000 Subject: [ofa-general] Re: [PATCH] Fix bug which prevented some GUIDs from being found due to formating issues. In-Reply-To: <20080229112509.64f1bbb4.weiny2@llnl.gov> References: <20080229112509.64f1bbb4.weiny2@llnl.gov> Message-ID: <20080301020441.GI1485@sashak.voltaire.com> Hi Ira, On 11:25 Fri 29 Feb , Ira Weiny wrote: > > # ========================================================================= > +# format_guid(guid) > +# The diags store the guids as strings. This converts the guid supplied > +# to the correct string format. > +# eg: 0x0008f10400411f56 == 0x8f10400411f56 > +# > +sub format_guid > +{ > + my $guid = hex $_[0]; > + my $guid_str = ""; > + $guid_str = sprintf("0x%016x", $guid); > + return ($guid_str); > +} And now I have this on 32-bit machine: $ iblinkinfo.pl -S 0x8f10400410bc0 Integer overflow in hexadecimal number at /usr/lib/perl5/site_perl/5.8.5/i386-linux-thread-multi/IBswcountlimits.pm line 441. Sasha From sashak at voltaire.com Fri Feb 29 18:15:52 2008 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 1 Mar 2008 02:15:52 +0000 Subject: [ofa-general] Re: [PATCH] infiniband-diags/scripts/ibprintswitch.pl: fix printing of ports In-Reply-To: <20080229113458.2bf6a194.weiny2@llnl.gov> References: <20080229113458.2bf6a194.weiny2@llnl.gov> Message-ID: <20080301021552.GJ1485@sashak.voltaire.com> On 11:34 Fri 29 Feb , Ira Weiny wrote: > From 9bbe965428a44751c0068f536a03b43c723bb57c Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Fri, 29 Feb 2008 11:32:06 -0800 > Subject: [PATCH] infiniband-diags/scripts/ibprintswitch.pl: fix printing of ports > > When there was only one switch in the system this would fail to print the > switch ports as designed. This fixes this and makes the code consistent > with printca and printrt. > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From hrosenstock at xsigo.com Fri Feb 29 19:59:47 2008 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Fri, 29 Feb 2008 19:59:47 -0800 Subject: [ofa-general] Re: [OpenSM] updn routing performance fix??? In-Reply-To: <20080301014716.GG1485@sashak.voltaire.com> References: <60144.128.15.244.44.1204258639.squirrel@127.0.0.1> <20080229172135.GD1485@sashak.voltaire.com> <1204308601.6469.74.camel@hrosenstock-ws.xsigo.com> <20080301014716.GG1485@sashak.voltaire.com> Message-ID: <1204343987.6469.140.camel@hrosenstock-ws.xsigo.com> Hi Sasha, On Sat, 2008-03-01 at 01:47 +0000, Sasha Khapyorsky wrote: > Hi Hal, > > On 10:10 Fri 29 Feb , Hal Rosenstock wrote: > > > > > > I see the problem. Actually I think it is even worse - for example if new > > > switch(es) is connected to a fabric routing will not be rebalanced on > > > existing ones. > > > > Also, would a console command to rebalance make sense ? > > Yes, I think it could be useful. If that makes sense, then also query commands on this "state" would likely also. -- Hal > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From rolandd at cisco.com Fri Feb 29 20:26:02 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:02 -0800 Subject: [ofa-general] [RFC PATCH 00/14] RFC drivers/infiniband cleanup patches Message-ID: <20082292026.bTA4eqBwzPpEEWH0@cisco.com> Here is a series of patches that fix up various compiler and sparse warnings for drivers/infiniband. I've provisionally put them in my for-2.6.26 branch for merging during the 2.6.26 merge window; if you see any problems with any of them, please let me know. This series cuts down on the warning noise quite a bit, but there are still some warnings left in case anyone else wants to join the fun. drivers/infiniband/hw/amso1100 and drivers/infiniband/hw/nes especially have a lot of sparse endianness warnings (build with "C=2 CF=-D__CHECK_ENDIAN__" to see them). From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 10/14] RFC IB/mlx4: Endianness annotations In-Reply-To: <20082292026.rdppSkcg8Vk0HOBr@cisco.com> Message-ID: <20082292026.QCkENmMgTjf04vR1@cisco.com> Trivial fixes to stamp_send_wqe(). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 958e205..ac965ab 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -122,7 +122,7 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) */ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) { - u32 *wqe; + __be32 *wqe; int i; int s; int ind; @@ -143,7 +143,7 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); for (i = 64; i < s; i += 64) { wqe = buf + i; - *wqe = 0xffffffff; + *wqe = cpu_to_be32(0xffffffff); } } } -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 04/14] RFC RDMA/amso1100: Don't use 0UL as a NULL pointer In-Reply-To: <20082292026.yJnERCjU6ZKdhSki@cisco.com> Message-ID: <20082292026.payxI6A60JrUgBpz@cisco.com> Write tests for NULL pointers as if (!ptr) instead of if (ptr == 0UL) to fix sparse warnings. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index f283a9f..c50533b 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -1005,7 +1005,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, /* Remap the adapter PCI registers in BAR4 */ mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, sizeof(struct c2_adapter_pci_regs)); - if (mmio_regs == 0UL) { + if (!mmio_regs) { printk(KERN_ERR PFX "Unable to remap adapter PCI registers in BAR4\n"); ret = -EIO; @@ -1109,7 +1109,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, /* Remap the adapter HRXDQ PA space to kernel VA space */ c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET, C2_RXP_HRXDQ_SIZE); - if (c2dev->mmio_rxp_ring == 0UL) { + if (!c2dev->mmio_rxp_ring) { printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n"); ret = -EIO; goto bail6; @@ -1118,7 +1118,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, /* Remap the adapter HTXDQ PA space to kernel VA space */ c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET, C2_TXP_HTXDQ_SIZE); - if (c2dev->mmio_txp_ring == 0UL) { + if (!c2dev->mmio_txp_ring) { printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n"); ret = -EIO; goto bail7; @@ -1129,7 +1129,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, /* Remap the PCI registers in adapter BAR0 to kernel VA space */ c2dev->regs = ioremap_nocache(reg0_start, reg0_len); - if (c2dev->regs == 0UL) { + if (!c2dev->regs) { printk(KERN_ERR PFX "Unable to remap BAR0\n"); ret = -EIO; goto bail8; @@ -1139,7 +1139,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET; c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET, kva_map_size); - if (c2dev->kva == 0UL) { + if (!c2dev->kva) { printk(KERN_ERR PFX "Unable to remap BAR4\n"); ret = -EIO; goto bail9; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 07/14] RFC IB/ipath: Fix sparse warning about pointer signedness In-Reply-To: <20082292026.wGjGlyABtVgGoyKV@cisco.com> Message-ID: <20082292026.nsb7AGzfyaeVkEEq@cisco.com> ipath_count_units() wants its third parameter to be a u32 *, so change the declaration of maxofallports in find_best_unit() to be a u32 instead of a signed int. This fixes drivers/infiniband/hw/ipath/ipath_file_ops.c:1654:47: warning: incorrect type in argument 3 (different signedness) drivers/infiniband/hw/ipath/ipath_file_ops.c:1654:47: expected unsigned int [usertype] *maxportsp drivers/infiniband/hw/ipath/ipath_file_ops.c:1654:47: got int * Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_file_ops.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 7e025c8..338733e 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1648,7 +1648,8 @@ static int find_best_unit(struct file *fp, const struct ipath_user_info *uinfo) { int ret = 0, i, prefunit = -1, devmax; - int maxofallports, npresent, nup; + int npresent, nup; + u32 maxofallports; int ndev; devmax = ipath_count_units(&npresent, &nup, &maxofallports); -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 01/14] RFC IB/mthca: Formatting cleanups In-Reply-To: <20082292026.bTA4eqBwzPpEEWH0@cisco.com> Message-ID: <20082292026.N52yKBug5E01ByRs@cisco.com> Fix a few whitespace and other coding style problems. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/mthca/mthca_dev.h | 10 +++++----- drivers/infiniband/hw/mthca/mthca_eq.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 09a30dd..667c35d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -219,7 +219,7 @@ static void mthca_cmd_post_dbell(struct mthca_dev *dev, __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | (1 << HCA_E_BIT) | (op_modifier << HCR_OPMOD_SHIFT) | - op), ptr + offs[6]); + op), ptr + offs[6]); wmb(); __raw_writel((__force u32) 0, ptr + offs[7]); wmb(); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7bbdd1f..f82ed83 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -390,11 +390,11 @@ extern void __buggy_use_of_MTHCA_PUT(void); do { \ void *__p = (char *) (source) + (offset); \ switch (sizeof (dest)) { \ - case 1: (dest) = *(u8 *) __p; break; \ - case 2: (dest) = be16_to_cpup(__p); break; \ - case 4: (dest) = be32_to_cpup(__p); break; \ - case 8: (dest) = be64_to_cpup(__p); break; \ - default: __buggy_use_of_MTHCA_GET(); \ + case 1: (dest) = *(u8 *) __p; break; \ + case 2: (dest) = be16_to_cpup(__p); break; \ + case 4: (dest) = be32_to_cpup(__p); break; \ + case 8: (dest) = be64_to_cpup(__p); break; \ + default: __buggy_use_of_MTHCA_GET(); \ } \ } while (0) diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index b60eb5d..8bde7f9 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -232,9 +232,9 @@ static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; } -static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq) +static inline struct mthca_eqe *next_eqe_sw(struct mthca_eq *eq) { - struct mthca_eqe* eqe; + struct mthca_eqe *eqe; eqe = get_eqe(eq, eq->cons_index); return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe; } diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index acfa41d..8b7e83e 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -125,7 +125,7 @@ static void smp_snoop(struct ib_device *ibdev, event.device = ibdev; event.element.port_num = port_num; - if(pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) event.event = IB_EVENT_CLIENT_REREGISTER; else event.event = IB_EVENT_LID_CHANGE; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 252db08..d7d502d 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -412,7 +412,7 @@ err: if (table->icm[i]) { mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, - &status); + &status); mthca_free_icm(dev, table->icm[i], table->coherent); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9e491df..ee9bc14 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -60,7 +60,7 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - struct mthca_dev* mdev = to_mdev(ibdev); + struct mthca_dev *mdev = to_mdev(ibdev); u8 status; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 09/14] RFC RDMA/addr: Endianness annotations In-Reply-To: <20082292026.1yZE1Hi3o3kxPkrE@cisco.com> Message-ID: <20082292026.rdppSkcg8Vk0HOBr@cisco.com> Since def42ff4 ("[IPV4]: Make struct in_addr::s_addr __be32"), s_addr is __be32, so we need to make IPv4 addresses in ib_addr __be32 also. Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index a58ad8a..781ea59 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -154,7 +154,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in) { struct rtable *rt; struct flowi fl; - u32 dst_ip = dst_in->sin_addr.s_addr; + __be32 dst_ip = dst_in->sin_addr.s_addr; memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = dst_ip; @@ -169,8 +169,8 @@ static int addr_resolve_remote(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) { - u32 src_ip = src_in->sin_addr.s_addr; - u32 dst_ip = dst_in->sin_addr.s_addr; + __be32 src_ip = src_in->sin_addr.s_addr; + __be32 dst_ip = dst_in->sin_addr.s_addr; struct flowi fl; struct rtable *rt; struct neighbour *neigh; @@ -257,7 +257,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in, struct rdma_dev_addr *addr) { struct net_device *dev; - u32 src_ip = src_in->sin_addr.s_addr; + __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 03/14] RFC mlx4_core: Move opening brace of function onto a new line In-Reply-To: <20082292026.eEW3LXh3MYv5r7ph@cisco.com> Message-ID: <20082292026.yJnERCjU6ZKdhSki@cisco.com> Signed-off-by: Roland Dreier --- drivers/net/mlx4/cmd.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/drivers/net/mlx4/cmd.c b/drivers/net/mlx4/cmd.c index db49051..70dff94 100644 --- a/drivers/net/mlx4/cmd.c +++ b/drivers/net/mlx4/cmd.c @@ -106,7 +106,8 @@ struct mlx4_cmd_context { u16 token; }; -static int mlx4_status_to_errno(u8 status) { +static int mlx4_status_to_errno(u8 status) +{ static const int trans_table[] = { [CMD_STAT_INTERNAL_ERR] = -EIO, [CMD_STAT_BAD_OP] = -EPERM, -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 13/14] RFC RDMA/nes: Trivial endianness annotations In-Reply-To: <20082292026.eijz7K8Loa5odTxG@cisco.com> Message-ID: <20082292026.7sFK72neoXMJFbuC@cisco.com> Fix a couple of htonl() that should really be ntohl(). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_cm.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 39adb26..d68aa0d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -394,7 +394,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, } if (type == NES_TIMER_TYPE_SEND) { - new_send->seq_num = htonl(tcp_hdr(skb)->seq); + new_send->seq_num = ntohl(tcp_hdr(skb)->seq); atomic_inc(&new_send->skb->users); ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); @@ -419,7 +419,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } if (type == NES_TIMER_TYPE_RECV) { - new_send->seq_num = htonl(tcp_hdr(skb)->seq); + new_send->seq_num = ntohl(tcp_hdr(skb)->seq); new_send->timetosend = jiffies; spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_add_tail(&new_send->list, &cm_node->recv_list); -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 08/14] RFC IB/ipath: Fix sparse warning about shadowed symbol In-Reply-To: <20082292026.nsb7AGzfyaeVkEEq@cisco.com> Message-ID: <20082292026.1yZE1Hi3o3kxPkrE@cisco.com> Fix drivers/infiniband/hw/ipath/ipath_init_chip.c:526:10: warning: symbol 'val' shadows an earlier one drivers/infiniband/hw/ipath/ipath_init_chip.c:473:6: originally declared here by giving the second val a different name. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_init_chip.c | 8 ++++---- 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 4471674..5428aff 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -523,16 +523,16 @@ static void enable_chip(struct ipath_devdata *dd, * initial values of the generation bit correct. */ for (i = 0; i < dd->ipath_pioavregs; i++) { - __le64 val; + __le64 pioavail; /* * Chip Errata bug 6641; even and odd qwords>3 are swapped. */ if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) - val = dd->ipath_pioavailregs_dma[i ^ 1]; + pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; else - val = dd->ipath_pioavailregs_dma[i]; - dd->ipath_pioavailshadow[i] = le64_to_cpu(val); + pioavail = dd->ipath_pioavailregs_dma[i]; + dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); } /* can get counters, stats, etc. */ dd->ipath_flags |= IPATH_PRESENT; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 06/14] RFC IB: Make struct ib_uobject.id a signed int In-Reply-To: <20082292026.bgCw0jLHUAh50ca1@cisco.com> Message-ID: <20082292026.wGjGlyABtVgGoyKV@cisco.com> IDR IDs are signed, so struct ib_uobject.id should be signed. This avoids some sparse pointer signedness warnings. Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 701e7b4..40ff512 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -730,7 +730,7 @@ struct ib_uobject { struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ - u32 id; /* index into kernel idr */ + int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ int live; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 05/14] RFC RDMA/cxgb3: IDR IDs are signed In-Reply-To: <20082292026.payxI6A60JrUgBpz@cisco.com> Message-ID: <20082292026.bgCw0jLHUAh50ca1@cisco.com> Fix sparse warnings about pointer signedness by using a signed int when calling idr_get_new_above(). Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e60..9ad9b1e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -147,7 +147,7 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, void *handle, u32 id) { int ret; - u32 newid; + int newid; do { if (!idr_pre_get(idr, GFP_KERNEL)) { -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 14/14] RFC RDMA/nes: Delete unused variables In-Reply-To: <20082292026.7sFK72neoXMJFbuC@cisco.com> Message-ID: <20082292026.yXkK6fEAjkkUN7jE@cisco.com> None of the cqp_reqs_XXX counters were ever used anywhere, and neither was the nics_per_function variable. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 7 ------- drivers/infiniband/hw/nes/nes.h | 9 --------- 2 files changed, 0 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index b2112f5..b23e22a 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -65,7 +65,6 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); int max_mtu = 9000; -int nics_per_function = 1; int interrupt_mod_interval = 0; @@ -96,12 +95,6 @@ LIST_HEAD(nes_adapter_list); LIST_HEAD(nes_dev_list); atomic_t qps_destroyed; -atomic_t cqp_reqs_allocated; -atomic_t cqp_reqs_freed; -atomic_t cqp_reqs_dynallocated; -atomic_t cqp_reqs_dynfreed; -atomic_t cqp_reqs_queued; -atomic_t cqp_reqs_redriven; static void nes_print_macaddr(struct net_device *netdev); static irqreturn_t nes_interrupt(int, void *); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index a48b288..5498b74 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -166,7 +166,6 @@ if (!(expr)) { \ #include "nes_cm.h" extern int max_mtu; -extern int nics_per_function; #define max_frame_len (max_mtu+ETH_HLEN) extern int interrupt_mod_interval; extern int nes_if_count; @@ -219,14 +218,6 @@ extern u32 int_mod_cq_depth_16; extern u32 int_mod_cq_depth_4; extern u32 int_mod_cq_depth_1; -extern atomic_t cqp_reqs_allocated; -extern atomic_t cqp_reqs_freed; -extern atomic_t cqp_reqs_dynallocated; -extern atomic_t cqp_reqs_dynfreed; -extern atomic_t cqp_reqs_queued; -extern atomic_t cqp_reqs_redriven; - - struct nes_device { struct nes_adapter *nesadapter; void __iomem *regs; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 02/14] RFC IB/mlx4: Convert "if(foo)" to "if (foo)" In-Reply-To: <20082292026.N52yKBug5E01ByRs@cisco.com> Message-ID: <20082292026.eEW3LXh3MYv5r7ph@cisco.com> Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 0ed02b7..4c1e72f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -165,7 +165,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) event.device = ibdev; event.element.port_num = port_num; - if(pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) event.event = IB_EVENT_CLIENT_REREGISTER; else event.event = IB_EVENT_LID_CHANGE; -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 12/14] RFC RDMA/cma: Endianness annotations In-Reply-To: <20082292026.LIpsrWh15syGMtf1@cisco.com> Message-ID: <20082292026.eijz7K8Loa5odTxG@cisco.com> Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 34507da..9b24899 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -169,14 +169,14 @@ union cma_ip_addr { struct in6_addr ip6; struct { __u32 pad[3]; - __u32 addr; + __be32 addr; } ip4; }; struct cma_hdr { u8 cma_version; u8 ip_version; /* IP version: 7:4 */ - __u16 port; + __be16 port; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; }; @@ -186,7 +186,7 @@ struct sdp_hh { u8 sdp_version; /* Major version: 7:4 */ u8 ip_version; /* IP version: 7:4 */ u8 sdp_specific1[10]; - __u16 port; + __be16 port; __u16 sdp_specific2; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; @@ -663,7 +663,7 @@ static inline int cma_any_port(struct sockaddr *addr) } static int cma_get_net_info(void *hdr, enum rdma_port_space ps, - u8 *ip_ver, __u16 *port, + u8 *ip_ver, __be16 *port, union cma_ip_addr **src, union cma_ip_addr **dst) { switch (ps) { @@ -695,7 +695,7 @@ static int cma_get_net_info(void *hdr, enum rdma_port_space ps, static void cma_save_net_info(struct rdma_addr *addr, struct rdma_addr *listen_addr, - u8 ip_ver, __u16 port, + u8 ip_ver, __be16 port, union cma_ip_addr *src, union cma_ip_addr *dst) { struct sockaddr_in *listen4, *ip4; @@ -996,7 +996,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_cm_id *id; struct rdma_route *rt; union cma_ip_addr *src, *dst; - __u16 port; + __be16 port; u8 ip_ver; if (cma_get_net_info(ib_event->private_data, listen_id->ps, @@ -1043,7 +1043,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; union cma_ip_addr *src, *dst; - __u16 port; + __be16 port; u8 ip_ver; int ret; @@ -1165,7 +1165,7 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, { struct cma_hdr *cma_data, *cma_mask; struct sdp_hh *sdp_data, *sdp_mask; - __u32 ip4_addr; + __be32 ip4_addr; struct in6_addr ip6_addr; memset(compare, 0, sizeof *compare); @@ -1181,12 +1181,12 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, sdp_set_ip_ver(sdp_data, 4); sdp_set_ip_ver(sdp_mask, 0xF); sdp_data->dst_addr.ip4.addr = ip4_addr; - sdp_mask->dst_addr.ip4.addr = ~0; + sdp_mask->dst_addr.ip4.addr = htonl(~0); } else { cma_set_ip_ver(cma_data, 4); cma_set_ip_ver(cma_mask, 0xF); cma_data->dst_addr.ip4.addr = ip4_addr; - cma_mask->dst_addr.ip4.addr = ~0; + cma_mask->dst_addr.ip4.addr = htonl(~0); } break; case AF_INET6: -- 1.5.4.2 From rolandd at cisco.com Fri Feb 29 20:26:03 2008 From: rolandd at cisco.com (Roland Dreier) Date: Fri, 29 Feb 2008 20:26:03 -0800 Subject: [ofa-general] [RFC PATCH 11/14] RFC IB/cm: Endianness annotations In-Reply-To: <20082292026.QCkENmMgTjf04vR1@cisco.com> Message-ID: <20082292026.LIpsrWh15syGMtf1@cisco.com> Mostly update the RB tree comparisons to force __be types to normal integers, but the change to cm_format_sidr_req() looks like a real fix: param->path->pkey is already __be16. Signed-off-by: Roland Dreier --- drivers/infiniband/core/cm.c | 65 +++++++++++++++++++++++++++++------------- 1 files changed, 45 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b10ade9..26a6f0c 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -393,7 +393,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv) spin_unlock_irqrestore(&cm.lock, flags); } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); - cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand); + cm_id_priv->id.local_id = (__force __be32) id ^ cm.random_id_operand; return ret; } @@ -467,6 +467,31 @@ static int cm_compare_private_data(u8 *private_data, return memcmp(src, dst_data->data, IB_CM_COMPARE_SIZE); } +/* + * Trivial helpers to strip endian annotation and compare; the + * endianness doesn't actually matter since we just need a stable + * order for the RB tree. + */ +static int be32_lt(__be32 a, __be32 b) +{ + return (__force u32) a < (__force u32) b; +} + +static int be32_gt(__be32 a, __be32 b) +{ + return (__force u32) a > (__force u32) b; +} + +static int be64_lt(__be64 a, __be64 b) +{ + return (__force u64) a < (__force u64) b; +} + +static int be64_gt(__be64 a, __be64 b) +{ + return (__force u64) a > (__force u64) b; +} + static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; @@ -492,9 +517,9 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) link = &(*link)->rb_left; else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) link = &(*link)->rb_right; - else if (service_id < cur_cm_id_priv->id.service_id) + else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_left; - else if (service_id > cur_cm_id_priv->id.service_id) + else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_right; else if (data_cmp < 0) link = &(*link)->rb_left; @@ -527,9 +552,9 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device, node = node->rb_left; else if (device > cm_id_priv->id.device) node = node->rb_right; - else if (service_id < cm_id_priv->id.service_id) + else if (be64_lt(service_id, cm_id_priv->id.service_id)) node = node->rb_left; - else if (service_id > cm_id_priv->id.service_id) + else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; else if (data_cmp < 0) node = node->rb_left; @@ -552,13 +577,13 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_id_node); - if (remote_id < cur_timewait_info->work.remote_id) + if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_left; - else if (remote_id > cur_timewait_info->work.remote_id) + else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_right; - else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; - else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; @@ -578,13 +603,13 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, while (node) { timewait_info = rb_entry(node, struct cm_timewait_info, remote_id_node); - if (remote_id < timewait_info->work.remote_id) + if (be32_lt(remote_id, timewait_info->work.remote_id)) node = node->rb_left; - else if (remote_id > timewait_info->work.remote_id) + else if (be32_gt(remote_id, timewait_info->work.remote_id)) node = node->rb_right; - else if (remote_ca_guid < timewait_info->remote_ca_guid) + else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_left; - else if (remote_ca_guid > timewait_info->remote_ca_guid) + else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_right; else return timewait_info; @@ -605,13 +630,13 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_qp_node); - if (remote_qpn < cur_timewait_info->remote_qpn) + if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_left; - else if (remote_qpn > cur_timewait_info->remote_qpn) + else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_right; - else if (remote_ca_guid < cur_timewait_info->remote_ca_guid) + else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; - else if (remote_ca_guid > cur_timewait_info->remote_ca_guid) + else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; @@ -635,9 +660,9 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, sidr_id_node); - if (remote_id < cur_cm_id_priv->id.remote_id) + if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_left; - else if (remote_id > cur_cm_id_priv->id.remote_id) + else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_right; else { int cmp; @@ -2848,7 +2873,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; - sidr_req_msg->pkey = cpu_to_be16(param->path->pkey); + sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) -- 1.5.4.2