[ofa-general] [RFC 2/2] ib/cm: add basic performance counters

Sean Hefty sean.hefty at intel.com
Thu Sep 13 10:40:00 PDT 2007


Add performance/debug counters to track sent/received messages, retries,
and duplicates.  Counters are tracked per CM message type, per port.

The counters are always enabled, so intrusive state tracking is not done.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---

 drivers/infiniband/core/cm.c |   87 ++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2e39236..0cebcb3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -78,10 +78,35 @@ static struct ib_cm {
 	struct workqueue_struct *wq;
 } cm;
 
+/* Counter indexes ordered by attribute ID */
+enum {
+	CM_REQ_COUNTER,
+	CM_MRA_COUNTER,
+	CM_REJ_COUNTER,
+	CM_REP_COUNTER,
+	CM_RTU_COUNTER,
+	CM_DREQ_COUNTER,
+	CM_DREP_COUNTER,
+	CM_SIDR_REQ_COUNTER,
+	CM_SIDR_REP_COUNTER,
+	CM_LAP_COUNTER,
+	CM_APR_COUNTER,
+	CM_COUNTERS,
+	CM_ATTR_ID_OFFSET = 0x0010
+};
+
+struct cm_counter {
+	atomic_long_t xmit;
+	atomic_long_t xmit_retries;
+	atomic_long_t rcv;
+	atomic_long_t rcv_duplicates;
+};
+
 struct cm_port {
 	struct cm_device *cm_dev;
 	struct ib_mad_agent *mad_agent;
 	u8 port_num;
+	struct cm_counter counters[CM_COUNTERS];
 };
 
 struct cm_device {
@@ -1270,6 +1295,8 @@ static void cm_dup_req_handler(struct cm_work *work,
 	struct ib_mad_send_buf *msg = NULL;
 	int ret;
 
+	atomic_long_inc(&work->port->counters[CM_REQ_COUNTER].rcv_duplicates);
+
 	/* Quick state check to discard duplicate REQs. */
 	if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
 		return;
@@ -1616,6 +1643,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
 	if (!cm_id_priv)
 		return;
 
+	atomic_long_inc(&work->port->counters[CM_REP_COUNTER].rcv_duplicates);
 	ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
 	if (ret)
 		goto deref;
@@ -1781,6 +1809,8 @@ static int cm_rtu_handler(struct cm_work *work)
 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
 		spin_unlock_irq(&cm_id_priv->lock);
+		atomic_long_inc(&work->port->counters[CM_RTU_COUNTER].
+				rcv_duplicates);
 		goto out;
 	}
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1958,6 +1988,8 @@ static int cm_dreq_handler(struct cm_work *work)
 	cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
 				   dreq_msg->local_comm_id);
 	if (!cm_id_priv) {
+		atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+				rcv_duplicates);
 		cm_issue_drep(work->port, work->mad_recv_wc);
 		return -EINVAL;
 	}
@@ -1977,6 +2009,8 @@ static int cm_dreq_handler(struct cm_work *work)
 	case IB_CM_MRA_REP_RCVD:
 		break;
 	case IB_CM_TIMEWAIT:
+		atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+				rcv_duplicates);
 		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
 			goto unlock;
 
@@ -1988,6 +2022,10 @@ static int cm_dreq_handler(struct cm_work *work)
 		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
+	case IB_CM_DREQ_RCVD:
+		atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+				rcv_duplicates);
+		goto unlock;
 	default:
 		goto unlock;
 	}
@@ -2339,10 +2377,19 @@ static int cm_mra_handler(struct cm_work *work)
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout)) {
+			if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+				atomic_long_inc(&work->port->counters
+						[CM_MRA_COUNTER].rcv_duplicates);
 			goto out;
+		}
 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
 		break;
+	case IB_CM_MRA_REQ_RCVD:
+	case IB_CM_MRA_REP_RCVD:
+		atomic_long_inc(&work->port->counters[CM_MRA_COUNTER].
+				rcv_duplicates);
+		/* fall through */
 	default:
 		goto out;
 	}
@@ -2502,6 +2549,8 @@ static int cm_lap_handler(struct cm_work *work)
 	case IB_CM_LAP_IDLE:
 		break;
 	case IB_CM_MRA_LAP_SENT:
+		atomic_long_inc(&work->port->counters[CM_LAP_COUNTER].
+				rcv_duplicates);
 		if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
 			goto unlock;
 
@@ -2515,6 +2564,10 @@ static int cm_lap_handler(struct cm_work *work)
 		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
+	case IB_CM_LAP_RCVD:
+		atomic_long_inc(&work->port->counters[CM_LAP_COUNTER].
+				rcv_duplicates);
+		goto unlock;
 	default:
 		goto unlock;
 	}
@@ -2796,6 +2849,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
 	if (cur_cm_id_priv) {
 		spin_unlock_irq(&cm.lock);
+		atomic_long_inc(&work->port->counters[CM_SIDR_REQ_COUNTER].
+				rcv_duplicates);
 		goto out; /* Duplicate message. */
 	}
 	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2990,6 +3045,25 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+	struct cm_port *port;
+	u16 attr_index;
+
+	port = mad_agent->context;
+	attr_index = be16_to_cpu(((struct ib_mad_hdr *)
+				  msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
+
+	/*
+	 * If the send was in response to a received message (context[0] is not
+	 * set to a cm_id), and is not a REJ, then it is a send that was
+	 * manually retried.
+	 */
+	if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+		msg->retries = 1;
+
+	atomic_long_add(1 + msg->retries, &port->counters[attr_index].xmit);
+	if (msg->retries)
+		atomic_long_add(msg->retries,
+				&port->counters[attr_index].xmit_retries);
 
 	switch (mad_send_wc->status) {
 	case IB_WC_SUCCESS:
@@ -3148,8 +3222,10 @@ EXPORT_SYMBOL(ib_cm_notify);
 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_recv_wc *mad_recv_wc)
 {
+	struct cm_port *port = mad_agent->context;
 	struct cm_work *work;
 	enum ib_cm_event_type event;
+	u16 attr_id;
 	int paths = 0;
 
 	switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
@@ -3194,6 +3270,9 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 		return;
 	}
 
+	attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
+	atomic_long_inc(&port->counters[attr_id - CM_ATTR_ID_OFFSET].rcv);
+
 	work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
 		       GFP_KERNEL);
 	if (!work) {
@@ -3204,7 +3283,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
 	INIT_DELAYED_WORK(&work->work, cm_work_handler);
 	work->cm_event.event = event;
 	work->mad_recv_wc = mad_recv_wc;
-	work->port = (struct cm_port *)mad_agent->context;
+	work->port = port;
 	queue_delayed_work(cm.wq, &work->work, 0);
 }
 
@@ -3397,7 +3476,7 @@ static void cm_add_one(struct ib_device *device)
 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
-	cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
+	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
 			 device->phys_port_cnt, GFP_KERNEL);
 	if (!cm_dev)
 		return;




More information about the general mailing list