[ofa-general] [RFC 2/2] ib/cm: add basic performance counters
Sean Hefty
sean.hefty at intel.com
Thu Sep 13 10:40:00 PDT 2007
Add performance/debug counters to track sent/received messages, retries,
and duplicates. Counters are tracked per CM message type, per port.
The counters are always enabled, so intrusive state tracking is not done.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
drivers/infiniband/core/cm.c | 87 ++++++++++++++++++++++++++++++++++++++++--
1 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 2e39236..0cebcb3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2006 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -78,10 +78,35 @@ static struct ib_cm {
struct workqueue_struct *wq;
} cm;
+/* Counter indexes ordered by attribute ID */
+enum {
+ CM_REQ_COUNTER,
+ CM_MRA_COUNTER,
+ CM_REJ_COUNTER,
+ CM_REP_COUNTER,
+ CM_RTU_COUNTER,
+ CM_DREQ_COUNTER,
+ CM_DREP_COUNTER,
+ CM_SIDR_REQ_COUNTER,
+ CM_SIDR_REP_COUNTER,
+ CM_LAP_COUNTER,
+ CM_APR_COUNTER,
+ CM_COUNTERS,
+ CM_ATTR_ID_OFFSET = 0x0010
+};
+
+struct cm_counter {
+ atomic_long_t xmit;
+ atomic_long_t xmit_retries;
+ atomic_long_t rcv;
+ atomic_long_t rcv_duplicates;
+};
+
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
u8 port_num;
+ struct cm_counter counters[CM_COUNTERS];
};
struct cm_device {
@@ -1270,6 +1295,8 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
+ atomic_long_inc(&work->port->counters[CM_REQ_COUNTER].rcv_duplicates);
+
/* Quick state check to discard duplicate REQs. */
if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
return;
@@ -1616,6 +1643,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
if (!cm_id_priv)
return;
+ atomic_long_inc(&work->port->counters[CM_REP_COUNTER].rcv_duplicates);
ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
if (ret)
goto deref;
@@ -1781,6 +1809,8 @@ static int cm_rtu_handler(struct cm_work *work)
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
+ atomic_long_inc(&work->port->counters[CM_RTU_COUNTER].
+ rcv_duplicates);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1958,6 +1988,8 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
dreq_msg->local_comm_id);
if (!cm_id_priv) {
+ atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+ rcv_duplicates);
cm_issue_drep(work->port, work->mad_recv_wc);
return -EINVAL;
}
@@ -1977,6 +2009,8 @@ static int cm_dreq_handler(struct cm_work *work)
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
+ atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+ rcv_duplicates);
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
goto unlock;
@@ -1988,6 +2022,10 @@ static int cm_dreq_handler(struct cm_work *work)
if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
+ case IB_CM_DREQ_RCVD:
+ atomic_long_inc(&work->port->counters[CM_DREQ_COUNTER].
+ rcv_duplicates);
+ goto unlock;
default:
goto unlock;
}
@@ -2339,10 +2377,19 @@ static int cm_mra_handler(struct cm_work *work)
if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ cm_id_priv->msg, timeout)) {
+ if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ atomic_long_inc(&work->port->counters
+ [CM_MRA_COUNTER].rcv_duplicates);
goto out;
+ }
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
+ case IB_CM_MRA_REQ_RCVD:
+ case IB_CM_MRA_REP_RCVD:
+ atomic_long_inc(&work->port->counters[CM_MRA_COUNTER].
+ rcv_duplicates);
+ /* fall through */
default:
goto out;
}
@@ -2502,6 +2549,8 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
+ atomic_long_inc(&work->port->counters[CM_LAP_COUNTER].
+ rcv_duplicates);
if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
goto unlock;
@@ -2515,6 +2564,10 @@ static int cm_lap_handler(struct cm_work *work)
if (ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
+ case IB_CM_LAP_RCVD:
+ atomic_long_inc(&work->port->counters[CM_LAP_COUNTER].
+ rcv_duplicates);
+ goto unlock;
default:
goto unlock;
}
@@ -2796,6 +2849,8 @@ static int cm_sidr_req_handler(struct cm_work *work)
cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
if (cur_cm_id_priv) {
spin_unlock_irq(&cm.lock);
+ atomic_long_inc(&work->port->counters[CM_SIDR_REQ_COUNTER].
+ rcv_duplicates);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
@@ -2990,6 +3045,25 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_port *port;
+ u16 attr_index;
+
+ port = mad_agent->context;
+ attr_index = be16_to_cpu(((struct ib_mad_hdr *)
+ msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
+
+ /*
+ * If the send was in response to a received message (context[0] is not
+ * set to a cm_id), and is not a REJ, then it is a send that was
+ * manually retried.
+ */
+ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ msg->retries = 1;
+
+ atomic_long_add(1 + msg->retries, &port->counters[attr_index].xmit);
+ if (msg->retries)
+ atomic_long_add(msg->retries,
+ &port->counters[attr_index].xmit_retries);
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
@@ -3148,8 +3222,10 @@ EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
+ struct cm_port *port = mad_agent->context;
struct cm_work *work;
enum ib_cm_event_type event;
+ u16 attr_id;
int paths = 0;
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
@@ -3194,6 +3270,9 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
return;
}
+ attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
+ atomic_long_inc(&port->counters[attr_id - CM_ATTR_ID_OFFSET].rcv);
+
work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
GFP_KERNEL);
if (!work) {
@@ -3204,7 +3283,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
INIT_DELAYED_WORK(&work->work, cm_work_handler);
work->cm_event.event = event;
work->mad_recv_wc = mad_recv_wc;
- work->port = (struct cm_port *)mad_agent->context;
+ work->port = port;
queue_delayed_work(cm.wq, &work->work, 0);
}
@@ -3397,7 +3476,7 @@ static void cm_add_one(struct ib_device *device)
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
- cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
+ cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
device->phys_port_cnt, GFP_KERNEL);
if (!cm_dev)
return;
More information about the general
mailing list