[ofa-general] [PATCH] infiniband-diags/libibnetdisc: remove all IBPANIC's and clean up error handling
Ira Weiny
weiny2 at llnl.gov
Wed Aug 12 16:53:20 PDT 2009
This patch applies after:
libibnetdisc: fix potential memory leak of port object
Which I sent last week but I don't think has made it up stream.
Ira
From: Ira Weiny <weiny2 at llnl.gov>
Date: Wed, 12 Aug 2009 16:13:56 -0700
Subject: [PATCH] infiniband-diags/libibnetdisc: remove all IBPANIC's and clean up error handling
Signed-off-by: Ira Weiny <weiny2 at llnl.gov>
---
infiniband-diags/libibnetdisc/src/chassis.c | 124 ++++++++++++++++---------
infiniband-diags/libibnetdisc/src/chassis.h | 2 +-
infiniband-diags/libibnetdisc/src/ibnetdisc.c | 75 ++++++++++-----
3 files changed, 132 insertions(+), 69 deletions(-)
diff --git a/infiniband-diags/libibnetdisc/src/chassis.c b/infiniband-diags/libibnetdisc/src/chassis.c
index 76a02a6..efa4ed5 100644
--- a/infiniband-diags/libibnetdisc/src/chassis.c
+++ b/infiniband-diags/libibnetdisc/src/chassis.c
@@ -323,7 +323,7 @@ char spine4_slot_2_slb[25] = { 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 0, 0
char anafa_spine4_slot_2_slb[25] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
/* reference { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }; */
-static void get_sfb_slot(struct ibnd_node *node, ibnd_port_t *lineport)
+static int get_sfb_slot(struct ibnd_node *node, ibnd_port_t *lineport)
{
ibnd_node_t *n = (ibnd_node_t *)node;
@@ -345,12 +345,14 @@ static void get_sfb_slot(struct ibnd_node *node, ibnd_port_t *lineport)
n->ch_slotnum = spine4_slot_2_slb[lineport->portnum];
n->ch_anafanum = anafa_spine4_slot_2_slb[lineport->portnum];
} else {
- IBPANIC("Unexpected node found: guid 0x%016" PRIx64,
- node->node.guid);
+ IBND_ERROR("Unexpected node found: guid 0x%016" PRIx64,
+ node->node.guid);
+ return (-1);
}
+ return (0);
}
-static void get_router_slot(struct ibnd_node *node, ibnd_port_t *spineport)
+static int get_router_slot(struct ibnd_node *node, ibnd_port_t *spineport)
{
ibnd_node_t *n = (ibnd_node_t *)node;
uint64_t guessnum = 0;
@@ -385,12 +387,14 @@ static void get_router_slot(struct ibnd_node *node, ibnd_port_t *spineport)
n->ch_slotnum = line_slot_2_sfb4[spineport->portnum];
n->ch_anafanum = ipr_slot_2_sfb4_port[spineport->portnum];
} else {
- IBPANIC("Unexpected node found: guid 0x%016" PRIx64,
- spineport->node->guid);
+ IBND_ERROR("Unexpected node found: guid 0x%016" PRIx64,
+ spineport->node->guid);
+ return (-1);
}
+ return (0);
}
-static void get_slb_slot(ibnd_node_t *n, ibnd_port_t *spineport)
+static int get_slb_slot(ibnd_node_t *n, ibnd_port_t *spineport)
{
n->ch_slot = LINE_CS;
if (is_spine_9096(CONV_NODE_INTERNAL(spineport->node))) {
@@ -410,9 +414,11 @@ static void get_slb_slot(ibnd_node_t *n, ibnd_port_t *spineport)
n->ch_slotnum = line_slot_2_sfb4[spineport->portnum];
n->ch_anafanum = anafa_line_slot_2_sfb4[spineport->portnum];
} else {
- IBPANIC("Unexpected node found: guid 0x%016" PRIx64,
- spineport->node->guid);
+ IBND_ERROR("Unexpected node found: guid 0x%016" PRIx64,
+ spineport->node->guid);
+ return (-1);
}
+ return (0);
}
/* forward declare this */
@@ -422,7 +428,7 @@ static void voltaire_portmap(ibnd_port_t *port);
It could be optimized so, but time overhead is very small
and its only diag.util
*/
-static void fill_voltaire_chassis_record(struct ibnd_node *node)
+static int fill_voltaire_chassis_record(struct ibnd_node *node)
{
ibnd_node_t *n = (ibnd_node_t *)node;
int p = 0;
@@ -430,7 +436,7 @@ static void fill_voltaire_chassis_record(struct ibnd_node *node)
struct ibnd_node *remnode = 0;
if (node->ch_found) /* somehow this node has already been passed */
- return;
+ return (0);
node->ch_found = 1;
/* node is router only in case of using unique lid */
@@ -456,7 +462,8 @@ static void fill_voltaire_chassis_record(struct ibnd_node *node)
}
if (!n->ch_type)
/* we assume here that remoteport belongs to line */
- get_sfb_slot(node, port->remoteport);
+ if (get_sfb_slot(node, port->remoteport))
+ return (-1);
/* we could break here, but need to find if more routers connected */
}
@@ -467,7 +474,8 @@ static void fill_voltaire_chassis_record(struct ibnd_node *node)
if (!port || port->portnum > 12 || !port->remoteport)
continue;
/* we assume here that remoteport belongs to spine */
- get_slb_slot(n, port->remoteport);
+ if (get_slb_slot(n, port->remoteport))
+ return (-1);
break;
}
}
@@ -480,15 +488,17 @@ static void fill_voltaire_chassis_record(struct ibnd_node *node)
voltaire_portmap(port);
}
- return;
+ return (0);
}
static int get_line_index(ibnd_node_t *node)
{
int retval = 3 * (node->ch_slotnum - 1) + node->ch_anafanum;
- if (retval > LINES_MAX_NUM || retval < 1)
- IBPANIC("Internal error");
+ if (retval > LINES_MAX_NUM || retval < 1) {
+ IBND_ERROR("Internal error\n");
+ return (-1);
+ }
return retval;
}
@@ -501,34 +511,44 @@ static int get_spine_index(ibnd_node_t *node)
else
retval = node->ch_slotnum;
- if (retval > SPINES_MAX_NUM || retval < 1)
- IBPANIC("Internal error");
+ if (retval > SPINES_MAX_NUM || retval < 1) {
+ IBND_ERROR("Internal error\n");
+ return (-1);
+ }
return retval;
}
-static void insert_line_router(ibnd_node_t *node, ibnd_chassis_t *chassis)
+static int insert_line_router(ibnd_node_t *node, ibnd_chassis_t *chassis)
{
int i = get_line_index(node);
+ if (i < 0)
+ return (i);
+
if (chassis->linenode[i])
- return; /* already filled slot */
+ return (0); /* already filled slot */
chassis->linenode[i] = node;
node->chassis = chassis;
+ return (0);
}
-static void insert_spine(ibnd_node_t *node, ibnd_chassis_t *chassis)
+static int insert_spine(ibnd_node_t *node, ibnd_chassis_t *chassis)
{
int i = get_spine_index(node);
+ if (i < 0)
+ return (i);
+
if (chassis->spinenode[i])
- return; /* already filled slot */
+ return (0); /* already filled slot */
chassis->spinenode[i] = node;
node->chassis = chassis;
+ return (0);
}
-static void pass_on_lines_catch_spines(ibnd_chassis_t *chassis)
+static int pass_on_lines_catch_spines(ibnd_chassis_t *chassis)
{
ibnd_node_t *node, *remnode;
ibnd_port_t *port;
@@ -549,12 +569,14 @@ static void pass_on_lines_catch_spines(ibnd_chassis_t *chassis)
if (!CONV_NODE_INTERNAL(remnode)->ch_found)
continue; /* some error - spine not initialized ? FIXME */
- insert_spine(remnode, chassis);
+ if (insert_spine(remnode, chassis))
+ return (-1);
}
}
+ return (0);
}
-static void pass_on_spines_catch_lines(ibnd_chassis_t *chassis)
+static int pass_on_spines_catch_lines(ibnd_chassis_t *chassis)
{
ibnd_node_t *node, *remnode;
ibnd_port_t *port;
@@ -572,9 +594,11 @@ static void pass_on_spines_catch_lines(ibnd_chassis_t *chassis)
if (!CONV_NODE_INTERNAL(remnode)->ch_found)
continue; /* some error - line/router not initialized ? FIXME */
- insert_line_router(remnode, chassis);
+ if (insert_line_router(remnode, chassis))
+ return (-1);
}
}
+ return (0);
}
/*
@@ -602,14 +626,15 @@ static void pass_on_spines_interpolate_chguid(ibnd_chassis_t *chassis)
in that chassis
chassis structure = structure of one standalone chassis
*/
-static void build_chassis(struct ibnd_node *node, ibnd_chassis_t *chassis)
+static int build_chassis(struct ibnd_node *node, ibnd_chassis_t *chassis)
{
int p = 0;
struct ibnd_node *remnode = 0;
ibnd_port_t *port = 0;
/* we get here with node = chassis_spine */
- insert_spine((ibnd_node_t *)node, chassis);
+ if (insert_spine((ibnd_node_t *)node, chassis))
+ return (-1);
/* loop: pass on all ports of node */
for (p = 1; p <= node->node.numports; p++ ) {
@@ -624,17 +649,23 @@ static void build_chassis(struct ibnd_node *node, ibnd_chassis_t *chassis)
insert_line_router(&(remnode->node), chassis);
}
- pass_on_lines_catch_spines(chassis);
+ if (pass_on_lines_catch_spines(chassis))
+ return (-1);
/* this pass needed for to catch routers, since routers connected only */
/* to spines in slot 1 or 4 and we could miss them first time */
- pass_on_spines_catch_lines(chassis);
+ if (pass_on_spines_catch_lines(chassis))
+ return (-1);
/* additional 2 passes needed for to overcome a problem of pure "in-chassis" */
/* connectivity - extra pass to ensure that all related chips/modules */
/* inserted into the chassis */
- pass_on_lines_catch_spines(chassis);
- pass_on_spines_catch_lines(chassis);
+ if (pass_on_lines_catch_spines(chassis))
+ return (-1);
+ if (pass_on_spines_catch_lines(chassis))
+ return (-1);
pass_on_spines_interpolate_chguid(chassis);
+
+ return (0);
}
/*========================================================*/
@@ -724,10 +755,12 @@ voltaire_portmap(ibnd_port_t *port)
port->ext_portnum = int2ext_map_slb8[chipnum][portnum];
}
-static void add_chassis(struct ibnd_fabric *fabric)
+static int add_chassis(struct ibnd_fabric *fabric)
{
- if (!(fabric->current_chassis = calloc(1, sizeof(ibnd_chassis_t))))
- IBPANIC("out of mem");
+ if (!(fabric->current_chassis = calloc(1, sizeof(ibnd_chassis_t)))) {
+ IBND_ERROR("OOM: failed to allocate chassis object\n");
+ return (-1);
+ }
if (fabric->first_chassis == NULL) {
fabric->first_chassis = fabric->current_chassis;
@@ -736,6 +769,7 @@ static void add_chassis(struct ibnd_fabric *fabric)
fabric->last_chassis->next = fabric->current_chassis;
fabric->last_chassis = fabric->current_chassis;
}
+ return (0);
}
static void
@@ -756,10 +790,9 @@ add_node_to_chassis(ibnd_chassis_t *chassis, ibnd_node_t *node)
3. pass on non Voltaire nodes (SystemImageGUID based grouping)
4. now group non Voltaire nodes by SystemImageGUID
Returns:
- Pointer to the first chassis in a NULL terminated list of chassis in
- the fabric specified.
+ 0 on success, -1 on failure
*/
-ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric)
+int group_nodes(struct ibnd_fabric *fabric)
{
struct ibnd_node *node;
int dist;
@@ -776,7 +809,8 @@ ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric)
for (dist = 0; dist <= fabric->fabric.maxhops_discovered; dist++) {
for (node = fabric->nodesdist[dist]; node; node = node->dnext) {
if (mad_get_field(node->node.info, 0, IB_NODE_VENDORID_F) == VTR_VENDOR_ID)
- fill_voltaire_chassis_record(node);
+ if (fill_voltaire_chassis_record(node))
+ return (-1);
}
}
@@ -791,9 +825,11 @@ ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric)
|| (node->node.chassis && node->node.chassis->chassisnum)
|| !is_spine(node))
continue;
- add_chassis(fabric);
+ if (add_chassis(fabric))
+ return (-1);
fabric->current_chassis->chassisnum = ++chassisnum;
- build_chassis(node, fabric->current_chassis);
+ if (build_chassis(node, fabric->current_chassis))
+ return (-1);
}
}
@@ -809,7 +845,8 @@ ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric)
chassis->nodecount++;
else {
/* Possible new chassis */
- add_chassis(fabric);
+ if (add_chassis(fabric))
+ return (-1);
fabric->current_chassis->chassisguid =
get_chassisguid((ibnd_node_t *)node);
fabric->current_chassis->nodecount = 1;
@@ -842,5 +879,6 @@ ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric)
dist++;
}
- return (fabric->first_chassis);
+ fabric->fabric.chassis = fabric->first_chassis;
+ return (0);
}
diff --git a/infiniband-diags/libibnetdisc/src/chassis.h b/infiniband-diags/libibnetdisc/src/chassis.h
index 16dad49..ecb21c9 100644
--- a/infiniband-diags/libibnetdisc/src/chassis.h
+++ b/infiniband-diags/libibnetdisc/src/chassis.h
@@ -80,6 +80,6 @@
enum ibnd_chassis_type { UNRESOLVED_CT, ISR9288_CT, ISR9096_CT, ISR2012_CT, ISR2004_CT };
enum ibnd_chassis_slot_type { UNRESOLVED_CS, LINE_CS, SPINE_CS, SRBD_CS };
-ibnd_chassis_t *group_nodes(struct ibnd_fabric *fabric);
+int group_nodes(struct ibnd_fabric *fabric);
#endif /* _CHASSIS_H_ */
diff --git a/infiniband-diags/libibnetdisc/src/ibnetdisc.c b/infiniband-diags/libibnetdisc/src/ibnetdisc.c
index 27ae9f3..6c31300 100644
--- a/infiniband-diags/libibnetdisc/src/ibnetdisc.c
+++ b/infiniband-diags/libibnetdisc/src/ibnetdisc.c
@@ -121,12 +121,13 @@ static int
query_node(struct ibmad_port *ibmad_port, struct ibnd_fabric *fabric,
struct ibnd_node *inode, struct ibnd_port *iport, ib_portid_t *portid)
{
+ int rc = 0;
ibnd_node_t *node = &(inode->node);
ibnd_port_t *port = &(iport->port);
void *nd = inode->node.nodedesc;
- if (query_node_info(ibmad_port, fabric, inode, portid))
- return -1;
+ if ((rc = query_node_info(ibmad_port, fabric, inode, portid)) != 0)
+ return rc;
port->portnum = mad_get_field(node->info, 0, IB_NODE_LOCAL_PORT_F);
port->guid = mad_get_field64(node->info, 0, IB_NODE_PORT_GUID_F);
@@ -169,8 +170,10 @@ query_node(struct ibmad_port *ibmad_port, struct ibnd_fabric *fabric,
static int
add_port_to_dpath(ib_dr_path_t *path, int nextport)
{
- if (path->cnt+2 >= sizeof(path->p))
+ if (path->cnt+2 >= sizeof(path->p)) {
+ IBND_ERROR("DR path has grown too long\n");
return -1;
+ }
++path->cnt;
path->p[path->cnt] = (uint8_t) nextport;
return path->cnt;
@@ -186,8 +189,10 @@ extend_dpath(struct ibmad_port *ibmad_port, struct ibnd_fabric *f,
/* If we were LID routed we need to set up the drslid */
if (!f->selfportid.lid)
if (ib_resolve_self_via(&f->selfportid, NULL, NULL,
- ibmad_port) < 0)
+ ibmad_port) < 0) {
+ IBND_ERROR("Failed to resolve self\n");
return -1;
+ }
portid->drpath.drslid = (uint16_t) f->selfportid.lid;
portid->drpath.drdlid = 0xFFFF;
@@ -413,8 +418,10 @@ create_node(struct ibnd_fabric *fabric, struct ibnd_node *temp, ib_portid_t *pat
struct ibnd_node *node;
node = malloc(sizeof(*node));
- if (!node)
- IBPANIC("OOM: node creation failed\n");
+ if (!node) {
+ IBND_ERROR("OOM: node creation failed\n");
+ return (NULL);
+ }
memcpy(node, temp, sizeof(*node));
node->node.dist = dist;
@@ -455,8 +462,10 @@ add_port_to_node(struct ibnd_fabric *fabric, struct ibnd_node *node, struct ibnd
}
port = malloc(sizeof(*port));
- if (!port)
+ if (!port) {
+ IBND_ERROR("Failed to allocate port\n");
return NULL;
+ }
memcpy(port, temp, sizeof(*port));
port->port.node = (ibnd_node_t *)node;
@@ -489,6 +498,7 @@ get_remote_node(struct ibmad_port *ibmad_port, struct ibnd_fabric *fabric,
struct ibnd_node *node, struct ibnd_port *port, ib_portid_t *path,
int portnum, int dist)
{
+ int rc = 0;
struct ibnd_node node_buf;
struct ibnd_port port_buf;
struct ibnd_node *remotenode, *oldnode;
@@ -501,43 +511,51 @@ get_remote_node(struct ibmad_port *ibmad_port, struct ibnd_fabric *fabric,
if (mad_get_field(port->port.info, 0, IB_PORT_PHYS_STATE_F)
!= IB_PORT_PHYS_STATE_LINKUP)
- return -1;
+ return 1; /* positive == non-fatal error */
if (extend_dpath(ibmad_port, fabric, path, portnum) < 0)
return -1;
if (query_node(ibmad_port, fabric, &node_buf, &port_buf, path)) {
- IBND_DEBUG("NodeInfo on %s failed, skipping port",
+ IBND_ERROR("Query remote node (%s) failed, skipping port\n",
portid2str(path));
path->drpath.cnt--; /* restore path */
- return -1;
+ return 1; /* positive == non-fatal error */
}
oldnode = find_existing_node(fabric, &node_buf);
if (oldnode)
remotenode = oldnode;
- else if (!(remotenode = create_node(fabric, &node_buf, path, dist + 1)))
- IBPANIC("no memory");
+ else if (!(remotenode = create_node(fabric, &node_buf, path, dist + 1))) {
+ rc = -1;
+ goto error;
+ }
oldport = find_existing_port_node(remotenode, &port_buf);
if (oldport) {
remoteport = oldport;
- } else if (!(remoteport = add_port_to_node(fabric, remotenode, &port_buf)))
- IBPANIC("no memory");
+ } else if (!(remoteport = add_port_to_node(fabric, remotenode,
+ &port_buf))) {
+ IBND_ERROR("OOM failed to add port to node\n");
+ rc = -1;
+ goto error;
+ }
dump_endnode(path, oldnode ? "known remote" : "new remote",
remotenode, remoteport);
link_ports(node, port, remotenode, remoteport);
+error:
path->drpath.cnt--; /* restore path */
- return 0;
+ return (rc);
}
ibnd_fabric_t *
ibnd_discover_fabric(struct ibmad_port *ibmad_port,
ib_portid_t *from, int hops)
{
+ int rc = 0;
struct ibnd_fabric *fabric = NULL;
ib_portid_t my_portid = {0};
struct ibnd_node node_buf;
@@ -563,8 +581,10 @@ ibnd_discover_fabric(struct ibmad_port *ibmad_port,
fabric = malloc(sizeof(*fabric));
- if (!fabric)
- IBPANIC("OOM: failed to malloc ibnd_fabric_t\n");
+ if (!fabric) {
+ IBND_ERROR("OOM: failed to malloc ibnd_fabric_t\n");
+ return (NULL);
+ }
memset(fabric, 0, sizeof(*fabric));
@@ -586,11 +606,14 @@ ibnd_discover_fabric(struct ibmad_port *ibmad_port,
port = add_port_to_node(fabric, node, &port_buf);
if (!port)
- IBPANIC("out of memory");
+ goto error;
- if(get_remote_node(ibmad_port, fabric, node, port, from,
+ rc = get_remote_node(ibmad_port, fabric, node, port, from,
mad_get_field(node->node.info, 0, IB_NODE_LOCAL_PORT_F),
- 0) < 0)
+ 0);
+ if (rc < 0)
+ goto error;
+ if (rc > 0) /* non-fatal error, nothing more to be done */
return ((ibnd_fabric_t *)fabric);
for (dist = 0; dist <= max_hops; dist++) {
@@ -608,7 +631,7 @@ ibnd_discover_fabric(struct ibmad_port *ibmad_port,
continue;
if (get_port_info(ibmad_port, fabric, &port_buf, i, path)) {
- IBND_DEBUG("can't reach node %s port %d", portid2str(path), i);
+ IBND_ERROR("can't reach node %s port %d", portid2str(path), i);
continue;
}
@@ -618,7 +641,7 @@ ibnd_discover_fabric(struct ibmad_port *ibmad_port,
port = add_port_to_node(fabric, node, &port_buf);
if (!port)
- IBPANIC("out of memory");
+ goto error;
/* If switch, set port GUID to node port GUID */
if (node->node.type == IB_NODE_SWITCH) {
@@ -626,13 +649,15 @@ ibnd_discover_fabric(struct ibmad_port *ibmad_port,
0, IB_NODE_PORT_GUID_F);
}
- get_remote_node(ibmad_port, fabric, node, port,
- path, i, dist);
+ if (get_remote_node(ibmad_port, fabric, node, port,
+ path, i, dist) < 0)
+ goto error;
}
}
}
- fabric->fabric.chassis = group_nodes(fabric);
+ if (group_nodes(fabric))
+ goto error;
return ((ibnd_fabric_t *)fabric);
error:
--
1.5.4.5
More information about the general
mailing list