[Openib-windows] [PATCH] ib_local_mad

Yossi Leybovich sleybo at mellanox.co.il
Mon May 15 07:27:29 PDT 2006


Fab
 
This patch add the ib_local_mad to user mode.
The code allow only to issue Get MAD.
The patch also include some Performance management definitions (ib_gmp
,port_counters) that I added to ib_types.h
I also change the vstat so that "vstat -c" will print out the error
counters of the ports.
We found this very useful in debugging big cluster , and hopefully this
the first step on the way to add the ib port counters to perfmon.
 
pls review
 
10x
Yossi 

Singed-off-by: Yossi Leybovich (sleybo at mellanox.co.il)

Index: core/al/al_dev.h
===================================================================
--- core/al/al_dev.h	(revision 1336)
+++ core/al/al_dev.h	(working copy)
@@ -257,7 +257,8 @@
 	ual_dereg_mad_pool_cmd,
 	ual_cancel_mad_cmd,
 	ual_mad_recv_comp_cmd,
-
+	ual_local_mad_cmd,
+	
 	al_subnet_maxops
 
 }	al_subnet_ops_t;
@@ -435,6 +436,7 @@
 #define UAL_CANCEL_MAD		IOCTL_CODE(ALDEV_KEY,
ual_cancel_mad_cmd)
 #define UAL_GET_SPL_QP_ALIAS IOCTL_CODE(ALDEV_KEY, ual_get_spl_qp_cmd)
 #define UAL_MAD_RECV_COMP	IOCTL_CODE(ALDEV_KEY,
ual_mad_recv_comp_cmd)
+#define UAL_LOCAL_MAD		IOCTL_CODE(ALDEV_KEY, ual_local_mad_cmd)
 
 /* CM Related ioctl commands */
 #define UAL_CM_LISTEN		IOCTL_CODE(ALDEV_KEY, ual_cm_listen_cmd)
Index: core/al/al_verbs.h
===================================================================
--- core/al/al_verbs.h	(revision 1336)
+++ core/al/al_verbs.h	(working copy)
@@ -531,11 +531,7 @@
 	IN		const	void* const
p_mad_in,
 	IN				void*
p_mad_out )
 {
-	UNUSED_PARAM( h_ca );
-	UNUSED_PARAM( port_num );
-	UNUSED_PARAM( p_mad_in );
-	UNUSED_PARAM( p_mad_out );
-	return IB_UNSUPPORTED;
+	return ual_local_mad(h_ca ,port_num ,p_mad_in ,p_mad_out);
 }
 
 #define check_local_mad(h_qp) \
Index: core/al/kernel/al_proxy_subnet.c
===================================================================
--- core/al/kernel/al_proxy_subnet.c	(revision 1336)
+++ core/al/kernel/al_proxy_subnet.c	(working copy)
@@ -1020,10 +1020,65 @@
 }
 
 
+
+/*
+ * Process the ioctl UAL_QUERY_CA:
+ */
+static cl_status_t
+proxy_local_mad(
+	IN		void
*p_open_context,
+	IN		cl_ioctl_handle_t		h_ioctl,
+	OUT		size_t
*p_ret_bytes )
+{
+	ual_local_mad_ioctl_t	*p_ioctl =
+			(ual_local_mad_ioctl_t *)cl_ioctl_in_buf(
h_ioctl );
+	al_dev_open_context_t	*p_context =
+
(al_dev_open_context_t *)p_open_context;
+	ib_ca_handle_t			h_ca;
+	ib_api_status_t			status;
+
+
+	CL_ENTER(AL_DBG_MAD , g_al_dbg_lvl );
+
+	/* Validate input buffers. */
+	if( !cl_ioctl_in_buf( h_ioctl ) || !cl_ioctl_out_buf( h_ioctl )
||
+		cl_ioctl_in_size( h_ioctl ) != sizeof(p_ioctl->in) ||
+		cl_ioctl_out_size( h_ioctl ) != sizeof(p_ioctl->out) )
+	{
+		CL_EXIT( AL_DBG_MAD , g_al_dbg_lvl );
+		return CL_INVALID_PARAMETER;
+	}
+
+/* Validate CA handle */
+	h_ca = (ib_ca_handle_t)
+		al_hdl_ref( p_context->h_al, p_ioctl->in.h_ca,
AL_OBJ_TYPE_H_CA );
+	if( !h_ca )
+	{
+		status = IB_INVALID_CA_HANDLE;
+		goto proxy_local_mad_err;
+	}
+	
+	/* Set the return bytes in all cases */
+	*p_ret_bytes = sizeof(p_ioctl->out);
+	
+	status = ib_local_mad(h_ca ,p_ioctl->in.port_num
,p_ioctl->in.mad_in ,p_ioctl->out.mad_out);
+
+proxy_local_mad_err:
+
+	if( h_ca )
+		deref_al_obj( &h_ca->obj );
+
+	p_ioctl->out.status = status;
+
+	CL_EXIT(AL_DBG_MAD ,g_al_dbg_lvl );
+	return CL_SUCCESS;
+}
+
+
 cl_status_t
 subnet_ioctl(
 	IN		cl_ioctl_handle_t		h_ioctl,
-		OUT	size_t
*p_ret_bytes )
+	OUT		size_t
*p_ret_bytes )
 {
 	cl_status_t cl_status;
 	IO_STACK_LOCATION		*p_io_stack;
@@ -1080,6 +1135,9 @@
 	case UAL_DEREG_MAD_POOL:
 		cl_status = proxy_dereg_mad_pool( p_context, h_ioctl,
p_ret_bytes );
 		break;
+	case UAL_LOCAL_MAD:
+		cl_status = proxy_local_mad( p_context, h_ioctl,
p_ret_bytes );
+		break;
 	default:
 		cl_status = CL_INVALID_PARAMETER;
 		break;
Index: core/al/user/ual_mad.c
===================================================================
--- core/al/user/ual_mad.c	(revision 1336)
+++ core/al/user/ual_mad.c	(working copy)
@@ -468,3 +468,53 @@
 }
 
 
+ib_api_status_t
+ual_local_mad(
+	IN		const ib_ca_handle_t
h_ca,
+	IN		const uint8_t
port_num,
+	IN		ib_mad_t* const
p_mad_in,
+	IN		ib_mad_t*
p_mad_out )
+{
+	/* Do we need to do any special checking here ?? */
+
+	ual_local_mad_ioctl_t		local_mad_ioctl;
+	uintn_t						bytes_ret;
+	cl_status_t					cl_status =
CL_SUCCESS;
+	ib_api_status_t				status = IB_SUCCESS;
+
+
+	AL_ENTER( AL_DBG_CA );
+
+	if(p_mad_in->method != IB_MAD_METHOD_GET)
+	{
+		AL_TRACE( AL_DBG_ERROR,
+			("UAL_LOCAL_MAD invalid method\n"));
+		status = IB_INVALID_PARAMETER;
+	}
+
+	local_mad_ioctl.in.h_ca = h_ca->obj.p_ci_ca->obj.hdl;
+	local_mad_ioctl.in.port_num = port_num;
+
cl_memcpy(local_mad_ioctl.in.mad_in,p_mad_in,sizeof(local_mad_ioctl.in.m
ad_in));
+
+	cl_status = do_al_dev_ioctl( UAL_LOCAL_MAD,
+		&local_mad_ioctl.in, sizeof(local_mad_ioctl.in),
&local_mad_ioctl.out, sizeof(local_mad_ioctl.out),
+		&bytes_ret );
+
+	if( cl_status != CL_SUCCESS || bytes_ret !=
sizeof(local_mad_ioctl.out) )
+	{
+		AL_TRACE( AL_DBG_ERROR,
+			("UAL_LOCAL_MAD IOCTL returned %s\n",
CL_STATUS_MSG(cl_status)) );
+		status = IB_ERROR;
+	}
+	else
+	{
+		status = local_mad_ioctl.out.status;
+
cl_memcpy(p_mad_out,local_mad_ioctl.out.mad_out,sizeof(local_mad_ioctl.o
ut.mad_out));
+	}
+
+	
+	AL_EXIT( AL_DBG_CA );
+	return status;
+}
+
+
Index: core/al/user/ual_mad.h
===================================================================
--- core/al/user/ual_mad.h	(revision 1336)
+++ core/al/user/ual_mad.h	(working copy)
@@ -86,4 +86,11 @@
 	IN		const	size_t
buf_size,
 		OUT			ib_mad_element_t** const
pp_mad_element );
 
+ib_api_status_t
+ual_local_mad(
+IN		const	ib_ca_handle_t				h_ca,
+	IN		const	uint8_t
port_num,
+	IN		const	void* const
p_mad_in,
+	IN				void*
p_mad_out );
+
 #endif /* __IB_UAL_MAD_H__ */
Index: hw/mthca/kernel/mthca_mad.c
===================================================================
--- hw/mthca/kernel/mthca_mad.c	(revision 1336)
+++ hw/mthca/kernel/mthca_mad.c	(working copy)
@@ -191,7 +191,7 @@
 	u8 status;
 	u16 slid = in_wc ? in_wc->recv.ud.remote_lid :
cl_ntoh16(IB_LID_PERMISSIVE);
 
-	HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD
,("mthca_process_mad: \n\tin: Class %02x, Method %02x, AttrId %x,
AttrMod %x, ClSpec %x, Tid %I64x\n",
+	HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD
,("mthca_process_mad: in: Class %02x, Method %02x, AttrId %x, AttrMod
%x, ClSpec %x, Tid %I64x\n",
 		(u32)in_mad->mad_hdr.mgmt_class,
(u32)in_mad->mad_hdr.method, 
 		(u32)in_mad->mad_hdr.attr_id, in_mad->mad_hdr.attr_mod, 
 		(u32)in_mad->mad_hdr.class_specific, in_mad->mad_hdr.tid
));
@@ -268,7 +268,7 @@
 	if (!out_mad->mad_hdr.status)
 		smp_snoop(ibdev, port_num, in_mad);
 
-	HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD,("mthca_process_mad:
\n\tout: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid
%I64x, Status %x\n",
+	HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD,("mthca_process_mad:
out: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid
%I64x, Status %x\n",
 		(u32)in_mad->mad_hdr.mgmt_class,
(u32)in_mad->mad_hdr.method, 
 		(u32)in_mad->mad_hdr.attr_id, in_mad->mad_hdr.attr_mod, 
 		(u32)in_mad->mad_hdr.class_specific,
in_mad->mad_hdr.tid,
Index: inc/iba/ib_al_ioctl.h
===================================================================
--- inc/iba/ib_al_ioctl.h	(revision 1336)
+++ inc/iba/ib_al_ioctl.h	(working copy)
@@ -2228,6 +2228,50 @@
 
 
 
+/****s* User-mode Access Layer/ual_local_mad_ioctl_t
+* NAME
+*	ual_local_mad_ioctl_t
+*
+* DESCRIPTION
+*	IOCTL structure containing the input and output parameters for
+*	ib_local_mad
+*
+* SYNOPSIS
+*/
+typedef union _ual_local_mad_ioctl
+{
+	struct _ual_local_mad_ioctl_in
+	{
+		uint64_t					h_ca;
+		uint8_t					port_num;
+		uint8_t
mad_in[MAD_BLOCK_SIZE];
+	}	in;
+	struct _ual_local_mad_ioctl_out
+	{
+		ib_api_status_t			status;
+		uint8_t
mad_out[MAD_BLOCK_SIZE];
+
+	}	out;
+
+}	ual_local_mad_ioctl_t;
+/*
+** FIELDS
+*	in.h_ca
+*		The handle to an open instance of CA returned via a
+*		ual_open_ca_ioctl structure.
+*	in.port_num
+*		Port number this MAD refere to.
+*	in mad_in
+*		Mad structure from user mode to forward to local HCA.
+*
+**	out.status
+*		Status of the operation.
+*	out.mad_out
+*		Mad structure answer from local HCA for user mode.
+*****/
+
+
+
 /****s* User-mode Access Layer/ual_create_cep_ioctl_t
 * NAME
 *	ual_create_cep_ioctl_t
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h	(revision 1336)
+++ inc/iba/ib_types.h	(working copy)
@@ -7090,6 +7090,107 @@
 #define IB_SIDR_UNSUPPORTED_VER				5
 
 
+
+
+
+
+/************/
+/****s* IBA Base: Types/ib_gmp_t
+* NAME
+*	ib_gmp_t
+*
+* DESCRIPTION
+*	IBA defined GMP MAD format. (16.1.1)
+*
+* SYNOPSIS
+*/
+#define IB_GMP_DATA_SIZE 200
+
+#include <complib/cl_packon.h>
+typedef struct _ib_gmp
+{
+	uint8_t					base_ver;
+	uint8_t					mgmt_class;
+	uint8_t					class_ver;
+	uint8_t					method;
+	ib_net16_t				status;
+	ib_net16_t				resv;
+	ib_net64_t				trans_id;
+	ib_net16_t				attr_id;
+	ib_net16_t				resv1;
+	ib_net32_t				attr_mod;
+	uint8_t					resv2[40];
+	uint8_t					data[IB_GMP_DATA_SIZE];
+}	PACK_SUFFIX ib_gmp_t;
+#include <complib/cl_packoff.h>
+/**********/
+#define IB_GMP_MAD_HDR_SIZE (sizeof(ib_gmp_t) - IB_GMP_DATA_SIZE)
+
+
+
+/************/
+/****s* IBA Base: Types/ib_port_counters_t
+* NAME
+*	ib_gmp_t
+*
+* DESCRIPTION
+*	IBA defined PortCounters MAD format. (16.1.3.5)
+*
+* SYNOPSIS
+*/
+
+#include <complib/cl_packon.h>
+typedef struct _ib_port_counters
+{
+	uint8_t			reserved0;
+	uint8_t			port_select;
+	ib_net16_t		counter_select;
+	ib_net16_t		symbol_error_counter; 
+	uint8_t			link_error_recovery_counter;
+	uint8_t			link_down_counter; 
+	ib_net16_t		port_rcv_errors; 
+	ib_net16_t		port_rcv_remote_physical_errors;
+	ib_net16_t		port_rcv_switch_relay_errors; 
+	ib_net16_t		port_xmit_discard; 
+	uint8_t			port_xmit_constraint_errors;
+	uint8_t			port_rcv_constraint_errors;
+	uint8_t			reserved1;
+	/* uint4_t excessive_buffer_overrun_errors;
+	uint4_t local_link_integrity_errors; */
+	uint8_t			lli_errors_exc_buf_errors;
+	ib_net16_t		reserved2; 
+	ib_net16_t		vl15_dropped;
+	ib_net32_t		port_xmit_data;
+	ib_net32_t		port_rcv_data;
+	ib_net32_t		port_xmit_pkts;
+	ib_net32_t		port_rcv_pkts;
+	ib_net32_t		reserved3[38];
+}	PACK_SUFFIX ib_port_counters_t;
+#include <complib/cl_packoff.h>
+
+
+#define IB_COUNTER_SYMBOL_ERROR				(1<<0)
+#define IB_COUNTER_LINK_RECOVERY_ERROR		(1<<1)
+#define IB_COUNTER_LINK_DOWN					(1<<2)
+#define IB_COUNTER_RCV_ERROR					(1<<3)
+#define IB_COUNTERT_RCV_RMT_PHY_ERROR		(1<<4)
+#define IB_COUNTER_RCV_SWITCH_RELAY_ERROR	(1<<5)
+#define IB_COUNTER_XMIT_DISCARD				(1<<6)
+#define IB_COUNTER_XMIT_CONSTRAIN				(1<<7)
+#define IB_COUNTER_RCV_CONSTRAIN				(1<<8)
+#define IB_COUNTER_LINK_INTEG_ERROR			(1<<9)
+#define IB_COUNTER_EXECE_BUF_ERROR			(1<<10)
+#define IB_COUNTER_VL15_DROP					(1<<11)
+#define IB_COUNTER_XMIT_DATA					(1<<12)
+#define IB_COUNTER_XMIT_PKT					(1<<13)
+#define IB_COUNTER_RCV_DATA					(1<<14)
+#define IB_COUNTER_RCV_PKT
(1<<15)
+#define IB_COUNTER_ALL
(0xff)
+
+
+
+
+
 /*
  *	The following definitions are shared between the Access Layer
and VPD
  */
Index: tools/vstat/user/vstat_main.c
===================================================================
--- tools/vstat/user/vstat_main.c	(revision 1336)
+++ tools/vstat/user/vstat_main.c	(working copy)
@@ -219,12 +219,63 @@
 }
 /* Internal Functions */
 
+void vstat_get_counters(ib_ca_handle_t h_ca,uint8_t port_num)
+{
+	ib_mad_t			*mad_in = NULL;
+	ib_mad_t			*mad_out = NULL;
+	ib_port_counters_t	*port_counters;
+	ib_api_status_t 	ib_status = IB_SUCCESS;
+	int i;
+	
+	mad_out = (ib_mad_t*)cl_zalloc(256);
+	CL_ASSERT(mad_out);
 
+	mad_in = (ib_mad_t*)cl_zalloc(256);
+	CL_ASSERT(mad_in);
 
+
+	mad_in->attr_id = IB_MAD_ATTR_PORT_CNTRS;
+	mad_in->method = IB_MAD_METHOD_GET;
+	mad_in->base_ver = 1;
+	mad_in->class_ver =1;
+	mad_in->mgmt_class = IB_MCLASS_PERF;
+
+	port_counters =
(ib_port_counters_t*)(((ib_gmp_t*)mad_in)->data);
+
+	port_counters->port_select= port_num;
+	port_counters->counter_select= 0xff;
+
+	ib_status = ib_local_mad(h_ca ,port_num ,mad_in ,mad_out);
+	if(ib_status != IB_SUCCESS)
+	{
+		printf("ib_local_mad failed with status = %d\n",
ib_status);
+		return;
+	}
+	
+	port_counters =
(ib_port_counters_t*)(((ib_gmp_t*)mad_out)->data);
+
+	printf("\nport counters for port %d\n",port_num);
+	printf("\tlink_error_recovery_counter\t0x%x
\n",port_counters->link_error_recovery_counter);
+	printf("\tlink_down_counter\t0x%x
\n",port_counters->link_down_counter);
+	printf("\tport_rcv_errors\t\t0x%x
\n",CL_NTOH16(port_counters->port_rcv_errors));
+	printf("\tport_rcv_remote_physical_errors\t0x%x
\n",CL_NTOH16(port_counters->port_rcv_remote_physical_errors));
+	printf("\tport_rcv_switch_relay_errors\t0x%x
\n",CL_NTOH16(port_counters->port_rcv_switch_relay_errors));
+	printf("\tport_xmit_discard\t\t0x%x
\n",CL_NTOH16(port_counters->port_xmit_discard));
+	printf("\tport_xmit_constraint_errors\t0x%x
\n",port_counters->port_xmit_constraint_errors);
+	printf("\tport_rcv_constraint_errors\t0x%x
\n",port_counters->port_rcv_constraint_errors);
+	printf("\tvl15_dropped\t\t\t0x%x
\n",CL_NTOH16(port_counters->vl15_dropped));
+	printf("\tport_rcv_data\t\t\t0x%x
\n",CL_NTOH32(port_counters->port_rcv_data));
+	printf("\tport_xmit_data\t\t\t0x%x
\n",CL_NTOH32(port_counters->port_xmit_data));
+	printf("\tport_rcv_pkts\t\t\t0x%x
\n",CL_NTOH32(port_counters->port_rcv_pkts));
+	printf("\tport_xmit_pkts\t\t\t0x%x
\n\n",CL_NTOH32(port_counters->port_xmit_pkts));
+	
+}
+
 ib_api_status_t
 vstat_ca_attr(
 	boolean_t modify_attr,
-	BOOLEAN fullPrint
+	BOOLEAN fullPrint,
+	BOOLEAN getCounters
 	)
 {
 	ib_al_handle_t		h_al = NULL;
@@ -237,10 +288,8 @@
 	ib_ca_handle_t 	h_ca = NULL;
 	uint32_t 			bsize;
 	ib_port_attr_mod_t port_attr_mod;
+	uint8_t			port_idx;
 
-
-
-
 	while(1)
 	{
 		/*
@@ -344,11 +393,17 @@
 				goto Cleanup2;
 			}
 
-			/* Print_ca_attributes */
 
+			
+
 			vstat_print_ca_attr((int)i, vstat_ca_attr,
fullPrint);
-
-
+			if(getCounters)
+			{
+				for(port_idx =0; port_idx<
vstat_ca_attr->num_ports;port_idx++){
+					vstat_get_counters(h_ca
,port_idx+1);
+				}
+			}
+			
 			/* Free the memory */
 			cl_free(vstat_ca_attr);
 			vstat_ca_attr = NULL;
@@ -398,18 +453,23 @@
 {
 	ib_api_status_t ib_status;
 	BOOLEAN fullPrint = FALSE;
+	BOOLEAN getCounters = FALSE;
 	if(argc>1){
 		int i = 2;
 		while(i<=argc){
 			if(!_stricmp(argv[i-1], "-v")){
 				fullPrint = TRUE;
 				i+=1;
+			}else if(!_stricmp(argv[i-1], "-c")){
+				getCounters = TRUE;
+				i+=1;
 			}else{
 				i+=2;
 			}
 		}
 	}
-	ib_status = vstat_ca_attr(FALSE, fullPrint);
+	ib_status = vstat_ca_attr(FALSE, fullPrint,getCounters);
+
 	return 0;
 }
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_local_mad.patch
Type: application/octet-stream
Size: 14798 bytes
Desc: ib_local_mad.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060515/214538fc/attachment.obj>


More information about the ofw mailing list