[ofw] IA64 vstate cmd execution induced system crash

Smith, Stan stan.smith at intel.com
Thu Sep 20 15:02:31 PDT 2007


Hello,
  After poking around with windbg using a minidump file, I found what
the problem is.....drum roll, surprise, surprise, an IA64 misaligned
access.

The problem is an unaligned (0xe0000165`3721def9) local MAD is passed
into mthca_process_mad(): 'mthca_mad.c'. The first few local MAD header
references are to u8 fields, when the attr_id field(be16)is accessed @
line 229 it's unaligned - oops!

After receiving mentoring from a well qualified openib-windows SW coach,
the problem is best fixed in
gen1\trunk\inc\iba\ib_al_ioctl.h @ line #2486, the typedef union
_ual_local_mad_ioctl definition; see enclosed files as the email
tabs/spaces don't really lineup well.

Testing on IA64 reveals no patch side effects not to mention vstat works
as expected.
Testing on x64 & x86 reveals no changes in system behavior as well as
working vstat commands. 

Please advise if this patch is acceptible and I will commit.

Stan.


Index: ib_al_ioctl.h
===================================================================
--- ib_al_ioctl.h	(revision 825)
+++ ib_al_ioctl.h	(working copy)
@@ -2298,7 +2298,7 @@
 		uint64_t					h_av;
 		ib_mad_element_t* __ptr64		p_mad_element;
 		uint32_t					size;
-		void* __ptr64* __ptr64		ph_proxy;
+		void* __ptr64* __ptr64			ph_proxy;
 
 	}	in;
 	struct _ual_send_mad_ioctl_out
@@ -2488,14 +2488,15 @@
 	struct _ual_local_mad_ioctl_in
 	{
 		uint64_t				h_ca;
+		__declspec(align(8)) uint8_t mad_in[MAD_BLOCK_SIZE];
 		uint8_t				port_num;
-		uint8_t				mad_in[MAD_BLOCK_SIZE];
 
 	}	in;
 	struct _ual_local_mad_ioctl_out
 	{
 		ib_api_status_t			status;
-		uint8_t				mad_out[MAD_BLOCK_SIZE];
+		uint32_t				_pad; /* 8-byte
alignment needed for ia64 */
+		__declspec(align(8)) uint8_t mad_out[MAD_BLOCK_SIZE];
 
 	}	out;
  

Offending call chain.

nt!KeBugCheck2+0x170
nt!KiSystemServiceHandler+0x190
nt!RtlpExecuteEmHandlerForException+0x50
nt!RtlDispatchException+0x580
nt!KiDispatchException+0x470
nt!KiExceptionDispatch+0x190
nt!KiGenericExceptionHandler+0x330
mthca!mthca_process_mad(struct ib_device * ibdev = 0xe0000165`3862f110,
int mad_flags = 0, unsigned char port_num = 0x01 '', struct _ib_wc *
in_wc = 0x00000000`00000000, struct _ib_grh * in_grh =
0x00000000`00000000, struct ib_mad * in_mad = 0xe0000165`3721def9,
struct ib_mad * out_mad = 0xe0000165`3721def4)+0x710
[d:\openib-windows-svn\769\gen1\trunk\hw\mthca\kernel\mthca_mad.c @ 229]

mthca!mlnx_local_mad(struct _ib_ca * h_ca = 0xe0000165`389dfd70,
unsigned char port_num = 0x01 '', struct _ib_av_attr * p_av_attr =
0x00000000`00000000, struct _ib_mad * p_mad_in = 0xe0000165`3721def9,
struct _ib_mad * p_mad_out = 0xe0000165`3721def4)+0x760
[d:\openib-windows-svn\769\gen1\trunk\hw\mthca\kernel\hca_verbs.c @
1541]

ibbus!al_local_mad(struct _ib_ca * h_ca = 0xe0000165`373ecf40, unsigned
char port_num = 0x01 '', struct _ib_av_attr * p_src_av_attr =
0x00000000`00000000, void * p_mad_in = 0xe0000165`3721def9, void *
p_mad_out = 0xe0000165`3721def4)+0x16c0
[d:\openib-windows-svn\769\gen1\trunk\core\al\al_mad.c @ 3229]
ibbus!ib_local_mad(struct _ib_ca * h_ca = 0xe0000165`373ecf40, unsigned
char port_num = 0x01 '', void * p_mad_in = 0xe0000165`3721def9, void *
p_mad_out = 0xe0000165`3721def4)+0xf80
[d:\openib-windows-svn\769\gen1\trunk\core\al\al_mad.c @ 3188]

ibbus!proxy_local_mad(void * p_open_context = 0xe0000165`38811140,
struct _IRP * h_ioctl = 0xe0000165`373c0cc0, unsigned int64 *
p_ret_bytes = 0xe0000165`24ef73a8)+0xbb0
[d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_proxy_subnet.c @
1077]

ibbus!subnet_ioctl(struct _IRP * h_ioctl = 0xe0000165`373c0cc0, unsigned
int64 * p_ret_bytes = 0xe0000165`24ef73a8)+0xc50
[d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_proxy_subnet.c @
1150]

ibbus!al_dev_ioctl(struct _IRP * h_ioctl = 0xe0000165`373c0cc0)+0xcd0
[d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_dev.c @ 460]

ibbus!bus_drv_ioctl(struct _DEVICE_OBJECT * p_dev_obj =
0xe0000165`386d38a0, struct _IRP * p_irp = 0xe0000165`373c0cc0)+0x8b0
[d:\openib-windows-svn\769\gen1\trunk\core\bus\kernel\bus_driver.c @
402]

nt!IofCallDriver+0x120
nt!IopSynchronousServiceTail+0x230
nt!IopXxxControlFile+0x1140
nt!NtDeviceIoControlFile+0x80
nt!KiSystemServiceExit
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_al_ioctl.h
Type: application/octet-stream
Size: 65602 bytes
Desc: ib_al_ioctl.h
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20070920/412cdddb/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ib_al_ioctl.h.diff
Type: application/octet-stream
Size: 872 bytes
Desc: ib_al_ioctl.h.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20070920/412cdddb/attachment-0001.obj>


More information about the ofw mailing list