[ofw] RE: IA64 vstate cmd execution induced system crash
Smith, Stan
stan.smith at intel.com
Thu Sep 20 17:05:50 PDT 2007
Done - svn.826
Thank you for the review.
Stan.
Tzachi Dar wrote:
> Seems fine, please commit.
>
> Thanks
> Tzachi
>
>> -----Original Message-----
>> From: Smith, Stan [mailto:stan.smith at intel.com]
>> Sent: Friday, September 21, 2007 12:03 AM
>> To: Tzachi Dar
>> Cc: ofw at lists.openfabrics.org; arlin.r.davis at intel.com;
>> Woodruff, Robert J
>> Subject: IA64 vstate cmd execution induced system crash
>>
>> Hello,
>> After poking around with windbg using a minidump file, I
>> found what the problem is.....drum roll, surprise, surprise,
>> an IA64 misaligned access.
>>
>> The problem is an unaligned (0xe0000165`3721def9) local MAD
>> is passed into mthca_process_mad(): 'mthca_mad.c'. The first
>> few local MAD header references are to u8 fields, when the
>> attr_id field(be16)is accessed @ line 229 it's unaligned - oops!
>>
>> After receiving mentoring from a well qualified
>> openib-windows SW coach, the problem is best fixed in
>> gen1\trunk\inc\iba\ib_al_ioctl.h @ line #2486, the typedef
>> union _ual_local_mad_ioctl definition; see enclosed files as
>> the email tabs/spaces don't really lineup well.
>>
>> Testing on IA64 reveals no patch side effects not to mention
>> vstat works as expected.
>> Testing on x64 & x86 reveals no changes in system behavior as
>> well as working vstat commands.
>>
>> Please advise if this patch is acceptible and I will commit.
>>
>> Stan.
>>
>>
>> Index: ib_al_ioctl.h
>> ===================================================================
>> --- ib_al_ioctl.h (revision 825)
>> +++ ib_al_ioctl.h (working copy)
>> @@ -2298,7 +2298,7 @@
>> uint64_t h_av;
>> ib_mad_element_t* __ptr64 p_mad_element;
>> uint32_t size;
>> - void* __ptr64* __ptr64 ph_proxy;
>> + void* __ptr64* __ptr64 ph_proxy;
>>
>> } in;
>> struct _ual_send_mad_ioctl_out
>> @@ -2488,14 +2488,15 @@
>> struct _ual_local_mad_ioctl_in
>> {
>> uint64_t h_ca;
>> + __declspec(align(8)) uint8_t mad_in[MAD_BLOCK_SIZE];
>> uint8_t port_num; - uint8_t
mad_in[MAD_BLOCK_SIZE];
>>
>> } in;
>> struct _ual_local_mad_ioctl_out
>> {
>> ib_api_status_t status;
>> - uint8_t mad_out[MAD_BLOCK_SIZE];
>> + uint32_t _pad; /* 8-byte
>> alignment needed for ia64 */
>> + __declspec(align(8)) uint8_t mad_out[MAD_BLOCK_SIZE];
>>
>> } out;
>>
>>
>> Offending call chain.
>>
>> nt!KeBugCheck2+0x170
>> nt!KiSystemServiceHandler+0x190
>> nt!RtlpExecuteEmHandlerForException+0x50
>> nt!RtlDispatchException+0x580
>> nt!KiDispatchException+0x470
>> nt!KiExceptionDispatch+0x190
>> nt!KiGenericExceptionHandler+0x330
>> mthca!mthca_process_mad(struct ib_device * ibdev =
>> 0xe0000165`3862f110, int mad_flags = 0, unsigned char
>> port_num = 0x01 '', struct _ib_wc * in_wc =
>> 0x00000000`00000000, struct _ib_grh * in_grh =
>> 0x00000000`00000000, struct ib_mad * in_mad =
>> 0xe0000165`3721def9, struct ib_mad * out_mad =
>> 0xe0000165`3721def4)+0x710
>> [d:\openib-windows-svn\769\gen1\trunk\hw\mthca\kernel\mthca_ma
>> d.c @ 229]
>>
>> mthca!mlnx_local_mad(struct _ib_ca * h_ca =
>> 0xe0000165`389dfd70, unsigned char port_num = 0x01 '', struct
>> _ib_av_attr * p_av_attr = 0x00000000`00000000, struct _ib_mad
>> * p_mad_in = 0xe0000165`3721def9, struct _ib_mad * p_mad_out =
>> 0xe0000165`3721def4)+0x760
>> [d:\openib-windows-svn\769\gen1\trunk\hw\mthca\kernel\hca_verb
>> s.c @ 1541]
>>
>> ibbus!al_local_mad(struct _ib_ca * h_ca =
>> 0xe0000165`373ecf40, unsigned char port_num = 0x01 '', struct
>> _ib_av_attr * p_src_av_attr = 0x00000000`00000000, void *
>> p_mad_in = 0xe0000165`3721def9, void * p_mad_out =
>> 0xe0000165`3721def4)+0x16c0
>> [d:\openib-windows-svn\769\gen1\trunk\core\al\al_mad.c @
>> 3229] ibbus!ib_local_mad(struct _ib_ca * h_ca =
>> 0xe0000165`373ecf40, unsigned char port_num = 0x01 '', void *
>> p_mad_in = 0xe0000165`3721def9, void * p_mad_out =
>> 0xe0000165`3721def4)+0xf80
>> [d:\openib-windows-svn\769\gen1\trunk\core\al\al_mad.c @ 3188]
>>
>> ibbus!proxy_local_mad(void * p_open_context =
>> 0xe0000165`38811140, struct _IRP * h_ioctl =
>> 0xe0000165`373c0cc0, unsigned int64 * p_ret_bytes =
>> 0xe0000165`24ef73a8)+0xbb0
>> [d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_proxy_
>> subnet.c @ 1077]
>>
>> ibbus!subnet_ioctl(struct _IRP * h_ioctl =
>> 0xe0000165`373c0cc0, unsigned
>> int64 * p_ret_bytes = 0xe0000165`24ef73a8)+0xc50
>> [d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_proxy_
>> subnet.c @ 1150]
>>
>> ibbus!al_dev_ioctl(struct _IRP * h_ioctl =
>> 0xe0000165`373c0cc0)+0xcd0
>> [d:\openib-windows-svn\769\gen1\trunk\core\al\kernel\al_dev.c @ 460]
>>
>> ibbus!bus_drv_ioctl(struct _DEVICE_OBJECT * p_dev_obj =
>> 0xe0000165`386d38a0, struct _IRP * p_irp =
>> 0xe0000165`373c0cc0)+0x8b0
>> [d:\openib-windows-svn\769\gen1\trunk\core\bus\kernel\bus_driv
>> er.c @ 402]
>>
>> nt!IofCallDriver+0x120
>> nt!IopSynchronousServiceTail+0x230
>> nt!IopXxxControlFile+0x1140
>> nt!NtDeviceIoControlFile+0x80
>> nt!KiSystemServiceExit
More information about the ofw
mailing list