[ofw] RE: Possible IPoIB DMA problems

Tzachi Dar tzachid at mellanox.co.il
Thu Aug 21 09:07:27 PDT 2008


As far as I know all low-level drivers on open fabrics don't use DMA
addresses. Leonid will be able to say more once he is back from
vacation.
 
In any case, I wander if you saw this problems happening on a real
machine, or is this something that you have reached by reading the code?
 
Thanks
Tzachi


________________________________

	From: Anh Duong [mailto:anh.duong at qlogic.com] 
	Sent: Thursday, August 21, 2008 5:58 PM
	To: ofw at lists.openfabrics.org
	Cc: Stan C. Smith; John Russo; ftillier at windows.microsoft.com;
Tzachi Dar; Alex Estrin; Usha Srinivasan
	Subject: Possible IPoIB DMA problems
	
	

	Hi All,

	 

	The IPoIB code does not use DMA buffers when transmitting the
ARP or DHCP packets.  Both of these packets are passed in DS [1] of the
WR in the Post Send.  The IPoIB header in DS [0] is not the DMA buffer
neither.   

	 

	Because of this, I believe that existing IPoIB code won't work
under these 2 situations:

	-        On systems that equipped with IOMMUs.

	-        On normal systems (without IOMMU) that has more than
4GB of RAM, IPoIB may also show the problem if the HCA is 32-bit.

	 

	If you agree with me, then someone needs to change these buffers
to DMA buffers.

	Here are the functions that are affected.  Only partial code is
displayed in the following functions.  Pay attention to the highlighted
Red  which are not DMA buffers that I mentioned.

	 

	static NDIS_STATUS

	__send_mgr_filter_arp(

	            IN
ipoib_port_t* const                              p_port,

	            IN                    const   eth_hdr_t* const
p_eth_hdr,

	            IN
NDIS_BUFFER*                                             p_buf,

	            IN                                            size_t
buf_len,

	            IN        OUT
ipoib_send_desc_t* const       p_desc )

	{

	            arp_pkt_t                                 *p_arp;

	            ipoib_arp_pkt_t                       *p_ib_arp;

	            NDIS_STATUS
status;

	            mac_addr_t                             null_hw =
{0};

	 

	            if( !buf_len )

	            {

	                        NdisGetNextBuffer( p_buf, &p_buf );

	                        NdisQueryBufferSafe( p_buf, &p_arp,
&buf_len, NormalPagePriority );

	            }

	            else

	            {

	                        p_arp = (arp_pkt_t*)(p_eth_hdr + 1);

	            }

	 

	 

	            /* Allocate our scratch buffer. */

	            p_desc->p_buf = (send_buf_t*)

	                        ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );

	 

	            p_ib_arp = (ipoib_arp_pkt_t*)p_desc->p_buf;

	 

	            /* Convert the ARP payload. */

	            p_ib_arp->hw_type = ARP_HW_TYPE_IB;

	            p_ib_arp->prot_type = p_arp->prot_type;

	            p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);

	            p_ib_arp->prot_size = p_arp->prot_size;

	            p_ib_arp->op = p_arp->op;

	            p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;

	            ib_gid_set_default( &p_ib_arp->src_hw.gid,

	                        p_port->p_adapter->guids.port_guid.guid
);

	            p_ib_arp->src_ip = p_arp->src_ip;

	            p_ib_arp->dst_ip = p_arp->dst_ip;

	 

	            p_desc->local_ds[1].vaddr = cl_get_physaddr(
p_ib_arp );

	            p_desc->local_ds[1].length =
sizeof(ipoib_arp_pkt_t);

	            p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;

	            p_desc->wr.num_ds = 2;

	 

	            return NDIS_STATUS_SUCCESS;

	}

	 

	static NDIS_STATUS

	__send_mgr_filter_udp(

	            IN
ipoib_port_t* const                              p_port,

	            IN                    const   ip_hdr_t* const
p_ip_hdr,

	            IN
NDIS_BUFFER*                                             p_buf,

	            IN                                            size_t
buf_len,

	            IN        OUT
ipoib_send_desc_t* const       p_desc )

	{

	            ib_api_status_t                        status;

	            udp_hdr_t                                *p_udp_hdr;

	            if( !buf_len )

	            {

	                        NdisGetNextBuffer( p_buf, &p_buf );

	                        NdisQueryBufferSafe( p_buf, &p_udp_hdr,
&buf_len, NormalPagePriority );

	            }

	            else

	            {

	                        p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);

	            }

	 

	            if( (p_udp_hdr->src_port != DHCP_PORT_CLIENT ||

	                        p_udp_hdr->dst_port != DHCP_PORT_SERVER)
&&

	                        (p_udp_hdr->src_port != DHCP_PORT_SERVER
||

	                        p_udp_hdr->dst_port != DHCP_PORT_CLIENT)
)

	            {

	                        /* Not a DHCP packet. */

	                        status = __send_gen( p_port, p_desc );

	                        return status;

	            }

	 

	            buf_len -= sizeof(udp_hdr_t);

	 

	            /* Allocate our scratch buffer. */

	            p_desc->p_buf = (send_buf_t*)

	                        ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );

	 

	            /* Copy the IP and UDP headers. */

	            cl_memcpy( &p_desc->p_buf->ip.hdr, p_ip_hdr ,
sizeof(ip_hdr_t) );

	            cl_memcpy(

	                        &p_desc->p_buf->ip.prot.udp.hdr,
p_udp_hdr, sizeof(udp_hdr_t) );

	 

	            status = __send_mgr_filter_dhcp(

	                        p_port, p_udp_hdr, p_buf, buf_len,
p_desc );

	            return status;

	}

	 

	static NDIS_STATUS

	__build_send_desc(

	            IN
ipoib_port_t* const                              p_port,

	            IN
eth_hdr_t* const                                  p_eth_hdr,

	            IN
NDIS_BUFFER* const                                   p_buf,

	            IN                    const   size_t
buf_len,

	            IN        OUT
ipoib_send_desc_t* const       p_desc )

	{

	            NDIS_STATUS
status;

	            int32_t                                     hdr_idx;

	            PNDIS_PACKET_EXTENSION
PktExt;

	            PNDIS_TCP_IP_CHECKSUM_PACKET_INFO
pChecksumPktInfo; //NDIS 5.1

	 

	            status = __send_mgr_filter(

	                        p_port, p_eth_hdr, p_buf, buf_len,
p_desc );

	 

	            /* Format the send descriptor. */

	            hdr_idx = cl_atomic_inc( &p_port->hdr_idx );

	            hdr_idx &= (p_port->p_adapter->params.sq_depth - 1);

	            ASSERT( hdr_idx < p_port->p_adapter->params.sq_depth
);

	            p_port->hdr[hdr_idx].type = p_eth_hdr->type;

	            p_port->hdr[hdr_idx].resv = 0;

	 

	            /* Setup the first local data segment (used for the
IPoIB header). */

	            p_desc->local_ds[0].vaddr = cl_get_physaddr(
&p_port->hdr[hdr_idx] );

	            p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);

	            p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;

	 

	            /* Setup the work request. */

	            p_desc->wr.p_next = NULL;

	            p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;

	            p_desc->wr.wr_type = WR_SEND;

	            p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;

	            

	            

	            p_desc->wr.ds_array = p_desc->local_ds;

	 

	            p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;

	            p_desc->wr.dgrm.ud.remote_qkey =
p_port->ib_mgr.bcast_rec.qkey;

	            p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;

	            p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;

	            p_desc->wr.dgrm.ud.rsvd = NULL;

	 

	            return NDIS_STATUS_SUCCESS;

	}

	 

	 

	 

	 

	 

	 

	 

	 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/66a0d25c/attachment.html>


More information about the ofw mailing list