[ofw] RE: Possible IPoIB DMA problems
Tzachi Dar
tzachid at mellanox.co.il
Thu Aug 21 09:07:27 PDT 2008
As far as I know all low-level drivers on open fabrics don't use DMA
addresses. Leonid will be able to say more once he is back from
vacation.
In any case, I wander if you saw this problems happening on a real
machine, or is this something that you have reached by reading the code?
Thanks
Tzachi
________________________________
From: Anh Duong [mailto:anh.duong at qlogic.com]
Sent: Thursday, August 21, 2008 5:58 PM
To: ofw at lists.openfabrics.org
Cc: Stan C. Smith; John Russo; ftillier at windows.microsoft.com;
Tzachi Dar; Alex Estrin; Usha Srinivasan
Subject: Possible IPoIB DMA problems
Hi All,
The IPoIB code does not use DMA buffers when transmitting the
ARP or DHCP packets. Both of these packets are passed in DS [1] of the
WR in the Post Send. The IPoIB header in DS [0] is not the DMA buffer
neither.
Because of this, I believe that existing IPoIB code won't work
under these 2 situations:
- On systems that equipped with IOMMUs.
- On normal systems (without IOMMU) that has more than
4GB of RAM, IPoIB may also show the problem if the HCA is 32-bit.
If you agree with me, then someone needs to change these buffers
to DMA buffers.
Here are the functions that are affected. Only partial code is
displayed in the following functions. Pay attention to the highlighted
Red which are not DMA buffers that I mentioned.
static NDIS_STATUS
__send_mgr_filter_arp(
IN
ipoib_port_t* const p_port,
IN const eth_hdr_t* const
p_eth_hdr,
IN
NDIS_BUFFER* p_buf,
IN size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
arp_pkt_t *p_arp;
ipoib_arp_pkt_t *p_ib_arp;
NDIS_STATUS
status;
mac_addr_t null_hw =
{0};
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_arp,
&buf_len, NormalPagePriority );
}
else
{
p_arp = (arp_pkt_t*)(p_eth_hdr + 1);
}
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
p_ib_arp = (ipoib_arp_pkt_t*)p_desc->p_buf;
/* Convert the ARP payload. */
p_ib_arp->hw_type = ARP_HW_TYPE_IB;
p_ib_arp->prot_type = p_arp->prot_type;
p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);
p_ib_arp->prot_size = p_arp->prot_size;
p_ib_arp->op = p_arp->op;
p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;
ib_gid_set_default( &p_ib_arp->src_hw.gid,
p_port->p_adapter->guids.port_guid.guid
);
p_ib_arp->src_ip = p_arp->src_ip;
p_ib_arp->dst_ip = p_arp->dst_ip;
p_desc->local_ds[1].vaddr = cl_get_physaddr(
p_ib_arp );
p_desc->local_ds[1].length =
sizeof(ipoib_arp_pkt_t);
p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
p_desc->wr.num_ds = 2;
return NDIS_STATUS_SUCCESS;
}
static NDIS_STATUS
__send_mgr_filter_udp(
IN
ipoib_port_t* const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
IN
NDIS_BUFFER* p_buf,
IN size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
ib_api_status_t status;
udp_hdr_t *p_udp_hdr;
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_udp_hdr,
&buf_len, NormalPagePriority );
}
else
{
p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);
}
if( (p_udp_hdr->src_port != DHCP_PORT_CLIENT ||
p_udp_hdr->dst_port != DHCP_PORT_SERVER)
&&
(p_udp_hdr->src_port != DHCP_PORT_SERVER
||
p_udp_hdr->dst_port != DHCP_PORT_CLIENT)
)
{
/* Not a DHCP packet. */
status = __send_gen( p_port, p_desc );
return status;
}
buf_len -= sizeof(udp_hdr_t);
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
/* Copy the IP and UDP headers. */
cl_memcpy( &p_desc->p_buf->ip.hdr, p_ip_hdr ,
sizeof(ip_hdr_t) );
cl_memcpy(
&p_desc->p_buf->ip.prot.udp.hdr,
p_udp_hdr, sizeof(udp_hdr_t) );
status = __send_mgr_filter_dhcp(
p_port, p_udp_hdr, p_buf, buf_len,
p_desc );
return status;
}
static NDIS_STATUS
__build_send_desc(
IN
ipoib_port_t* const p_port,
IN
eth_hdr_t* const p_eth_hdr,
IN
NDIS_BUFFER* const p_buf,
IN const size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
NDIS_STATUS
status;
int32_t hdr_idx;
PNDIS_PACKET_EXTENSION
PktExt;
PNDIS_TCP_IP_CHECKSUM_PACKET_INFO
pChecksumPktInfo; //NDIS 5.1
status = __send_mgr_filter(
p_port, p_eth_hdr, p_buf, buf_len,
p_desc );
/* Format the send descriptor. */
hdr_idx = cl_atomic_inc( &p_port->hdr_idx );
hdr_idx &= (p_port->p_adapter->params.sq_depth - 1);
ASSERT( hdr_idx < p_port->p_adapter->params.sq_depth
);
p_port->hdr[hdr_idx].type = p_eth_hdr->type;
p_port->hdr[hdr_idx].resv = 0;
/* Setup the first local data segment (used for the
IPoIB header). */
p_desc->local_ds[0].vaddr = cl_get_physaddr(
&p_port->hdr[hdr_idx] );
p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);
p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;
/* Setup the work request. */
p_desc->wr.p_next = NULL;
p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;
p_desc->wr.wr_type = WR_SEND;
p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;
p_desc->wr.ds_array = p_desc->local_ds;
p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;
p_desc->wr.dgrm.ud.remote_qkey =
p_port->ib_mgr.bcast_rec.qkey;
p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;
p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;
p_desc->wr.dgrm.ud.rsvd = NULL;
return NDIS_STATUS_SUCCESS;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/66a0d25c/attachment.html>
More information about the ofw
mailing list