[ofw] Possible IPoIB DMA problems
Anh Duong
anh.duong at qlogic.com
Thu Aug 21 07:57:34 PDT 2008
Hi All,
The IPoIB code does not use DMA buffers when transmitting the ARP or
DHCP packets. Both of these packets are passed in DS [1] of the WR in
the Post Send. The IPoIB header in DS [0] is not the DMA buffer
neither.
Because of this, I believe that existing IPoIB code won't work under
these 2 situations:
- On systems that equipped with IOMMUs.
- On normal systems (without IOMMU) that has more than 4GB of
RAM, IPoIB may also show the problem if the HCA is 32-bit.
If you agree with me, then someone needs to change these buffers to DMA
buffers.
Here are the functions that are affected. Only partial code is
displayed in the following functions. Pay attention to the highlighted
Red which are not DMA buffers that I mentioned.
static NDIS_STATUS
__send_mgr_filter_arp(
IN ipoib_port_t*
const p_port,
IN const eth_hdr_t* const
p_eth_hdr,
IN NDIS_BUFFER*
p_buf,
IN size_t
buf_len,
IN OUT ipoib_send_desc_t*
const p_desc )
{
arp_pkt_t *p_arp;
ipoib_arp_pkt_t *p_ib_arp;
NDIS_STATUS status;
mac_addr_t null_hw = {0};
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_arp, &buf_len,
NormalPagePriority );
}
else
{
p_arp = (arp_pkt_t*)(p_eth_hdr + 1);
}
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
p_ib_arp = (ipoib_arp_pkt_t*)p_desc->p_buf;
/* Convert the ARP payload. */
p_ib_arp->hw_type = ARP_HW_TYPE_IB;
p_ib_arp->prot_type = p_arp->prot_type;
p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);
p_ib_arp->prot_size = p_arp->prot_size;
p_ib_arp->op = p_arp->op;
p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;
ib_gid_set_default( &p_ib_arp->src_hw.gid,
p_port->p_adapter->guids.port_guid.guid );
p_ib_arp->src_ip = p_arp->src_ip;
p_ib_arp->dst_ip = p_arp->dst_ip;
p_desc->local_ds[1].vaddr = cl_get_physaddr( p_ib_arp );
p_desc->local_ds[1].length = sizeof(ipoib_arp_pkt_t);
p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
p_desc->wr.num_ds = 2;
return NDIS_STATUS_SUCCESS;
}
static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
IN NDIS_BUFFER*
p_buf,
IN size_t
buf_len,
IN OUT ipoib_send_desc_t*
const p_desc )
{
ib_api_status_t status;
udp_hdr_t *p_udp_hdr;
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_udp_hdr,
&buf_len, NormalPagePriority );
}
else
{
p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);
}
if( (p_udp_hdr->src_port != DHCP_PORT_CLIENT ||
p_udp_hdr->dst_port != DHCP_PORT_SERVER) &&
(p_udp_hdr->src_port != DHCP_PORT_SERVER ||
p_udp_hdr->dst_port != DHCP_PORT_CLIENT) )
{
/* Not a DHCP packet. */
status = __send_gen( p_port, p_desc );
return status;
}
buf_len -= sizeof(udp_hdr_t);
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
/* Copy the IP and UDP headers. */
cl_memcpy( &p_desc->p_buf->ip.hdr, p_ip_hdr ,
sizeof(ip_hdr_t) );
cl_memcpy(
&p_desc->p_buf->ip.prot.udp.hdr, p_udp_hdr,
sizeof(udp_hdr_t) );
status = __send_mgr_filter_dhcp(
p_port, p_udp_hdr, p_buf, buf_len, p_desc );
return status;
}
static NDIS_STATUS
__build_send_desc(
IN ipoib_port_t*
const p_port,
IN eth_hdr_t*
const p_eth_hdr,
IN NDIS_BUFFER*
const p_buf,
IN const size_t
buf_len,
IN OUT ipoib_send_desc_t*
const p_desc )
{
NDIS_STATUS status;
int32_t hdr_idx;
PNDIS_PACKET_EXTENSION
PktExt;
PNDIS_TCP_IP_CHECKSUM_PACKET_INFO
pChecksumPktInfo; //NDIS 5.1
status = __send_mgr_filter(
p_port, p_eth_hdr, p_buf, buf_len, p_desc );
/* Format the send descriptor. */
hdr_idx = cl_atomic_inc( &p_port->hdr_idx );
hdr_idx &= (p_port->p_adapter->params.sq_depth - 1);
ASSERT( hdr_idx < p_port->p_adapter->params.sq_depth );
p_port->hdr[hdr_idx].type = p_eth_hdr->type;
p_port->hdr[hdr_idx].resv = 0;
/* Setup the first local data segment (used for the IPoIB
header). */
p_desc->local_ds[0].vaddr = cl_get_physaddr(
&p_port->hdr[hdr_idx] );
p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);
p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;
/* Setup the work request. */
p_desc->wr.p_next = NULL;
p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;
p_desc->wr.wr_type = WR_SEND;
p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;
p_desc->wr.ds_array = p_desc->local_ds;
p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;
p_desc->wr.dgrm.ud.remote_qkey =
p_port->ib_mgr.bcast_rec.qkey;
p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;
p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;
p_desc->wr.dgrm.ud.rsvd = NULL;
return NDIS_STATUS_SUCCESS;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/875423cc/attachment.html>
More information about the ofw
mailing list