[ofw] RE: Possible IPoIB DMA problems
Alex Estrin
alex.estrin at qlogic.com
Thu Aug 21 09:27:55 PDT 2008
Anh,
>From your examples ... these are the places where appropriate physical
address is taken from virtual.
preparing send WR.
....
p_desc->local_ds[1].vaddr = cl_get_physaddr( p_ib_arp );
.....
/* Setup the first local data segment (used for the IPoIB header). */
p_desc->local_ds[0].vaddr = cl_get_physaddr( &p_port->hdr[hdr_idx] );
Thanks,
Alex
________________________________
From: Anh Duong
Sent: Thursday, August 21, 2008 10:58 AM
To: ofw at lists.openfabrics.org
Cc: Stan C. Smith; John Russo; ftillier at windows.microsoft.com;
Tzachi Dar; Alex Estrin; Usha Srinivasan
Subject: Possible IPoIB DMA problems
Hi All,
The IPoIB code does not use DMA buffers when transmitting the
ARP or DHCP packets. Both of these packets are passed in DS [1] of the
WR in the Post Send. The IPoIB header in DS [0] is not the DMA buffer
neither.
Because of this, I believe that existing IPoIB code won't work
under these 2 situations:
- On systems that equipped with IOMMUs.
- On normal systems (without IOMMU) that has more than
4GB of RAM, IPoIB may also show the problem if the HCA is 32-bit.
If you agree with me, then someone needs to change these buffers
to DMA buffers.
Here are the functions that are affected. Only partial code is
displayed in the following functions. Pay attention to the highlighted
Red which are not DMA buffers that I mentioned.
static NDIS_STATUS
__send_mgr_filter_arp(
IN
ipoib_port_t* const p_port,
IN const eth_hdr_t* const
p_eth_hdr,
IN
NDIS_BUFFER* p_buf,
IN size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
arp_pkt_t *p_arp;
ipoib_arp_pkt_t *p_ib_arp;
NDIS_STATUS
status;
mac_addr_t null_hw =
{0};
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_arp,
&buf_len, NormalPagePriority );
}
else
{
p_arp = (arp_pkt_t*)(p_eth_hdr + 1);
}
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
p_ib_arp = (ipoib_arp_pkt_t*)p_desc->p_buf;
/* Convert the ARP payload. */
p_ib_arp->hw_type = ARP_HW_TYPE_IB;
p_ib_arp->prot_type = p_arp->prot_type;
p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);
p_ib_arp->prot_size = p_arp->prot_size;
p_ib_arp->op = p_arp->op;
p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;
ib_gid_set_default( &p_ib_arp->src_hw.gid,
p_port->p_adapter->guids.port_guid.guid
);
p_ib_arp->src_ip = p_arp->src_ip;
p_ib_arp->dst_ip = p_arp->dst_ip;
p_desc->local_ds[1].vaddr = cl_get_physaddr(
p_ib_arp );
p_desc->local_ds[1].length =
sizeof(ipoib_arp_pkt_t);
p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
p_desc->wr.num_ds = 2;
return NDIS_STATUS_SUCCESS;
}
static NDIS_STATUS
__send_mgr_filter_udp(
IN
ipoib_port_t* const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
IN
NDIS_BUFFER* p_buf,
IN size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
ib_api_status_t status;
udp_hdr_t *p_udp_hdr;
if( !buf_len )
{
NdisGetNextBuffer( p_buf, &p_buf );
NdisQueryBufferSafe( p_buf, &p_udp_hdr,
&buf_len, NormalPagePriority );
}
else
{
p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);
}
if( (p_udp_hdr->src_port != DHCP_PORT_CLIENT ||
p_udp_hdr->dst_port != DHCP_PORT_SERVER)
&&
(p_udp_hdr->src_port != DHCP_PORT_SERVER
||
p_udp_hdr->dst_port != DHCP_PORT_CLIENT)
)
{
/* Not a DHCP packet. */
status = __send_gen( p_port, p_desc );
return status;
}
buf_len -= sizeof(udp_hdr_t);
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );
/* Copy the IP and UDP headers. */
cl_memcpy( &p_desc->p_buf->ip.hdr, p_ip_hdr ,
sizeof(ip_hdr_t) );
cl_memcpy(
&p_desc->p_buf->ip.prot.udp.hdr,
p_udp_hdr, sizeof(udp_hdr_t) );
status = __send_mgr_filter_dhcp(
p_port, p_udp_hdr, p_buf, buf_len,
p_desc );
return status;
}
static NDIS_STATUS
__build_send_desc(
IN
ipoib_port_t* const p_port,
IN
eth_hdr_t* const p_eth_hdr,
IN
NDIS_BUFFER* const p_buf,
IN const size_t
buf_len,
IN OUT
ipoib_send_desc_t* const p_desc )
{
NDIS_STATUS
status;
int32_t hdr_idx;
PNDIS_PACKET_EXTENSION
PktExt;
PNDIS_TCP_IP_CHECKSUM_PACKET_INFO
pChecksumPktInfo; //NDIS 5.1
status = __send_mgr_filter(
p_port, p_eth_hdr, p_buf, buf_len,
p_desc );
/* Format the send descriptor. */
hdr_idx = cl_atomic_inc( &p_port->hdr_idx );
hdr_idx &= (p_port->p_adapter->params.sq_depth - 1);
ASSERT( hdr_idx < p_port->p_adapter->params.sq_depth
);
p_port->hdr[hdr_idx].type = p_eth_hdr->type;
p_port->hdr[hdr_idx].resv = 0;
/* Setup the first local data segment (used for the
IPoIB header). */
p_desc->local_ds[0].vaddr = cl_get_physaddr(
&p_port->hdr[hdr_idx] );
p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);
p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;
/* Setup the work request. */
p_desc->wr.p_next = NULL;
p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;
p_desc->wr.wr_type = WR_SEND;
p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;
p_desc->wr.ds_array = p_desc->local_ds;
p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;
p_desc->wr.dgrm.ud.remote_qkey =
p_port->ib_mgr.bcast_rec.qkey;
p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;
p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;
p_desc->wr.dgrm.ud.rsvd = NULL;
return NDIS_STATUS_SUCCESS;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/c6d7c36b/attachment.html>
More information about the ofw
mailing list