[ofw] Possible IPoIB DMA problems

Anh Duong anh.duong at qlogic.com
Thu Aug 21 07:57:34 PDT 2008


Hi All,

 

The IPoIB code does not use DMA buffers when transmitting the ARP or
DHCP packets.  Both of these packets are passed in DS [1] of the WR in
the Post Send.  The IPoIB header in DS [0] is not the DMA buffer
neither.   

 

Because of this, I believe that existing IPoIB code won't work under
these 2 situations:

-        On systems that equipped with IOMMUs.

-        On normal systems (without IOMMU) that has more than 4GB of
RAM, IPoIB may also show the problem if the HCA is 32-bit.

 

If you agree with me, then someone needs to change these buffers to DMA
buffers.

Here are the functions that are affected.  Only partial code is
displayed in the following functions.  Pay attention to the highlighted
Red  which are not DMA buffers that I mentioned.

 

static NDIS_STATUS

__send_mgr_filter_arp(

            IN                                            ipoib_port_t*
const                              p_port,

            IN                    const   eth_hdr_t* const
p_eth_hdr,

            IN                                            NDIS_BUFFER*
p_buf,

            IN                                            size_t
buf_len,

            IN        OUT                            ipoib_send_desc_t*
const       p_desc )

{

            arp_pkt_t                                 *p_arp;

            ipoib_arp_pkt_t                       *p_ib_arp;

            NDIS_STATUS                                   status;

            mac_addr_t                             null_hw = {0};

 

            if( !buf_len )

            {

                        NdisGetNextBuffer( p_buf, &p_buf );

                        NdisQueryBufferSafe( p_buf, &p_arp, &buf_len,
NormalPagePriority );

            }

            else

            {

                        p_arp = (arp_pkt_t*)(p_eth_hdr + 1);

            }

 

 

            /* Allocate our scratch buffer. */

            p_desc->p_buf = (send_buf_t*)

                        ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );

 

            p_ib_arp = (ipoib_arp_pkt_t*)p_desc->p_buf;

 

            /* Convert the ARP payload. */

            p_ib_arp->hw_type = ARP_HW_TYPE_IB;

            p_ib_arp->prot_type = p_arp->prot_type;

            p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);

            p_ib_arp->prot_size = p_arp->prot_size;

            p_ib_arp->op = p_arp->op;

            p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;

            ib_gid_set_default( &p_ib_arp->src_hw.gid,

                        p_port->p_adapter->guids.port_guid.guid );

            p_ib_arp->src_ip = p_arp->src_ip;

            p_ib_arp->dst_ip = p_arp->dst_ip;

 

            p_desc->local_ds[1].vaddr = cl_get_physaddr( p_ib_arp );

            p_desc->local_ds[1].length = sizeof(ipoib_arp_pkt_t);

            p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;

            p_desc->wr.num_ds = 2;

 

            return NDIS_STATUS_SUCCESS;

}

 

static NDIS_STATUS

__send_mgr_filter_udp(

            IN                                            ipoib_port_t*
const                              p_port,

            IN                    const   ip_hdr_t* const
p_ip_hdr,

            IN                                            NDIS_BUFFER*
p_buf,

            IN                                            size_t
buf_len,

            IN        OUT                            ipoib_send_desc_t*
const       p_desc )

{

            ib_api_status_t                        status;

            udp_hdr_t                                *p_udp_hdr;

            if( !buf_len )

            {

                        NdisGetNextBuffer( p_buf, &p_buf );

                        NdisQueryBufferSafe( p_buf, &p_udp_hdr,
&buf_len, NormalPagePriority );

            }

            else

            {

                        p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);

            }

 

            if( (p_udp_hdr->src_port != DHCP_PORT_CLIENT ||

                        p_udp_hdr->dst_port != DHCP_PORT_SERVER) &&

                        (p_udp_hdr->src_port != DHCP_PORT_SERVER ||

                        p_udp_hdr->dst_port != DHCP_PORT_CLIENT) )

            {

                        /* Not a DHCP packet. */

                        status = __send_gen( p_port, p_desc );

                        return status;

            }

 

            buf_len -= sizeof(udp_hdr_t);

 

            /* Allocate our scratch buffer. */

            p_desc->p_buf = (send_buf_t*)

                        ExAllocateFromNPagedLookasideList(
&p_port->buf_mgr.send_buf_list );

 

            /* Copy the IP and UDP headers. */

            cl_memcpy( &p_desc->p_buf->ip.hdr, p_ip_hdr ,
sizeof(ip_hdr_t) );

            cl_memcpy(

                        &p_desc->p_buf->ip.prot.udp.hdr, p_udp_hdr,
sizeof(udp_hdr_t) );

 

            status = __send_mgr_filter_dhcp(

                        p_port, p_udp_hdr, p_buf, buf_len, p_desc );

            return status;

}

 

static NDIS_STATUS

__build_send_desc(

            IN                                            ipoib_port_t*
const                              p_port,

            IN                                            eth_hdr_t*
const                                  p_eth_hdr,

            IN                                            NDIS_BUFFER*
const                                   p_buf,

            IN                    const   size_t
buf_len,

            IN        OUT                            ipoib_send_desc_t*
const       p_desc )

{

            NDIS_STATUS                                   status;

            int32_t                                     hdr_idx;

            PNDIS_PACKET_EXTENSION
PktExt;

            PNDIS_TCP_IP_CHECKSUM_PACKET_INFO
pChecksumPktInfo; //NDIS 5.1

 

            status = __send_mgr_filter(

                        p_port, p_eth_hdr, p_buf, buf_len, p_desc );

 

            /* Format the send descriptor. */

            hdr_idx = cl_atomic_inc( &p_port->hdr_idx );

            hdr_idx &= (p_port->p_adapter->params.sq_depth - 1);

            ASSERT( hdr_idx < p_port->p_adapter->params.sq_depth );

            p_port->hdr[hdr_idx].type = p_eth_hdr->type;

            p_port->hdr[hdr_idx].resv = 0;

 

            /* Setup the first local data segment (used for the IPoIB
header). */

            p_desc->local_ds[0].vaddr = cl_get_physaddr(
&p_port->hdr[hdr_idx] );

            p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);

            p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;

 

            /* Setup the work request. */

            p_desc->wr.p_next = NULL;

            p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;

            p_desc->wr.wr_type = WR_SEND;

            p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;

            

            

            p_desc->wr.ds_array = p_desc->local_ds;

 

            p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;

            p_desc->wr.dgrm.ud.remote_qkey =
p_port->ib_mgr.bcast_rec.qkey;

            p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;

            p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;

            p_desc->wr.dgrm.ud.rsvd = NULL;

 

            return NDIS_STATUS_SUCCESS;

}

 

 

 

 

 

 

 

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/875423cc/attachment.html>


More information about the ofw mailing list