[ofw] [PATCH 1/4] Avoid the SM
Fab Tillier
ftillier at windows.microsoft.com
Thu Aug 21 09:49:20 PDT 2008
This patch removes the path query from IPoIB when trying to send a unicast packet. Instead it uses fields form the broadcast group and the work completion to form an address vector. This speeds ARP responses.
The limitation of this patch is that the rate for the address vector will be that of the broadcast group, which may be suboptimal in mixed SDR/DDR/QDR fabrics.
Signed-off-by: Fab Tillier <ftillier at microsoft.com>
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\ulp\ipoib\kernel\ipoib_endpoint.c trunk\ulp\ipoib\kernel\ipoib_endpoint.c
--- old\ulp\ipoib\kernel\ipoib_endpoint.c Mon Jun 30 11:09:26 2008
+++ trunk\ulp\ipoib\kernel\ipoib_endpoint.c Thu Aug 21 08:41:29 2008
@@ -183,6 +183,7 @@ ipoib_endpt_set_mcast(
return status;
}
p_endpt->h_mcast = p_mcast_rec->h_mcast;
+ p_endpt->dlid = p_mcast_rec->p_member_rec->mlid;
IPOIB_EXIT( IPOIB_DBG_ENDPT );
return IB_SUCCESS;
@@ -279,9 +280,8 @@ ipoib_endpt_queue(
{
ib_api_status_t status;
ipoib_port_t *p_port;
- ib_query_req_t query;
- ib_user_query_t info;
- ib_path_rec_t path;
+ ib_av_attr_t av_attr;
+ net32_t flow_lbl;
IPOIB_ENTER( IPOIB_DBG_ENDPT );
@@ -291,108 +291,34 @@ ipoib_endpt_queue(
return NDIS_STATUS_SUCCESS;
}
- if( p_endpt->h_query ||
- p_endpt->qpn == CL_HTON32(0x00FFFFFF) )
+ if( p_endpt->qpn == CL_HTON32(0x00FFFFFF) )
{
+ /*
+ * Handle a race between the mcast callback and a receive/send. The QP
+ * is joined to the MC group before the MC callback is received, so it
+ * can receive packets, and NDIS can try to respond. We need to delay
+ * a response until the MC callback runs and sets the AV.
+ */
ipoib_endpt_deref( p_endpt );
IPOIB_EXIT( IPOIB_DBG_ENDPT );
return NDIS_STATUS_PENDING;
}
- /* This is the first packet for this endpoint. Query the SA. */
- p_port = __endpt_parent( p_endpt );
-
- IPOIB_ENTER( IPOIB_DBG_ENDPT );
-
- info.method = IB_MAD_METHOD_GETTABLE;
- info.attr_id = IB_MAD_ATTR_PATH_RECORD;
- info.attr_size = sizeof(ib_path_rec_t);
- info.comp_mask = IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID |
- IB_PR_COMPMASK_REVERSIBLE | IB_PR_COMPMASK_NUM_PATH;
- info.p_attr = &path;
-
- cl_memclr( &path, sizeof(ib_path_rec_t) );
- path.dgid = p_endpt->dgid;
- ib_gid_set_default( &path.sgid, p_port->p_adapter->guids.port_guid.guid );
- path.num_path = 0x1;
-
- cl_memclr( &query, sizeof(ib_query_req_t) );
- query.query_type = IB_QUERY_USER_DEFINED;
- query.p_query_input = &info;
- query.port_guid = p_port->p_adapter->guids.port_guid.guid;
- query.timeout_ms = p_port->p_adapter->params.sa_timeout;
- query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;
-
- query.query_context = p_endpt;
- query.pfn_query_cb = __path_query_cb;
-
- status = p_port->p_adapter->p_ifc->query(
- p_port->p_adapter->h_al, &query, &p_endpt->h_query );
- if( status != IB_SUCCESS )
- {
- IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ENDPT,
- ("ib_query for path returned %s\n",
- p_port->p_adapter->p_ifc->get_err_str( status )) );
- ipoib_endpt_deref( p_endpt );
- /* Flag the adapter as hung. */
- p_port->p_adapter->hung = TRUE;
- }
-
- IPOIB_EXIT( IPOIB_DBG_ENDPT );
- return NDIS_STATUS_PENDING;
-}
-
-
-static void
-__path_query_cb(
- IN ib_query_rec_t *p_query_rec )
-{
- ib_api_status_t status;
- ipoib_endpt_t *p_endpt;
- ipoib_port_t *p_port;
- ib_av_attr_t av_attr;
- ib_path_rec_t *p_path;
- net32_t flow_lbl;
-
- IPOIB_ENTER( IPOIB_DBG_ENDPT );
-
- p_endpt = (ipoib_endpt_t*)p_query_rec->query_context;
+ /* This is the first packet for this endpoint. Create the AV. */
p_port = __endpt_parent( p_endpt );
- cl_obj_lock( &p_endpt->obj );
- p_endpt->h_query = NULL;
- if( p_endpt->obj.state == CL_DESTROYING )
- {
- cl_obj_unlock( &p_endpt->obj );
- ipoib_endpt_deref( p_endpt );
- if( p_query_rec->p_result_mad )
- p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("Endpoint destroying, aborting.\n") );
- return;
- }
- cl_obj_unlock( &p_endpt->obj );
-
- if( p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt )
- {
- p_port->p_adapter->hung = TRUE;
- ipoib_endpt_deref( p_endpt );
- if( p_query_rec->p_result_mad )
- p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("Path query failed with %s\n",
- p_port->p_adapter->p_ifc->get_err_str( p_query_rec->status )) );
- return;
- }
-
- p_path = ib_get_query_path_rec( p_query_rec->p_result_mad, 0 );
-
cl_memclr( &av_attr, sizeof(ib_av_attr_t) );
av_attr.port_num = p_port->port_num;
- av_attr.sl = ib_path_rec_sl( p_path );
- av_attr.dlid = p_path->dlid;
+ ib_member_get_sl_flow_hop(
+ p_port->ib_mgr.bcast_rec.sl_flow_hop,
+ &av_attr.sl,
+ &flow_lbl,
+ &av_attr.grh.hop_limit
+ );
+
+ av_attr.dlid = p_endpt->dlid;
/*
* We always send the GRH so that we preferably lookup endpoints
@@ -402,38 +328,17 @@ __path_query_cb(
* for which there is no match, something that doesn't work when
* using LIDs only.
*/
- flow_lbl = ib_path_rec_flow_lbl( p_path );
av_attr.grh_valid = TRUE;
av_attr.grh.ver_class_flow = ib_grh_set_ver_class_flow(
- 6, p_path->tclass, flow_lbl );
+ 6, p_port->ib_mgr.bcast_rec.tclass, flow_lbl );
av_attr.grh.resv1 = 0;
av_attr.grh.resv2 = 0;
- av_attr.grh.hop_limit = ib_path_rec_hop_limit( p_path );
- av_attr.grh.src_gid = p_path->sgid;
- av_attr.grh.dest_gid = p_path->dgid;
-
- cl_obj_lock( &p_port->obj );
- if( !p_endpt->dlid )
- {
- cl_map_item_t *p_qitem;
+ ib_gid_set_default( &av_attr.grh.src_gid, p_port->p_adapter->guids.port_guid.guid );
+ av_attr.grh.dest_gid = p_endpt->dgid;
- /* This is a subnet local endpoint that does not have its LID set. */
- p_endpt->dlid = p_path->dlid;
- /*
- * Insert the item in the LID map so that locally routed unicast
- * traffic will resolve it properly.
- */
- p_qitem = cl_qmap_insert( &p_port->endpt_mgr.lid_endpts,
- p_endpt->dlid, &p_endpt->lid_item );
- CL_ASSERT( p_qitem == &p_endpt->lid_item );
- }
- cl_obj_unlock( &p_port->obj );
- av_attr.static_rate = ib_path_rec_rate( p_path );
+ av_attr.static_rate = p_port->ib_mgr.bcast_rec.rate;
av_attr.path_bits = 0;
- /* Done with the path record. Release the MAD. */
- p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
-
/* Create the AV. */
status = p_port->p_adapter->p_ifc->create_av(
p_port->ib_mgr.h_pd, &av_attr, &p_endpt->h_av );
@@ -445,13 +350,9 @@ __path_query_cb(
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("ib_create_av failed with %s\n",
p_port->p_adapter->p_ifc->get_err_str( status )) );
- return;
+ return NDIS_STATUS_FAILURE;
}
- /* Try to send all pending sends. */
- ipoib_port_resume( p_port );
-
- /* Release the reference taken for the SA query. */
- ipoib_endpt_deref( p_endpt );
IPOIB_EXIT( IPOIB_DBG_ENDPT );
+ return NDIS_STATUS_SUCCESS;
}
diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\ulp\ipoib\kernel\ipoib_port.c trunk\ulp\ipoib\kernel\ipoib_port.c
--- old\ulp\ipoib\kernel\ipoib_port.c Thu Aug 21 08:40:18 2008
+++ trunk\ulp\ipoib\kernel\ipoib_port.c Thu Aug 21 08:41:29 2008
@@ -1833,7 +1833,7 @@ __recv_get_endpts(
#else /* IPOIB_INLINE_RECV */
*pp_src = ipoib_endpt_create( &p_desc->p_buf->ib.grh.src_gid,
#endif /* IPOIB_INLINE_RECV */
- 0, p_wc->recv.ud.remote_qp );
+ p_wc->recv.ud.remote_lid, p_wc->recv.ud.remote_qp );
if( !*pp_src )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
@@ -2423,13 +2423,10 @@ __recv_arp(
return status;
}
/*
- * Create the endpoint. Note that the LID is left blank and will be
- * resolved by a path query as needed. This is done because the
- * remote LID/GID from the work completion may not be the original
- * initiator.
+ * Create the endpoint.
*/
*pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid,
- 0, (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
+ p_wc->recv.ud.remote_lid, (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
if( !*pp_src )
{
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_no_pr_query.patch
Type: application/octet-stream
Size: 7888 bytes
Desc: ipoib_no_pr_query.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080821/469bedbf/attachment.obj>
More information about the ofw
mailing list