[ofw] [PATCH 1/4] Avoid the SM
Hal Rosenstock
halr at obsidianresearch.com
Thu Aug 28 07:51:53 PDT 2008
Hi Fab,
Fab Tillier wrote:
> This patch removes the path query from IPoIB when trying to send a unicast packet. Instead it uses fields form the broadcast group and the work completion to form an address vector. This speeds ARP responses.
>
> The limitation of this patch is that the rate for the address vector will be that of the broadcast group, which may be suboptimal in mixed SDR/DDR/QDR fabrics.
>
Maybe I'm wrong about this but I think another limitation may be that
this only works intra IB subnet (when the IPoIB subnet is contained
within a single IB subnet). What I mean by that if the IPoIB subnet
(e.g. broadcast group) spans more than 1 IB subnet, I don't think this
works, right ? I know that is an "experimental" configuration currently.
Has that been tried ?
-- Hal
> Signed-off-by: Fab Tillier <ftillier at microsoft.com>
>
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\ulp\ipoib\kernel\ipoib_endpoint.c trunk\ulp\ipoib\kernel\ipoib_endpoint.c
> --- old\ulp\ipoib\kernel\ipoib_endpoint.c Mon Jun 30 11:09:26 2008
> +++ trunk\ulp\ipoib\kernel\ipoib_endpoint.c Thu Aug 21 08:41:29 2008
> @@ -183,6 +183,7 @@ ipoib_endpt_set_mcast(
> return status;
> }
> p_endpt->h_mcast = p_mcast_rec->h_mcast;
> + p_endpt->dlid = p_mcast_rec->p_member_rec->mlid;
>
> IPOIB_EXIT( IPOIB_DBG_ENDPT );
> return IB_SUCCESS;
> @@ -279,9 +280,8 @@ ipoib_endpt_queue(
> {
> ib_api_status_t status;
> ipoib_port_t *p_port;
> - ib_query_req_t query;
> - ib_user_query_t info;
> - ib_path_rec_t path;
> + ib_av_attr_t av_attr;
> + net32_t flow_lbl;
>
> IPOIB_ENTER( IPOIB_DBG_ENDPT );
>
> @@ -291,108 +291,34 @@ ipoib_endpt_queue(
> return NDIS_STATUS_SUCCESS;
> }
>
> - if( p_endpt->h_query ||
> - p_endpt->qpn == CL_HTON32(0x00FFFFFF) )
> + if( p_endpt->qpn == CL_HTON32(0x00FFFFFF) )
> {
> + /*
> + * Handle a race between the mcast callback and a receive/send. The QP
> + * is joined to the MC group before the MC callback is received, so it
> + * can receive packets, and NDIS can try to respond. We need to delay
> + * a response until the MC callback runs and sets the AV.
> + */
> ipoib_endpt_deref( p_endpt );
> IPOIB_EXIT( IPOIB_DBG_ENDPT );
> return NDIS_STATUS_PENDING;
> }
>
> - /* This is the first packet for this endpoint. Query the SA. */
> - p_port = __endpt_parent( p_endpt );
> -
> - IPOIB_ENTER( IPOIB_DBG_ENDPT );
> -
> - info.method = IB_MAD_METHOD_GETTABLE;
> - info.attr_id = IB_MAD_ATTR_PATH_RECORD;
> - info.attr_size = sizeof(ib_path_rec_t);
> - info.comp_mask = IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID |
> - IB_PR_COMPMASK_REVERSIBLE | IB_PR_COMPMASK_NUM_PATH;
> - info.p_attr = &path;
> -
> - cl_memclr( &path, sizeof(ib_path_rec_t) );
> - path.dgid = p_endpt->dgid;
> - ib_gid_set_default( &path.sgid, p_port->p_adapter->guids.port_guid.guid );
> - path.num_path = 0x1;
> -
> - cl_memclr( &query, sizeof(ib_query_req_t) );
> - query.query_type = IB_QUERY_USER_DEFINED;
> - query.p_query_input = &info;
> - query.port_guid = p_port->p_adapter->guids.port_guid.guid;
> - query.timeout_ms = p_port->p_adapter->params.sa_timeout;
> - query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt;
> -
> - query.query_context = p_endpt;
> - query.pfn_query_cb = __path_query_cb;
> -
> - status = p_port->p_adapter->p_ifc->query(
> - p_port->p_adapter->h_al, &query, &p_endpt->h_query );
> - if( status != IB_SUCCESS )
> - {
> - IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ENDPT,
> - ("ib_query for path returned %s\n",
> - p_port->p_adapter->p_ifc->get_err_str( status )) );
> - ipoib_endpt_deref( p_endpt );
> - /* Flag the adapter as hung. */
> - p_port->p_adapter->hung = TRUE;
> - }
> -
> - IPOIB_EXIT( IPOIB_DBG_ENDPT );
> - return NDIS_STATUS_PENDING;
> -}
> -
> -
> -static void
> -__path_query_cb(
> - IN ib_query_rec_t *p_query_rec )
> -{
> - ib_api_status_t status;
> - ipoib_endpt_t *p_endpt;
> - ipoib_port_t *p_port;
> - ib_av_attr_t av_attr;
> - ib_path_rec_t *p_path;
> - net32_t flow_lbl;
> -
> - IPOIB_ENTER( IPOIB_DBG_ENDPT );
> -
> - p_endpt = (ipoib_endpt_t*)p_query_rec->query_context;
> + /* This is the first packet for this endpoint. Create the AV. */
> p_port = __endpt_parent( p_endpt );
>
> - cl_obj_lock( &p_endpt->obj );
> - p_endpt->h_query = NULL;
> - if( p_endpt->obj.state == CL_DESTROYING )
> - {
> - cl_obj_unlock( &p_endpt->obj );
> - ipoib_endpt_deref( p_endpt );
> - if( p_query_rec->p_result_mad )
> - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
> - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> - ("Endpoint destroying, aborting.\n") );
> - return;
> - }
> - cl_obj_unlock( &p_endpt->obj );
> -
> - if( p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt )
> - {
> - p_port->p_adapter->hung = TRUE;
> - ipoib_endpt_deref( p_endpt );
> - if( p_query_rec->p_result_mad )
> - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
> - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> - ("Path query failed with %s\n",
> - p_port->p_adapter->p_ifc->get_err_str( p_query_rec->status )) );
> - return;
> - }
> -
> - p_path = ib_get_query_path_rec( p_query_rec->p_result_mad, 0 );
> -
> cl_memclr( &av_attr, sizeof(ib_av_attr_t) );
>
> av_attr.port_num = p_port->port_num;
>
> - av_attr.sl = ib_path_rec_sl( p_path );
> - av_attr.dlid = p_path->dlid;
> + ib_member_get_sl_flow_hop(
> + p_port->ib_mgr.bcast_rec.sl_flow_hop,
> + &av_attr.sl,
> + &flow_lbl,
> + &av_attr.grh.hop_limit
> + );
> +
> + av_attr.dlid = p_endpt->dlid;
>
> /*
> * We always send the GRH so that we preferably lookup endpoints
> @@ -402,38 +328,17 @@ __path_query_cb(
> * for which there is no match, something that doesn't work when
> * using LIDs only.
> */
> - flow_lbl = ib_path_rec_flow_lbl( p_path );
> av_attr.grh_valid = TRUE;
> av_attr.grh.ver_class_flow = ib_grh_set_ver_class_flow(
> - 6, p_path->tclass, flow_lbl );
> + 6, p_port->ib_mgr.bcast_rec.tclass, flow_lbl );
> av_attr.grh.resv1 = 0;
> av_attr.grh.resv2 = 0;
> - av_attr.grh.hop_limit = ib_path_rec_hop_limit( p_path );
> - av_attr.grh.src_gid = p_path->sgid;
> - av_attr.grh.dest_gid = p_path->dgid;
> -
> - cl_obj_lock( &p_port->obj );
> - if( !p_endpt->dlid )
> - {
> - cl_map_item_t *p_qitem;
> + ib_gid_set_default( &av_attr.grh.src_gid, p_port->p_adapter->guids.port_guid.guid );
> + av_attr.grh.dest_gid = p_endpt->dgid;
>
> - /* This is a subnet local endpoint that does not have its LID set. */
> - p_endpt->dlid = p_path->dlid;
> - /*
> - * Insert the item in the LID map so that locally routed unicast
> - * traffic will resolve it properly.
> - */
> - p_qitem = cl_qmap_insert( &p_port->endpt_mgr.lid_endpts,
> - p_endpt->dlid, &p_endpt->lid_item );
> - CL_ASSERT( p_qitem == &p_endpt->lid_item );
> - }
> - cl_obj_unlock( &p_port->obj );
> - av_attr.static_rate = ib_path_rec_rate( p_path );
> + av_attr.static_rate = p_port->ib_mgr.bcast_rec.rate;
> av_attr.path_bits = 0;
>
> - /* Done with the path record. Release the MAD. */
> - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad );
> -
> /* Create the AV. */
> status = p_port->p_adapter->p_ifc->create_av(
> p_port->ib_mgr.h_pd, &av_attr, &p_endpt->h_av );
> @@ -445,13 +350,9 @@ __path_query_cb(
> IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> ("ib_create_av failed with %s\n",
> p_port->p_adapter->p_ifc->get_err_str( status )) );
> - return;
> + return NDIS_STATUS_FAILURE;
> }
>
> - /* Try to send all pending sends. */
> - ipoib_port_resume( p_port );
> -
> - /* Release the reference taken for the SA query. */
> - ipoib_endpt_deref( p_endpt );
> IPOIB_EXIT( IPOIB_DBG_ENDPT );
> + return NDIS_STATUS_SUCCESS;
> }
> diff -up -r -X trunk\docs\dontdiff.txt -I \$Id: old\ulp\ipoib\kernel\ipoib_port.c trunk\ulp\ipoib\kernel\ipoib_port.c
> --- old\ulp\ipoib\kernel\ipoib_port.c Thu Aug 21 08:40:18 2008
> +++ trunk\ulp\ipoib\kernel\ipoib_port.c Thu Aug 21 08:41:29 2008
> @@ -1833,7 +1833,7 @@ __recv_get_endpts(
> #else /* IPOIB_INLINE_RECV */
> *pp_src = ipoib_endpt_create( &p_desc->p_buf->ib.grh.src_gid,
> #endif /* IPOIB_INLINE_RECV */
> - 0, p_wc->recv.ud.remote_qp );
> + p_wc->recv.ud.remote_lid, p_wc->recv.ud.remote_qp );
> if( !*pp_src )
> {
> IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> @@ -2423,13 +2423,10 @@ __recv_arp(
> return status;
> }
> /*
> - * Create the endpoint. Note that the LID is left blank and will be
> - * resolved by a path query as needed. This is done because the
> - * remote LID/GID from the work completion may not be the original
> - * initiator.
> + * Create the endpoint.
> */
> *pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid,
> - 0, (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
> + p_wc->recv.ud.remote_lid, (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
>
> if( !*pp_src )
> {
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> ofw mailing list
> ofw at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
More information about the ofw
mailing list