[ofa-general] [PATCH] opensm: improve multicast re-routing requests processing
Hal Rosenstock
hal.rosenstock at gmail.com
Tue Sep 8 15:23:24 PDT 2009
On Sun, Sep 6, 2009 at 1:39 PM, Sasha Khapyorsky <sashak at voltaire.com>wrote:
>
> When we have two or more changes in a same multicast group multiple
> multicast rerouting requests will be created and processed. To prevent
> this we will use array of requests indexed by mlid value minus
> IB_LID_MCAST_START_HO and for each multicast group change we will just
> mark that specific mlid requires re-routing and "duplicated" requests
> will be merged there.
>
> Also in this way we will be able to process multicast group routing
> entries deletion for already removed groups by just knowing its MLID
> and not using its content - this will let us to not delay mutlicast
> groups deletion ('to_be_deleted' flag) and will simplify many multicast
> related code flows.
>
While the delay adds complexity, it is a feature. Delayed deletion (and
join) is allowed by IBA and is needed in a fast changing subnet when there
are a lot of groups changing. This was seen quite a while ago and was how
OpenSM evolved based on field experience and other testing. Eitan is the
expert here. IMO support for this needs to be added (back in).
-- Hal
>
> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> ---
> opensm/include/opensm/osm_sm.h | 4 +-
> opensm/opensm/osm_mcast_mgr.c | 27 +++++++++------------
> opensm/opensm/osm_sm.c | 49
> +++++++++++++---------------------------
> 3 files changed, 30 insertions(+), 50 deletions(-)
>
> diff --git a/opensm/include/opensm/osm_sm.h
> b/opensm/include/opensm/osm_sm.h
> index 0914a95..986143a 100644
> --- a/opensm/include/opensm/osm_sm.h
> +++ b/opensm/include/opensm/osm_sm.h
> @@ -126,8 +126,8 @@ typedef struct osm_sm {
> cl_dispatcher_t *p_disp;
> cl_plock_t *p_lock;
> atomic32_t sm_trans_id;
> - cl_spinlock_t mgrp_lock;
> - cl_qlist_t mgrp_list;
> + unsigned mlids_req_max;
> + uint8_t *mlids_req;
> osm_sm_mad_ctrl_t mad_ctrl;
> osm_lid_mgr_t lid_mgr;
> osm_ucast_mgr_t ucast_mgr;
> diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
> index d7c5ce1..dd504ef 100644
> --- a/opensm/opensm/osm_mcast_mgr.c
> +++ b/opensm/opensm/osm_mcast_mgr.c
> @@ -1116,7 +1116,6 @@ static int mcast_mgr_set_mftables(osm_sm_t * sm)
> int osm_mcast_mgr_process(osm_sm_t * sm)
> {
> cl_qmap_t *p_sw_tbl;
> - cl_qlist_t *p_list = &sm->mgrp_list;
> osm_mgrp_t *p_mgrp;
> int i, ret = 0;
>
> @@ -1150,16 +1149,14 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
> mcast_mgr_process_mgrp(sm, p_mgrp);
> }
>
> + memset(sm->mlids_req, 0, sm->mlids_req_max);
> + sm->mlids_req_max = 0;
> +
> /*
> Walk the switches and download the tables for each.
> */
> ret = mcast_mgr_set_mftables(sm);
>
> - while (!cl_is_qlist_empty(p_list)) {
> - cl_list_item_t *p = cl_qlist_remove_head(p_list);
> - free(p);
> - }
> -
> exit:
> CL_PLOCK_RELEASE(sm->p_lock);
>
> @@ -1174,11 +1171,10 @@ exit:
> **********************************************************************/
> int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
> {
> - cl_qlist_t *p_list = &sm->mgrp_list;
> osm_mgrp_t *p_mgrp;
> ib_net16_t mlid;
> - osm_mcast_mgr_ctxt_t *ctx;
> int ret = 0;
> + unsigned i;
>
> OSM_LOG_ENTER(sm->p_log);
>
> @@ -1192,14 +1188,12 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
> goto exit;
> }
>
> - while (!cl_is_qlist_empty(p_list)) {
> - ctx = (osm_mcast_mgr_ctxt_t *)
> cl_qlist_remove_head(p_list);
> -
> - /* nice copy no warning on size diff */
> - memcpy(&mlid, &ctx->mlid, sizeof(mlid));
> + for (i = 0; i <= sm->mlids_req_max; i++) {
> + if (!sm->mlids_req[i])
> + continue;
> + sm->mlids_req[i] = 0;
>
> - /* we can destroy the context now */
> - free(ctx);
> + mlid = cl_hton16(i + IB_LID_MCAST_START_HO);
>
> /* since we delayed the execution we prefer to pass the
> mlid as the mgrp identifier and then find it or abort */
> @@ -1223,6 +1217,9 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
> mcast_mgr_process_mgrp(sm, p_mgrp);
> }
>
> + memset(sm->mlids_req, 0, sm->mlids_req_max);
> + sm->mlids_req_max = 0;
> +
> /*
> Walk the switches and download the tables for each.
> */
> diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
> index 50aee91..e446c9d 100644
> --- a/opensm/opensm/osm_sm.c
> +++ b/opensm/opensm/osm_sm.c
> @@ -166,7 +166,6 @@ void osm_sm_construct(IN osm_sm_t * p_sm)
> cl_event_construct(&p_sm->subnet_up_event);
> cl_event_wheel_construct(&p_sm->trap_aging_tracker);
> cl_thread_construct(&p_sm->sweeper);
> - cl_spinlock_construct(&p_sm->mgrp_lock);
> osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl);
> osm_lid_mgr_construct(&p_sm->lid_mgr);
> osm_ucast_mgr_construct(&p_sm->ucast_mgr);
> @@ -234,8 +233,8 @@ void osm_sm_destroy(IN osm_sm_t * p_sm)
> cl_event_destroy(&p_sm->signal_event);
> cl_event_destroy(&p_sm->subnet_up_event);
> cl_spinlock_destroy(&p_sm->signal_lock);
> - cl_spinlock_destroy(&p_sm->mgrp_lock);
> cl_spinlock_destroy(&p_sm->state_lock);
> + free(p_sm->mlids_req);
>
> osm_log(p_sm->p_log, OSM_LOG_SYS, "Exiting SM\n"); /* Format
> Waived */
> OSM_LOG_EXIT(p_sm->p_log);
> @@ -288,11 +287,14 @@ ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN
> osm_subn_t * p_subn,
> if (status != CL_SUCCESS)
> goto Exit;
>
> - cl_qlist_init(&p_sm->mgrp_list);
> -
> - status = cl_spinlock_init(&p_sm->mgrp_lock);
> - if (status != CL_SUCCESS)
> + p_sm->mlids_req_max = 0;
> + p_sm->mlids_req = malloc((IB_LID_MCAST_END_HO -
> IB_LID_MCAST_START_HO +
> + 1) * sizeof(p_sm->mlids_req[0]));
> + if (!p_sm->mlids_req)
> goto Exit;
> + memset(p_sm->mlids_req, 0,
> + (IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO +
> + 1) * sizeof(p_sm->mlids_req[0]));
>
> status = osm_sm_mad_ctrl_init(&p_sm->mad_ctrl, p_sm->p_subn,
> p_sm->p_mad_pool, p_sm->p_vl15,
> @@ -441,32 +443,15 @@ Exit:
>
> /**********************************************************************
> **********************************************************************/
> -static ib_api_status_t sm_mgrp_process(IN osm_sm_t * p_sm,
> - IN osm_mgrp_t * p_mgrp)
> +static void request_mlid(osm_sm_t * sm, uint16_t mlid)
> {
> - osm_mcast_mgr_ctxt_t *ctx;
> -
> - /*
> - * 'Schedule' all the QP0 traffic for when the state manager
> - * isn't busy trying to do something else.
> - */
> - ctx = malloc(sizeof(*ctx));
> - if (!ctx)
> - return IB_ERROR;
> - memset(ctx, 0, sizeof(*ctx));
> - ctx->mlid = p_mgrp->mlid;
> -
> - cl_spinlock_acquire(&p_sm->mgrp_lock);
> - cl_qlist_insert_tail(&p_sm->mgrp_list, &ctx->list_item);
> - cl_spinlock_release(&p_sm->mgrp_lock);
> -
> - osm_sm_signal(p_sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
> -
> - return IB_SUCCESS;
> + mlid -= IB_LID_MCAST_START_HO;
> + sm->mlids_req[mlid] = 1;
> + if (sm->mlids_req_max < mlid)
> + sm->mlids_req_max = mlid;
> + osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
> }
>
> -/**********************************************************************
> - **********************************************************************/
> ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN osm_mgrp_t *mgrp,
> IN const ib_net64_t port_guid)
> {
> @@ -519,7 +504,7 @@ ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm,
> IN osm_mgrp_t *mgrp,
> goto Exit;
> }
>
> - status = sm_mgrp_process(p_sm, mgrp);
> + request_mlid(p_sm, cl_ntoh16(mgrp->mlid));
> Exit:
> CL_PLOCK_RELEASE(p_sm->p_lock);
> OSM_LOG_EXIT(p_sm->p_log);
> @@ -527,8 +512,6 @@ Exit:
> return status;
> }
>
> -/**********************************************************************
> - **********************************************************************/
> ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN osm_mgrp_t
> *mgrp,
> IN const ib_net64_t port_guid)
> {
> @@ -557,7 +540,7 @@ ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm,
> IN osm_mgrp_t *mgrp,
>
> osm_port_remove_mgrp(p_port, mgrp);
>
> - status = sm_mgrp_process(p_sm, mgrp);
> + request_mlid(p_sm, cl_hton16(mgrp->mlid));
> Exit:
> CL_PLOCK_RELEASE(p_sm->p_lock);
> OSM_LOG_EXIT(p_sm->p_log);
> --
> 1.6.4.2
>
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit
> http://openib.org/mailman/listinfo/openib-general
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090908/733054f0/attachment.html>
More information about the general
mailing list