[ofa-general] [PATCH] opensm: improve multicast re-routing requests processing

Hal Rosenstock hal.rosenstock at gmail.com
Tue Sep 8 15:23:24 PDT 2009


On Sun, Sep 6, 2009 at 1:39 PM, Sasha Khapyorsky <sashak at voltaire.com>wrote:

>
> When we have two or more changes in a same multicast group multiple
> multicast rerouting requests will be created and processed. To prevent
> this we will use array of requests indexed by mlid value minus
> IB_LID_MCAST_START_HO and for each multicast group change we will just
> mark that specific mlid requires re-routing and "duplicated" requests
> will be merged there.
>
> Also in this way we will be able to process multicast group routing
> entries deletion for already removed groups by just knowing its MLID
> and not using its content - this will let us to not delay mutlicast
> groups deletion ('to_be_deleted' flag) and will simplify many multicast
> related code flows.
>

While the delay adds complexity, it is a feature. Delayed deletion (and
join) is allowed by IBA and is needed in a fast changing subnet when there
are a lot of groups changing. This was seen quite a while ago and was how
OpenSM evolved based on field experience and other testing. Eitan is the
expert here. IMO support for this needs to be added (back in).

-- Hal



>
> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> ---
>  opensm/include/opensm/osm_sm.h |    4 +-
>  opensm/opensm/osm_mcast_mgr.c  |   27 +++++++++------------
>  opensm/opensm/osm_sm.c         |   49
> +++++++++++++---------------------------
>  3 files changed, 30 insertions(+), 50 deletions(-)
>
> diff --git a/opensm/include/opensm/osm_sm.h
> b/opensm/include/opensm/osm_sm.h
> index 0914a95..986143a 100644
> --- a/opensm/include/opensm/osm_sm.h
> +++ b/opensm/include/opensm/osm_sm.h
> @@ -126,8 +126,8 @@ typedef struct osm_sm {
>        cl_dispatcher_t *p_disp;
>        cl_plock_t *p_lock;
>        atomic32_t sm_trans_id;
> -       cl_spinlock_t mgrp_lock;
> -       cl_qlist_t mgrp_list;
> +       unsigned mlids_req_max;
> +       uint8_t *mlids_req;
>        osm_sm_mad_ctrl_t mad_ctrl;
>        osm_lid_mgr_t lid_mgr;
>        osm_ucast_mgr_t ucast_mgr;
> diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
> index d7c5ce1..dd504ef 100644
> --- a/opensm/opensm/osm_mcast_mgr.c
> +++ b/opensm/opensm/osm_mcast_mgr.c
> @@ -1116,7 +1116,6 @@ static int mcast_mgr_set_mftables(osm_sm_t * sm)
>  int osm_mcast_mgr_process(osm_sm_t * sm)
>  {
>        cl_qmap_t *p_sw_tbl;
> -       cl_qlist_t *p_list = &sm->mgrp_list;
>        osm_mgrp_t *p_mgrp;
>        int i, ret = 0;
>
> @@ -1150,16 +1149,14 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
>                        mcast_mgr_process_mgrp(sm, p_mgrp);
>        }
>
> +       memset(sm->mlids_req, 0, sm->mlids_req_max);
> +       sm->mlids_req_max = 0;
> +
>        /*
>           Walk the switches and download the tables for each.
>         */
>        ret = mcast_mgr_set_mftables(sm);
>
> -       while (!cl_is_qlist_empty(p_list)) {
> -               cl_list_item_t *p = cl_qlist_remove_head(p_list);
> -               free(p);
> -       }
> -
>  exit:
>        CL_PLOCK_RELEASE(sm->p_lock);
>
> @@ -1174,11 +1171,10 @@ exit:
>  **********************************************************************/
>  int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>  {
> -       cl_qlist_t *p_list = &sm->mgrp_list;
>        osm_mgrp_t *p_mgrp;
>        ib_net16_t mlid;
> -       osm_mcast_mgr_ctxt_t *ctx;
>        int ret = 0;
> +       unsigned i;
>
>        OSM_LOG_ENTER(sm->p_log);
>
> @@ -1192,14 +1188,12 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>                goto exit;
>        }
>
> -       while (!cl_is_qlist_empty(p_list)) {
> -               ctx = (osm_mcast_mgr_ctxt_t *)
> cl_qlist_remove_head(p_list);
> -
> -               /* nice copy no warning on size diff */
> -               memcpy(&mlid, &ctx->mlid, sizeof(mlid));
> +       for (i = 0; i <= sm->mlids_req_max; i++) {
> +               if (!sm->mlids_req[i])
> +                       continue;
> +               sm->mlids_req[i] = 0;
>
> -               /* we can destroy the context now */
> -               free(ctx);
> +               mlid = cl_hton16(i + IB_LID_MCAST_START_HO);
>
>                /* since we delayed the execution we prefer to pass the
>                   mlid as the mgrp identifier and then find it or abort */
> @@ -1223,6 +1217,9 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>                mcast_mgr_process_mgrp(sm, p_mgrp);
>        }
>
> +       memset(sm->mlids_req, 0, sm->mlids_req_max);
> +       sm->mlids_req_max = 0;
> +
>        /*
>           Walk the switches and download the tables for each.
>         */
> diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
> index 50aee91..e446c9d 100644
> --- a/opensm/opensm/osm_sm.c
> +++ b/opensm/opensm/osm_sm.c
> @@ -166,7 +166,6 @@ void osm_sm_construct(IN osm_sm_t * p_sm)
>        cl_event_construct(&p_sm->subnet_up_event);
>        cl_event_wheel_construct(&p_sm->trap_aging_tracker);
>        cl_thread_construct(&p_sm->sweeper);
> -       cl_spinlock_construct(&p_sm->mgrp_lock);
>        osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl);
>        osm_lid_mgr_construct(&p_sm->lid_mgr);
>        osm_ucast_mgr_construct(&p_sm->ucast_mgr);
> @@ -234,8 +233,8 @@ void osm_sm_destroy(IN osm_sm_t * p_sm)
>        cl_event_destroy(&p_sm->signal_event);
>        cl_event_destroy(&p_sm->subnet_up_event);
>        cl_spinlock_destroy(&p_sm->signal_lock);
> -       cl_spinlock_destroy(&p_sm->mgrp_lock);
>        cl_spinlock_destroy(&p_sm->state_lock);
> +       free(p_sm->mlids_req);
>
>        osm_log(p_sm->p_log, OSM_LOG_SYS, "Exiting SM\n");      /* Format
> Waived */
>        OSM_LOG_EXIT(p_sm->p_log);
> @@ -288,11 +287,14 @@ ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN
> osm_subn_t * p_subn,
>        if (status != CL_SUCCESS)
>                goto Exit;
>
> -       cl_qlist_init(&p_sm->mgrp_list);
> -
> -       status = cl_spinlock_init(&p_sm->mgrp_lock);
> -       if (status != CL_SUCCESS)
> +       p_sm->mlids_req_max = 0;
> +       p_sm->mlids_req = malloc((IB_LID_MCAST_END_HO -
> IB_LID_MCAST_START_HO +
> +                                 1) * sizeof(p_sm->mlids_req[0]));
> +       if (!p_sm->mlids_req)
>                goto Exit;
> +       memset(p_sm->mlids_req, 0,
> +              (IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO +
> +               1) * sizeof(p_sm->mlids_req[0]));
>
>        status = osm_sm_mad_ctrl_init(&p_sm->mad_ctrl, p_sm->p_subn,
>                                      p_sm->p_mad_pool, p_sm->p_vl15,
> @@ -441,32 +443,15 @@ Exit:
>
>  /**********************************************************************
>  **********************************************************************/
> -static ib_api_status_t sm_mgrp_process(IN osm_sm_t * p_sm,
> -                                      IN osm_mgrp_t * p_mgrp)
> +static void request_mlid(osm_sm_t * sm, uint16_t mlid)
>  {
> -       osm_mcast_mgr_ctxt_t *ctx;
> -
> -       /*
> -        * 'Schedule' all the QP0 traffic for when the state manager
> -        * isn't busy trying to do something else.
> -        */
> -       ctx = malloc(sizeof(*ctx));
> -       if (!ctx)
> -               return IB_ERROR;
> -       memset(ctx, 0, sizeof(*ctx));
> -       ctx->mlid = p_mgrp->mlid;
> -
> -       cl_spinlock_acquire(&p_sm->mgrp_lock);
> -       cl_qlist_insert_tail(&p_sm->mgrp_list, &ctx->list_item);
> -       cl_spinlock_release(&p_sm->mgrp_lock);
> -
> -       osm_sm_signal(p_sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
> -
> -       return IB_SUCCESS;
> +       mlid -= IB_LID_MCAST_START_HO;
> +       sm->mlids_req[mlid] = 1;
> +       if (sm->mlids_req_max < mlid)
> +               sm->mlids_req_max = mlid;
> +       osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
>  }
>
> -/**********************************************************************
> - **********************************************************************/
>  ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN osm_mgrp_t *mgrp,
>                                  IN const ib_net64_t port_guid)
>  {
> @@ -519,7 +504,7 @@ ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm,
> IN osm_mgrp_t *mgrp,
>                goto Exit;
>        }
>
> -       status = sm_mgrp_process(p_sm, mgrp);
> +       request_mlid(p_sm, cl_ntoh16(mgrp->mlid));
>  Exit:
>        CL_PLOCK_RELEASE(p_sm->p_lock);
>        OSM_LOG_EXIT(p_sm->p_log);
> @@ -527,8 +512,6 @@ Exit:
>        return status;
>  }
>
> -/**********************************************************************
> - **********************************************************************/
>  ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN osm_mgrp_t
> *mgrp,
>                                   IN const ib_net64_t port_guid)
>  {
> @@ -557,7 +540,7 @@ ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm,
> IN osm_mgrp_t *mgrp,
>
>        osm_port_remove_mgrp(p_port, mgrp);
>
> -       status = sm_mgrp_process(p_sm, mgrp);
> +       request_mlid(p_sm, cl_hton16(mgrp->mlid));
>  Exit:
>        CL_PLOCK_RELEASE(p_sm->p_lock);
>        OSM_LOG_EXIT(p_sm->p_log);
> --
> 1.6.4.2
>
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>
> To unsubscribe, please visit
> http://openib.org/mailman/listinfo/openib-general
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20090908/733054f0/attachment.html>


More information about the general mailing list