[ofa-general] [PATCH 1/4] multicast multiplexing - definitions for new data types, and infrastructure functions
Slava Strebkov
slavas at voltaire.com
Thu Jul 16 07:39:42 PDT 2009
Please see my answers below your comments.
Slava
-----Original Message-----
From: Hal Rosenstock [mailto:hal.rosenstock at gmail.com]
Sent: Thursday, July 16, 2009 5:15 PM
To: Slava Strebkov
Cc: Sasha Khapyorsky; general at lists.openfabrics.org
Subject: Re: [ofa-general] [PATCH 1/4] multicast multiplexing - definitions for new data types, and infrastructure functions
On Thu, Jul 16, 2009 at 9:52 AM, Slava Strebkov<slavas at voltaire.com> wrote:
>
> This thread covers implementation of compression multiple MGIDs to one MLID.
> The following groups of IP addresses will have same MLID (for all pkey):
> 1. FF12401bxxxx000000000000FFFFFFFF - All Nodes
> 2. FF12401bxxxx00000000000000000001 - All hosts
> 3. FF12401bffff0000000000000000004d - all Gateways
> 4. FF12401bxxxx00000000000000000002 - all routers
> 5. FF12601bxxxx000000000001ffxxxxxx - IPv6 SNM
OK; thanks. So to recap, it compresses the list above to a single
common MGID/MLID (regardless of PKey) and also compresses IP groups
other than the above and IPv6 SNM based on MGID_MUX_MLID_MASK.
There were also some comments/questions embedded in the patch which
you may have missed.
-- Hal
> Slava
>
> -----Original Message-----
> From: Hal Rosenstock [mailto:hal.rosenstock at gmail.com]
> Sent: Thursday, July 16, 2009 4:39 PM
> To: Slava Strebkov
> Cc: Sasha Khapyorsky; general at lists.openfabrics.org
> Subject: Re: [ofa-general] [PATCH 1/4] multicast multiplexing - definitions for new data types, and infrastructure functions
>
> On Sun, Jun 28, 2009 at 6:36 AM, Slava Strebkov<slavas at voltaire.com> wrote:
>> [PATCH 1/4] Patch implements multicast multiplexing as proposed in the
>> thread entitled "IPv6 and IPoIB scalability issue".
>
> Would you be more explicit about what is covered (and what remains to
> be covered) in that lengthy thread ?
>
>> This first patch contains definitions for new data types
>> and infrastructure functions.
>> Signed-off-by: Slava Strebkov <slavas at voltaire.com>
>>
>> ---
>> opensm/include/opensm/osm_multicast.h | 94 +++++++++++++++++++++++++++++++--
>> opensm/opensm/osm_multicast.c | 82 ++++++++++++++++++++++++++++-
>> 2 files changed, 170 insertions(+), 6 deletions(-)
>>
>> diff --git a/opensm/include/opensm/osm_multicast.h b/opensm/include/opensm/osm_multicast.h
>> index a871306..02b63bd 100644
>> --- a/opensm/include/opensm/osm_multicast.h
>> +++ b/opensm/include/opensm/osm_multicast.h
>> @@ -1,5 +1,5 @@
>> /*
>> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
>> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>> * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>> *
>> @@ -45,6 +45,7 @@
>>
>> #include <iba/ib_types.h>
>> #include <complib/cl_qmap.h>
>> +#include <complib/cl_fleximap.h>
>> #include <complib/cl_qlist.h>
>> #include <complib/cl_spinlock.h>
>> #include <opensm/osm_base.h>
>> @@ -93,7 +94,7 @@ BEGIN_C_DECLS
>> *
>> * SYNOPSIS
>> */
>> -typedef struct osm_mcast_mgr_ctxt {
>> + typedef struct osm_mcast_mgr_ctxt {
>
> Is this (and other similar) formatting change(s) needed ?
> - osm_indent performed this work
> While not a problem here, it would be easier to read/review the
> patches if the formatting changes are in separate patches.
>
>> cl_list_item_t list_item;
>> ib_net16_t mlid;
>> } osm_mcast_mgr_ctxt_t;
>> @@ -107,6 +108,89 @@ typedef struct osm_mcast_mgr_ctxt {
>> * SEE ALSO
>> *********/
>>
>> +/****s* OpenSM: Multicast Group Holder/osm_mgrp_holder_t
>> +* NAME
>> +* osm_mgrp_holder_t
>> +*
>> +* DESCRIPTION
>> +* Holder for mgroups.
>> +*
>> +* The osm_mgrp_t object should be treated as opaque and should
>> +* be manipulated only through the provided functions.
>> +*
>> +* SYNOPSIS
>> +*/
>> +
>> +typedef struct osm_mgrp_holder {
>> + cl_fmap_t mgrp_map;
>> + cl_qmap_t mgrp_port_map;
>> + ib_gid_t common_mgid;
>> + osm_mtree_node_t *p_root;
>> + boolean_t to_be_deleted;
>> + uint32_t last_tree_id;
>> + uint32_t last_change_id;
>> + ib_net16_t mlid;
>> +} osm_mgrp_holder_t;
>> +
>> +/*
>> +* FIELDS
>> +* mgrp_map
>> +* Map for mgroups. Must be first element!!
>> +*
>> +* mgrp_port_map
>> +* Map of all ports joined same mlid
>> +*
>> +* common_mgid
>> +* mgid of mgroup, ANDed with bitmask.
>> +* mgid of each mgroup in mgrp_map, ANDed with bitmask,
>> +* see osm_mgrp_holder_prepare_common_mgid
>> +*
>> +* p_root
>> +* Pointer to the root "tree node" in the single spanning tree
>> +* for this multicast group holder. The nodes of the tree represent
>> +* switches. Member ports are not represented in the tree.
>> +*
>> +* to_be_deleted
>> +* Since holders are deleted when there are no mgroups in.
>> +*
>> +* last_change_id
>> +* a counter for the number of changes applied to the group in this holder.
>> +* This counter shuold be incremented on any modification
>> +* to the group: joining or leaving of ports.
>> +*
>> +* last_tree_id
>> +* the last change id used for building the current tree.
>> +*
>> +* mlid
>> +* mlid of current group holder
>> +*/
>> + /****s* OpenSM: Multicast group Port /osm_mgrp_port _t
>> +* NAME
>> +* osm_mgrp_port _t
>> +*
>> +* DESCRIPTION
>> +* Holder for pointers to mgroups and port guid.
>> +*
>> +*
>> +* SYNOPSIS
>> +*/
>> +typedef struct _osm_mgrp_port {
>> + cl_map_item_t guid_item;
>> + cl_qlist_t mgroups;
>> + ib_net64_t port_guid;
>> +} osm_mgrp_port_t;
>> +/*
>> +* FIELDS
>> +* guid_item
>> +* Map for ports. Must be first element
>> +*
>> +* mgroups
>> +* Map for mgroups opened by this port.
>> +*
>> +* portguid
>> +* guid of port representing current structure
>> +*/
>> +
>> /****s* OpenSM: Multicast Group/osm_mgrp_t
>> * NAME
>> * osm_mgrp_t
>> @@ -355,7 +439,7 @@ static inline ib_net16_t osm_mgrp_get_mlid(IN const osm_mgrp_t * const p_mgrp)
>> *
>> * SYNOPSIS
>> */
>> -osm_mcm_port_t *osm_mgrp_add_port(osm_subn_t *subn, osm_log_t *log,
>> +osm_mcm_port_t *osm_mgrp_add_port(osm_subn_t * subn, osm_log_t * log,
>> IN osm_mgrp_t * const p_mgrp,
>> IN const ib_gid_t * const p_port_gid,
>> IN const uint8_t join_state,
>> @@ -452,8 +536,8 @@ osm_mgrp_delete_port(IN osm_subn_t * const p_subn,
>> * SEE ALSO
>> *********/
>>
>> -int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t *log, osm_mgrp_t *mgrp,
>> - osm_mcm_port_t *mcm, uint8_t join_state);
>> +int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
>> + osm_mcm_port_t * mcm, uint8_t join_state);
>>
>> /****f* OpenSM: Multicast Group/osm_mgrp_apply_func
>> * NAME
>> diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
>> index d2733c4..ae0a818 100644
>> --- a/opensm/opensm/osm_multicast.c
>> +++ b/opensm/opensm/osm_multicast.c
>> @@ -1,5 +1,5 @@
>> /*
>> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
>> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>> * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>> * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>> *
>> @@ -48,6 +48,7 @@
>> #include <opensm/osm_mcm_port.h>
>> #include <opensm/osm_mtree.h>
>> #include <opensm/osm_inform.h>
>> +#include <arpa/inet.h>
>>
>> /**********************************************************************
>> **********************************************************************/
>> @@ -298,3 +299,82 @@ void osm_mgrp_apply_func(const osm_mgrp_t * p_mgrp, osm_mgrp_func_t p_func,
>> if (p_mtn)
>> mgrp_apply_func_sub(p_mgrp, p_mtn, p_func, context);
>> }
>> +
>> +/**********************************************************************
>> + **********************************************************************/
>> +#define PREFIX_MASK_IP CL_HTON64(0xff10ffff0000ffffULL)
>> +#define PREFIX_SIGNATURE_IPV4 CL_HTON64(0xff10401b00000000ULL)
>> +#define INTERFACE_ID_IPV4 CL_HTON64(0x0000000fffffffffULL)
>> +#define PREFIX_SIGNATURE_IPV6 CL_HTON64(0xff10601b00000000ULL)
>> +#define INTERFACE_ID_ALL_NODES CL_HTON64(0x00000000ffffffffULL)
>> +#define INTERFACE_ID_ALL_HOSTS CL_HTON64(0x0000000000000001ULL)
>> +#define INTERFACE_ID_ALL_GATEWAYS CL_HTON64(0x000000000000004dULL)
>> +#define INTERFACE_ID_ALL_ROUTERS CL_HTON64(0x0000000000000002ULL)
>> +#define PREFIX_PKEY_MASK_OFF CL_HTON64(0xffffffff0000ffffULL)
>> +#define PREFIX_MASK CL_HTON64(0xff10ffff0000ffffULL)
>> +#define PREFIX_SIGNATURE CL_HTON64(0xff10601b00000000ULL)
>> +#define INT_ID_MASK CL_HTON64(0xfffffff1ff000000ULL)
>> +#define INT_ID_SIGNATURE CL_HTON64(0x00000001ff000000ULL)
>> +
>> +/**********************************************************************
>> + **********************************************************************/
>> + /* mask used for multiplexing mgids to one mlid. Here default value (0xff) means that 8 lsb bits
>> + of mgid will be masked off .
>> + */
>> +static uint64_t MGID_MUX_MLID_MASK = CL_HTON64(0x00000000000000ffULL);
>
> Is this only changed by source change and recompilation ?
> MGID_MUX_MLID_MASK will be a parameter in opensm.conf file.
Here I placed it as an example.
>> +void osm_mgrp_holder_prepare_common_mgid(IN const ib_gid_t * const p_mgid,
>> + OUT ib_gid_t * p_common_mgid)
>> +{
>> + memcpy(p_common_mgid, p_mgid, sizeof(ib_gid_t));
>> + /* Don't mux non IPoIB mgids. When mux mask is zero, no multiplexing occures */
>> + if ((((p_common_mgid->unicast.prefix & PREFIX_MASK_IP) !=
>> + PREFIX_SIGNATURE_IPV4)
>> + && ((p_common_mgid->unicast.prefix & PREFIX_MASK_IP) !=
>> + PREFIX_SIGNATURE_IPV6)) || (!MGID_MUX_MLID_MASK))
>> + return;
>> +
>> + if (((p_common_mgid->unicast.prefix & PREFIX_MASK_IP) ==
>> + PREFIX_SIGNATURE_IPV4)
>> + && ((p_common_mgid->unicast.interface_id == INTERFACE_ID_ALL_NODES)
>> + || (p_common_mgid->unicast.interface_id ==
>> + INTERFACE_ID_ALL_HOSTS)
>> + || (p_common_mgid->unicast.interface_id ==
>> + INTERFACE_ID_ALL_GATEWAYS)
>> + || (p_common_mgid->unicast.interface_id ==
>> + INTERFACE_ID_ALL_ROUTERS))) {
>> + /* zero pkey bits - special groups will have same mlid for all PKEYs */
>
> Is it a good idea to use the same MLID regardless of PKey ?
> - This is not so much groups, even for all PKeys. Providing different MLID
for each special group we'll not achieve any compression.
>> + p_common_mgid->unicast.prefix &= PREFIX_PKEY_MASK_OFF;
>> + } else if ((p_common_mgid->unicast.prefix & PREFIX_MASK_IP) ==
>> + PREFIX_SIGNATURE_IPV6
>> + && (p_common_mgid->unicast.interface_id & INT_ID_MASK) ==
>> + INT_ID_SIGNATURE) {
>> + /* Special Case IPv6 Solicited Node Multicast (SNM) addresses */
>> + /* 0xff1Z601bXXXX0000 : 0x00000001ffYYYYYY */
>> + /* Where Z is the scope, XXXX is the P_Key, and
>> + * YYYYYY is the last 24 bits of the port guid */
>> + p_common_mgid->unicast.prefix &= PREFIX_MASK_IP;
>> + p_common_mgid->unicast.interface_id &= INT_ID_MASK;
>> + } else {
>> + /* zero mask bits */
>> + p_common_mgid->unicast.interface_id &= (~MGID_MUX_MLID_MASK);
>> + }
>> +}
>> +
>> +static int64_t
>> +__mgid_cmp(IN const void *const p_gid1, IN const void *const p_gid2)
>> +{
>> + return memcmp(p_gid1, p_gid2, sizeof(ib_gid_t));
>> +}
>> +
>> +static osm_mgrp_port_t *osm_mgrp_port_new(ib_net64_t port_guid)
>> +{
>> + osm_mgrp_port_t *p_mgrp_port =
>> + (osm_mgrp_port_t *) malloc(sizeof(osm_mgrp_port_t));
>> + if (!p_mgrp_port) {
>> + return NULL;
>> + }
>> + memset(p_mgrp_port, 0, sizeof(*p_mgrp_port));
>
> Minor - could use calloc rather than malloc/memset here.
>
> -- Hal
>
>> + p_mgrp_port->port_guid = port_guid;
>> + cl_qlist_init(&p_mgrp_port->mgroups);
>> + return p_mgrp_port;
>> +}
>> --
>> 1.5.5
>>
>> _______________________________________________
>> general mailing list
>> general at lists.openfabrics.org
>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
>>
>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
>>
>
More information about the general
mailing list