[ofw][patch] igmp v2
Slava Strebkov
slavas at voltaire.com
Thu Jun 26 07:23:49 PDT 2008
Please review changes in IPoIB mgid creation. This is a part, needed for
garbage collector support.
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h (revision 1299)
+++ inc/kernel/ip_packet.h (working copy)
@@ -196,6 +196,7 @@
#define IP_PROT_IP 4
#define IP_PROT_TCP 6
#define IP_PROT_UDP 17
+#define IP_PROT_IGMP 2
#include <complib/cl_packon.h>
@@ -355,6 +356,55 @@
*********/
#include <complib/cl_packoff.h>
+#define IGMP_V2_MEMBERSHIP_QUERY 0x11
+#define IGMP_V2_MEMBERSHIP_REPORT 0x16
+#define IGMP_V1_MEMBERSHIP_REPORT 0x12 // for backward
compatibility with IGMPv1
+#define IGMP_V2_LEAVE_GROUP 0x17
+#include <complib/cl_packon.h>
+/****s* IB Network Drivers/igmp__v2_hdr_t
+* NAME
+* igmp_v2_hdr_t
+*
+* DESCRIPTION
+* Defines the IGMPv2 header for IP packets.
+*
+* SYNOPSIS
+*/
+typedef struct _igmp_v2_hdr
+{
+ uint8_t type;
+ uint8_t max_resp_time;
+ net16_t chksum;
+ net32_t group_address;
+} PACK_SUFFIX igmp_v2_hdr_t;
+/*
+* FIELDS
+* type
+* type of IGMPv2 message: query/report/leave
+*
+* max_resp_time
+* The Max Response Time field is meaningful only in
Membership Query
+* messages, and specifies the maximum allowed time
before sending a
+* responding report in units of 1/10 second. In
all other messages, it
+* is set to zero by the sender and ignored by
receivers.
+*
+* checksum
+* The checksum is the 16-bit one's complement of
the one's complement
+* sum of the whole IGMP message (the entire IP payload).
+*
+* group_address
+* In a Membership Query message, the group address
field is set to zero
+* when sending a General Query, and set to the group address
being
+* queried when sending a Group-Specific Query.
+*
+* In a Membership Report or Leave Group message, the group
address
+* field holds the IP multicast group address of the group being
+* reported or left.
+*
+* SEE ALSO
+* IB Network Drivers, eth_hdr_t, arp_pkt_t, ip_hdr_t, tcp_hdr_t
+*********/
+#include <complib/cl_packoff.h>
#define DHCP_PORT_SERVER CL_HTON16(67)
#define DHCP_PORT_CLIENT CL_HTON16(68)
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c (revision 1299)
+++ ulp/ipoib/kernel/ipoib_adapter.c (working copy)
@@ -762,7 +762,14 @@
if( j != p_adapter->mcast_array_size
)
continue;
- ipoib_port_join_mcast( p_port,
p_mac_array[i] ,IB_MC_REC_STATE_FULL_MEMBER);
+ // Join to "All hosts mc group" for
IP and to NON-IP MC groups
+ if ( ( p_mac_array[i].addr[0] == 1 &&
p_mac_array[i].addr[1] == 0 && p_mac_array[i].addr[2] == 0x5e &&
+ p_mac_array[i].addr[3]
== 0 && p_mac_array[i].addr[4] == 0 && p_mac_array[i].addr[5] == 1 ) ||
+ !(
p_mac_array[i].addr[0] == 1 && p_mac_array[i].addr[1] == 0 &&
p_mac_array[i].addr[2] == 0x5e )
+ )
+ {
+ ipoib_port_join_mcast(
p_port, p_mac_array[i], IB_MC_REC_STATE_FULL_MEMBER );
+ }
}
}
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1299)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -290,6 +290,14 @@
IN OUT
ipoib_send_desc_t* const p_desc );
static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len );
+
+static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
IN const ip_hdr_t* const
p_ip_hdr,
@@ -489,6 +497,13 @@
#endif
}
+/* function returns pointer to payload that is going after IP header.
+* asssuming that payload and IP header are in the same buffer
+*/
+static void* GetIpPayloadPtr(const ip_hdr_t* const p_ip_hdr)
+{
+ return (void*)((uint8_t*)p_ip_hdr + 4*(p_ip_hdr->ver_hl &
0xf));
+}
/***********************************************************************
*******
*
@@ -2131,6 +2146,13 @@
p_eth->hdr.src = p_src->mac;
p_eth->hdr.dst = p_dst->mac;
+ if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) &&
+ p_eth->hdr.type == ETH_PROT_TYPE_IP &&
+ !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) )
+ {
+ p_eth->hdr.dst.addr[1] = 0;
+ p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
0x7f;
+ }
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -3110,6 +3132,27 @@
if( p_ip_hdr->offset ||
p_ip_hdr->prot != IP_PROT_UDP )
{
+ /* Check if this packet is IGMP */
+ if ( p_ip_hdr->prot == IP_PROT_IGMP )
+ {
+ /*
+ In igmp packet I saw that iph
arrive in 2 NDIS_BUFFERs:
+ 1. iph
+ 2. ip options
+ So to get the IGMP packet
we need to skip the ip options NDIS_BUFFER
+ */
+ size_t iph_size_in_bytes =
(p_ip_hdr->ver_hl & 0xf) * 4;
+ size_t iph_options_size =
iph_size_in_bytes - buf_len;
+ buf_len -= sizeof(ip_hdr_t);
+
+ /*
+ Could be a case that arrived igmp
packet not from type IGMPv2 ,
+ but IGMPv1 or IGMPv3.
+ We anyway pass it to
__send_mgr_filter_igmp_v2().
+ */
+ /* p_buf is not changed because we'll
need it to obtain next buffer */
+ __send_mgr_filter_igmp_v2(p_port,
p_ip_hdr, iph_options_size, p_buf, buf_len);
+ }
/* Not a UDP packet. */
cl_perf_start( SendTcp );
status = __send_gen( p_port, p_desc );
@@ -3129,7 +3172,121 @@
return status;
}
+static NDIS_STATUS
+__send_mgr_filter_igmp_v2(
+ IN ipoib_port_t*
const p_port,
+ IN const ip_hdr_t* const
p_ip_hdr,
+ IN size_t
iph_options_size,
+ IN NDIS_BUFFER*
p_buf,
+ IN size_t
buf_len )
+{
+ igmp_v2_hdr_t *p_igmp_v2_hdr = NULL;
+ NDIS_STATUS endpt_status;
+ ipoib_endpt_t* p_endpt = NULL;
+ mac_addr_t fake_mcast_mac;
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ if( !buf_len )
+ {
+ // To get the IGMP packet we need to skip the ip
options NDIS_BUFFER (if exists)
+ while ( iph_options_size )
+ {
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
get IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf,
&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
query IGMPv2 header buffer.\n") );
+ return
NDIS_STATUS_FAILURE;
+ }
+
+ iph_options_size-=buf_len;
+ }
+
+ NdisGetNextBuffer( p_buf, &p_buf );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to get IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
&buf_len, NormalPagePriority );
+ if( !p_igmp_v2_hdr )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Failed to query IGMPv2
header buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ }
+ else
+ {
+ /* assuming ip header and options are in the same
packet */
+ p_igmp_v2_hdr = GetIpPayloadPtr(p_ip_hdr);
+ }
+ /* Get the IGMP header length. */
+ if( buf_len < sizeof(igmp_v2_hdr_t) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
+ ("Buffer not large enough for IGMPv2
packet.\n") );
+ return NDIS_STATUS_BUFFER_TOO_SHORT;
+ }
+
+ // build fake mac from igmp packet group address
+ fake_mcast_mac.addr[0] = 1;
+ fake_mcast_mac.addr[1] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
+ fake_mcast_mac.addr[2] = 0x5E;
+ fake_mcast_mac.addr[3] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[1];
+ fake_mcast_mac.addr[4] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[2];
+ fake_mcast_mac.addr[5] = ((unsigned
char*)&p_igmp_v2_hdr->group_address)[3];
+
+ switch ( p_igmp_v2_hdr->type )
+ {
+ case IGMP_V2_MEMBERSHIP_REPORT:
+ /*
+ This mean that some body open
listener on this group
+ Change type of mcast endpt to SEND_RECV
endpt. So mcast garbage collector
+ will not delete this mcast endpt.
+ */
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_MEMBERSHIP_REPORT
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ case IGMP_V2_LEAVE_GROUP:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Catched IGMP_V2_LEAVE_GROUP
message\n") );
+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
&p_endpt );
+ if ( p_endpt )
+ {
+ cl_obj_lock( &p_port->obj );
+ cl_obj_unlock( &p_port->obj );
+ ipoib_endpt_deref( p_endpt );
+ }
+ break;
+
+ default:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
+ ("Send Unknown IGMP message:
0x%x \n", p_igmp_v2_hdr->type ) );
+ break;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return NDIS_STATUS_SUCCESS;
+}
+
static NDIS_STATUS
__send_mgr_filter_udp(
IN ipoib_port_t*
const p_port,
@@ -3167,7 +3324,7 @@
}
else
{
- p_udp_hdr = (udp_hdr_t*)(p_ip_hdr + 1);
+ p_udp_hdr =
(udp_hdr_t*)GetIpPayloadPtr(p_ip_hdr);
}
/* Get the UDP header and check the destination port
numbers. */
if( buf_len < sizeof(udp_hdr_t) )
@@ -3736,6 +3893,44 @@
}
cl_perf_start( SendMgrQueue );
+
+ if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&
+ p_eth_hdr->type == ETH_PROT_TYPE_IP
&&
+ !ETH_IS_BROADCAST(
p_eth_hdr->dst.addr ) )
+ {
+ ip_hdr_t *p_ip_hdr;
+ NDIS_BUFFER
*p_ip_hdr_buf;
+ UINT
ip_hdr_buf_len;
+
+ // Extract the ip hdr
+ NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );
+ if( !p_ip_hdr_buf )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
get IP header buffer.\n") );
+ goto h_end;
+ }
+
+ NdisQueryBufferSafe( p_ip_hdr_buf,
&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );
+ if( !p_ip_hdr )
+ {
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to
query IP header buffer.\n") );
+ goto h_end;
+ }
+
+ if( ip_hdr_buf_len < sizeof(ip_hdr_t)
)
+ {
+ /* This buffer is done
for. Get the next buffer. */
+ IPOIB_PRINT_EXIT(
TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Buffer too
small for IP packet.\n") );
+ goto h_end;
+ }
+
+ p_eth_hdr->dst.addr[1] = ((unsigned
char*)&p_ip_hdr->dst_ip)[0] & 0x0f;
+ p_eth_hdr->dst.addr[3] = ((unsigned
char*)&p_ip_hdr->dst_ip)[1];
+ }
+h_end:
status = __send_mgr_queue( p_port, p_eth_hdr,
&desc.p_endpt );
cl_perf_stop( &p_port->p_adapter->perf,
SendMgrQueue );
if( status == NDIS_STATUS_PENDING )
@@ -5490,15 +5685,15 @@
mcast_req.member_rec.mlid = 0;
ib_member_set_state(
&mcast_req.member_rec.scope_state,state);
- if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
0x5E )
+ if( (mac.addr[0] == 1) && (mac.addr[2] == 0x5E ))
{
/*
* Update the address portion of the MGID with
the 28 lower bits of the
- * IP address. Since we're given a MAC address,
we end up using only
- * the 24 lower bits of that network-byte-ordered
value (assuming MSb
- * is zero).
+ * IP address. Since we're given a MAC address,
we are using
+ * 24 lower bits of that network-byte-ordered
value (assuming MSb
+ * is zero) and 4 lsb bits of the first byte of IP
address.
*/
- mcast_req.member_rec.mgid.raw[12] = 0;
+ mcast_req.member_rec.mgid.raw[12] = mac.addr[1];
mcast_req.member_rec.mgid.raw[13] = mac.addr[3];
mcast_req.member_rec.mgid.raw[14] = mac.addr[4];
mcast_req.member_rec.mgid.raw[15] = mac.addr[5];
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080626/efee2a22/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: igmp_v2_support.diff
Type: application/octet-stream
Size: 11879 bytes
Desc: igmp_v2_support.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20080626/efee2a22/attachment.obj>
More information about the ofw
mailing list