[ofw][patch] igmp v2
Fab Tillier
ftillier at windows.microsoft.com
Wed Jun 25 14:23:40 PDT 2008
Hi Slava,
>Please review changes in IPoIB mgid creation. This is a part, needed for
>garbage collector support.
>
>Index: ulp/ipoib/kernel/ipoib_port.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_port.c (revision 1299)
>+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
>@@ -290,6 +290,14 @@
> IN OUT
>ipoib_send_desc_t* const p_desc );
>
> static NDIS_STATUS
>+__send_mgr_filter_igmp_v2(
>+ IN ipoib_port_t*
>const p_port,
>+ IN const ip_hdr_t* const
>p_ip_hdr,
>+ IN size_t
>iph_options_size,
>+ IN NDIS_BUFFER*
>p_buf,
>+ IN size_t
>buf_len );
>+
>+static NDIS_STATUS
> __send_mgr_filter_udp(
> IN ipoib_port_t*
>const p_port,
> IN const ip_hdr_t* const
>p_ip_hdr,
>@@ -2131,6 +2139,13 @@
> p_eth->hdr.src = p_src->mac;
> p_eth->hdr.dst = p_dst->mac;
>
>+ if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) &&
>+ p_eth->hdr.type == ETH_PROT_TYPE_IP &&
>+ !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) )
>+ {
>+ p_eth->hdr.dst.addr[1] = 0;
>+ p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
>0x7f;
>+ }
> IPOIB_EXIT( IPOIB_DBG_RECV );
> return IB_SUCCESS;
> }
>@@ -3110,6 +3125,26 @@
> if( p_ip_hdr->offset ||
> p_ip_hdr->prot != IP_PROT_UDP )
> {
>+ /* Check if this packet is IGMP */
>+ if ( p_ip_hdr->prot == IP_PROT_IGMP )
>+ {
>+ /*
>+ In igmp packet I saw that iph
>arrive in 2 NDIS_BUFFERs:
>+ 1. iph
>+ 2. ip options
>+ So to get the IGMP packet
>we need to skip the ip options NDIS_BUFFER
>+ */
>+ size_t iph_size_in_bytes =
>(p_ip_hdr->ver_hl & 0xf) * 4;
>+ size_t iph_options_size =
>iph_size_in_bytes - buf_len;
>+ buf_len -= sizeof(ip_hdr_t);
>+
>+ /*
>+ Could be a case that arrived igmp
>packet not from type IGMPv2 ,
>+ but IGMPv1 or IGMPv3.
>+ We anyway pass it to
>__send_mgr_filter_igmp_v2().
>+ */
>+ __send_mgr_filter_igmp_v2(p_port,
>p_ip_hdr, iph_options_size, p_buf, buf_len);
I gave this comment last time you sent this (6/12/2008), but I guess it was ignored. :(
Here it is again, hopefully with better results:
You adjusted buf_len, but not p_buf - is that then intent?
>+ }
> /* Not a UDP packet. */
> cl_perf_start( SendTcp );
> status = __send_gen( p_port, p_desc );
>@@ -3129,7 +3164,120 @@
> return status;
> }
>
>+static NDIS_STATUS
>+__send_mgr_filter_igmp_v2(
>+ IN ipoib_port_t*
>const p_port,
>+ IN const ip_hdr_t* const
>p_ip_hdr,
>+ IN size_t
>iph_options_size,
>+ IN NDIS_BUFFER*
>p_buf,
>+ IN size_t
>buf_len )
>+{
>+ igmp_v2_hdr_t *p_igmp_v2_hdr = NULL;
>+ NDIS_STATUS endpt_status;
>+ ipoib_endpt_t* p_endpt = NULL;
>+ mac_addr_t fake_mcast_mac;
>
>+ IPOIB_ENTER( IPOIB_DBG_SEND );
>+
>+ if( !buf_len )
>+ {
>+ // To get the IGMP packet we need to skip the ip
>options NDIS_BUFFER (if exists)
>+ while ( iph_options_size )
>+ {
>+ NdisGetNextBuffer( p_buf, &p_buf );
>+ if( !p_buf )
>+ {
>+ IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+ ("Failed to
>get IGMPv2 header buffer.\n") );
>+ return
>NDIS_STATUS_FAILURE;
>+ }
>+ NdisQueryBufferSafe( p_buf,
>&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
>+ if( !p_igmp_v2_hdr )
>+ {
>+ IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+ ("Failed to
>query IGMPv2 header buffer.\n") );
>+ return
>NDIS_STATUS_FAILURE;
>+ }
>+
>+ iph_options_size-=buf_len;
>+ }
>+
>+ NdisGetNextBuffer( p_buf, &p_buf );
>+ if( !p_buf )
>+ {
>+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+ ("Failed to get IGMPv2
>header buffer.\n") );
>+ return NDIS_STATUS_FAILURE;
>+ }
>+ NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
>&buf_len, NormalPagePriority );
>+ if( !p_igmp_v2_hdr )
>+ {
>+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+ ("Failed to query IGMPv2
>header buffer.\n") );
>+ return NDIS_STATUS_FAILURE;
>+ }
>+ }
>+ else
>+ {
>+ p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);
I gave this comment last time you sent this (6/12/2008), but I guess it was ignored. :(
Here it is again, hopefully with better results:
Ok, this is confusing now. If buf_len was not zero, you set the IGMP header to start right after the IP header, but you don't account for options like you did when you calculated buf_len. If you adjusted p_buf when calling, you could just set p_igmp_vs_hdr = p_buf and be safe.
>+ }
>+ /* Get the IGMP header length. */
>+ if( buf_len < sizeof(igmp_v2_hdr_t) )
>+ {
>+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+ ("Buffer not large enough for IGMPv2
>packet.\n") );
>+ return NDIS_STATUS_BUFFER_TOO_SHORT;
>+ }
>+
>+ // build fake mac from igmp packet group address
>+ fake_mcast_mac.addr[0] = 1;
>+ fake_mcast_mac.addr[1] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
>+ fake_mcast_mac.addr[2] = 0x5E;
>+ fake_mcast_mac.addr[3] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[1];
>+ fake_mcast_mac.addr[4] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[2];
>+ fake_mcast_mac.addr[5] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[3];
>+
>+ switch ( p_igmp_v2_hdr->type )
>+ {
>+ case IGMP_V2_MEMBERSHIP_REPORT:
>+ /*
>+ This mean that some body open
>listener on this group
>+ Change type of mcast endpt to SEND_RECV
>endpt. So mcast garbage collector
>+ will not delete this mcast endpt.
>+ */
>+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+ ("Catched IGMP_V2_MEMBERSHIP_REPORT
>message\n") );
>+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
>&p_endpt );
>+ if ( p_endpt )
>+ {
>+ cl_obj_lock( &p_port->obj );
>+ cl_obj_unlock( &p_port->obj );
>+ ipoib_endpt_deref( p_endpt );
>+ }
>+ break;
>+
>+ case IGMP_V2_LEAVE_GROUP:
>+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+ ("Catched IGMP_V2_LEAVE_GROUP
>message\n") );
>+ endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
>&p_endpt );
>+ if ( p_endpt )
>+ {
>+ cl_obj_lock( &p_port->obj );
>+ cl_obj_unlock( &p_port->obj );
>+ ipoib_endpt_deref( p_endpt );
>+ }
>+ break;
>+
>+ default:
>+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+ ("Send Unknown IGMP message:
>0x%x \n", p_igmp_v2_hdr->type ) );
>+ break;
>+ }
>+
>+ IPOIB_EXIT( IPOIB_DBG_SEND );
>+ return NDIS_STATUS_SUCCESS;
>+}
>+
> static NDIS_STATUS
> __send_mgr_filter_udp(
> IN ipoib_port_t*
>const p_port,
>@@ -3736,6 +3884,47 @@
> }
>
> cl_perf_start( SendMgrQueue );
>+#pragma warning(disable:4127)
>+ do{
Just use a goto here, it would be clearer, plus you wouldn't need the pragma.
>+ if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&
>+ p_eth_hdr->type == ETH_PROT_TYPE_IP
>&&
>+ !ETH_IS_BROADCAST(
>p_eth_hdr->dst.addr ) )
>+ {
>+ ip_hdr_t *p_ip_hdr;
>+ NDIS_BUFFER
>*p_ip_hdr_buf;
>+ UINT
>ip_hdr_buf_len;
>+
>+ // Extract the ip hdr
>+ NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );
>+ if( !p_ip_hdr_buf )
>+ {
>+ IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+ ("Failed to
>get IP header buffer.\n") );
>+ break;
>+ }
>+
>+ NdisQueryBufferSafe( p_ip_hdr_buf,
>&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );
>+ if( !p_ip_hdr )
>+ {
>+ IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+ ("Failed to
>query IP header buffer.\n") );
>+ break;
>+ }
>+
>+ if( ip_hdr_buf_len < sizeof(ip_hdr_t)
>)
>+ {
>+ /* This buffer is done
>for. Get the next buffer. */
>+ IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+ ("Buffer too
>small for IP packet.\n") );
>+ //return
>NDIS_STATUS_BUFFER_TOO_SHORT;
>+ break;
>+ }
>+
>+ p_eth_hdr->dst.addr[1] = ((unsigned
>char*)&p_ip_hdr->dst_ip)[0] & 0x0f;
>+ p_eth_hdr->dst.addr[3] = ((unsigned
>char*)&p_ip_hdr->dst_ip)[1];
>+ }
>+ }while(FALSE);
>+#pragma warning(default:4127)
> status = __send_mgr_queue( p_port, p_eth_hdr,
>&desc.p_endpt );
> cl_perf_stop( &p_port->p_adapter->perf,
>SendMgrQueue );
> if( status == NDIS_STATUS_PENDING )
>@@ -4706,6 +4895,7 @@
> ib_query_req_t query;
> ib_user_query_t info;
> ib_portinfo_record_t port_rec;
>+ BOOLEAN success = TRUE;
>
> IPOIB_ENTER( IPOIB_DBG_INIT );
>
>@@ -4740,17 +4930,28 @@
> /* reference the object for the multicast query. */
> ipoib_port_ref( p_port, ref_port_up );
>
>+ __try
>+ {
> status = p_port->p_adapter->p_ifc->query(
> p_port->p_adapter->h_al, &query,
>&p_port->ib_mgr.h_query );
> if( status != IB_SUCCESS )
> {
>- KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
>FALSE );
>- ipoib_set_inactive( p_port->p_adapter );
>- ipoib_port_deref( p_port, ref_port_up );
>+ success = FALSE;
> IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
> ("ib_query returned %s\n",
>
>p_port->p_adapter->p_ifc->get_err_str( status )) );
>+ __leave;
>+ }
> return;
>+ }
>+ __finally
>+ {
>+ if (!success)
>+ {
>+ KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
>+ ipoib_set_inactive( p_port->p_adapter );
>+ ipoib_port_deref( p_port, ref_port_up );
>+ }
What is this for? The original code did this:
1. Check status
2. if failure, set sa_event, set_inactive, port_deref
3. return
The new code does this:
1. declare new local variable
2. set success to true.
3. Check status
4. if failure, set success to false, __leave
5. execute __finally, check success
6. if false, set sa_event, set_inactive, port_deref
7. return
So you've added a try/except block, a new flag, an extra conditional, but to what end?
I understand that you're splitting up the IGMP v2 changes from the garbage collection, but you didn't address any of my comments from the first round. For example, this __try block had more logic in it (probably will come back as part of the garbage collection), and my comment on 6/12/2008 was:
Your indentation of the whole __try block is wrong. In any case, I don't like the use of the __try here, as you're just using it instead of a goto. Just use a goto and be done with it.
So for now, leave the code as it was, and when you add the garbage collection add a goto. Don't use do/while(FALSE) and __try/__leave/__finally just to avoid gotos.
> }
>
> IPOIB_EXIT( IPOIB_DBG_INIT );
>@@ -5490,7 +5691,7 @@
> mcast_req.member_rec.mlid = 0;
> ib_member_set_state(
>&mcast_req.member_rec.scope_state,state);
>
>- if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
>0x5E )
>+ if( (mac.addr[0] == 1) && (mac.addr[2] == 0x5E ))
> {
> /*
> * Update the address portion of the MGID with
>the 28 lower bits of the
>@@ -5498,7 +5699,7 @@
> * the 24 lower bits of that
>network-byte-ordered value (assuming MSb
> * is zero).
> */
>- mcast_req.member_rec.mgid.raw[12] = 0;
>+ mcast_req.member_rec.mgid.raw[12] = mac.addr[1];
The comment above about the 24 lower bits should be corrected to match the code.
> mcast_req.member_rec.mgid.raw[13] = mac.addr[3];
> mcast_req.member_rec.mgid.raw[14] = mac.addr[4];
> mcast_req.member_rec.mgid.raw[15] = mac.addr[5];
-Fab
More information about the ofw
mailing list