[ofw][patch] igmp v2

Fab Tillier ftillier at windows.microsoft.com
Wed Jun 25 14:23:40 PDT 2008


Hi Slava,

>Please review changes in IPoIB mgid creation. This is a part, needed for
>garbage collector support.
>
>Index: ulp/ipoib/kernel/ipoib_port.c
>===================================================================
>--- ulp/ipoib/kernel/ipoib_port.c   (revision 1299)
>+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
>@@ -290,6 +290,14 @@
>            IN         OUT
>ipoib_send_desc_t* const           p_desc );
>
> static NDIS_STATUS
>+__send_mgr_filter_igmp_v2(
>+          IN                                             ipoib_port_t*
>const                                 p_port,
>+    IN               const    ip_hdr_t* const
>p_ip_hdr,
>+          IN                                             size_t
>iph_options_size,
>+          IN                                             NDIS_BUFFER*
>p_buf,
>+          IN                                             size_t
>buf_len );
>+
>+static NDIS_STATUS
> __send_mgr_filter_udp(
>            IN                                             ipoib_port_t*
>const                                 p_port,
>            IN                     const    ip_hdr_t* const
>p_ip_hdr,
>@@ -2131,6 +2139,13 @@
>            p_eth->hdr.src = p_src->mac;
>            p_eth->hdr.dst = p_dst->mac;
>
>+          if ( ETH_IS_MULTICAST(p_eth->hdr.dst.addr) &&
>+                      p_eth->hdr.type == ETH_PROT_TYPE_IP &&
>+                      !ETH_IS_BROADCAST(p_eth->hdr.dst.addr) )
>+          {
>+                      p_eth->hdr.dst.addr[1] = 0;
>+                      p_eth->hdr.dst.addr[3] = p_eth->hdr.dst.addr[3] &
>0x7f;
>+          }
>            IPOIB_EXIT( IPOIB_DBG_RECV );
>            return IB_SUCCESS;
> }
>@@ -3110,6 +3125,26 @@
>            if( p_ip_hdr->offset ||
>                        p_ip_hdr->prot != IP_PROT_UDP )
>            {
>+                      /* Check if this packet is IGMP */
>+                      if ( p_ip_hdr->prot == IP_PROT_IGMP )
>+                      {
>+                                  /*
>+                                      In igmp packet I saw that iph
>arrive in 2 NDIS_BUFFERs:
>+                                              1. iph
>+                                              2. ip options
>+                                              So to get the IGMP packet
>we need to skip the ip options NDIS_BUFFER
>+                                  */
>+                                  size_t iph_size_in_bytes =
>(p_ip_hdr->ver_hl & 0xf) * 4;
>+                                  size_t iph_options_size =
>iph_size_in_bytes - buf_len;
>+                                  buf_len -= sizeof(ip_hdr_t);
>+
>+                                  /*
>+                                      Could be a case that arrived igmp
>packet not from type IGMPv2 ,
>+                                              but IGMPv1 or IGMPv3.
>+                                              We anyway pass it to
>__send_mgr_filter_igmp_v2().
>+                                  */
>+                                  __send_mgr_filter_igmp_v2(p_port,
>p_ip_hdr, iph_options_size, p_buf, buf_len);

I gave this comment last time you sent this (6/12/2008), but I guess it was ignored. :(
Here it is again, hopefully with better results:

You adjusted buf_len, but not p_buf - is that then intent?

>+                      }
>                        /* Not a UDP packet. */
>                        cl_perf_start( SendTcp );
>                        status = __send_gen( p_port, p_desc );
>@@ -3129,7 +3164,120 @@
>            return status;
> }
>
>+static NDIS_STATUS
>+__send_mgr_filter_igmp_v2(
>+          IN                                             ipoib_port_t*
>const                                 p_port,
>+    IN               const    ip_hdr_t* const
>p_ip_hdr,
>+          IN                                             size_t
>iph_options_size,
>+          IN                                             NDIS_BUFFER*
>p_buf,
>+          IN                                             size_t
>buf_len )
>+{
>+    igmp_v2_hdr_t                     *p_igmp_v2_hdr = NULL;
>+          NDIS_STATUS                          endpt_status;
>+          ipoib_endpt_t*               p_endpt = NULL;
>+          mac_addr_t                               fake_mcast_mac;
>
>+          IPOIB_ENTER( IPOIB_DBG_SEND );
>+
>+    if( !buf_len )
>+          {
>+                      // To get the IGMP packet we need to skip the ip
>options NDIS_BUFFER (if exists)
>+                      while ( iph_options_size )
>+                      {
>+                                  NdisGetNextBuffer( p_buf, &p_buf );
>+                                  if( !p_buf )
>+                                  {
>+                                              IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                                          ("Failed to
>get IGMPv2 header buffer.\n") );
>+                                              return
>NDIS_STATUS_FAILURE;
>+                                  }
>+                                  NdisQueryBufferSafe( p_buf,
>&p_igmp_v2_hdr, &buf_len, NormalPagePriority );
>+                                  if( !p_igmp_v2_hdr )
>+                                  {
>+                                              IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                                          ("Failed to
>query IGMPv2 header buffer.\n") );
>+                                              return
>NDIS_STATUS_FAILURE;
>+                                  }
>+
>+                                  iph_options_size-=buf_len;
>+                      }
>+
>+                      NdisGetNextBuffer( p_buf, &p_buf );
>+                      if( !p_buf )
>+                      {
>+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+                                              ("Failed to get IGMPv2
>header buffer.\n") );
>+                                  return NDIS_STATUS_FAILURE;
>+                      }
>+                      NdisQueryBufferSafe( p_buf, &p_igmp_v2_hdr,
>&buf_len, NormalPagePriority );
>+                      if( !p_igmp_v2_hdr )
>+                      {
>+                                  IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+                                              ("Failed to query IGMPv2
>header buffer.\n") );
>+                                  return NDIS_STATUS_FAILURE;
>+                      }
>+          }
>+          else
>+          {
>+                      p_igmp_v2_hdr = (igmp_v2_hdr_t*)(p_ip_hdr + 1);

I gave this comment last time you sent this (6/12/2008), but I guess it was ignored. :(
Here it is again, hopefully with better results:

Ok, this is confusing now.  If buf_len was not zero, you set the IGMP header to start right after the IP header, but you don't account for options like you did when you calculated buf_len.  If you adjusted p_buf when calling, you could just set p_igmp_vs_hdr = p_buf and be safe.

>+          }
>+          /* Get the IGMP header length. */
>+          if( buf_len < sizeof(igmp_v2_hdr_t) )
>+          {
>+                      IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>+                                  ("Buffer not large enough for IGMPv2
>packet.\n") );
>+                      return NDIS_STATUS_BUFFER_TOO_SHORT;
>+          }
>+
>+          // build fake mac from igmp packet group address
>+          fake_mcast_mac.addr[0] = 1;
>+    fake_mcast_mac.addr[1] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[0] & 0x0f;
>+    fake_mcast_mac.addr[2] = 0x5E;
>+          fake_mcast_mac.addr[3] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[1];
>+          fake_mcast_mac.addr[4] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[2];
>+          fake_mcast_mac.addr[5] = ((unsigned
>char*)&p_igmp_v2_hdr->group_address)[3];
>+
>+          switch ( p_igmp_v2_hdr->type )
>+          {
>+          case IGMP_V2_MEMBERSHIP_REPORT:
>+                      /*
>+                                  This mean that some body open
>listener on this group
>+                          Change type of mcast endpt to SEND_RECV
>endpt. So mcast garbage collector
>+                                  will not delete this mcast endpt.
>+                      */
>+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+                                  ("Catched IGMP_V2_MEMBERSHIP_REPORT
>message\n") );
>+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
>&p_endpt );
>+                      if ( p_endpt )
>+                      {
>+                                  cl_obj_lock( &p_port->obj );
>+                                  cl_obj_unlock( &p_port->obj );
>+            ipoib_endpt_deref( p_endpt );
>+                      }
>+                      break;
>+
>+          case IGMP_V2_LEAVE_GROUP:
>+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+                                       ("Catched IGMP_V2_LEAVE_GROUP
>message\n") );
>+        endpt_status = __endpt_mgr_ref( p_port, fake_mcast_mac,
>&p_endpt );
>+                      if ( p_endpt )
>+                      {
>+                                  cl_obj_lock( &p_port->obj );
>+                                  cl_obj_unlock( &p_port->obj );
>+                                  ipoib_endpt_deref( p_endpt );
>+                      }
>+                      break;
>+
>+          default:
>+        IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_MCAST,
>+                                       ("Send Unknown IGMP message:
>0x%x \n", p_igmp_v2_hdr->type ) );
>+                      break;
>+          }
>+
>+          IPOIB_EXIT( IPOIB_DBG_SEND );
>+          return NDIS_STATUS_SUCCESS;
>+}
>+
> static NDIS_STATUS
> __send_mgr_filter_udp(
>            IN                                             ipoib_port_t*
>const                                 p_port,
>@@ -3736,6 +3884,47 @@
>                        }
>
>                        cl_perf_start( SendMgrQueue );
>+#pragma warning(disable:4127)
>+                      do{

Just use a goto here, it would be clearer, plus you wouldn't need the pragma.

>+        if ( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) &&
>+                                  p_eth_hdr->type == ETH_PROT_TYPE_IP
>&&
>+                                  !ETH_IS_BROADCAST(
>p_eth_hdr->dst.addr ) )
>+                      {
>+            ip_hdr_t                                  *p_ip_hdr;
>+                                  NDIS_BUFFER
>*p_ip_hdr_buf;
>+                                  UINT
>ip_hdr_buf_len;
>+
>+                                  // Extract the ip hdr
>+            NdisGetNextBuffer( p_buf, &p_ip_hdr_buf );
>+                                  if( !p_ip_hdr_buf )
>+                                  {
>+                                              IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                                          ("Failed to
>get IP header buffer.\n") );
>+                                              break;
>+                                  }
>+
>+                                  NdisQueryBufferSafe( p_ip_hdr_buf,
>&p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority );
>+                                  if( !p_ip_hdr )
>+                                  {
>+                                              IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                                          ("Failed to
>query IP header buffer.\n") );
>+                                              break;
>+                                  }
>+
>+                                  if( ip_hdr_buf_len < sizeof(ip_hdr_t)
>)
>+                                  {
>+                                              /* This buffer is done
>for.  Get the next buffer. */
>+                                              IPOIB_PRINT_EXIT(
>TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
>+                                                          ("Buffer too
>small for IP packet.\n") );
>+                                              //return
>NDIS_STATUS_BUFFER_TOO_SHORT;
>+                                              break;
>+                                  }
>+
>+            p_eth_hdr->dst.addr[1] = ((unsigned
>char*)&p_ip_hdr->dst_ip)[0] & 0x0f;
>+                                  p_eth_hdr->dst.addr[3] = ((unsigned
>char*)&p_ip_hdr->dst_ip)[1];
>+                      }
>+                      }while(FALSE);
>+#pragma warning(default:4127)
>                        status = __send_mgr_queue( p_port, p_eth_hdr,
>&desc.p_endpt );
>                        cl_perf_stop( &p_port->p_adapter->perf,
>SendMgrQueue );
>                        if( status == NDIS_STATUS_PENDING )
>@@ -4706,6 +4895,7 @@
>            ib_query_req_t                          query;
>            ib_user_query_t                         info;
>            ib_portinfo_record_t       port_rec;
>+    BOOLEAN                 success = TRUE;
>
>            IPOIB_ENTER( IPOIB_DBG_INIT );
>
>@@ -4740,17 +4930,28 @@
>            /* reference the object for the multicast query. */
>            ipoib_port_ref( p_port, ref_port_up );
>
>+    __try
>+    {
>            status = p_port->p_adapter->p_ifc->query(
>                        p_port->p_adapter->h_al, &query,
>&p_port->ib_mgr.h_query );
>            if( status != IB_SUCCESS )
>            {
>-                       KeSetEvent( &p_port->sa_event, EVENT_INCREMENT,
>FALSE );
>-                       ipoib_set_inactive( p_port->p_adapter );
>-                       ipoib_port_deref( p_port, ref_port_up );
>+            success = FALSE;
>                        IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
>IPOIB_DBG_ERROR,
>                                    ("ib_query returned %s\n",
>
>p_port->p_adapter->p_ifc->get_err_str( status )) );
>+            __leave;
>+        }
>                        return;
>+    }
>+    __finally
>+    {
>+        if (!success)
>+        {
>+            KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE );
>+            ipoib_set_inactive( p_port->p_adapter );
>+            ipoib_port_deref( p_port, ref_port_up );
>+        }

What is this for?  The original code did this:

1. Check status
2. if failure, set sa_event, set_inactive, port_deref
3. return

The new code does this:

1. declare new local variable
2. set success to true.
3. Check status
4. if failure, set success to false, __leave
5. execute __finally, check success
6. if false, set sa_event, set_inactive, port_deref
7. return

So you've added a try/except block, a new flag, an extra conditional, but to what end?

I understand that you're splitting up the IGMP v2 changes from the garbage collection, but you didn't address any of my comments from the first round.  For example, this __try block had more logic in it (probably will come back as part of the garbage collection), and my comment on 6/12/2008 was:

Your indentation of the whole __try block is wrong.  In any case, I don't like the use of the __try here, as you're just using it instead of a goto.  Just use a goto and be done with it.

So for now, leave the code as it was, and when you add the garbage collection add a goto.  Don't use do/while(FALSE) and __try/__leave/__finally just to avoid gotos.

>            }
>
>            IPOIB_EXIT( IPOIB_DBG_INIT );
>@@ -5490,7 +5691,7 @@
>            mcast_req.member_rec.mlid = 0;
>            ib_member_set_state(
>&mcast_req.member_rec.scope_state,state);
>
>-           if( mac.addr[0] == 1 && mac.addr[1] == 0 && mac.addr[2] ==
>0x5E )
>+          if( (mac.addr[0] == 1) && (mac.addr[2] == 0x5E ))
>            {
>                        /*
>                         * Update the address portion of the MGID with
>the 28 lower bits of the
>@@ -5498,7 +5699,7 @@
>                         * the 24 lower bits of that
>network-byte-ordered value (assuming MSb
>                         * is zero).
>                         */
>-                       mcast_req.member_rec.mgid.raw[12] = 0;
>+                      mcast_req.member_rec.mgid.raw[12] = mac.addr[1];

The comment above about the 24 lower bits should be corrected to match the code.

>                        mcast_req.member_rec.mgid.raw[13] = mac.addr[3];
>                        mcast_req.member_rec.mgid.raw[14] = mac.addr[4];
>                        mcast_req.member_rec.mgid.raw[15] = mac.addr[5];

-Fab



More information about the ofw mailing list