[Openib-windows] Adding DHCP support for virtualization

Tzachi Dar tzachid at mellanox.co.il
Wed Mar 8 08:39:15 PST 2006


Hi Fab,
 
As part of the virtualization support, there is also a need to support
DHCP for the guest OSes.
 
During this day I have learned the DHCP code that exists in IPOIB and I
have made some changes to it, so it now works.
 
In general the old code was based on replacing the client identifier
with a string that contained the GID + QP number. This combination
promises uniqueness, as long as there was one IP per QP. (this is also
the process that is described in 
http://www.ietf.org/internet-drafts/draft-ietf-ipoib-dhcp-over-infiniban
d-10.txt)
 
With the new virtual server, there are now more than one IP addresses
per QP, so I had to do a change: I have added the client identifier to
be GID + QP + MAC addresses. Assuming that the MAC addresses are unique
at least in one physical machine, this should work fine.
 
The next point is about what the receive side sees. In the current
implementation, the sender (actually windows) puts it's MAC (6 bytes) as
it's identifier. Later IPOIB changes that to GID+QP, and the receiver
changes that to MAC again (* see bellow).  As there are no limitations
to what the DHCP server can receive, I believe that it is more correct
to pass the entire string as is. (22 bytes goes to the server).
 
* Please note that this implementation was wrong if there wasn't a match
between the GUID and the MAC as a new MAC was generated for each packet.
 
One more issue in the same area, is what happens if a packet comes and
it's format is not recognized by you. For example wrong magic number.
Currently the code drops it, and I believe that it would be better to
pass the packet to windows, and hope for the good (this will allow other
clients to work with us).
 
Please note that the old code and the new one don't handle the case that
the client identifier is not what we expected. (for example some
configuration in the registry).
 
Last thing to note is that in the current code, an IP packet is passed
to us by windows, we enlarge the client identifier but don't change the
IP header size and UDP header size, and later we decrease the packet,
and not changing anything, so we are fine. However I believe that we
should fix the headers each time.
 
Attached is the partial patch so you can look at it. Based on your
comments I'll create the complete patch so that you can submit it.
 
I'll be also happy to get comments to the last patch.
 
Thanks
Tzachi
 
 
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h (working copy)
@@ -60,7 +60,9 @@
 #define IPOIB_DBG_ALLOC (1 << 8)
 #define IPOIB_DBG_OID (1 << 9)
 #define IPOIB_DBG_IOCTL (1 << 10)
+#define IPOIB_DBG_VM (1 << 11)
 
+
 #define IPOIB_DBG_FUNC (1 << 28) /* For function entry/exit */
 #define IPOIB_DBG_INFO (1 << 29) /* For verbose information */
 #define IPOIB_DBG_WARN (1 << 30) /* For warnings. */
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -237,6 +237,7 @@
 
 static ib_api_status_t
 __recv_gen(
+ IN  const   ipoib_port_t* const   p_port,
  IN  const ipoib_pkt_t* const   p_ipoib,
   OUT   eth_pkt_t* const   p_eth,
  IN    ipoib_endpt_t* const  p_src,
@@ -461,7 +462,34 @@
  return cl_memcmp( p_key1, p_key2, sizeof(ib_gid_t) );
 }
 
+/**********************************************************************
********
+*
+* Virtual server ip to mac translations
+*
+***********************************************************************
*******/
+static void
+__init_vs_ip_to_mac_translation(
+ IN    VS_ip_mac_manager *   p_manager);
 
+static void
+__shutdown_vs_ip_to_mac_translation(
+ IN    VS_ip_mac_manager *   p_manager);
+
+static ib_api_status_t
+__get_mac_from_ip(
+ IN const  VS_ip_mac_manager * const p_manager,
+ IN    net32_t      dst_ip,
+ OUT    mac_addr_t *    p_dst_mac);
+
+static ib_api_status_t
+__put_mac_ip_pair(
+ IN    VS_ip_mac_manager *   p_manager,
+ IN    net32_t      dst_ip,
+ IN    mac_addr_t     dst_mac);
+
+
+
+
 
/***********************************************************************
*******
 *
 * Implementation
@@ -548,6 +576,8 @@
 
  __endpt_mgr_construct( p_port );
 
+ __init_vs_ip_to_mac_translation(  &p_port->vs_manager );
+
  IPOIB_EXIT( IPOIB_DBG_INIT );
 }
 
@@ -702,6 +732,8 @@
 
  cl_obj_deinit( p_obj );
 
+ __shutdown_vs_ip_to_mac_translation( &p_port->vs_manager );
+
  cl_free( p_port );
 
  IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -1780,7 +1812,7 @@
    continue;
   }
 
-  len = p_wc->length - sizeof(ib_grh_t);
+  len = p_wc->length - sizeof(ib_grh_t); //????? Can there be a buffer
overrun here ????
 
   if( len < sizeof(ipoib_hdr_t) )
   {
@@ -1838,7 +1870,7 @@
    {
     /* Unfiltered.  Setup the ethernet header and report. */
     cl_perf_start( RecvTcp );
-    status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+    status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
     cl_perf_stop( &p_port->p_adapter->perf, RecvTcp );
     break;
    }
@@ -1876,7 +1908,7 @@
    {
     /* Unfiltered.  Setup the ethernet header and report. */
     cl_perf_start( RecvUdp );
-    status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+    status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
     cl_perf_stop( &p_port->p_adapter->perf, RecvUdp );
    }
    break;
@@ -1898,7 +1930,7 @@
   default:
    /* Unfiltered.  Setup the ethernet header and report. */
    cl_perf_start( RecvGen );
-   status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+   status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
    cl_perf_stop( &p_port->p_adapter->perf, RecvGen );
   }
 
@@ -1943,11 +1975,15 @@
 
 static ib_api_status_t
 __recv_gen(
+ IN  const   ipoib_port_t* const   p_port,
  IN  const ipoib_pkt_t* const   p_ipoib,
   OUT   eth_pkt_t* const   p_eth,
  IN    ipoib_endpt_t* const  p_src,
  IN    ipoib_endpt_t* const  p_dst )
 {
+ net16_t  OriginalType;
+ ib_api_status_t  status;
+ mac_addr_t dst_mac;
  IPOIB_ENTER( IPOIB_DBG_RECV );
 
  if( !p_src || !p_dst )
@@ -1957,6 +1993,8 @@
   return IB_NOT_DONE;
  }
 
+ OriginalType = p_ipoib->hdr.type;
+
  /*
   * Fill in the ethernet header.  Note that doing so will overwrite
   * the IPoIB header, so start by moving the information from the IPoIB
@@ -1966,6 +2004,14 @@
  p_eth->hdr.src = p_src->mac;
  p_eth->hdr.dst = p_dst->mac;
 
+ if (OriginalType == ETH_PROT_TYPE_IP) 
+ {
+  status = __get_mac_from_ip(&p_port->vs_manager,
p_ipoib->type.ip.hdr.dst_ip, &dst_mac);
+  if ( status == IB_SUCCESS) 
+  {
+   p_eth->hdr.dst = dst_mac;
+  }
+ }
  IPOIB_EXIT( IPOIB_DBG_RECV );
  return IB_SUCCESS;
 }
@@ -1991,7 +2037,7 @@
  UNUSED_PARAM( p_port );
 
  /* Create the ethernet header. */
- status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
  if( status != IB_SUCCESS )
  {
   IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
@@ -2128,6 +2174,7 @@
  const ipoib_arp_pkt_t *p_ib_arp;
  ib_gid_t    gid;
  mac_addr_t    mac;
+ mac_addr_t dst_mac;
  ipoib_hw_addr_t   null_hw = {0};
 
  IPOIB_ENTER( IPOIB_DBG_RECV );
@@ -2279,7 +2326,7 @@
   * Create the ethernet header.  Note that this is done last so that
   * we have a chance to create a new endpoint.
   */
- status = __recv_gen( p_ipoib, p_eth, *pp_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, *pp_src, p_dst );
  if( status != IB_SUCCESS )
  {
   IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
@@ -2288,6 +2335,20 @@
   return status;
  }
 
+    if (p_eth->hdr.type == ETH_PROT_TYPE_ARP) {
+  if ((p_eth->type.arp.op == ARP_OP_REP)) 
+  {
+   status = __get_mac_from_ip(&p_port->vs_manager,
p_eth->type.arp.dst_ip, &dst_mac);
+   if ( status == IB_SUCCESS) 
+   {
+    p_eth->hdr.dst = dst_mac;
+    p_eth->type.arp.dst_hw = dst_mac;
+   }
+  } 
+
+
+    }
+
  IPOIB_EXIT( IPOIB_DBG_RECV );
  return IB_SUCCESS;
 }
@@ -3133,7 +3194,7 @@
    p_cid[1] = 21;
   }
 
-  CL_ASSERT( p_cid[1] == 21 );
+//??????  CL_ASSERT( p_cid[1] == 21 ); // This asserts seems to bounce,
nothing happens if ignored ???
   p_cid[23]= DHCP_OPT_END;
   ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid );
   cl_memcpy( &p_cid[7], &gid, sizeof(ib_gid_t) );
@@ -3219,6 +3280,11 @@
   return NDIS_STATUS_INVALID_DATA;
  }
 
+ if ((p_arp->op == ARP_OP_REQ)) 
+ {
+  __put_mac_ip_pair(&p_port->vs_manager,p_arp->src_ip, p_arp->src_hw );
+ }
+
  /* Allocate our scratch buffer. */
  p_desc->p_buf = (send_buf_t*)
   ExAllocateFromNPagedLookasideList( &p_port->buf_mgr.send_buf_list );
@@ -5211,3 +5277,124 @@
 
  IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
+
+static void
+__init_vs_ip_to_mac_translation(
+ IN    VS_ip_mac_manager *   p_manager)
+{
+ p_manager->p_pairs = NULL;
+ p_manager->data_size = 0;
+ p_manager->array_size = 0;
+}
+
+static void
+__shutdown_vs_ip_to_mac_translation(
+ IN    VS_ip_mac_manager *   p_manager)
+{
+ if ( p_manager->p_pairs != NULL ) 
+ {
+  cl_free( p_manager->p_pairs );
+ }
+}
+
+static ib_api_status_t
+__get_mac_from_ip(
+ IN const  VS_ip_mac_manager * const p_manager,
+ IN    net32_t      dst_ip,
+ OUT    mac_addr_t *    p_dst_mac)
+{
+ uint32_t i;
+ for (i = 0 ; i < p_manager->data_size; i++ ) 
+ {
+  if (p_manager->p_pairs[i].dst_ip == dst_ip) 
+  {
+   // We have found the IP that we are looking for
+   *p_dst_mac = p_manager->p_pairs[i].mac;
+   IPOIB_TRACE( IPOIB_DBG_VM,("__get_mac_from_ip dst_ip = %d.%d.%d.%d
found in table\n", 
+    ((dst_ip & 0xff      )       ),
+    ((dst_ip & 0xff00    ) >> 8  ),
+    ((dst_ip & 0xff0000  ) >> 16 ),
+    ((dst_ip & 0xff000000) >> 24 )));
+   
+   return IB_SUCCESS;
+  }
+   
+ }
+ // Not found
+ IPOIB_TRACE( IPOIB_DBG_VM,("__get_mac_from_ip dst_ip = %d.%d.%d.%d not
found \n", 
+   ((dst_ip & 0xff      )       ),
+   ((dst_ip & 0xff00    ) >> 8  ),
+   ((dst_ip & 0xff0000  ) >> 16 ),
+   ((dst_ip & 0xff000000) >> 24 )));
+
+ 
+ return IB_NOT_FOUND;
+}
+
+static ib_api_status_t
+__put_mac_ip_pair(
+ IN    VS_ip_mac_manager *   p_manager,
+ IN    net32_t      dst_ip,
+ IN    mac_addr_t     dst_mac)
+{
+ uint32_t i;
+ uint32_t new_size = 0;
+ VS_ip_mac_pair *new_array;
+ IPOIB_ENTER( IPOIB_DBG_VM );
+
+ IPOIB_TRACE( IPOIB_DBG_VM,("__put_mac_ip_pair dst_ip = %d.%d.%d.%d
\n", 
+    ((dst_ip & 0xff      )       ),
+    ((dst_ip & 0xff00    ) >> 8  ),
+    ((dst_ip & 0xff0000  ) >> 16 ),
+    ((dst_ip & 0xff000000) >> 24 )));
+
+ // First step is to look if this is actually an update and not adding
+ for (i = 0 ; i < p_manager->data_size; i++ ) 
+ {
+  if (p_manager->p_pairs[i].dst_ip == dst_ip) 
+  {
+   // We have found the IP that we are looking for, update it
+   p_manager->p_pairs[i].mac = dst_mac;
+   return IB_SUCCESS;
+  }
+   
+ }
+ // Not found, let see if we need to increase the table
+ if ( p_manager->array_size <= p_manager->data_size )
+ {
+  // Need to increase the array
+  if (p_manager->array_size < 4) 
+  {
+   new_size = 4;
+  } else {
+   new_size = p_manager->array_size * 2;
+  }
+  new_array = cl_zalloc(new_size * sizeof (VS_ip_mac_pair));
+  if ( new_array == NULL )
+  {
+   IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
+    ("Failed to allocate new_array.\n") );
+   return CL_INSUFFICIENT_MEMORY;
+  }
+  // copy the data to the new array
+  if ( p_manager->array_size > 0 ) 
+  {
+   cl_memcpy ( new_array, p_manager->p_pairs, p_manager->data_size *
sizeof (VS_ip_mac_pair));
+  }
+  if ( p_manager->p_pairs != NULL ) 
+  {
+   cl_free( p_manager->p_pairs );
+  }
+  p_manager->p_pairs = new_array;
+  new_array = NULL;
+  p_manager->array_size = new_size;  
+ }
+
+ p_manager->p_pairs[p_manager->data_size].dst_ip = dst_ip;
+ p_manager->p_pairs[p_manager->data_size].mac = dst_mac;
+ p_manager->data_size++;
+
+ return IB_SUCCESS;
+
+}
+
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -468,7 +468,20 @@
 *  are inserted in the LID map.
 *********/
 
+typedef struct _VS_ip_mac_pair
+{
+ mac_addr_t    mac;
+ net32_t     dst_ip;
+} VS_ip_mac_pair;
 
+typedef struct _VS_ip_mac_manager
+{
+ VS_ip_mac_pair   *p_pairs;
+ uint32_t    array_size;
+ uint32_t    data_size;
+} VS_ip_mac_manager;
+
+
 typedef struct _ipoib_port
 {
  cl_obj_t    obj;
@@ -496,8 +509,12 @@
  atomic32_t    endpt_rdr;
 
  atomic32_t    hdr_idx;
- ipoib_hdr_t    hdr[1];
 
+ VS_ip_mac_manager  vs_manager;
+
+ // Must be last
+ ipoib_hdr_t    hdr[1]; 
+
 } ipoib_port_t;
 /*
 * FIELDS
@@ -536,7 +553,6 @@
 *  Endpoint manager.
 *********/
 
-
 ib_api_status_t
 ipoib_create_port(
  IN    struct _ipoib_adapter* const p_adapter,

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060308/25a9141b/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: vmdhcp.patch
Type: application/octet-stream
Size: 10895 bytes
Desc: vmdhcp.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060308/25a9141b/attachment.obj>


More information about the ofw mailing list