[Openib-windows] Adding DHCP support for virtualization
Tzachi Dar
tzachid at mellanox.co.il
Wed Mar 8 08:39:15 PST 2006
Hi Fab,
As part of the virtualization support, there is also a need to support
DHCP for the guest OSes.
During this day I have learned the DHCP code that exists in IPOIB and I
have made some changes to it, so it now works.
In general the old code was based on replacing the client identifier
with a string that contained the GID + QP number. This combination
promises uniqueness, as long as there was one IP per QP. (this is also
the process that is described in
http://www.ietf.org/internet-drafts/draft-ietf-ipoib-dhcp-over-infiniban
d-10.txt)
With the new virtual server, there are now more than one IP addresses
per QP, so I had to do a change: I have added the client identifier to
be GID + QP + MAC addresses. Assuming that the MAC addresses are unique
at least in one physical machine, this should work fine.
The next point is about what the receive side sees. In the current
implementation, the sender (actually windows) puts it's MAC (6 bytes) as
it's identifier. Later IPOIB changes that to GID+QP, and the receiver
changes that to MAC again (* see bellow). As there are no limitations
to what the DHCP server can receive, I believe that it is more correct
to pass the entire string as is. (22 bytes goes to the server).
* Please note that this implementation was wrong if there wasn't a match
between the GUID and the MAC as a new MAC was generated for each packet.
One more issue in the same area, is what happens if a packet comes and
it's format is not recognized by you. For example wrong magic number.
Currently the code drops it, and I believe that it would be better to
pass the packet to windows, and hope for the good (this will allow other
clients to work with us).
Please note that the old code and the new one don't handle the case that
the client identifier is not what we expected. (for example some
configuration in the registry).
Last thing to note is that in the current code, an IP packet is passed
to us by windows, we enlarge the client identifier but don't change the
IP header size and UDP header size, and later we decrease the packet,
and not changing anything, so we are fine. However I believe that we
should fix the headers each time.
Attached is the partial patch so you can look at it. Based on your
comments I'll create the complete patch so that you can submit it.
I'll be also happy to get comments to the last patch.
Thanks
Tzachi
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_debug.h (working copy)
@@ -60,7 +60,9 @@
#define IPOIB_DBG_ALLOC (1 << 8)
#define IPOIB_DBG_OID (1 << 9)
#define IPOIB_DBG_IOCTL (1 << 10)
+#define IPOIB_DBG_VM (1 << 11)
+
#define IPOIB_DBG_FUNC (1 << 28) /* For function entry/exit */
#define IPOIB_DBG_INFO (1 << 29) /* For verbose information */
#define IPOIB_DBG_WARN (1 << 30) /* For warnings. */
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -237,6 +237,7 @@
static ib_api_status_t
__recv_gen(
+ IN const ipoib_port_t* const p_port,
IN const ipoib_pkt_t* const p_ipoib,
OUT eth_pkt_t* const p_eth,
IN ipoib_endpt_t* const p_src,
@@ -461,7 +462,34 @@
return cl_memcmp( p_key1, p_key2, sizeof(ib_gid_t) );
}
+/**********************************************************************
********
+*
+* Virtual server ip to mac translations
+*
+***********************************************************************
*******/
+static void
+__init_vs_ip_to_mac_translation(
+ IN VS_ip_mac_manager * p_manager);
+static void
+__shutdown_vs_ip_to_mac_translation(
+ IN VS_ip_mac_manager * p_manager);
+
+static ib_api_status_t
+__get_mac_from_ip(
+ IN const VS_ip_mac_manager * const p_manager,
+ IN net32_t dst_ip,
+ OUT mac_addr_t * p_dst_mac);
+
+static ib_api_status_t
+__put_mac_ip_pair(
+ IN VS_ip_mac_manager * p_manager,
+ IN net32_t dst_ip,
+ IN mac_addr_t dst_mac);
+
+
+
+
/***********************************************************************
*******
*
* Implementation
@@ -548,6 +576,8 @@
__endpt_mgr_construct( p_port );
+ __init_vs_ip_to_mac_translation( &p_port->vs_manager );
+
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -702,6 +732,8 @@
cl_obj_deinit( p_obj );
+ __shutdown_vs_ip_to_mac_translation( &p_port->vs_manager );
+
cl_free( p_port );
IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -1780,7 +1812,7 @@
continue;
}
- len = p_wc->length - sizeof(ib_grh_t);
+ len = p_wc->length - sizeof(ib_grh_t); //????? Can there be a buffer
overrun here ????
if( len < sizeof(ipoib_hdr_t) )
{
@@ -1838,7 +1870,7 @@
{
/* Unfiltered. Setup the ethernet header and report. */
cl_perf_start( RecvTcp );
- status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
cl_perf_stop( &p_port->p_adapter->perf, RecvTcp );
break;
}
@@ -1876,7 +1908,7 @@
{
/* Unfiltered. Setup the ethernet header and report. */
cl_perf_start( RecvUdp );
- status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
cl_perf_stop( &p_port->p_adapter->perf, RecvUdp );
}
break;
@@ -1898,7 +1930,7 @@
default:
/* Unfiltered. Setup the ethernet header and report. */
cl_perf_start( RecvGen );
- status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
cl_perf_stop( &p_port->p_adapter->perf, RecvGen );
}
@@ -1943,11 +1975,15 @@
static ib_api_status_t
__recv_gen(
+ IN const ipoib_port_t* const p_port,
IN const ipoib_pkt_t* const p_ipoib,
OUT eth_pkt_t* const p_eth,
IN ipoib_endpt_t* const p_src,
IN ipoib_endpt_t* const p_dst )
{
+ net16_t OriginalType;
+ ib_api_status_t status;
+ mac_addr_t dst_mac;
IPOIB_ENTER( IPOIB_DBG_RECV );
if( !p_src || !p_dst )
@@ -1957,6 +1993,8 @@
return IB_NOT_DONE;
}
+ OriginalType = p_ipoib->hdr.type;
+
/*
* Fill in the ethernet header. Note that doing so will overwrite
* the IPoIB header, so start by moving the information from the IPoIB
@@ -1966,6 +2004,14 @@
p_eth->hdr.src = p_src->mac;
p_eth->hdr.dst = p_dst->mac;
+ if (OriginalType == ETH_PROT_TYPE_IP)
+ {
+ status = __get_mac_from_ip(&p_port->vs_manager,
p_ipoib->type.ip.hdr.dst_ip, &dst_mac);
+ if ( status == IB_SUCCESS)
+ {
+ p_eth->hdr.dst = dst_mac;
+ }
+ }
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -1991,7 +2037,7 @@
UNUSED_PARAM( p_port );
/* Create the ethernet header. */
- status = __recv_gen( p_ipoib, p_eth, p_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, p_src, p_dst );
if( status != IB_SUCCESS )
{
IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
@@ -2128,6 +2174,7 @@
const ipoib_arp_pkt_t *p_ib_arp;
ib_gid_t gid;
mac_addr_t mac;
+ mac_addr_t dst_mac;
ipoib_hw_addr_t null_hw = {0};
IPOIB_ENTER( IPOIB_DBG_RECV );
@@ -2279,7 +2326,7 @@
* Create the ethernet header. Note that this is done last so that
* we have a chance to create a new endpoint.
*/
- status = __recv_gen( p_ipoib, p_eth, *pp_src, p_dst );
+ status = __recv_gen( p_port, p_ipoib, p_eth, *pp_src, p_dst );
if( status != IB_SUCCESS )
{
IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
@@ -2288,6 +2335,20 @@
return status;
}
+ if (p_eth->hdr.type == ETH_PROT_TYPE_ARP) {
+ if ((p_eth->type.arp.op == ARP_OP_REP))
+ {
+ status = __get_mac_from_ip(&p_port->vs_manager,
p_eth->type.arp.dst_ip, &dst_mac);
+ if ( status == IB_SUCCESS)
+ {
+ p_eth->hdr.dst = dst_mac;
+ p_eth->type.arp.dst_hw = dst_mac;
+ }
+ }
+
+
+ }
+
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
}
@@ -3133,7 +3194,7 @@
p_cid[1] = 21;
}
- CL_ASSERT( p_cid[1] == 21 );
+//?????? CL_ASSERT( p_cid[1] == 21 ); // This asserts seems to bounce,
nothing happens if ignored ???
p_cid[23]= DHCP_OPT_END;
ib_gid_set_default( &gid, p_port->p_adapter->guids.port_guid );
cl_memcpy( &p_cid[7], &gid, sizeof(ib_gid_t) );
@@ -3219,6 +3280,11 @@
return NDIS_STATUS_INVALID_DATA;
}
+ if ((p_arp->op == ARP_OP_REQ))
+ {
+ __put_mac_ip_pair(&p_port->vs_manager,p_arp->src_ip, p_arp->src_hw );
+ }
+
/* Allocate our scratch buffer. */
p_desc->p_buf = (send_buf_t*)
ExAllocateFromNPagedLookasideList( &p_port->buf_mgr.send_buf_list );
@@ -5211,3 +5277,124 @@
IPOIB_EXIT( IPOIB_DBG_MCAST );
}
+
+static void
+__init_vs_ip_to_mac_translation(
+ IN VS_ip_mac_manager * p_manager)
+{
+ p_manager->p_pairs = NULL;
+ p_manager->data_size = 0;
+ p_manager->array_size = 0;
+}
+
+static void
+__shutdown_vs_ip_to_mac_translation(
+ IN VS_ip_mac_manager * p_manager)
+{
+ if ( p_manager->p_pairs != NULL )
+ {
+ cl_free( p_manager->p_pairs );
+ }
+}
+
+static ib_api_status_t
+__get_mac_from_ip(
+ IN const VS_ip_mac_manager * const p_manager,
+ IN net32_t dst_ip,
+ OUT mac_addr_t * p_dst_mac)
+{
+ uint32_t i;
+ for (i = 0 ; i < p_manager->data_size; i++ )
+ {
+ if (p_manager->p_pairs[i].dst_ip == dst_ip)
+ {
+ // We have found the IP that we are looking for
+ *p_dst_mac = p_manager->p_pairs[i].mac;
+ IPOIB_TRACE( IPOIB_DBG_VM,("__get_mac_from_ip dst_ip = %d.%d.%d.%d
found in table\n",
+ ((dst_ip & 0xff ) ),
+ ((dst_ip & 0xff00 ) >> 8 ),
+ ((dst_ip & 0xff0000 ) >> 16 ),
+ ((dst_ip & 0xff000000) >> 24 )));
+
+ return IB_SUCCESS;
+ }
+
+ }
+ // Not found
+ IPOIB_TRACE( IPOIB_DBG_VM,("__get_mac_from_ip dst_ip = %d.%d.%d.%d not
found \n",
+ ((dst_ip & 0xff ) ),
+ ((dst_ip & 0xff00 ) >> 8 ),
+ ((dst_ip & 0xff0000 ) >> 16 ),
+ ((dst_ip & 0xff000000) >> 24 )));
+
+
+ return IB_NOT_FOUND;
+}
+
+static ib_api_status_t
+__put_mac_ip_pair(
+ IN VS_ip_mac_manager * p_manager,
+ IN net32_t dst_ip,
+ IN mac_addr_t dst_mac)
+{
+ uint32_t i;
+ uint32_t new_size = 0;
+ VS_ip_mac_pair *new_array;
+ IPOIB_ENTER( IPOIB_DBG_VM );
+
+ IPOIB_TRACE( IPOIB_DBG_VM,("__put_mac_ip_pair dst_ip = %d.%d.%d.%d
\n",
+ ((dst_ip & 0xff ) ),
+ ((dst_ip & 0xff00 ) >> 8 ),
+ ((dst_ip & 0xff0000 ) >> 16 ),
+ ((dst_ip & 0xff000000) >> 24 )));
+
+ // First step is to look if this is actually an update and not adding
+ for (i = 0 ; i < p_manager->data_size; i++ )
+ {
+ if (p_manager->p_pairs[i].dst_ip == dst_ip)
+ {
+ // We have found the IP that we are looking for, update it
+ p_manager->p_pairs[i].mac = dst_mac;
+ return IB_SUCCESS;
+ }
+
+ }
+ // Not found, let see if we need to increase the table
+ if ( p_manager->array_size <= p_manager->data_size )
+ {
+ // Need to increase the array
+ if (p_manager->array_size < 4)
+ {
+ new_size = 4;
+ } else {
+ new_size = p_manager->array_size * 2;
+ }
+ new_array = cl_zalloc(new_size * sizeof (VS_ip_mac_pair));
+ if ( new_array == NULL )
+ {
+ IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR,
+ ("Failed to allocate new_array.\n") );
+ return CL_INSUFFICIENT_MEMORY;
+ }
+ // copy the data to the new array
+ if ( p_manager->array_size > 0 )
+ {
+ cl_memcpy ( new_array, p_manager->p_pairs, p_manager->data_size *
sizeof (VS_ip_mac_pair));
+ }
+ if ( p_manager->p_pairs != NULL )
+ {
+ cl_free( p_manager->p_pairs );
+ }
+ p_manager->p_pairs = new_array;
+ new_array = NULL;
+ p_manager->array_size = new_size;
+ }
+
+ p_manager->p_pairs[p_manager->data_size].dst_ip = dst_ip;
+ p_manager->p_pairs[p_manager->data_size].mac = dst_mac;
+ p_manager->data_size++;
+
+ return IB_SUCCESS;
+
+}
+
Index: Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h (revision 226)
+++ Q:/OpenIb/gen1/trunk/ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -468,7 +468,20 @@
* are inserted in the LID map.
*********/
+typedef struct _VS_ip_mac_pair
+{
+ mac_addr_t mac;
+ net32_t dst_ip;
+} VS_ip_mac_pair;
+typedef struct _VS_ip_mac_manager
+{
+ VS_ip_mac_pair *p_pairs;
+ uint32_t array_size;
+ uint32_t data_size;
+} VS_ip_mac_manager;
+
+
typedef struct _ipoib_port
{
cl_obj_t obj;
@@ -496,8 +509,12 @@
atomic32_t endpt_rdr;
atomic32_t hdr_idx;
- ipoib_hdr_t hdr[1];
+ VS_ip_mac_manager vs_manager;
+
+ // Must be last
+ ipoib_hdr_t hdr[1];
+
} ipoib_port_t;
/*
* FIELDS
@@ -536,7 +553,6 @@
* Endpoint manager.
*********/
-
ib_api_status_t
ipoib_create_port(
IN struct _ipoib_adapter* const p_adapter,
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060308/25a9141b/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: vmdhcp.patch
Type: application/octet-stream
Size: 10895 bytes
Desc: vmdhcp.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060308/25a9141b/attachment.obj>
More information about the ofw
mailing list