[ofw] [PATCH 3/7 v2] ib/cm: export CM only interface

Sean Hefty sean.hefty at intel.com
Tue Jan 20 17:07:22 PST 2009


To support WinVerbs and a future OFED libibcm compatibility library (requires
a new library and driver), export a new IB CM interface.  The interface sits
above the existing communication endpoint (CEP) manager.  This allows the
interface to only support IB CM functionality.

Rather than exposing low-level CM related MADs as done in the initial posting
of the code, a cleaner, more self-contained interface was defined.
The interface is based on the requirements of the CM as defined by the
IB architecture.  The interface does not assume that the underlying CM has
direct access to QP data structures or user event queues.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
diffs from v1:

* rnr_nak_timeout was added as a separate parameter to kal_cep_pre_req and
  kal_cep_pre_rep.  The QP attribute parameter was made optional to both calls.

* eliminated status variable in kal_alloc_cep.
* changed failure status to invalid handle, versus invalid parameter in
  kal_config_cep

* used sizeof(*p_cm_rep) in __ndi_fill_cm_rep.  The compiler did like it okay.
* Do not deref QP if kal_config_cep succeeds, but kal_cep_pre_rep fails in
  __ndi_send_rep

Not done:
- Rework other CM kernel functions to use iba_cm_* data types instead of
  ib_cm_*_t.  I will submit this as a separate patch.

- Convert CM functions to return NTSTATUS versus ib_status_t.  This needs to
  be separate to ensure that all users of those calls convert.

- Any race between destroying a CEP and a QP still exist.  I do not have a
  simple solution to this yet, but I don't think the proposed patch is worse
  than what's there today.

--- trunk\inc\kernel\iba\ib_cm_ifc.h	1969-12-31 16:00:00.000000000 -0800
+++ branches\winverbs\inc\kernel\iba\ib_cm_ifc.h	2008-11-20 23:21:48.890625000 -0800
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _ib_cm_ifc_h_
+#define _ib_cm_ifc_h_
+
+#include <initguid.h>
+#include <iba/ib_al_ifc.h>
+#include <iba/ib_types.h>
+#include <iba/ib_al.h>
+
+struct _iba_cm_event;
+
+typedef struct _iba_cm_id
+{
+	void		*context;
+	NTSTATUS	(*callback)(struct _iba_cm_id *p_id, struct _iba_cm_event *p_event);
+	net32_t		cid;
+
+}	iba_cm_id;
+
+typedef struct _iba_cm_req
+{
+	ib_net64_t					service_id;
+
+	ib_path_rec_t				*p_primary_path;
+	ib_path_rec_t				*p_alt_path;
+
+	net32_t						qpn;
+	ib_qp_type_t				qp_type;
+	net32_t						starting_psn;
+
+	void						*p_pdata;
+	uint8_t						pdata_len;
+
+	uint8_t						max_cm_retries;
+	uint8_t						resp_res;
+	uint8_t						init_depth;
+	uint8_t						remote_resp_timeout;
+	uint8_t						flow_ctrl;
+	uint8_t						local_resp_timeout;
+	uint8_t						rnr_retry_cnt;
+	uint8_t						retry_cnt;
+	uint8_t						srq;
+
+}	iba_cm_req;
+
+typedef struct _iba_cm_req_event
+{
+	iba_cm_req					req;
+	net64_t						local_ca_guid;
+	net64_t						remote_ca_guid;
+	uint16_t					pkey_index;
+	uint8_t						port_num;
+
+}	iba_cm_req_event;
+
+typedef struct _iba_cm_rep
+{
+	net32_t						qpn;
+	net32_t						starting_psn;
+
+	void						*p_pdata;
+	uint8_t						pdata_len;
+
+	ib_cm_failover_t			failover_accepted;
+	uint8_t						resp_res;
+	uint8_t						init_depth;
+	uint8_t						flow_ctrl;
+	uint8_t						rnr_retry_cnt;
+	uint8_t						srq;
+
+}	iba_cm_rep;
+
+typedef struct _iba_cm_rep_event
+{
+	iba_cm_rep					rep;
+	net64_t						ca_guid;
+	uint8_t						target_ack_delay;
+
+}	iba_cm_rep_event;
+
+typedef struct _iba_cm_pdata
+{
+	void						*p_pdata;
+	uint8_t						pdata_len;
+
+}	iba_cm_pdata;
+typedef iba_cm_pdata			iba_cm_rtu_event;
+typedef iba_cm_pdata			iba_cm_dreq_event;
+typedef iba_cm_pdata			iba_cm_drep_event;
+
+typedef struct _iba_cm_rej_event
+{
+	void						*ari;
+	void						*p_pdata;
+	ib_rej_status_t				reason;
+	uint8_t						ari_length;
+	uint8_t						pdata_len;
+	
+}	iba_cm_rej_event;
+
+typedef struct _iba_cm_mra_event
+{
+	void						*p_pdata;
+	uint8_t						pdata_len;
+	uint8_t						service_timeout;
+
+}	iba_cm_mra_event;
+
+typedef struct _iba_cm_lap
+{
+	ib_path_rec_t				*p_alt_path;
+	void						*p_pdata;
+	uint8_t						pdata_len;
+	uint8_t						remote_resp_timeout;
+
+}	iba_cm_lap;
+typedef iba_cm_lap iba_cm_lap_event;
+
+typedef struct _iba_cm_apr
+{
+	void						*p_pdata;
+	ib_apr_info_t				*p_info;
+	uint8_t						pdata_len;
+	uint8_t						info_length;
+	ib_apr_status_t				status;
+
+}	iba_cm_apr;
+typedef iba_cm_apr iba_cm_apr_event;
+
+typedef struct _iba_cm_sidr_req
+{
+	ib_net64_t					service_id;
+	ib_path_rec_t				*p_path;
+	void						*p_pdata;
+	uint8_t						pdata_len;
+	uint8_t						max_cm_retries;
+	uint32_t					timeout;
+
+}	iba_cm_sidr_req;
+
+typedef struct _iba_cm_sidr_req_event
+{
+	iba_cm_sidr_req				sidr_req;
+	net64_t						ca_guid;
+	uint16_t					pkey_index;
+	uint8_t						port_num;
+
+}	iba_cm_sidr_req_event;
+
+typedef struct _iba_cm_sidr_rep
+{
+	net32_t						qpn;
+	net32_t						qkey;
+	void						*p_pdata;
+	void						*p_info;
+	uint8_t						pdata_len;
+	uint8_t						info_len;
+	ib_sidr_status_t			status;
+
+}	iba_cm_sidr_rep;
+typedef iba_cm_sidr_rep iba_cm_sidr_rep_event;
+
+typedef enum _iba_cm_event_type
+{
+	iba_cm_req_error,
+	iba_cm_req_received,
+	iba_cm_rep_error,
+	iba_cm_rep_received,
+	iba_cm_rtu_received,
+	iba_cm_dreq_error,
+	iba_cm_dreq_received,
+	iba_cm_drep_received,
+	iba_cm_rej_received,
+	iba_cm_mra_received,
+	iba_cm_lap_error,
+	iba_cm_lap_received,
+	iba_cm_apr_received,
+	iba_cm_sidr_req_error,
+	iba_cm_sidr_req_received,
+	iba_cm_sidr_rep_received
+
+}	iba_cm_event_type;
+
+typedef struct _iba_cm_event
+{
+	iba_cm_event_type			type;
+	union
+	{
+		iba_cm_req_event		req;
+		iba_cm_rep_event		rep;
+		iba_cm_rtu_event		rtu;
+		iba_cm_dreq_event		dreq;
+		iba_cm_drep_event		drep;
+		iba_cm_rej_event		rej;
+		iba_cm_mra_event		mra;
+		iba_cm_lap_event		lap;
+		iba_cm_apr_event		apr;
+		iba_cm_sidr_req_event	sidr_req;
+		iba_cm_sidr_rep_event	sidr_rep;
+
+	}	data;
+
+}	iba_cm_event;
+
+typedef struct _iba_cm_interface
+{
+	NTSTATUS		(*create_id)(NTSTATUS (*callback)(iba_cm_id *p_id,
+													  iba_cm_event *p_event),
+								 void *context, iba_cm_id **pp_id);
+	void			(*destroy_id)(iba_cm_id *p_id);
+
+	NTSTATUS		(*listen)(iba_cm_id *p_id, net64_t service_id, void *p_compare_buf,
+							  uint8_t compare_len, uint8_t compare_offset);
+
+	NTSTATUS		(*send_req)(iba_cm_id *p_id, iba_cm_req *p_req);
+	NTSTATUS		(*send_rep)(iba_cm_id *p_id, iba_cm_rep *p_rep);
+	NTSTATUS		(*send_rtu)(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len);
+
+	NTSTATUS		(*send_dreq)(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len);
+	NTSTATUS		(*send_drep)(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len);
+
+	NTSTATUS		(*send_rej)(iba_cm_id *p_id, ib_rej_status_t status,
+								void *p_ari, uint8_t ari_len,
+								void *p_pdata, uint8_t pdata_len);
+	NTSTATUS		(*send_mra)(iba_cm_id *p_id, uint8_t service_timeout,
+								void *p_pdata, uint8_t pdata_len);
+
+	NTSTATUS		(*send_lap)(iba_cm_id *p_id, iba_cm_lap *p_lap);
+	NTSTATUS		(*send_apr)(iba_cm_id *p_id, iba_cm_apr *p_apr);
+
+	NTSTATUS		(*send_sidr_req)(iba_cm_id *p_id, iba_cm_sidr_req *p_req);
+	NTSTATUS		(*send_sidr_rep)(iba_cm_id *p_id, iba_cm_sidr_rep *p_rep);
+
+	NTSTATUS		(*get_qp_attr)(iba_cm_id *p_id, ib_qp_state_t state,
+								   ib_qp_mod_t *p_attr);
+
+	NTSTATUS		(*migrate)(iba_cm_id *p_id);
+	NTSTATUS		(*established)(iba_cm_id *p_id);
+
+}	iba_cm_interface;
+
+static inline USHORT IbaCmVersion(UINT8 Major, UINT8 Minor)
+{
+	return ((USHORT) Major << 8) | ((USHORT) Minor);
+}
+
+static inline UINT8 IbaCmVersionMajor(USHORT Version)
+{
+	return (UINT8) (Version >> 8);
+}
+
+static inline UINT8 IbaCmVersionMinor(USHORT Version)
+{
+	return (UINT8) Version;
+}
+
+// {EACC1466-BB2D-4478-B5BE-40EDF7EE08AB}
+DEFINE_GUID(GUID_INFINIBAND_INTERFACE_CM, 0xeacc1466, 0xbb2d, 0x4478,
+			0xb5, 0xbe, 0x40, 0xed, 0xf7, 0xee, 0x8, 0xab);
+
+typedef struct _INFINIBAND_INTERFACE_CM
+{
+	INTERFACE			InterfaceHeader;
+	iba_cm_interface	CM;
+
+}	INFINIBAND_INTERFACE_CM;
+
+#endif // _ib_cm_ifc_h_
--- trunk\core\al\kernel\al_cm.c	1969-12-31 16:00:00.000000000 -0800
+++ branches\winverbs\core\al\kernel\al_cm.c	2009-01-20 16:01:24.451979200 -0800
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <iba/ib_cm_ifc.h>
+#include "al_cm_cep.h"
+#include "al_mgr.h"
+#include "al_proxy.h"
+#include "al_cm_conn.h"
+#include "al_cm_sidr.h"
+
+typedef struct _iba_cm_id_priv
+{
+	iba_cm_id	id;
+	KEVENT		destroy_event;	
+
+}	iba_cm_id_priv;
+
+static iba_cm_id*
+cm_alloc_id(NTSTATUS (*callback)(iba_cm_id *p_id, iba_cm_event *p_event),
+			void *context)
+{
+	iba_cm_id_priv	*id;
+
+	id = ExAllocatePoolWithTag(NonPagedPool, sizeof(iba_cm_id_priv), 'mcbi');
+	if (id == NULL) {
+		return NULL;
+	}
+
+	KeInitializeEvent(&id->destroy_event, NotificationEvent, FALSE);
+	id->id.callback = callback;
+	id->id.context = context;
+	return &id->id;
+}
+
+static void
+cm_free_id(iba_cm_id *id)
+{
+	ExFreePool(CONTAINING_RECORD(id, iba_cm_id_priv, id));
+}
+
+static void
+cm_destroy_handler(void *context)
+{
+	iba_cm_id_priv	*id = context;
+	KeSetEvent(&id->destroy_event, 0, FALSE);
+}
+
+static void
+cm_cep_handler(const ib_al_handle_t h_al, const net32_t cid)
+{
+	void				*context;
+	net32_t				new_cid;
+	ib_mad_element_t	*mad;
+	iba_cm_id			*id, *listen_id;
+	iba_cm_event		event;
+	NTSTATUS			status;
+
+	while (al_cep_poll(h_al, cid, &context, &new_cid, &mad) == IB_SUCCESS) {
+
+		if (new_cid == AL_INVALID_CID) {
+			id = (iba_cm_id *) context;
+		} else {
+			listen_id = (iba_cm_id *) context;
+
+			id = cm_alloc_id(listen_id->callback, listen_id);
+			if (id == NULL) {
+				kal_destroy_cep(h_al, new_cid);
+				ib_put_mad(mad);
+				continue;
+			}
+
+			kal_config_cep(h_al, new_cid, cm_cep_handler, id, cm_destroy_handler);
+			id->cid = new_cid;
+		}
+
+		kal_cep_format_event(h_al, id->cid, mad, &event);
+		status = id->callback(id, &event);
+		if (!NT_SUCCESS(status)) {
+			kal_config_cep(h_al, new_cid, NULL, NULL, NULL);
+			kal_destroy_cep(h_al, id->cid);
+			cm_free_id(id);
+		}
+		ib_put_mad(mad);
+	}
+}
+
+static NTSTATUS
+cm_create_id(NTSTATUS (*callback)(iba_cm_id *p_id, iba_cm_event *p_event),
+			 void *context, iba_cm_id **pp_id)
+{
+	iba_cm_id		*id;
+	ib_api_status_t	ib_status;
+
+	id = cm_alloc_id(callback, context);
+	if (id == NULL) {
+		return STATUS_NO_MEMORY;
+	}
+
+	ib_status = kal_alloc_cep(gh_al, &id->cid);
+	if (ib_status != IB_SUCCESS) {
+		cm_free_id(id);
+		return ib_to_ntstatus(ib_status);
+	}
+
+	kal_config_cep(gh_al, id->cid, cm_cep_handler, id, cm_destroy_handler);
+	*pp_id = id;
+	return STATUS_SUCCESS;
+}
+
+static void
+cm_destroy_id(iba_cm_id *p_id)
+{
+	iba_cm_id_priv	*id;
+
+	id = CONTAINING_RECORD(p_id, iba_cm_id_priv, id);
+	kal_destroy_cep(gh_al, p_id->cid);
+	KeWaitForSingleObject(&id->destroy_event, Executive, KernelMode, FALSE, NULL);
+	cm_free_id(p_id);
+}
+
+static NTSTATUS
+cm_listen(iba_cm_id *p_id, net64_t service_id, void *p_compare_buf,
+		  uint8_t compare_len, uint8_t compare_offset)
+{
+	ib_cep_listen_t info;
+	ib_api_status_t	ib_status;
+
+	info.svc_id = service_id;
+	info.port_guid = IB_ALL_PORTS;
+	info.p_cmp_buf = p_compare_buf;
+	info.cmp_len = compare_len;
+	info.cmp_offset = compare_offset;
+	
+	ib_status = al_cep_listen(gh_al, p_id->cid, &info);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_req(iba_cm_id *p_id, iba_cm_req *p_req)
+{
+	ib_api_status_t ib_status;
+	
+	ib_status = kal_cep_pre_req(gh_al, p_id->cid, p_req, 0, NULL);
+	if (ib_status != IB_SUCCESS) {
+		return ib_to_ntstatus(ib_status);
+	}
+
+	ib_status = al_cep_send_req(gh_al, p_id->cid);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_rep(iba_cm_id *p_id, iba_cm_rep *p_rep)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = kal_cep_pre_rep(gh_al, p_id->cid, p_rep, 0, NULL);
+	if (ib_status != IB_SUCCESS) {
+		return ib_to_ntstatus(ib_status);
+	}
+
+	ib_status = al_cep_send_rep(gh_al, p_id->cid);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_rtu(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_rtu(gh_al, p_id->cid, p_pdata, pdata_len);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_dreq(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_dreq(gh_al, p_id->cid, p_pdata, pdata_len);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_drep(iba_cm_id *p_id, void *p_pdata, uint8_t pdata_len)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_drep(gh_al, p_id->cid, p_pdata, pdata_len);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_rej(iba_cm_id *p_id, ib_rej_status_t status,
+			void *p_ari, uint8_t ari_len,
+			void *p_pdata, uint8_t pdata_len)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_rej(gh_al, p_id->cid, status, p_ari, ari_len,
+						   p_pdata, pdata_len);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_mra(iba_cm_id *p_id, uint8_t service_timeout,
+			void *p_pdata, uint8_t pdata_len)
+{
+	ib_cm_mra_t		mra;
+	ib_api_status_t ib_status;
+
+	mra.svc_timeout = service_timeout;
+	mra.p_mra_pdata = p_pdata;
+	mra.mra_length = pdata_len;
+
+	ib_status = al_cep_mra(gh_al, p_id->cid, &mra);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_lap(iba_cm_id *p_id, iba_cm_lap *p_lap)
+{
+	ib_cm_lap_t		lap;
+	ib_api_status_t	ib_status;
+
+	RtlZeroMemory(&lap, sizeof lap);
+	lap.p_lap_pdata = p_lap->p_pdata;
+	lap.lap_length = p_lap->pdata_len;
+	lap.remote_resp_timeout = p_lap->remote_resp_timeout;
+	lap.p_alt_path = p_lap->p_alt_path;
+
+	ib_status = al_cep_lap(gh_al, p_id->cid, &lap);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_apr(iba_cm_id *p_id, iba_cm_apr *p_apr)
+{
+	ib_cm_apr_t		apr;
+	ib_qp_mod_t		attr;
+	ib_api_status_t	ib_status;
+
+	RtlZeroMemory(&apr, sizeof apr);
+	apr.p_apr_pdata = p_apr->p_pdata;
+	apr.apr_length = p_apr->pdata_len;
+	apr.apr_status = p_apr->status;
+	apr.info_length = p_apr->info_length;
+	apr.p_info = p_apr->p_info;
+
+	ib_status = al_cep_pre_apr(gh_al, p_id->cid, &apr, &attr);
+	if (ib_status != IB_SUCCESS) {
+		return ib_to_ntstatus(ib_status);
+	}
+
+	ib_status = al_cep_send_apr(gh_al, p_id->cid);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_send_sidr_req(iba_cm_id *p_id, iba_cm_sidr_req *p_req)
+{
+	UNUSED_PARAM(p_id);
+	UNUSED_PARAM(p_req);
+
+	return STATUS_NOT_SUPPORTED;
+}
+
+static NTSTATUS
+cm_send_sidr_rep(iba_cm_id *p_id, iba_cm_sidr_rep *p_rep)
+{
+	UNUSED_PARAM(p_id);
+	UNUSED_PARAM(p_rep);
+
+	return STATUS_NOT_SUPPORTED;
+}
+
+static NTSTATUS
+cm_get_qp_attr(iba_cm_id *p_id, ib_qp_state_t state, ib_qp_mod_t *p_attr)
+{
+	ib_api_status_t	ib_status;
+
+	switch (state) {
+	case IB_QPS_INIT:
+		ib_status = al_cep_get_init_attr(gh_al, p_id->cid, p_attr);
+		break;
+	case IB_QPS_RTR:
+		ib_status = al_cep_get_rtr_attr(gh_al, p_id->cid, p_attr);
+		break;
+	case IB_QPS_RTS:
+		ib_status = al_cep_get_rts_attr(gh_al, p_id->cid, p_attr);
+		break;
+	default:
+		return STATUS_INVALID_PARAMETER;
+	}
+
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_migrate(iba_cm_id *p_id)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_migrate(gh_al, p_id->cid);
+	return ib_to_ntstatus(ib_status);
+}
+
+static NTSTATUS
+cm_establish(iba_cm_id *p_id)
+{
+	ib_api_status_t ib_status;
+
+	ib_status = al_cep_established(gh_al, p_id->cid);
+	return ib_to_ntstatus(ib_status);
+}
+
+void cm_get_interface(iba_cm_interface *p_ifc)
+{
+	p_ifc->create_id = cm_create_id;
+	p_ifc->destroy_id = cm_destroy_id;
+	p_ifc->listen = cm_listen;
+	p_ifc->send_req = cm_send_req;
+	p_ifc->send_rep = cm_send_rep;
+	p_ifc->send_rtu = cm_send_rtu;
+	p_ifc->send_dreq = cm_send_dreq;
+	p_ifc->send_drep = cm_send_drep;
+	p_ifc->send_rej = cm_send_rej;
+	p_ifc->send_mra = cm_send_mra;
+	p_ifc->send_lap = cm_send_lap;
+	p_ifc->send_apr = cm_send_apr;
+	p_ifc->send_sidr_req = cm_send_sidr_req;
+	p_ifc->send_sidr_rep = cm_send_sidr_rep;
+	p_ifc->get_qp_attr = cm_get_qp_attr;
+	p_ifc->migrate = cm_migrate;
+	p_ifc->established = cm_establish;
+}
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk/core/al/al_cm_cep.h branches\winverbs/core/al/al_cm_cep.h
--- trunk/core/al/al_cm_cep.h	2008-07-16 08:53:58.172250000 -0700
+++ branches\winverbs/core/al/al_cm_cep.h	2009-01-20 15:57:06.531643700 -0800
@@ -36,10 +36,12 @@
 #ifndef _AL_CM_CEP_H_
 #define _AL_CM_CEP_H_
 
-
 #include <iba/ib_al.h>
 #include "al_common.h"
 
+#ifdef CL_KERNEL
+#include <iba/ib_cm_ifc.h>
+#endif
 
 #define CEP_EVENT_TIMEOUT	0x80000000
 #define CEP_EVENT_RECV		0x40000000
@@ -108,6 +110,21 @@ al_create_cep(
 * event notification.
 *********/
 
+#ifdef CL_KERNEL
+ib_api_status_t
+kal_alloc_cep(
+	IN				ib_al_handle_t				h_al,
+	IN	OUT			net32_t* const				p_cid );
+
+ib_api_status_t
+kal_config_cep(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN				al_pfn_cep_cb_t				pfn_cb,
+	IN				void*						context,
+	IN				ib_pfn_destroy_cb_t			pfn_destroy_cb );
+#endif
+
 /* Destruction is asynchronous. */
 void
 al_destroy_cep(
@@ -124,6 +141,16 @@ al_cep_listen(
 	IN				ib_cep_listen_t* const		p_listen_info );
 
 
+#ifdef CL_KERNEL
+ib_api_status_t
+kal_cep_pre_req(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN		const	iba_cm_req* const			p_cm_req,
+	IN				uint8_t						rnr_nak_timeout,
+	IN	OUT			ib_qp_mod_t* const			p_init OPTIONAL );
+#endif
+
 ib_api_status_t
 al_cep_pre_req(
 	IN				ib_al_handle_t				h_al,
@@ -148,22 +175,40 @@ al_cep_pre_rep(
 	IN	OUT			net32_t* const				p_cid,
 		OUT			ib_qp_mod_t* const			p_init );
 
+#ifdef CL_KERNEL
 ib_api_status_t
-al_cep_pre_rep_ex(
+kal_cep_pre_rep(
 	IN				ib_al_handle_t				h_al,
 	IN				net32_t						cid,
-	IN				al_pfn_cep_cb_t				pfn_cb,
-	IN				void*						context,
-	IN				ib_pfn_destroy_cb_t			pfn_destroy_cb OPTIONAL,
-	IN		const	ib_cm_rep_t* const			p_cm_rep,
-	IN	OUT			net32_t* const				p_cid,
-		OUT			ib_qp_mod_t* const			p_init );
+	IN		const	iba_cm_rep* const			p_cm_rep,
+	IN				uint8_t						rnr_nak_timeout,
+	IN	OUT			ib_qp_mod_t* const			p_init OPTIONAL );
+
+void
+kal_destroy_cep(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid );
+#endif
 
 ib_api_status_t
 al_cep_send_rep(
 	IN				ib_al_handle_t				h_al,
 	IN				net32_t						cid );
 
+#ifdef CL_KERNEL
+void
+kal_cep_format_event(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN				ib_mad_element_t			*p_mad,
+	IN	OUT			iba_cm_event				*p_event);
+
+ib_api_status_t
+al_cep_get_init_attr(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+		OUT			ib_qp_mod_t* const			p_init );
+#endif
 
 ib_api_status_t
 al_cep_get_rtr_attr(
@@ -271,6 +316,7 @@ al_cep_poll(
 
 
 #ifdef CL_KERNEL
+void cm_get_interface(iba_cm_interface *p_ifc);
 
 NTSTATUS
 al_cep_queue_irp(
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk/core/al/kernel/al_cm_cep.c branches\winverbs/core/al/kernel/al_cm_cep.c
--- trunk/core/al/kernel/al_cm_cep.c	2008-07-16 08:53:56.469125000 -0700
+++ branches\winverbs/core/al/kernel/al_cm_cep.c	2009-01-20 16:10:50.374156000 -0800
@@ -36,6 +36,7 @@
 #include <complib/cl_rbmap.h>
 #include <complib/cl_qmap.h>
 #include <complib/cl_spinlock.h>
+#include <iba/ib_cm_ifc.h>
 #include "al_common.h"
 #include "al_cm_cep.h"
 #include "al_cm_conn.h"
@@ -333,7 +334,6 @@ typedef struct _al_kcep
 	ib_mad_svc_handle_t			h_mad_svc;
 	ib_mad_element_t			*p_send_mad;
 
-	/* Number of outstanding MADs.  Delays destruction of CEP destruction. */
 	atomic32_t					ref_cnt;
 
 	/* MAD transaction ID to use when sending MADs. */
@@ -367,7 +367,7 @@ typedef struct _al_kcep
 	}	mads;
 
 	/*
-	 * NDI stuff
+	 * NDI stuff - TODO: manage above core kernel CM code
 	 */
 
 	/* IRP list head */
@@ -1064,7 +1064,8 @@ __req_handler(
 
 	KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );
 
-	if( conn_req_get_qp_type( p_req ) > IB_QPT_UNRELIABLE_CONN )
+	if( conn_req_get_qp_type( p_req ) > IB_QPT_UNRELIABLE_CONN ||
+		conn_req_get_lcl_qpn( p_req ) == 0 )
 	{
 		/* Reserved value.  Reject. */
 		AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Invalid transport type received.\n") );
@@ -3410,6 +3411,7 @@ __cep_queue_mad(
 		return IB_INVALID_STATE;
 	}
 
+	// TODO: Remove - manage above core kernel CM code
 	/* NDI connection request case */
 	if ( p_cep->state == CEP_STATE_LISTEN &&
 		(p_cep->sid & ~0x0ffffffI64) == IB_REQ_CM_RDMA_SID_PREFIX )
@@ -3650,6 +3652,54 @@ __bind_cep(
 	cl_spinlock_release( &h_al->obj.lock );
 }
 
+ib_api_status_t
+kal_alloc_cep(
+	IN				ib_al_handle_t				h_al,
+	IN	OUT			net32_t* const				p_cid )
+{
+	kcep_t				*p_cep;
+	KLOCK_QUEUE_HANDLE	hdl;
+
+	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+	p_cep = __create_cep();
+	KeReleaseInStackQueuedSpinLock( &hdl );
+
+	if( !p_cep )
+		return IB_INSUFFICIENT_MEMORY;
+
+	__bind_cep(p_cep, h_al, NULL, NULL);
+	*p_cid = p_cep->cid;
+	return IB_SUCCESS;
+}
+
+ib_api_status_t
+kal_config_cep(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN				al_pfn_cep_cb_t				pfn_cb,
+	IN				void*						context,
+	IN				ib_pfn_destroy_cb_t			pfn_destroy_cb )
+{
+	kcep_t				*p_cep;
+	KLOCK_QUEUE_HANDLE	hdl;
+	ib_api_status_t		status = IB_SUCCESS;
+
+	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+	p_cep = __lookup_cep( h_al, cid );
+	if (!p_cep )
+	{
+		status = IB_INVALID_HANDLE;
+		goto out;
+	}
+
+	p_cep->pfn_cb = pfn_cb;
+	p_cep->context = context;
+	p_cep->pfn_destroy_cb = pfn_destroy_cb;
+
+out:
+	KeReleaseInStackQueuedSpinLock( &hdl );
+	return status;
+}
 
 static inline void
 __unbind_cep(
@@ -3967,10 +4017,12 @@ __cleanup_cep(
 			sizeof(p_cep->local_ca_guid), NULL, 0 );
 		break;
 
-	case CEP_STATE_ESTABLISHED:
-	case CEP_STATE_LAP_RCVD:
 	case CEP_STATE_LAP_SENT:
 	case CEP_STATE_LAP_MRA_RCVD:
+		ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad );
+		/* fall through */
+	case CEP_STATE_ESTABLISHED:
+	case CEP_STATE_LAP_RCVD:
 	case CEP_STATE_LAP_MRA_SENT:
 	case CEP_STATE_PRE_APR:
 	case CEP_STATE_PRE_APR_MRA_SENT:
@@ -3980,9 +4032,9 @@ __cleanup_cep(
 		/* Fall through. */
 
 	case CEP_STATE_DREQ_SENT:
+		ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad );
 		p_cep->state = CEP_STATE_DREQ_DESTROY;
-		AL_EXIT( AL_DBG_CM );
-		return cl_atomic_dec( &p_cep->ref_cnt );
+		goto out;
 
 	case CEP_STATE_DREQ_RCVD:
 		/* Send the DREP. */
@@ -4017,13 +4069,13 @@ __cleanup_cep(
 	case CEP_STATE_TIMEWAIT:
 		/* Already in timewait - so all is good. */
 		p_cep->state = CEP_STATE_DESTROY;
-		AL_EXIT( AL_DBG_CM );
-		return cl_atomic_dec( &p_cep->ref_cnt );
+		goto out;
 	}
 
 	p_cep->state = CEP_STATE_DESTROY;
 	__insert_timewait( p_cep );
 
+out:
 	AL_EXIT( AL_DBG_CM );
 	return cl_atomic_dec( &p_cep->ref_cnt );
 }
@@ -4064,40 +4116,19 @@ al_create_cep(
 	IN				ib_pfn_destroy_cb_t			pfn_destroy_cb,
 	IN	OUT			net32_t* const				p_cid )
 {
-	kcep_t				*p_cep;
-	KLOCK_QUEUE_HANDLE	hdl;
+	ib_api_status_t	status;
 
 	AL_ENTER( AL_DBG_CM );
+	CL_ASSERT( h_al );
 
-	CL_ASSERT( p_cid );
-
-	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
-	if( *p_cid != AL_INVALID_CID )
-	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		return IB_RESOURCE_BUSY;
-	}
-	p_cep = __create_cep();
-	if( !p_cep )
+	status = kal_alloc_cep(h_al, p_cid);
+	if ( status == IB_SUCCESS )
 	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Failed to allocate CEP.\n") );
-		return IB_INSUFFICIENT_MEMORY;
+		kal_config_cep(h_al, *p_cid, pfn_cb, context, pfn_destroy_cb);
 	}
 
-	__bind_cep( p_cep, h_al, pfn_cb, context );
-
-	p_cep->pfn_destroy_cb = pfn_destroy_cb;
-	*p_cid = p_cep->cid;
-
-	KeReleaseInStackQueuedSpinLock( &hdl );
-
-	AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM ,
-		("Created CEP with cid %d, h_al %p, context %p \n", 
-		p_cep->cid, h_al, p_cep->context ));
-
 	AL_EXIT( AL_DBG_CM );
-	return IB_SUCCESS;
+	return status;
 }
 
 
@@ -4169,10 +4200,6 @@ al_destroy_cep(
 
 	KeReleaseInStackQueuedSpinLock( &hdl );
 
-	/*
-	 * Done waiting.  Release the reference so the timewait timer callback
-	 * can finish cleaning up.
-	 */
 	if( !ref_cnt && pfn_destroy_cb )
 		pfn_destroy_cb( context );
 
@@ -4183,6 +4210,14 @@ al_destroy_cep(
 	AL_EXIT( AL_DBG_CM );
 }
 
+void
+kal_destroy_cep(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid )
+{
+	al_destroy_cep(h_al, &cid, FALSE);
+}
+
 
 ib_api_status_t
 al_cep_listen(
@@ -4433,8 +4468,9 @@ __format_req_path(
 
 static ib_api_status_t
 __format_req(
+	IN				cep_agent_t* const			p_port_cep,
 	IN				kcep_t* const				p_cep,
-	IN		const	ib_cm_req_t* const			p_cm_req )
+	IN		const	iba_cm_req* const			p_cm_req )
 {
 	ib_api_status_t	status;
 	mad_cm_req_t*	p_req;
@@ -4453,14 +4489,14 @@ __format_req(
 
 	p_req = (mad_cm_req_t*)p_cep->p_mad->p_mad_buf;
 
-	ci_ca_lock_attr( p_cm_req->h_qp->obj.p_ci_ca );
+	ci_ca_lock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 	/*
 	 * Store the local CA's ack timeout for use when computing
 	 * the local ACK timeout.
 	 */
 	p_cep->local_ack_delay =
-		p_cm_req->h_qp->obj.p_ci_ca->p_pnp_attr->local_ack_delay;
-	ci_ca_unlock_attr( p_cm_req->h_qp->obj.p_ci_ca );
+		p_port_cep->h_ca->obj.p_ci_ca->p_pnp_attr->local_ack_delay;
+	ci_ca_unlock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 
 	/* Format the primary path. */
 	__format_req_path( p_cm_req->p_primary_path,
@@ -4479,16 +4515,16 @@ __format_req(
 
 	/* Set the local communication in the REQ. */
 	p_req->local_comm_id = p_cep->local_comm_id;
-	p_req->sid = p_cm_req->svc_id;
-	p_req->local_ca_guid = p_cm_req->h_qp->obj.p_ci_ca->verbs.guid;
+	p_req->sid = p_cm_req->service_id;
+	p_req->local_ca_guid = p_port_cep->h_ca->obj.p_ci_ca->verbs.guid;
 
 	conn_req_set_lcl_qpn( p_cep->local_qpn, p_req );
 	conn_req_set_resp_res( p_cm_req->resp_res, p_req );
 	conn_req_set_init_depth( p_cm_req->init_depth, p_req );
 	conn_req_set_remote_resp_timeout( p_cm_req->remote_resp_timeout, p_req );
-	conn_req_set_qp_type( p_cm_req->h_qp->type, p_req );
+	conn_req_set_qp_type( p_cm_req->qp_type, p_req );
 	conn_req_set_flow_ctrl( p_cm_req->flow_ctrl, p_req );
-	conn_req_set_starting_psn( p_cep->rq_psn, p_req );
+	conn_req_set_starting_psn( p_cm_req->starting_psn, p_req );
 
 	conn_req_set_lcl_resp_timeout( p_cm_req->local_resp_timeout, p_req );
 	conn_req_set_retry_cnt( p_cm_req->retry_cnt, p_req );
@@ -4500,7 +4536,7 @@ __format_req(
 
 	conn_req_set_max_cm_retries( p_cm_req->max_cm_retries, p_req );
 	status = conn_req_set_pdata(
-		p_cm_req->p_req_pdata, p_cm_req->req_length, p_req );
+		p_cm_req->p_pdata, p_cm_req->pdata_len, p_req );
 
 	conn_req_clr_rsvd_fields( p_req );
 
@@ -4512,7 +4548,8 @@ __format_req(
 static ib_api_status_t
 __save_user_req(
 	IN				kcep_t* const				p_cep,
-	IN		const	ib_cm_req_t* const			p_cm_req,
+	IN		const	iba_cm_req* const			p_cm_req,
+	IN				uint8_t						rnr_nak_timeout,
 		OUT			cep_agent_t** const			pp_port_cep )
 {
 	cep_agent_t		*p_port_cep;
@@ -4525,40 +4562,12 @@ __save_user_req(
 		return IB_INVALID_SETTING;
 	}
 
-	p_cep->sid = p_cm_req->svc_id;
-
+	p_cep->sid = p_cm_req->service_id;
 	p_cep->idx_primary = 0;
-
-	p_cep->p2p = (p_cm_req->pfn_cm_req_cb != NULL);
-
-	if( p_cm_req->p_compare_buffer )
-	{
-		if( !p_cm_req->compare_length ||
-			(p_cm_req->compare_offset + p_cm_req->compare_length) >
-			IB_REQ_PDATA_SIZE )
-		{
-			AL_EXIT( AL_DBG_CM );
-			return IB_INVALID_SETTING;
-		}
-		p_cep->p_cmp_buf = cl_malloc( p_cm_req->compare_length );
-		if( !p_cep->p_cmp_buf )
-		{
-			AL_EXIT( AL_DBG_CM );
-			return IB_INSUFFICIENT_MEMORY;
-		}
-
-		cl_memcpy( p_cep->p_cmp_buf,
-			p_cm_req->p_compare_buffer, p_cm_req->compare_length );
-
-		p_cep->cmp_len = p_cm_req->compare_length;
-		p_cep->cmp_offset = p_cm_req->compare_offset;
-	}
-	else
-	{
-		p_cep->p_cmp_buf = NULL;
-		p_cep->cmp_len = 0;
-		p_cep->cmp_offset = 0;
-	}
+	p_cep->p2p = FALSE;
+	p_cep->p_cmp_buf = NULL;
+	p_cep->cmp_len = 0;
+	p_cep->cmp_offset = 0;
 	p_cep->was_active = TRUE;
 
 	/* Validate the primary path. */
@@ -4571,15 +4580,6 @@ __save_user_req(
 
 	p_cep->av[0].attr.conn.seq_err_retry_cnt = p_cm_req->retry_cnt;
 
-	/* Make sure the paths will work on the desired QP. */
-	if( p_port_cep->h_ca->obj.p_ci_ca->verbs.guid !=
-		p_cm_req->h_qp->obj.p_ci_ca->verbs.guid )
-	{
-		AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-			("Primary path not realizable on given QP.\n") );
-		return IB_INVALID_SETTING;
-	}
-
 	p_cep->local_ca_guid = p_port_cep->h_ca->obj.p_ci_ca->verbs.guid;
 
 	*pp_port_cep = p_port_cep;
@@ -4644,7 +4644,7 @@ __save_user_req(
 	p_cep->remote_comm_id = 0;
 
 	/* Cache the local QPN. */
-	p_cep->local_qpn = p_cm_req->h_qp->num;
+	p_cep->local_qpn = p_cm_req->qpn;
 	p_cep->remote_ca_guid = 0;
 	p_cep->remote_qpn = 0;
 
@@ -4662,9 +4662,9 @@ __save_user_req(
 	 */
 	p_cep->timewait_time.QuadPart = 0;
 
-	p_cep->rq_psn = p_cep->local_qpn;
+	p_cep->rq_psn = p_cm_req->starting_psn;
 
-	p_cep->rnr_nak_timeout = p_cm_req->rnr_nak_timeout;
+	p_cep->rnr_nak_timeout = rnr_nak_timeout;
 
 	AL_EXIT( AL_DBG_CM );
 	return IB_SUCCESS;
@@ -4672,11 +4672,12 @@ __save_user_req(
 
 
 ib_api_status_t
-al_cep_pre_req(
+kal_cep_pre_req(
 	IN				ib_al_handle_t				h_al,
 	IN				net32_t						cid,
-	IN		const	ib_cm_req_t* const			p_cm_req,
-		OUT			ib_qp_mod_t* const			p_init )
+	IN		const	iba_cm_req* const			p_cm_req,
+	IN				uint8_t						rnr_nak_timeout,
+	IN	OUT			ib_qp_mod_t* const			p_init OPTIONAL )
 {
 	ib_api_status_t		status;
 	kcep_t				*p_cep;
@@ -4687,14 +4688,6 @@ al_cep_pre_req(
 
 	CL_ASSERT( h_al );
 	CL_ASSERT( p_cm_req );
-	CL_ASSERT( p_init );
-
-	/* TODO: Code P2P support. */
-	if( p_cm_req->pfn_cm_req_cb )
-	{
-		AL_EXIT( AL_DBG_CM );
-		return IB_UNSUPPORTED;
-	}
 
 	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
 	p_cep = __lookup_cep( h_al, cid );
@@ -4713,7 +4706,7 @@ al_cep_pre_req(
 		p_cep->p_mad = NULL;
 		/* Fall through. */
 	case CEP_STATE_IDLE:
-		status = __save_user_req( p_cep, p_cm_req, &p_port_cep );
+		status = __save_user_req( p_cep, p_cm_req, rnr_nak_timeout, &p_port_cep );
 		if( status != IB_SUCCESS )
 			break;
 
@@ -4722,7 +4715,7 @@ al_cep_pre_req(
 		if( status != IB_SUCCESS )
 			break;
 
-		status = __format_req( p_cep, p_cm_req );
+		status = __format_req( p_port_cep, p_cep, p_cm_req );
 		if( status != IB_SUCCESS )
 		{
 			AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Invalid pdata length.\n") );
@@ -4732,14 +4725,16 @@ al_cep_pre_req(
 		}
 
 		/* Format the INIT qp modify attributes. */
-		p_init->req_state = IB_QPS_INIT;
-		p_init->state.init.primary_port =
-			p_cep->av[p_cep->idx_primary].attr.port_num;
-		p_init->state.init.qkey = 0;
-		p_init->state.init.pkey_index =
-			p_cep->av[p_cep->idx_primary].pkey_index;
-		p_init->state.init.access_ctrl = IB_AC_LOCAL_WRITE;
-
+		if( p_init )
+		{
+			p_init->req_state = IB_QPS_INIT;
+			p_init->state.init.primary_port =
+				p_cep->av[p_cep->idx_primary].attr.port_num;
+			p_init->state.init.qkey = 0;
+			p_init->state.init.pkey_index =
+				p_cep->av[p_cep->idx_primary].pkey_index;
+			p_init->state.init.access_ctrl = IB_AC_LOCAL_WRITE;
+		}
 		p_cep->state = CEP_STATE_PRE_REQ;
 		break;
 
@@ -4757,6 +4752,40 @@ al_cep_pre_req(
 	return status;
 }
 
+ib_api_status_t
+al_cep_pre_req(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN		const	ib_cm_req_t* const			p_cm_req,
+		OUT			ib_qp_mod_t* const			p_init )
+{
+	iba_cm_req req;
+	
+	RtlZeroMemory(&req, sizeof req);
+	req.service_id = p_cm_req->svc_id;
+
+	req.p_primary_path = p_cm_req->p_primary_path;
+	req.p_alt_path = p_cm_req->p_alt_path;
+
+	req.qpn = p_cm_req->h_qp->num;
+	req.qp_type = p_cm_req->qp_type;
+	req.starting_psn = req.qpn;
+
+	req.p_pdata = (void *) p_cm_req->p_req_pdata;
+	req.pdata_len = p_cm_req->req_length;
+
+	req.max_cm_retries = p_cm_req->max_cm_retries;
+	req.resp_res = p_cm_req->resp_res;
+	req.init_depth = p_cm_req->init_depth;
+	req.remote_resp_timeout = p_cm_req->remote_resp_timeout;
+	req.flow_ctrl = (uint8_t) p_cm_req->flow_ctrl;
+	req.local_resp_timeout = p_cm_req->local_resp_timeout;
+	req.rnr_retry_cnt = p_cm_req->rnr_retry_cnt;
+	req.retry_cnt = p_cm_req->retry_cnt;
+	req.srq = (uint8_t) (p_cm_req->h_qp->h_srq != NULL);
+
+	return kal_cep_pre_req(h_al, cid, &req, p_cm_req->rnr_nak_timeout, p_init);
+}
 
 ib_api_status_t
 al_cep_send_req(
@@ -4817,31 +4846,32 @@ al_cep_send_req(
 
 static void
 __save_user_rep(
+	IN				cep_agent_t* const			p_port_cep,
 	IN				kcep_t* const				p_cep,
-	IN		const	ib_cm_rep_t* const			p_cm_rep )
+	IN		const	iba_cm_rep* const			p_cm_rep,
+	IN				uint8_t						rnr_nak_timeout )
 {
 	AL_ENTER( AL_DBG_CM );
 
-	/* Cache the local QPN. */
-	p_cep->local_qpn = p_cm_rep->h_qp->num;
-	p_cep->rq_psn = p_cep->local_qpn;
+	p_cep->local_qpn = p_cm_rep->qpn;
+	p_cep->rq_psn = p_cm_rep->starting_psn;
 	p_cep->init_depth = p_cm_rep->init_depth;
 
-	ci_ca_lock_attr( p_cm_rep->h_qp->obj.p_ci_ca );
+	ci_ca_lock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 	/* Check the CA's responder resource max and trim if necessary. */
-	if( (p_cm_rep->h_qp->obj.p_ci_ca->p_pnp_attr->max_qp_resp_res <
-		p_cep->resp_res) )
+	if( p_port_cep->h_ca->obj.p_ci_ca->p_pnp_attr->max_qp_resp_res <
+		p_cep->resp_res )
 	{
 		/*
 		 * The CA cannot handle the requested responder resources.
 		 * Set the response to the CA's maximum.
 		 */
 		p_cep->resp_res = 
-			p_cm_rep->h_qp->obj.p_ci_ca->p_pnp_attr->max_qp_resp_res;
+			p_port_cep->h_ca->obj.p_ci_ca->p_pnp_attr->max_qp_resp_res;
 	}
-	ci_ca_unlock_attr( p_cm_rep->h_qp->obj.p_ci_ca );
+	ci_ca_unlock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 
-	p_cep->rnr_nak_timeout = p_cm_rep->rnr_nak_timeout;
+	p_cep->rnr_nak_timeout = rnr_nak_timeout;
 
 	AL_EXIT( AL_DBG_CM );
 }
@@ -4849,8 +4879,9 @@ __save_user_rep(
 
 static ib_api_status_t
 __format_rep(
+	IN				cep_agent_t* const			p_port_cep,
 	IN				kcep_t* const				p_cep,
-	IN		const	ib_cm_rep_t* const			p_cm_rep )
+	IN		const	iba_cm_rep* const			p_cm_rep )
 {
 	ib_api_status_t		status;
 	mad_cm_rep_t		*p_rep;
@@ -4898,10 +4929,10 @@ __format_rep(
 
 	p_rep->resp_resources = p_cep->resp_res;
 
-	ci_ca_lock_attr( p_cm_rep->h_qp->obj.p_ci_ca );
+	ci_ca_lock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 	conn_rep_set_target_ack_delay(
-		p_cm_rep->h_qp->obj.p_ci_ca->p_pnp_attr->local_ack_delay, p_rep );
-	ci_ca_unlock_attr( p_cm_rep->h_qp->obj.p_ci_ca );
+		p_port_cep->h_ca->obj.p_ci_ca->p_pnp_attr->local_ack_delay, p_rep );
+	ci_ca_unlock_attr( p_port_cep->h_ca->obj.p_ci_ca );
 
 	p_rep->initiator_depth = p_cep->init_depth;
 
@@ -4915,7 +4946,7 @@ __format_rep(
 	p_rep->local_ca_guid = p_cep->local_ca_guid;
 
 	status = conn_rep_set_pdata(
-		p_cm_rep->p_rep_pdata, p_cm_rep->rep_length, p_rep );
+		p_cm_rep->p_pdata, p_cm_rep->pdata_len, p_rep );
 
 	conn_rep_clr_rsvd_fields( p_rep );
 
@@ -4925,12 +4956,12 @@ __format_rep(
 
 
 
-ib_api_status_t
+static ib_api_status_t
 __al_cep_pre_rep(
 	IN				kcep_t						*p_cep,
-	IN				void*						context,
-	IN		const	ib_cm_rep_t* const			p_cm_rep,
-		OUT			ib_qp_mod_t* const			p_init )
+	IN		const	iba_cm_rep* const			p_cm_rep,
+	IN				uint8_t						rnr_nak_timeout,
+		OUT			ib_qp_mod_t* const			p_init OPTIONAL )
 {
 	ib_api_status_t		status;
 	cep_agent_t			*p_port_cep;
@@ -4938,7 +4969,6 @@ __al_cep_pre_rep(
 	AL_ENTER( AL_DBG_CM );
 
 	CL_ASSERT( p_cm_rep );
-	CL_ASSERT( p_init );
 
 	switch( p_cep->state )
 	{
@@ -4956,9 +4986,9 @@ __al_cep_pre_rep(
 		if( status != IB_SUCCESS )
 			break;
 
-		__save_user_rep( p_cep, p_cm_rep );
+		__save_user_rep( p_port_cep, p_cep, p_cm_rep, rnr_nak_timeout );
 
-		status = __format_rep( p_cep, p_cm_rep );
+		status = __format_rep( p_port_cep, p_cep, p_cm_rep );
 		if( status != IB_SUCCESS )
 		{
 			ib_put_mad( p_cep->p_mad );
@@ -4967,16 +4997,16 @@ __al_cep_pre_rep(
 		}
 
 		/* Format the INIT qp modify attributes. */
-		p_init->req_state = IB_QPS_INIT;
-		p_init->state.init.primary_port =
-			p_cep->av[p_cep->idx_primary].attr.port_num;
-		p_init->state.init.qkey = 0;
-		p_init->state.init.pkey_index =
-			p_cep->av[p_cep->idx_primary].pkey_index;
-		p_init->state.init.access_ctrl = IB_AC_LOCAL_WRITE;
-
-		p_cep->context = context;
-
+		if( p_init )
+		{
+			p_init->req_state = IB_QPS_INIT;
+			p_init->state.init.primary_port =
+				p_cep->av[p_cep->idx_primary].attr.port_num;
+			p_init->state.init.qkey = 0;
+			p_init->state.init.pkey_index =
+				p_cep->av[p_cep->idx_primary].pkey_index;
+			p_init->state.init.access_ctrl = IB_AC_LOCAL_WRITE;
+		}
 		/* Just OR in the PREP bit into the state. */
 		p_cep->state |= CEP_STATE_PREP;
 		break;
@@ -5001,56 +5031,54 @@ al_cep_pre_rep(
 	IN	OUT			net32_t* const				p_cid,
 		OUT			ib_qp_mod_t* const			p_init )
 {
-	ib_api_status_t		status;
 	kcep_t				*p_cep;
+	iba_cm_rep			rep;
 	KLOCK_QUEUE_HANDLE	hdl;
+	ib_api_status_t		status;
 
-	AL_ENTER( AL_DBG_CM );
-
-	CL_ASSERT( h_al );
-	CL_ASSERT( p_cm_rep );
-	CL_ASSERT( p_init );
+	RtlZeroMemory(&rep, sizeof rep);
+	rep.qpn = p_cm_rep->h_qp->num;
+	rep.starting_psn = rep.qpn;
+
+	rep.p_pdata = (void *) p_cm_rep->p_rep_pdata;
+	rep.pdata_len = p_cm_rep->rep_length;
+
+	rep.failover_accepted = p_cm_rep->failover_accepted;
+	rep.init_depth = p_cm_rep->init_depth;
+	rep.flow_ctrl = (uint8_t) p_cm_rep->flow_ctrl;
+	rep.rnr_retry_cnt = p_cm_rep->rnr_retry_cnt;
+	rep.srq = (uint8_t) (p_cm_rep->h_qp->h_srq != NULL);
 
 	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
-	if( *p_cid != AL_INVALID_CID )
-	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_EXIT( AL_DBG_CM );
-		return IB_RESOURCE_BUSY;
-	}
-
 	p_cep = __lookup_cep( h_al, cid );
-	if( !p_cep )
+	if (!p_cep )
 	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_EXIT( AL_DBG_CM );
-		return IB_INVALID_HANDLE;
+		status = IB_INVALID_HANDLE;
+		goto out;
 	}
 
-	status = __al_cep_pre_rep( p_cep, context, p_cm_rep, p_init );
-
-	if( status == IB_SUCCESS )
+	rep.resp_res = p_cep->resp_res;
+	status = __al_cep_pre_rep( p_cep, &rep, p_cm_rep->rnr_nak_timeout, p_init );
+	if ( status == IB_SUCCESS )
 	{
+		p_cep->context = context;
 		p_cep->pfn_destroy_cb = pfn_destroy_cb;
 		*p_cid = cid;
 	}
 
+out:
 	KeReleaseInStackQueuedSpinLock( &hdl );
-	AL_EXIT( AL_DBG_CM );
 	return status;
 }
 
 
 ib_api_status_t
-al_cep_pre_rep_ex(
+kal_cep_pre_rep(
 	IN				ib_al_handle_t				h_al,
 	IN				net32_t						cid,
-	IN				al_pfn_cep_cb_t				pfn_cb,
-	IN				void*						context,
-	IN				ib_pfn_destroy_cb_t			pfn_destroy_cb,
-	IN		const	ib_cm_rep_t* const			p_cm_rep,
-	IN	OUT			net32_t* const				p_cid,
-		OUT			ib_qp_mod_t* const			p_init )
+	IN		const	iba_cm_rep* const			p_cm_rep,
+	IN				uint8_t						rnr_nak_timeout,
+		OUT			ib_qp_mod_t* const			p_init OPTIONAL )
 {
 	ib_api_status_t		status;
 	kcep_t				*p_cep;
@@ -5060,33 +5088,18 @@ al_cep_pre_rep_ex(
 
 	CL_ASSERT( h_al );
 	CL_ASSERT( p_cm_rep );
-	CL_ASSERT( p_init );
 
 	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
-	if( *p_cid != AL_INVALID_CID )
-	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_EXIT( AL_DBG_CM );
-		return IB_RESOURCE_BUSY;
-	}
-
 	p_cep = __lookup_cep( h_al, cid );
 	if( !p_cep )
 	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_EXIT( AL_DBG_CM );
-		return IB_INVALID_HANDLE;
+		status = IB_INVALID_HANDLE;
+		goto out;
 	}
 
-	status = __al_cep_pre_rep( p_cep, context, p_cm_rep, p_init );
-
-	if( status == IB_SUCCESS )
-	{
-		p_cep->pfn_cb = pfn_cb;
-		p_cep->pfn_destroy_cb = pfn_destroy_cb;
-		*p_cid = cid;
-	}
+	status = __al_cep_pre_rep( p_cep, p_cm_rep, rnr_nak_timeout, p_init );
 
+out:
 	KeReleaseInStackQueuedSpinLock( &hdl );
 	AL_EXIT( AL_DBG_CM );
 	return status;
@@ -5110,9 +5123,8 @@ al_cep_send_rep(
 	p_cep = __lookup_cep( h_al, cid );
 	if( !p_cep )
 	{
-		KeReleaseInStackQueuedSpinLock( &hdl );
-		AL_EXIT( AL_DBG_CM );
-		return IB_INVALID_HANDLE;
+		status = IB_INVALID_HANDLE;
+		goto out;
 	}
 
 	switch( p_cep->state )
@@ -5123,7 +5135,9 @@ al_cep_send_rep(
 		p_port_cep = __get_cep_agent( p_cep );
 		if( !p_port_cep )
 		{
+			// Why call ib_put_mad() here but not below?
 			ib_put_mad( p_cep->p_mad );
+			// Why call __remove_cep() below but not here?
 			p_cep->state = CEP_STATE_IDLE;
 			status = IB_INSUFFICIENT_RESOURCES;
 		}
@@ -5148,6 +5162,7 @@ al_cep_send_rep(
 			("Invalid state: %d\n", p_cep->state) );
 		status = IB_INVALID_STATE;
 	}
+out:
 	KeReleaseInStackQueuedSpinLock( &hdl );
 	AL_EXIT( AL_DBG_CM );
 	return status;
@@ -5975,6 +5990,263 @@ al_cep_established(
 	return status;
 }
 
+static void
+__format_path(ib_path_rec_t *p_path, req_path_info_t *p_info,
+			  ib_net16_t pkey, uint8_t mtu)
+{
+	p_path->resv0 = 0;
+	p_path->dgid = p_info->local_gid;
+	p_path->sgid = p_info->remote_gid;
+	p_path->dlid = p_info->local_lid;
+	p_path->slid = p_info->remote_lid;
+	ib_path_rec_set_hop_flow_raw(p_path, p_info->hop_limit, 
+								 conn_req_path_get_flow_lbl(p_info), 0);
+	p_path->tclass = p_info->traffic_class;
+	p_path->num_path = 0;
+	p_path->pkey = pkey;
+	p_path->sl = conn_req_path_get_svc_lvl(p_info);
+	p_path->mtu = mtu;
+	p_path->rate = conn_req_path_get_pkt_rate(p_info);
+	p_path->pkt_life = conn_req_path_get_lcl_ack_timeout(p_info);
+	p_path->preference = 0;
+	p_path->resv1 = 0;
+	p_path->resv2 = 0;
+}
+
+static void
+__format_event_req(kcep_t *p_cep, mad_cm_req_t *p_mad, iba_cm_req_event *p_req)
+{
+	p_req->local_ca_guid = p_cep->local_ca_guid;
+	p_req->remote_ca_guid = p_cep->remote_ca_guid;
+	p_req->pkey_index = p_cep->av[0].pkey_index;
+	p_req->port_num = p_cep->av[0].attr.port_num;
+	p_req->req.service_id = p_mad->sid;
+
+	p_req->req.qpn = conn_req_get_lcl_qpn(p_mad);
+	p_req->req.qp_type = conn_req_get_qp_type(p_mad);
+	p_req->req.starting_psn = conn_req_get_starting_psn(p_mad);
+
+	p_req->req.p_pdata = p_mad->pdata;
+	p_req->req.pdata_len = IB_REQ_PDATA_SIZE;
+
+	p_req->req.max_cm_retries = conn_req_get_max_cm_retries(p_mad);
+	p_req->req.resp_res = conn_req_get_init_depth(p_mad);
+	p_req->req.init_depth = conn_req_get_resp_res(p_mad);
+	p_req->req.remote_resp_timeout = conn_req_get_resp_timeout(p_mad);
+	p_req->req.flow_ctrl = (uint8_t) conn_req_get_flow_ctrl(p_mad);
+	p_req->req.local_resp_timeout = conn_req_get_lcl_resp_timeout(p_mad);
+	p_req->req.rnr_retry_cnt = conn_req_get_rnr_retry_cnt(p_mad);
+	p_req->req.retry_cnt = conn_req_get_retry_cnt(p_mad);
+	p_req->req.srq = 0; // TODO: fix mad_cm_req_t
+
+	// We can re-use the MAD buffer if we're careful to read out the data
+	// that we need before it's overwritten.
+	p_req->req.p_primary_path = (ib_path_rec_t *) p_mad;
+	__format_path(p_req->req.p_primary_path, &p_mad->primary_path,
+				  p_mad->pkey, conn_req_get_mtu(p_mad));
+
+	if (p_mad->alternate_path.remote_lid != 0) {
+		p_req->req.p_alt_path = p_req->req.p_primary_path + 1;
+		__format_path(p_req->req.p_alt_path, &p_mad->alternate_path,
+					  p_req->req.p_primary_path->pkey,
+					  p_req->req.p_primary_path->mtu);
+	} else {
+		p_req->req.p_alt_path = NULL;
+	}
+}
+
+static void
+__format_event_rep(mad_cm_rep_t *p_mad, iba_cm_rep_event *p_rep)
+{
+	p_rep->ca_guid = p_mad->local_ca_guid;
+	p_rep->target_ack_delay = conn_rep_get_target_ack_delay(p_mad);
+	p_rep->rep.qpn = conn_rep_get_lcl_qpn(p_mad);
+	p_rep->rep.starting_psn = conn_rep_get_starting_psn(p_mad);
+
+	p_rep->rep.p_pdata = p_mad->pdata;
+	p_rep->rep.pdata_len = IB_REP_PDATA_SIZE;
+
+	p_rep->rep.failover_accepted = conn_rep_get_failover(p_mad);
+	p_rep->rep.resp_res = p_mad->initiator_depth;
+	p_rep->rep.init_depth = p_mad->resp_resources;
+	p_rep->rep.flow_ctrl = (uint8_t) conn_rep_get_e2e_flow_ctl(p_mad);
+	p_rep->rep.rnr_retry_cnt = conn_rep_get_rnr_retry_cnt(p_mad);
+}
+
+static void
+__format_event_rej(mad_cm_rej_t *p_mad, iba_cm_rej_event *p_rej)
+{
+	p_rej->ari = p_mad->ari;
+	p_rej->p_pdata = p_mad->pdata;
+	p_rej->reason = p_mad->reason;
+	p_rej->ari_length = conn_rej_get_ari_len(p_mad);
+	p_rej->pdata_len = IB_MRA_PDATA_SIZE;
+}
+
+static void
+__format_event_mra(mad_cm_mra_t *p_mad, iba_cm_mra_event *p_mra)
+{
+	p_mra->p_pdata = p_mad->pdata;
+	p_mra->pdata_len = IB_MRA_PDATA_SIZE;
+	p_mra->service_timeout = conn_mra_get_svc_timeout(p_mad);
+}
+
+static void
+__format_event_pdata(kcep_t *p_cep, iba_cm_pdata *p_pdata)
+{
+	p_pdata->p_pdata = p_cep->pdata;
+	p_pdata->pdata_len = p_cep->psize;
+}
+
+/*
+ * Called after polling a MAD from a CEP to parse the received CM message
+ * into readable event data.
+ */
+void
+kal_cep_format_event(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+	IN				ib_mad_element_t			*p_mad,
+	IN	OUT			iba_cm_event				*p_event)
+{
+	KLOCK_QUEUE_HANDLE	hdl;
+	kcep_t				*p_cep;
+	
+	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+	p_cep = __lookup_cep( h_al, cid );
+	KeReleaseInStackQueuedSpinLock( &hdl );
+
+	switch (p_mad->p_mad_buf->attr_id) {
+	case CM_REQ_ATTR_ID:
+		if (p_mad->status == IB_SUCCESS) {
+			p_event->type = iba_cm_req_received;
+			__format_event_req(p_cep, (mad_cm_req_t*) p_mad->p_mad_buf, &p_event->data.req);
+		} else {
+			p_event->type = iba_cm_req_error;
+		}
+		break;
+	case CM_REP_ATTR_ID:
+		if (p_mad->status == IB_SUCCESS) {
+			p_event->type = iba_cm_rep_received;
+			__format_event_rep((mad_cm_rep_t*) p_mad->p_mad_buf, &p_event->data.rep);
+		} else {
+			p_event->type = iba_cm_rep_error;
+		}
+		break;
+	case CM_RTU_ATTR_ID:
+		p_event->type = iba_cm_rtu_received;
+		__format_event_pdata(p_cep, &p_event->data.rtu);
+		break;
+	case CM_DREQ_ATTR_ID:
+		if (p_mad->status == IB_SUCCESS) {
+			p_event->type = iba_cm_dreq_received;
+			__format_event_pdata(p_cep, &p_event->data.dreq);
+		} else {
+			p_event->type = iba_cm_dreq_error;
+		}
+		break;
+	case CM_DREP_ATTR_ID:
+		p_event->type = iba_cm_drep_received;
+		__format_event_pdata(p_cep, &p_event->data.drep);
+		break;
+	case CM_REJ_ATTR_ID:
+		p_event->type = iba_cm_rej_received;
+		__format_event_rej((mad_cm_rej_t*) p_mad->p_mad_buf, &p_event->data.rej);
+		break;
+	case CM_MRA_ATTR_ID:
+		p_event->type = iba_cm_mra_received;
+		__format_event_mra((mad_cm_mra_t*) p_mad->p_mad_buf, &p_event->data.mra);
+		break;
+	case CM_LAP_ATTR_ID:
+		if (p_mad->status == IB_SUCCESS) {
+			p_event->type = iba_cm_lap_received;
+			// TODO: format lap event
+		} else {
+			p_event->type = iba_cm_lap_error;
+		}
+		break;
+	case CM_APR_ATTR_ID:
+		p_event->type = iba_cm_apr_received;;
+		// TODO: format apr event
+		break;
+	case CM_SIDR_REQ_ATTR_ID:
+		if (p_mad->status == IB_SUCCESS) {
+			p_event->type = iba_cm_sidr_req_received;
+			// TODO: format sidr req event
+		} else {
+			p_event->type = iba_cm_sidr_req_error;
+		}
+		break;
+	case CM_SIDR_REP_ATTR_ID:
+		p_event->type = iba_cm_sidr_rep_received;
+		// TODO: format sidr rep event
+		break;
+	default:
+		CL_ASSERT(0);
+	}
+}
+
+
+ib_api_status_t
+al_cep_get_init_attr(
+	IN				ib_al_handle_t				h_al,
+	IN				net32_t						cid,
+		OUT			ib_qp_mod_t* const			p_init )
+{
+	ib_api_status_t		status;
+	kcep_t				*p_cep;
+	KLOCK_QUEUE_HANDLE	hdl;
+
+	AL_ENTER( AL_DBG_CM );
+
+	KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );
+	p_cep = __lookup_cep( h_al, cid );
+	if( !p_cep )
+	{
+		status = IB_INVALID_HANDLE;
+		goto out;
+	}
+
+	switch( p_cep->state )
+	{
+	case CEP_STATE_PRE_REQ:
+	case CEP_STATE_REQ_RCVD:
+	case CEP_STATE_PRE_REP:
+	case CEP_STATE_REQ_SENT:
+	case CEP_STATE_REQ_MRA_RCVD:
+	case CEP_STATE_REQ_MRA_SENT:
+	case CEP_STATE_PRE_REP_MRA_SENT:
+	case CEP_STATE_REP_RCVD:
+	case CEP_STATE_REP_SENT:
+	case CEP_STATE_REP_MRA_RCVD:
+	case CEP_STATE_REP_MRA_SENT:
+	case CEP_STATE_ESTABLISHED:
+		/* Format the INIT qp modify attributes. */
+		cl_memclr(p_init, sizeof(ib_qp_mod_t));
+		p_init->req_state = IB_QPS_INIT;
+		p_init->state.init.primary_port =
+			p_cep->av[p_cep->idx_primary].attr.port_num;
+		p_init->state.init.pkey_index =
+			p_cep->av[p_cep->idx_primary].pkey_index;
+		p_init->state.init.access_ctrl = IB_AC_LOCAL_WRITE;
+		if ( p_cep->resp_res )
+		{
+			p_init->state.init.access_ctrl |= IB_AC_RDMA_READ |
+											  IB_AC_RDMA_WRITE |
+											  IB_AC_ATOMIC;
+		}
+		status = IB_SUCCESS;
+		break;
+	default:
+		status = IB_INVALID_STATE;
+		break;
+	}
+
+out:
+	KeReleaseInStackQueuedSpinLock( &hdl );
+	AL_EXIT( AL_DBG_CM );
+	return status;
+}
 
 ib_api_status_t
 al_cep_get_rtr_attr(
@@ -6002,6 +6274,8 @@ al_cep_get_rtr_attr(
 
 	switch( p_cep->state )
 	{
+	case CEP_STATE_REQ_RCVD:
+	case CEP_STATE_REQ_MRA_SENT:
 	case CEP_STATE_PRE_REP:
 	case CEP_STATE_PRE_REP_MRA_SENT:
 	case CEP_STATE_REP_SENT:
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk/core/al/kernel/al_ndi_cm.c branches\winverbs/core/al/kernel/al_ndi_cm.c
--- trunk/core/al/kernel/al_ndi_cm.c	2008-10-29 11:23:54.390625000 -0700
+++ branches\winverbs/core/al/kernel/al_ndi_cm.c	2009-01-20 16:18:12.305413700 -0800
@@ -1469,24 +1469,20 @@ static void
 __ndi_fill_cm_rep(
 	IN		ib_qp_handle_t	const				h_qp,
 	IN		ual_ndi_rep_cm_ioctl_in_t			*p_rep,
-		OUT	ib_cm_rep_t							*p_cm_rep)
+		OUT	iba_cm_rep							*p_cm_rep)
 {
 	AL_ENTER( AL_DBG_NDI );
 
-	memset( p_cm_rep, 0, sizeof(ib_cm_rep_t) );
+	memset( p_cm_rep, 0, sizeof(*p_cm_rep) );
 
-	p_cm_rep->p_rep_pdata = p_rep->pdata;
-	p_cm_rep->rep_length = sizeof(p_rep->pdata);
+	p_cm_rep->p_pdata = p_rep->pdata;
+	p_cm_rep->pdata_len = sizeof(p_rep->pdata);
 
-	p_cm_rep->qp_type = IB_QPT_RELIABLE_CONN;
-	p_cm_rep->h_qp = h_qp;
+	p_cm_rep->qpn = h_qp->num;
 
-	p_cm_rep->access_ctrl = IB_AC_RDMA_READ | IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
 	p_cm_rep->init_depth = p_rep->init_depth;
-	p_cm_rep->target_ack_delay = 10;
 	p_cm_rep->failover_accepted = IB_FAILOVER_ACCEPT_UNSUPPORTED;
 	p_cm_rep->flow_ctrl = TRUE;	/* HCAs must support end-to-end flow control. */
-	p_cm_rep->rnr_nak_timeout = QP_ATTRIB_RNR_NAK_TIMEOUT;
 	p_cm_rep->rnr_retry_cnt = QP_ATTRIB_RNR_RETRY;
 
 	AL_EXIT( AL_DBG_NDI );
@@ -1499,7 +1495,7 @@ __ndi_send_rep(
 	IN		PIRP								p_irp )
 {
 	IO_STACK_LOCATION	*p_io_stack;
-	ib_cm_rep_t cm_rep;
+	iba_cm_rep cm_rep;
 	ib_qp_mod_t qp_mod;
 	ib_api_status_t status;
 	ual_ndi_rep_cm_ioctl_in_t *p_rep = 
@@ -1525,21 +1521,27 @@ __ndi_send_rep(
 	/* Format ib_cm_req_t structure */
 	__ndi_fill_cm_rep( h_qp, p_rep, &cm_rep );
 
-	ref_al_obj( &h_qp->obj ); /* Take CEP reference. */
-
 	/* prepare Passive CEP for connection */
-	status = al_cep_pre_rep_ex(
-		qp_get_al( h_qp ), p_rep->cid, __ndi_cm_handler, h_qp, deref_al_obj,
-		&cm_rep, &((al_conn_qp_t*)h_qp)->cid, &qp_mod );
+	ref_al_obj( &h_qp->obj ); /* Take CEP reference. */
+	status = kal_config_cep(qp_get_al( h_qp ), p_rep->cid, __ndi_cm_handler,
+							h_qp, deref_al_obj);
+	if ( status == IB_SUCCESS )
+	{
+		status = kal_cep_pre_rep(qp_get_al( h_qp ), p_rep->cid,
+								 &cm_rep, QP_ATTRIB_RNR_NAK_TIMEOUT, &qp_mod );
+	}
+	else
+	{
+		deref_al_obj( &h_qp->obj ); /* Release CEP reference. */
+	}
 	if( status != IB_SUCCESS )
 	{
 		IoFreeWorkItem( p_irp->Tail.Overlay.DriverContext[1] );
 		p_irp->Tail.Overlay.DriverContext[1] = NULL;
 		deref_al_obj( &h_qp->obj ); /* Release work item reference. */
 		al_destroy_cep( qp_get_al( h_qp ), &p_rep->cid, FALSE );
-		deref_al_obj( &h_qp->obj ); /* Release CEP reference. */
 		AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-			("al_cep_pre_rep_ex returned %s.\n", ib_get_err_str( status )) );
+			("kal_cep_pre_rep returned %s.\n", ib_get_err_str( status )) );
 		switch (status)
 		{
 			case IB_INVALID_HANDLE:
@@ -1552,6 +1554,10 @@ __ndi_send_rep(
 				return ib_to_ntstatus( status );
 		}
 	}
+	else
+	{
+		((al_conn_qp_t*)h_qp)->cid = p_rep->cid;
+	}
 
 	AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
 		("Prepared Passive CEP with cid %d, h_al %p, context %p\n",
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk/core/al/kernel/SOURCES branches\winverbs/core/al/kernel/SOURCES
--- trunk/core/al/kernel/SOURCES	2008-07-23 12:34:49.203500000 -0700
+++ branches\winverbs/core/al/kernel/SOURCES	2008-11-12 14:37:18.966875000 -0800
@@ -13,6 +13,7 @@ ENABLE_EVENT_TRACING=1
 
 SOURCES= ibal.rc			\
 	al_ci_ca.c				\
+	al_cm.c					\
 	al_cm_cep.c				\
 	al_dev.c				\
 	al_ioc_pnp.c			\
diff -up -r -X trunk\docs\dontdiff.txt -I '\$Id:' trunk/core/bus/kernel/bus_pnp.c branches\winverbs/core/bus/kernel/bus_pnp.c
--- trunk/core/bus/kernel/bus_pnp.c	2008-12-17 11:06:45.664250000 -0800
+++ branches\winverbs/core/bus/kernel/bus_pnp.c	2009-01-14 00:08:23.171875000 -0800
@@ -45,11 +45,14 @@
 #include "complib/cl_memory.h"
 #include <initguid.h>
 #include "iba/ib_ci_ifc.h"
+#include "iba/ib_cm_ifc.h"
+#include "al_cm_cep.h"
 
 
 /* Interface names are generated by IoRegisterDeviceInterface. */
 static UNICODE_STRING	al_ifc_name;
 static UNICODE_STRING	ci_ifc_name;
+static UNICODE_STRING	cm_ifc_name;
 
 KEVENT					g_ControlEvent;
 ULONG					g_bfi_InstanceCount;
@@ -102,6 +105,11 @@ __query_ci_ifc(
 	IN				IO_STACK_LOCATION* const	p_io_stack );
 
 static NTSTATUS
+__query_cm_ifc(
+	IN				DEVICE_OBJECT* const		p_dev_obj,
+	IN				IO_STACK_LOCATION* const	p_io_stack );
+
+static NTSTATUS
 fdo_query_interface(
 	IN				DEVICE_OBJECT* const		p_dev_obj,
 	IN				IRP* const					p_irp, 
@@ -130,6 +138,7 @@ __fdo_set_power(
 #pragma alloc_text (PAGE, fdo_query_bus_relations)
 #pragma alloc_text (PAGE, __query_al_ifc)
 #pragma alloc_text (PAGE, __query_ci_ifc)
+#pragma alloc_text (PAGE, __query_cm_ifc)
 #pragma alloc_text (PAGE, __get_relations)
 #pragma alloc_text (PAGE, fdo_query_interface)
 #pragma alloc_text (PAGE_PNP, __fdo_query_power)
@@ -211,16 +220,15 @@ bus_add_device(
 		{
 			BUS_PRINT( BUS_DBG_ERROR, 
 				("Failed to create ControlDeviceObject, status %x.\n",status) );
-			goto bail;
+			goto err1;
 		}
 		IoDeleteSymbolicLink( &dos_name );
 		status = IoCreateSymbolicLink( &dos_name, &dev_name );
 		if( !NT_SUCCESS(status) )
 		{
-			IoDeleteDevice( p_dev_obj );
 			BUS_PRINT( BUS_DBG_ERROR,
 				("Failed to create symlink for dos name.\n") );
-			goto bail;
+			goto err2;
 		}
 	}
 	else {
@@ -231,7 +239,7 @@ bus_add_device(
 		{
 			BUS_PRINT( BUS_DBG_ERROR, 
 				("Failed to create bus root FDO device.\n") );
-			goto bail;
+			goto err1;
 		}
 	}
 
@@ -242,10 +250,9 @@ bus_add_device(
 	p_next_do = IoAttachDeviceToDeviceStack( p_dev_obj, p_pdo );
 	if( !p_next_do )
 	{
-		IoDeleteDevice( p_dev_obj );
 		BUS_PRINT( BUS_DBG_ERROR, ("IoAttachToDeviceStack failed.\n") );
 		status = STATUS_NO_SUCH_DEVICE;
-		goto bail;
+		goto err2;
 	}
 
 	cl_init_pnp_po_ext( p_dev_obj, p_next_do, p_pdo, bus_globals.dbg_lvl,
@@ -265,13 +272,11 @@ bus_add_device(
 										&al_ifc_name );
 	if( !NT_SUCCESS( status ) )
 	{
-		IoDetachDevice( p_ext->cl_ext.p_next_do );
-		IoDeleteDevice( p_dev_obj );
 		BUS_PRINT( BUS_DBG_ERROR, 
 			("IoRegisterDeviceInterface for upper interface returned %08x\n",
 			status) );
 		status = STATUS_NO_SUCH_DEVICE;
-		goto bail;
+		goto err3;
 	}
 
 	/* Register the lower (CI) interface (the one used by HCA VPDs). */
@@ -279,13 +284,22 @@ bus_add_device(
 										&ci_ifc_name );
 	if( !NT_SUCCESS( status ) )
 	{
-		IoDetachDevice( p_ext->cl_ext.p_next_do );
-		IoDeleteDevice( p_dev_obj );
 		BUS_PRINT( BUS_DBG_ERROR, 
 			("IoRegisterDeviceInterface for lower interface returned %08x\n",
 			status) );
 		status = STATUS_NO_SUCH_DEVICE;
-		goto bail;
+		goto err3;
+	}
+
+	status = IoRegisterDeviceInterface( p_pdo, &GUID_INFINIBAND_INTERFACE_CM, NULL,
+										&cm_ifc_name );
+	if( !NT_SUCCESS( status ) )
+	{
+		BUS_PRINT( BUS_DBG_ERROR, 
+			("IoRegisterDeviceInterface for cm interface returned %08x\n",
+			status) );
+		status = STATUS_NO_SUCH_DEVICE;
+		goto err3;
 	}
 
 adxit:
@@ -294,7 +308,11 @@ adxit:
 	BUS_EXIT( BUS_DBG_PNP );
 	return STATUS_SUCCESS;
 
-bail:
+err3:
+	IoDetachDevice( p_ext->cl_ext.p_next_do );
+err2:
+	IoDeleteDevice( p_dev_obj );
+err1:
 	BUS_PRINT( BUS_DBG_PNP, ("%s exit status 0x%x\n", p_bfi->whoami,status) );
 	ic = free_bfi(p_bfi);
 	/* if last Bus filter, then cleanup */
@@ -374,6 +392,9 @@ fdo_start(
 
 		status = IoSetDeviceInterfaceState( &ci_ifc_name, TRUE );
 		ASSERT( NT_SUCCESS( status ) );
+
+		status = IoSetDeviceInterfaceState( &cm_ifc_name, TRUE );
+		ASSERT( NT_SUCCESS( status ) );
 	}
 
 	BUS_PRINT(BUS_DBG_PNP,
@@ -490,8 +511,11 @@ fdo_release_resources(
 	ASSERT( NT_SUCCESS( status ) );
 	status = IoSetDeviceInterfaceState( &ci_ifc_name, FALSE );
 	ASSERT( NT_SUCCESS( status ) );
+	status = IoSetDeviceInterfaceState( &cm_ifc_name, FALSE );
+	ASSERT( NT_SUCCESS( status ) );
 
 	/* Release the memory allocated for the interface symbolic names. */
+	RtlFreeUnicodeString( &cm_ifc_name );
 	RtlFreeUnicodeString( &ci_ifc_name );
 	RtlFreeUnicodeString( &al_ifc_name );
 
@@ -909,6 +933,47 @@ __query_ci_ifc(
 
 
 static NTSTATUS
+__query_cm_ifc(
+	IN					DEVICE_OBJECT* const		p_dev_obj,
+	IN					IO_STACK_LOCATION* const	p_io_stack )
+{
+	INFINIBAND_INTERFACE_CM	*p_ifc;
+
+	BUS_ENTER( BUS_DBG_PNP );
+
+	if( p_io_stack->Parameters.QueryInterface.Version != IbaCmVersion(1, 0) )
+	{
+		BUS_TRACE_EXIT( BUS_DBG_PNP, ("Incorrect interface version (%d)\n",
+			p_io_stack->Parameters.QueryInterface.Version ) );
+		return STATUS_NOT_SUPPORTED;
+	}
+
+	if( p_io_stack->Parameters.QueryInterface.Size < sizeof(INFINIBAND_INTERFACE_CM) )
+	{
+		BUS_TRACE_EXIT( BUS_DBG_PNP, 
+			("Buffer too small (%d given, %d required).\n",
+			p_io_stack->Parameters.QueryInterface.Size, sizeof(INFINIBAND_INTERFACE_CM)) );
+		return STATUS_BUFFER_TOO_SMALL;
+	}
+
+	/* Copy the interface. */
+	p_ifc = (INFINIBAND_INTERFACE_CM*)p_io_stack->Parameters.QueryInterface.Interface;
+
+	p_ifc->InterfaceHeader.Size = sizeof(INFINIBAND_INTERFACE_CM);
+	p_ifc->InterfaceHeader.Version = IbaCmVersion(1, 0);
+	p_ifc->InterfaceHeader.Context = p_dev_obj;
+	p_ifc->InterfaceHeader.InterfaceReference = al_ref_ifc;
+	p_ifc->InterfaceHeader.InterfaceDereference = al_deref_ifc;
+	cm_get_interface(&p_ifc->CM);
+
+	/* take the reference before returning. */
+	al_ref_ifc( p_dev_obj );
+	BUS_EXIT( BUS_DBG_PNP );
+	return STATUS_SUCCESS;
+}
+
+
+static NTSTATUS
 fdo_query_interface(
 	IN					DEVICE_OBJECT* const	p_dev_obj,
 	IN					IRP* const				p_irp, 
@@ -934,6 +999,11 @@ fdo_query_interface(
 	{
 		status = __query_ci_ifc( p_dev_obj, p_io_stack );
 	}
+	else if( IsEqualGUID( p_io_stack->Parameters.QueryInterface.InterfaceType,
+		&GUID_INFINIBAND_INTERFACE_CM ) )
+	{
+		status = __query_cm_ifc( p_dev_obj, p_io_stack );
+	}
 	else
 	{
 		status = p_irp->IoStatus.Status;





More information about the ofw mailing list