[openib-general][PATCH][RFC]: CMA IB implementation
Guy German
guyg at voltaire.com
Mon Sep 19 07:56:39 PDT 2005
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
*
* This Software is licensed under one of the following licenses:
*
* 1) under the terms of the "Common Public License 1.0" a copy of which is
* available from the Open Source Initiative, see
* http://www.opensource.org/licenses/cpl.php.
*
* 2) under the terms of the "The BSD License" a copy of which is
* available from the Open Source Initiative, see
* http://www.opensource.org/licenses/bsd-license.php.
*
* 3) under the terms of the "GNU General Public License (GPL) Version 2" a
* copy of which is available from the Open Source Initiative, see
* http://www.opensource.org/licenses/gpl-license.php.
*
* Licensee has the right to choose one of the above licenses.
*
* Redistributions of source code must retain the above copyright
* notice and one of the license notices.
*
* Redistributions in binary form must reproduce both the above copyright
* notice, one of the license notices in the documentation
* and/or other materials provided with the distribution.
*
*/
#include "ib_cma.h" // move to <rdma/ib_cma.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_at.h>
MODULE_AUTHOR("Guy German");
MODULE_DESCRIPTION("Generic RDMA CM - Infiniband implementation");
MODULE_LICENSE("Dual BSD/GPL");
#define PFX "ib_cma: "
#define CMA_TARGET_MAX 4
#define CMA_INITIATOR_DEPTH 4
#define CMA_RC_RETRY_COUNT 7
#define CMA_RNR_RETRY_COUNT 6
#define CMA_CM_RESPONSE_TIMEOUT 20 /* 4 sec */
#define CMA_MAX_CM_RETRIES 0
enum cma_close_flags {
CMA_CLOSE_ABRUPT = 0,
CMA_CLOSE_GRACEFUL
};
struct accept_callback {
ib_cma_ac_handler func;
void *context;
};
struct listen_callback {
ib_cma_listen_handler func;
void *context;
};
struct cma_context {
struct ib_cm_id *cm_id;
struct ib_cma_conn cma_conn;
struct ib_cm_req_param cma_param;
struct ib_at_ib_route cma_route;
struct ib_sa_path_rec cma_path;
struct ib_at_completion ibat_comp;
struct accept_callback accept_cb;
struct listen_callback listen_cb;
struct cma_context *creq_cma_ctx;
spinlock_t lock;
unsigned long flags;
int in_callback;
int destroy;
};
/* Static functions */
/*
* approximately transforms microseconds to 4.096us*2^x*
* 63(+8) is max return
*/
static inline u8 us_to_cmt(u32 us) {
u32 ms = us, converged = 2;
u8 i;
do_div(ms, 1000UL);
if (ms < 2)
return 8;
for (i = 1; i < 63; i++) {
if (converged >= ms)
break;
converged *= 2;
}
return i+8;
}
static int cma_modify_qp_state(struct ib_cm_id *cm_id, struct ib_qp *qp,
enum ib_qp_state qp_state,
int qp_attr_mask)
{
struct ib_qp_attr qp_attr;
int status = 0;
printk(KERN_DEBUG PFX "%s: enter >>> modify to %d\n",
__func__, qp_state);
if (qp == NULL)
return -EINVAL;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = qp_state;
if (cm_id && !qp_attr_mask)
status = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
if (!status)
status = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
if (status)
printk(KERN_ERR PFX "%s: qp_state=%d status%d\n",
__func__, qp_state, status);
return status;
}
static int destroy_cma_ctx(struct cma_context *cma_ctx)
{
if(!IS_ERR(cma_ctx->cm_id))
ib_destroy_cm_id(cma_ctx->cm_id);
if (cma_ctx->cma_param.private_data)
kfree(cma_ctx->cma_param.private_data);
if (cma_ctx)
kfree(cma_ctx);
cma_ctx = NULL;
return 0;
}
static int cma_disconnect(struct ib_qp *qp,
struct cma_context *cma_ctx,
enum cma_close_flags cflags)
{
int status;
if (cma_ctx == NULL)
goto modqp;
if (cflags == CMA_CLOSE_ABRUPT)
status = destroy_cma_ctx(cma_ctx);
else if (cflags == CMA_CLOSE_GRACEFUL){
status = ib_send_cm_dreq(cma_ctx->cm_id, NULL, 0);
}
modqp:
status = cma_modify_qp_state(0, qp, IB_QPS_ERR, IB_QP_STATE);
return status;
}
void cma_connection_callback(struct cma_context *cma_ctx,
const enum ib_cma_event event,
const void *private_data)
{
ib_cma_event_handler conn_cb;
struct ib_qp *qp = cma_ctx->cma_conn.qp;
int status;
conn_cb = cma_ctx->cma_conn.cma_event_handler;
switch (event) {
case IB_CMA_EVENT_ESTABLISHED:
break;
case IB_CMA_EVENT_DISCONNECTED:
case IB_CMA_EVENT_REJECTED:
case IB_CMA_EVENT_UNREACHABLE:
case IB_CMA_EVENT_NON_PEER_REJECTED:
status = cma_disconnect(qp, cma_ctx, CMA_CLOSE_ABRUPT);
break;
default:
printk(KERN_ERR PFX "%s: unknown event !!\n", __func__);
}
printk(KERN_DEBUG PFX "%s: event=%d\n", __func__, event);
conn_cb(event, cma_ctx->cma_conn.context, private_data);
}
static inline int cma_rep_recv(struct cma_context *cma_ctx,
struct ib_cm_event *rep_cm_event)
{
int status;
status = cma_modify_qp_state(cma_ctx->cm_id, cma_ctx->cma_conn.qp,
IB_QPS_RTR, 0);
if (status) {
printk(KERN_ERR PFX "%s: fail to modify QPS_RTR %d\n",
__func__, status);
return status;
}
status = cma_modify_qp_state(cma_ctx->cm_id, cma_ctx->cma_conn.qp,
IB_QPS_RTS, 0);
if (status) {
printk(KERN_ERR PFX "%s: fail to modify QPS_RTR %d\n",
__func__, status);
return status;
}
status = ib_send_cm_rtu(cma_ctx->cm_id, NULL, 0);
if (status) {
printk(KERN_ERR PFX "%s: fail to send cm rtu %d\n",
__func__, status);
return status;
}
return 0;
}
int cma_active_cb_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
{
int status = 0;
enum ib_cma_event cma_event = 0;
struct cma_context *cma_ctx = cm_id->context;
printk(KERN_DEBUG PFX "%s: enter >>> cm_id=%p cma_ctx=%p\n",__func__,
cm_id, cma_ctx);
switch (event->event) {
case IB_CM_REQ_ERROR:
cma_event = IB_CMA_EVENT_UNREACHABLE;
break;
case IB_CM_REJ_RECEIVED:
cma_event = IB_CMA_EVENT_NON_PEER_REJECTED;
break;
case IB_CM_DREP_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
cma_event = IB_CMA_EVENT_DISCONNECTED;
break;
case IB_CM_REP_RECEIVED:
status = cma_rep_recv(cma_ctx, event);
if (!status)
cma_event = IB_CMA_EVENT_ESTABLISHED;
else
cma_event = IB_CMA_EVENT_DISCONNECTED;
break;
case IB_CM_DREQ_RECEIVED:
ib_send_cm_drep(cm_id, NULL, 0);
cma_event = IB_CMA_EVENT_DISCONNECTED;
break;
case IB_CM_DREQ_ERROR:
break;
default:
printk(KERN_WARNING PFX "%s: cm event (%d) not handled\n",
__func__, event->event);
break;
}
printk(KERN_WARNING PFX "%s: cm_event=%d cma_event=%d\n",
__func__, event->event, cma_event);
if (cma_event)
cma_connection_callback(cma_ctx, cma_event,
event->private_data);
return status;
}
static struct cma_context *get_cma_ctx(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct cma_context *new_cma_ctx;
int status;
if (event->event != IB_CM_REQ_RECEIVED)
return cm_id->context;
if (((struct cma_context *)cm_id->context)->cm_id != cm_id)
printk(KERN_DEBUG PFX "%s: old_cm_id=%p new_cm_id=%p\n",
__func__, ((struct cma_context *)cm_id->context)->cm_id,
cm_id);
new_cma_ctx = kmalloc(sizeof *new_cma_ctx, GFP_KERNEL);
if (!new_cma_ctx) {
status = ib_send_cm_rej(cm_id,
IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return NULL;
}
memset(new_cma_ctx, 0, sizeof *new_cma_ctx);
new_cma_ctx->cm_id = cm_id;
new_cma_ctx->creq_cma_ctx = cm_id->context;
cm_id->context = new_cma_ctx;
return new_cma_ctx;
}
static int cma_passive_cb_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event)
{
struct cma_context *cma_ctx;
ib_cma_listen_handler crcb;
ib_cma_ac_handler accb;
void *cr_ctx, *ac_ctx;
int status = 0;
printk(KERN_DEBUG PFX "%s: enter >>> cm_id=%p\n",__func__, cm_id);
cma_ctx = get_cma_ctx(cm_id, event);
if (!cma_ctx)
return -EINVAL;
accb = cma_ctx->accept_cb.func;
ac_ctx = cma_ctx->accept_cb.context;
switch (event->event) {
case IB_CM_REQ_RECEIVED:
crcb = cma_ctx->creq_cma_ctx->listen_cb.func;
cr_ctx = cma_ctx->creq_cma_ctx->listen_cb.context;
memcpy(&cma_ctx->cma_path,
((struct ib_cm_req_event_param *)
&event->param)->primary_path,
sizeof cma_ctx->cma_path);
crcb(cma_ctx, cm_id->device, event->private_data, cr_ctx);
break;
case IB_CM_REP_ERROR:
accb(IB_CMA_EVENT_UNREACHABLE, ac_ctx);
break;
case IB_CM_REJ_RECEIVED:
accb(IB_CMA_EVENT_REJECTED, ac_ctx);
break;
case IB_CM_RTU_RECEIVED:
status = cma_modify_qp_state(cma_ctx->cm_id,
cma_ctx->cma_conn.qp,
IB_QPS_RTS, 0);
if (!status)
accb(IB_CMA_EVENT_ESTABLISHED, ac_ctx);
else {
accb(IB_CMA_EVENT_DISCONNECTED, ac_ctx);
status = cma_disconnect(cma_ctx->cma_conn.qp, cma_ctx,
CMA_CLOSE_ABRUPT);
}
break;
case IB_CM_DREQ_RECEIVED:
ib_send_cm_drep(cm_id, NULL, 0);
break;
case IB_CM_DREQ_ERROR:
break;
case IB_CM_DREP_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
accb(IB_CMA_EVENT_DISCONNECTED, ac_ctx);
status = cma_disconnect(cma_ctx->cma_conn.qp, cma_ctx,
CMA_CLOSE_ABRUPT);
break;
default:
break;
}
destroy_cma_ctx(cma_ctx);
return status;
}
static void cma_path_handler(u64 req_id, void *context, int rec_num)
{
struct cma_context *cma_ctx = context;
enum ib_cma_event event;
int status = 0;
if (!cma_ctx) {
printk(KERN_ERR PFX "%s: context received null\n",__func__);
return;
}
if (rec_num <= 0) {
event = IB_CMA_EVENT_UNREACHABLE;
goto error;
}
cma_ctx->cma_param.primary_path = &cma_ctx->cma_path;
cma_ctx->cma_param.alternate_path = NULL;
printk(KERN_DEBUG PFX "%s: dlid=%d slid=%d pkey=%d mtu=%d sid=%llx "
"qpn=%d qpt=%d psn=%d prd=%s respres=%d rcm=%d flc=%d "
"cmt=%d rtrc=%d rntrtr=%d maxcm=%d \n",__func__,
cma_ctx->cma_param.primary_path->dlid ,
cma_ctx->cma_param.primary_path->slid ,
cma_ctx->cma_param.primary_path->pkey ,
cma_ctx->cma_param.primary_path->mtu ,
cma_ctx->cma_param.service_id,
cma_ctx->cma_param.qp_num,
cma_ctx->cma_param.qp_type,
cma_ctx->cma_param.starting_psn,
(char *)cma_ctx->cma_param.private_data,
cma_ctx->cma_param.responder_resources,
cma_ctx->cma_param.remote_cm_response_timeout,
cma_ctx->cma_param.flow_control,
cma_ctx->cma_param.local_cm_response_timeout,
cma_ctx->cma_param.retry_count,
cma_ctx->cma_param.rnr_retry_count,
cma_ctx->cma_param.max_cm_retries);
status = ib_send_cm_req(cma_ctx->cm_id, &cma_ctx->cma_param);
if (status) {
printk(KERN_ERR PFX "%s: cm_req failed %d\n",__func__, status);
event = IB_CMA_EVENT_REJECTED;
goto error;
}
return;
error:
printk(KERN_ERR PFX "%s: return error %d \n",__func__, status);
cma_connection_callback(cma_ctx, event, NULL);
}
static void cma_route_handler(u64 req_id, void *context, int rec_num)
{
struct cma_context *cma_ctx = context;
enum ib_cma_event event;
int status = 0;
if (rec_num <= 0) {
event = IB_CMA_EVENT_UNREACHABLE;
goto error;
}
cma_ctx->ibat_comp.fn = &cma_path_handler;
cma_ctx->ibat_comp.context = cma_ctx;
status = ib_at_paths_by_route(&cma_ctx->cma_route, 0,
&cma_ctx->cma_path, 1,
&cma_ctx->ibat_comp);
if (status) {
event = IB_CMA_EVENT_DISCONNECTED;
goto error;
}
return;
error:
printk(KERN_ERR PFX "%s: return error %d \n",__func__, status);
cma_connection_callback(cma_ctx, event ,NULL);
}
/* API functions */
int ib_cma_create_qp(struct ib_pd *pd, u8 port, struct ib_qp **qp_in,
struct ib_qp_init_attr *init_attr)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask;
struct ib_qp *qp;
qp = ib_create_qp(pd, init_attr);
if (IS_ERR(qp))
return IS_ERR(qp);
*qp_in = qp;
printk(KERN_DEBUG PFX "%s: qp created (%p)\n",__func__, qp);
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
qp_attr.qp_access_flags = IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.pkey_index = 0;
qp_attr.port_num = port;
printk(KERN_DEBUG PFX "%s: call ib_modify_qp (qp_attr_mask=%x)\n",
__func__, qp_attr_mask);
return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
};
EXPORT_SYMBOL(ib_cma_create_qp);
int ib_cma_connect(struct ib_cma_conn *cma_conn, void **cma_id)
{
struct cma_context *cma_ctx;
int status;
u32 timeout;
u32 dst_ip;
dst_ip = (((struct sockaddr_in *)(cma_conn->dst_ip))->sin_addr).s_addr;
printk(KERN_DEBUG PFX "%s: enter >>> dst_ip=%d.%d.%d.%d\n",__func__,
(dst_ip & 0x000000ff),
(dst_ip & 0x0000ff00) >> 8,
(dst_ip & 0x00ff0000) >> 16,
(dst_ip & 0xff000000) >> 24);
cma_ctx = kmalloc(sizeof *cma_ctx, GFP_KERNEL);
if (!cma_ctx)
return -ENOMEM;
memset(cma_ctx, 0, sizeof *cma_ctx);
timeout = us_to_cmt(cma_conn->qp_attr->timeout);
cma_ctx->cm_id = ib_create_cm_id(cma_conn->device,
cma_active_cb_handler,
(void *)cma_ctx);
if (IS_ERR(cma_ctx->cm_id)) {
printk(KERN_ERR PFX "%s: cm_id creation failed\n", __func__);
destroy_cma_ctx(cma_ctx);
return -EAGAIN;
}
else
printk(KERN_DEBUG PFX "%s: cm_id created %p\n", __func__,
cma_ctx->cm_id);
printk(KERN_DEBUG PFX "%s: cma_event_handler=%p\n", __func__,
cma_conn->cma_event_handler );
memcpy(&cma_ctx->cma_conn, cma_conn, sizeof *cma_conn);
cma_ctx->cma_param.service_id = cma_conn->service_id;
cma_ctx->cma_param.qp_num = cma_conn->qp->qp_num;
cma_ctx->cma_param.qp_type = IB_QPT_RC;
cma_ctx->cma_param.private_data = kmalloc(cma_conn->private_data_len,
GFP_KERNEL);
memcpy((u8 *)cma_ctx->cma_param.private_data,
(u8 *)cma_conn->private_data, cma_conn->private_data_len);
cma_ctx->cma_param.private_data_len = cma_conn->private_data_len;
cma_ctx->cma_param.responder_resources = CMA_TARGET_MAX;
cma_ctx->cma_param.initiator_depth = CMA_INITIATOR_DEPTH;
cma_ctx->cma_param.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; //timeout;
cma_ctx->cma_param.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
cma_ctx->cma_param.retry_count = CMA_RC_RETRY_COUNT;
cma_ctx->cma_param.rnr_retry_count = CMA_RNR_RETRY_COUNT;
cma_ctx->cma_param.max_cm_retries = CMA_MAX_CM_RETRIES;
cma_ctx->ibat_comp.fn = &cma_route_handler;
cma_ctx->ibat_comp.context = cma_ctx;
status = ib_at_route_by_ip(dst_ip, 0, 0, 0, &cma_ctx->cma_route,
&cma_ctx->ibat_comp);
if (status < 0) {
printk(KERN_ERR PFX " ib_at_route_by_ip failed (%d)\n",
status);
destroy_cma_ctx(cma_ctx);
return -EAGAIN;
}
if (status == 1) {
printk(KERN_DEBUG PFX "%s: immidiate route - call "
"route_handler\n",__func__);
cma_route_handler(cma_ctx->ibat_comp.req_id, cma_ctx, 1);
}
*cma_id = (void *)cma_ctx;
return 0;
};
EXPORT_SYMBOL(ib_cma_connect);
int ib_cma_disconnect(struct ib_qp *qp, void *cma_id)
{
struct cma_context *cma_ctx = cma_id;
int status;
status = cma_disconnect(qp, cma_ctx, CMA_CLOSE_ABRUPT);
return status;
};
EXPORT_SYMBOL(ib_cma_disconnect);
int ib_cma_listen(struct ib_device *device, struct sockaddr *address,
__be64 service_id, void *context,
ib_cma_listen_handler cm_listen_handler,
void **cma_id)
{
struct cma_context *cma_ctx;
int status;
printk(KERN_DEBUG PFX "%s: enter >> \n",__func__);
cma_ctx = kmalloc(sizeof *cma_ctx, GFP_KERNEL);
if (!cma_ctx)
return -ENOMEM;
memset(cma_ctx, 0, sizeof *cma_ctx);
cma_ctx->listen_cb.func = cm_listen_handler;
cma_ctx->listen_cb.context = context;
cma_ctx->cm_id = ib_create_cm_id(device, cma_passive_cb_handler,
(void *)cma_ctx);
if (IS_ERR(cma_ctx->cm_id)) {
printk(KERN_ERR PFX "%s: cm_id creation failed\n", __func__);
destroy_cma_ctx(cma_ctx);
return -EAGAIN;
}
else
printk(KERN_DEBUG PFX "%s: cm_id created %p\n", __func__,
cma_ctx->cm_id);
/* `address` is ignored at the moment ... */
status = ib_cm_listen(cma_ctx->cm_id, service_id, 0);
if (status) {
printk(KERN_ERR PFX "%s: cm_listen failed %d\n", __func__,
status);
destroy_cma_ctx(cma_ctx);
return status;
}
printk(KERN_INFO PFX "%s:cm_id=%p cma_id=%p\n", __func__,
cma_ctx->cm_id, cma_ctx);
*cma_id = (void *)cma_ctx;
return 0;
};
EXPORT_SYMBOL(ib_cma_listen);
int ib_cma_destroy(void *cma_id)
{
return destroy_cma_ctx((struct cma_context *)cma_id);
};
EXPORT_SYMBOL(ib_cma_destroy);
int ib_cma_accept(void *cma_id, struct ib_qp *qp,
const void *private_data, u8 private_data_len,
void *context, ib_cma_ac_handler cm_accept_handler)
{
struct cma_context *cma_ctx = cma_id;
struct ib_cm_rep_param passive_params;
int status;
printk(KERN_DEBUG PFX "%s: enter >> private_data = %s (len=%d)\n",
__func__, (char *)private_data, private_data_len);
if (private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) {
status = -EINVAL;
goto reject;
}
memset(&passive_params, 0, sizeof passive_params);
passive_params.private_data = private_data;
passive_params.private_data_len = private_data_len;
passive_params.qp_num = qp->qp_num;
passive_params.responder_resources = CMA_TARGET_MAX;
passive_params.initiator_depth = CMA_INITIATOR_DEPTH;
passive_params.rnr_retry_count = CMA_RNR_RETRY_COUNT;
status = cma_modify_qp_state(cma_ctx->cm_id, qp, IB_QPS_RTR, 0);
if (status)
goto reject;
cma_ctx->accept_cb.func = cm_accept_handler;
cma_ctx->accept_cb.context = context;
status = ib_send_cm_rep(cma_ctx->cm_id, &passive_params);
if (status)
goto reject;
printk(KERN_DEBUG PFX "%s: return success\n", __func__);
return 0;
reject:
printk(KERN_ERR PFX "%s: error status %d\n", __func__, status);
ib_send_cm_rej(cma_ctx->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
destroy_cma_ctx(cma_ctx);
return status;
};
EXPORT_SYMBOL(ib_cma_accept);
int ib_cma_reject(void *cma_id, const void *private_data,
u8 private_data_len)
{
struct cma_context *cma_ctx = cma_id;
int status;
if (cma_ctx == NULL)
return -EINVAL;
status = ib_send_cm_rej(cma_ctx->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, private_data, private_data_len);
destroy_cma_ctx(cma_ctx);
return status;
};
EXPORT_SYMBOL(ib_cma_reject);
int ib_cma_get_device(struct sockaddr *remote_address,
enum ib_qos qos, struct ib_device **device, u8 *port)
{
return 0;
};
EXPORT_SYMBOL(ib_cma_get_device);
int ib_cma_get_src_ip(void *cma_id, ib_cma_addr_handler addr_handler,
void *context)
{
return 0;
};
EXPORT_SYMBOL(ib_cma_get_src_ip);
static int cma_init(void)
{
printk(KERN_WARNING PFX "START generic CM module\n");
return 0;
}
static void cma_cleanup(void)
{
printk(KERN_WARNING PFX "EXIT generic CM module\n");
}
module_init(cma_init);
module_exit(cma_cleanup);
More information about the general
mailing list