[openib-general][PATCH][RFC]: CMA IB implementation
Sean Hefty
mshefty at ichips.intel.com
Mon Sep 19 10:59:16 PDT 2005
Guy German wrote:
> #define CMA_TARGET_MAX 4
> #define CMA_INITIATOR_DEPTH 4
> #define CMA_RC_RETRY_COUNT 7
> #define CMA_RNR_RETRY_COUNT 6
> #define CMA_CM_RESPONSE_TIMEOUT 20 /* 4 sec */
> #define CMA_MAX_CM_RETRIES 0
Are these values hard-coded just for the initial implementation? How would
these change?
> enum cma_close_flags {
> CMA_CLOSE_ABRUPT = 0,
> CMA_CLOSE_GRACEFUL
> };
Not sure what these are for. Why not have the user always destroy the cma_id?
If it hasn't yet been destroyed when a disconnect comes in, callback the user.
If a connection hasn't been disconnected when it is destroyed, automatically
send a disconnect message.
> struct accept_callback {
> ib_cma_ac_handler func;
> void *context;
> };
>
> struct listen_callback {
> ib_cma_listen_handler func;
> void *context;
> };
These could be eliminated if we just associated a context with a cma_id and left
it at that. I think we're asking for race conditions if we try to update the
context with every callback.
> static int cma_modify_qp_state(struct ib_cm_id *cm_id, struct ib_qp *qp,
> enum ib_qp_state qp_state,
> int qp_attr_mask)
> {
> struct ib_qp_attr qp_attr;
> int status = 0;
> printk(KERN_DEBUG PFX "%s: enter >>> modify to %d\n",
> __func__, qp_state);
>
> if (qp == NULL)
> return -EINVAL;
We shouldn't need checks like this in the kernel.
> memset(&qp_attr, 0, sizeof qp_attr);
> qp_attr.qp_state = qp_state;
>
> if (cm_id && !qp_attr_mask)
Or this check...
> static int destroy_cma_ctx(struct cma_context *cma_ctx)
> {
> if(!IS_ERR(cma_ctx->cm_id))
> ib_destroy_cm_id(cma_ctx->cm_id);
> if (cma_ctx->cma_param.private_data)
> kfree(cma_ctx->cma_param.private_data);
Is this the outbound private data or inbound? Why not tie the private data to
an event and avoid storing it with the cma_ctx?
> if (cma_ctx)
> kfree(cma_ctx);
>
> cma_ctx = NULL;
Is there a reason to set this to NULL?
> return 0;
> }
> static int cma_disconnect(struct ib_qp *qp,
> struct cma_context *cma_ctx,
> enum cma_close_flags cflags)
> {
> int status;
>
> if (cma_ctx == NULL)
> goto modqp;
We shouldn't need this check.
> if (cflags == CMA_CLOSE_ABRUPT)
> status = destroy_cma_ctx(cma_ctx);
Why would this call fail? What would the user do if it does?
> else if (cflags == CMA_CLOSE_GRACEFUL){
> status = ib_send_cm_dreq(cma_ctx->cm_id, NULL, 0);
> }
See comments above. Eliminate the GRACEFUL/ABRUPT flags and just let the user
either issue the disconnect or just destroy the cma_ctx.
> modqp:
> status = cma_modify_qp_state(0, qp, IB_QPS_ERR, IB_QP_STATE);
> return status;
> }
>
> void cma_connection_callback(struct cma_context *cma_ctx,
> const enum ib_cma_event event,
> const void *private_data)
> {
> ib_cma_event_handler conn_cb;
> struct ib_qp *qp = cma_ctx->cma_conn.qp;
> int status;
>
> conn_cb = cma_ctx->cma_conn.cma_event_handler;
>
> switch (event) {
> case IB_CMA_EVENT_ESTABLISHED:
> break;
> case IB_CMA_EVENT_DISCONNECTED:
> case IB_CMA_EVENT_REJECTED:
> case IB_CMA_EVENT_UNREACHABLE:
> case IB_CMA_EVENT_NON_PEER_REJECTED:
> status = cma_disconnect(qp, cma_ctx, CMA_CLOSE_ABRUPT);
This is destroying the cma_ctx without the user knowing it. The dereference to
cma_ctx below will crash. We shouldn't take any action on behalf of the user.
Simply report the error and let the user destroy the cma_id.
> break;
> default:
> printk(KERN_ERR PFX "%s: unknown event !!\n", __func__);
> }
>
> printk(KERN_DEBUG PFX "%s: event=%d\n", __func__, event);
>
> conn_cb(event, cma_ctx->cma_conn.context, private_data);
> }
>
> static inline int cma_rep_recv(struct cma_context *cma_ctx,
> struct ib_cm_event *rep_cm_event)
> {
> int status;
>
> status = cma_modify_qp_state(cma_ctx->cm_id, cma_ctx->cma_conn.qp,
> IB_QPS_RTR, 0);
> if (status) {
> printk(KERN_ERR PFX "%s: fail to modify QPS_RTR %d\n",
> __func__, status);
> return status;
> }
>
> status = cma_modify_qp_state(cma_ctx->cm_id, cma_ctx->cma_conn.qp,
> IB_QPS_RTS, 0);
> if (status) {
> printk(KERN_ERR PFX "%s: fail to modify QPS_RTR %d\n",
> __func__, status);
> return status;
> }
>
> status = ib_send_cm_rtu(cma_ctx->cm_id, NULL, 0);
> if (status) {
> printk(KERN_ERR PFX "%s: fail to send cm rtu %d\n",
> __func__, status);
> return status;
> }
>
> return 0;
> }
>
> int cma_active_cb_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
> {
> int status = 0;
> enum ib_cma_event cma_event = 0;
> struct cma_context *cma_ctx = cm_id->context;
>
> printk(KERN_DEBUG PFX "%s: enter >>> cm_id=%p cma_ctx=%p\n",__func__,
> cm_id, cma_ctx);
>
> switch (event->event) {
> case IB_CM_REQ_ERROR:
> cma_event = IB_CMA_EVENT_UNREACHABLE;
> break;
> case IB_CM_REJ_RECEIVED:
> cma_event = IB_CMA_EVENT_NON_PEER_REJECTED;
> break;
> case IB_CM_DREP_RECEIVED:
> case IB_CM_TIMEWAIT_EXIT:
> cma_event = IB_CMA_EVENT_DISCONNECTED;
> break;
> case IB_CM_REP_RECEIVED:
> status = cma_rep_recv(cma_ctx, event);
> if (!status)
> cma_event = IB_CMA_EVENT_ESTABLISHED;
> else
> cma_event = IB_CMA_EVENT_DISCONNECTED;
> break;
> case IB_CM_DREQ_RECEIVED:
> ib_send_cm_drep(cm_id, NULL, 0);
> cma_event = IB_CMA_EVENT_DISCONNECTED;
> break;
> case IB_CM_DREQ_ERROR:
> break;
> default:
> printk(KERN_WARNING PFX "%s: cm event (%d) not handled\n",
> __func__, event->event);
> break;
> }
>
> printk(KERN_WARNING PFX "%s: cm_event=%d cma_event=%d\n",
> __func__, event->event, cma_event);
>
> if (cma_event)
This check isn't needed.
> cma_connection_callback(cma_ctx, cma_event,
> event->private_data);
>
> return status;
Returning non-zero will destroy the underlying cm_id. We can avoid some
synchronization by letting it exist until the user destroys the corresponding
cma_id. Otherwise, there's the potential of the user trying to destroy it
twice. Once from the cma_connection_callback reporting an error, and then again
here.
> }
>
> static struct cma_context *get_cma_ctx(struct ib_cm_id *cm_id,
> struct ib_cm_event *event)
> {
> struct cma_context *new_cma_ctx;
> int status;
>
>
> if (event->event != IB_CM_REQ_RECEIVED)
> return cm_id->context;
>
> if (((struct cma_context *)cm_id->context)->cm_id != cm_id)
> printk(KERN_DEBUG PFX "%s: old_cm_id=%p new_cm_id=%p\n",
> __func__, ((struct cma_context *)cm_id->context)->cm_id,
> cm_id);
This check shouldn't be needed.
> new_cma_ctx = kmalloc(sizeof *new_cma_ctx, GFP_KERNEL);
> if (!new_cma_ctx) {
> status = ib_send_cm_rej(cm_id,
> IB_CM_REJ_CONSUMER_DEFINED,
> NULL, 0, NULL, 0);
> return NULL;
> }
>
> memset(new_cma_ctx, 0, sizeof *new_cma_ctx);
> new_cma_ctx->cm_id = cm_id;
> new_cma_ctx->creq_cma_ctx = cm_id->context;
> cm_id->context = new_cma_ctx;
>
> return new_cma_ctx;
> }
>
> static void cma_path_handler(u64 req_id, void *context, int rec_num)
> {
> struct cma_context *cma_ctx = context;
> enum ib_cma_event event;
> int status = 0;
>
> if (!cma_ctx) {
This check isn't needed.
> printk(KERN_ERR PFX "%s: context received null\n",__func__);
> return;
> }
>
> if (rec_num <= 0) {
> event = IB_CMA_EVENT_UNREACHABLE;
> goto error;
> }
>
> cma_ctx->cma_param.primary_path = &cma_ctx->cma_path;
> cma_ctx->cma_param.alternate_path = NULL;
>
> printk(KERN_DEBUG PFX "%s: dlid=%d slid=%d pkey=%d mtu=%d sid=%llx "
> "qpn=%d qpt=%d psn=%d prd=%s respres=%d rcm=%d flc=%d "
> "cmt=%d rtrc=%d rntrtr=%d maxcm=%d \n",__func__,
> cma_ctx->cma_param.primary_path->dlid ,
> cma_ctx->cma_param.primary_path->slid ,
> cma_ctx->cma_param.primary_path->pkey ,
> cma_ctx->cma_param.primary_path->mtu ,
> cma_ctx->cma_param.service_id,
> cma_ctx->cma_param.qp_num,
> cma_ctx->cma_param.qp_type,
> cma_ctx->cma_param.starting_psn,
> (char *)cma_ctx->cma_param.private_data,
> cma_ctx->cma_param.responder_resources,
> cma_ctx->cma_param.remote_cm_response_timeout,
> cma_ctx->cma_param.flow_control,
> cma_ctx->cma_param.local_cm_response_timeout,
> cma_ctx->cma_param.retry_count,
> cma_ctx->cma_param.rnr_retry_count,
> cma_ctx->cma_param.max_cm_retries);
>
> status = ib_send_cm_req(cma_ctx->cm_id, &cma_ctx->cma_param);
> if (status) {
> printk(KERN_ERR PFX "%s: cm_req failed %d\n",__func__, status);
> event = IB_CMA_EVENT_REJECTED;
> goto error;
> }
>
> return;
>
> error:
> printk(KERN_ERR PFX "%s: return error %d \n",__func__, status);
> cma_connection_callback(cma_ctx, event, NULL);
> }
>
> static void cma_route_handler(u64 req_id, void *context, int rec_num)
> {
> struct cma_context *cma_ctx = context;
> enum ib_cma_event event;
> int status = 0;
>
> if (rec_num <= 0) {
> event = IB_CMA_EVENT_UNREACHABLE;
> goto error;
> }
> cma_ctx->ibat_comp.fn = &cma_path_handler;
> cma_ctx->ibat_comp.context = cma_ctx;
>
> status = ib_at_paths_by_route(&cma_ctx->cma_route, 0,
> &cma_ctx->cma_path, 1,
> &cma_ctx->ibat_comp);
>
> if (status) {
> event = IB_CMA_EVENT_DISCONNECTED;
> goto error;
> }
> return;
>
> error:
> printk(KERN_ERR PFX "%s: return error %d \n",__func__, status);
> cma_connection_callback(cma_ctx, event ,NULL);
> }
>
> /* API functions */
>
> int ib_cma_create_qp(struct ib_pd *pd, u8 port, struct ib_qp **qp_in,
> struct ib_qp_init_attr *init_attr)
> {
Why not return struct ib_qp* similar to how the other APIs operate?
> struct ib_qp_attr qp_attr;
> int qp_attr_mask;
> struct ib_qp *qp;
>
> qp = ib_create_qp(pd, init_attr);
> if (IS_ERR(qp))
> return IS_ERR(qp);
> *qp_in = qp;
> printk(KERN_DEBUG PFX "%s: qp created (%p)\n",__func__, qp);
> memset(&qp_attr, 0, sizeof qp_attr);
>
> qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
> IB_QP_PKEY_INDEX | IB_QP_PORT;
>
> qp_attr.qp_access_flags = IB_ACCESS_REMOTE_READ |
> IB_ACCESS_REMOTE_WRITE;
> qp_attr.qp_state = IB_QPS_INIT;
> qp_attr.pkey_index = 0;
> qp_attr.port_num = port;
> printk(KERN_DEBUG PFX "%s: call ib_modify_qp (qp_attr_mask=%x)\n",
> __func__, qp_attr_mask);
>
> return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
If modify fails, the QP should be destroyed.
> };
> EXPORT_SYMBOL(ib_cma_create_qp);
>
> int ib_cma_connect(struct ib_cma_conn *cma_conn, void **cma_id)
> {
> struct cma_context *cma_ctx;
> int status;
> u32 timeout;
> u32 dst_ip;
> dst_ip = (((struct sockaddr_in *)(cma_conn->dst_ip))->sin_addr).s_addr;
>
> printk(KERN_DEBUG PFX "%s: enter >>> dst_ip=%d.%d.%d.%d\n",__func__,
> (dst_ip & 0x000000ff),
> (dst_ip & 0x0000ff00) >> 8,
> (dst_ip & 0x00ff0000) >> 16,
> (dst_ip & 0xff000000) >> 24);
> cma_ctx = kmalloc(sizeof *cma_ctx, GFP_KERNEL);
> if (!cma_ctx)
> return -ENOMEM;
> memset(cma_ctx, 0, sizeof *cma_ctx);
>
> timeout = us_to_cmt(cma_conn->qp_attr->timeout);
>
> cma_ctx->cm_id = ib_create_cm_id(cma_conn->device,
> cma_active_cb_handler,
> (void *)cma_ctx);
> if (IS_ERR(cma_ctx->cm_id)) {
> printk(KERN_ERR PFX "%s: cm_id creation failed\n", __func__);
> destroy_cma_ctx(cma_ctx);
> return -EAGAIN;
> }
> else
> printk(KERN_DEBUG PFX "%s: cm_id created %p\n", __func__,
> cma_ctx->cm_id);
>
> printk(KERN_DEBUG PFX "%s: cma_event_handler=%p\n", __func__,
> cma_conn->cma_event_handler );
> memcpy(&cma_ctx->cma_conn, cma_conn, sizeof *cma_conn);
> cma_ctx->cma_param.service_id = cma_conn->service_id;
> cma_ctx->cma_param.qp_num = cma_conn->qp->qp_num;
> cma_ctx->cma_param.qp_type = IB_QPT_RC;
> cma_ctx->cma_param.private_data = kmalloc(cma_conn->private_data_len,
> GFP_KERNEL);
> memcpy((u8 *)cma_ctx->cma_param.private_data,
> (u8 *)cma_conn->private_data, cma_conn->private_data_len);
> cma_ctx->cma_param.private_data_len = cma_conn->private_data_len;
> cma_ctx->cma_param.responder_resources = CMA_TARGET_MAX;
> cma_ctx->cma_param.initiator_depth = CMA_INITIATOR_DEPTH;
> cma_ctx->cma_param.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; //timeout;
> cma_ctx->cma_param.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
> cma_ctx->cma_param.retry_count = CMA_RC_RETRY_COUNT;
> cma_ctx->cma_param.rnr_retry_count = CMA_RNR_RETRY_COUNT;
> cma_ctx->cma_param.max_cm_retries = CMA_MAX_CM_RETRIES;
> cma_ctx->ibat_comp.fn = &cma_route_handler;
> cma_ctx->ibat_comp.context = cma_ctx;
>
> status = ib_at_route_by_ip(dst_ip, 0, 0, 0, &cma_ctx->cma_route,
> &cma_ctx->ibat_comp);
> if (status < 0) {
> printk(KERN_ERR PFX " ib_at_route_by_ip failed (%d)\n",
> status);
> destroy_cma_ctx(cma_ctx);
> return -EAGAIN;
> }
>
> if (status == 1) {
> printk(KERN_DEBUG PFX "%s: immidiate route - call "
> "route_handler\n",__func__);
> cma_route_handler(cma_ctx->ibat_comp.req_id, cma_ctx, 1);
> }
> *cma_id = (void *)cma_ctx;
> return 0;
> };
> EXPORT_SYMBOL(ib_cma_connect);
>
> int ib_cma_disconnect(struct ib_qp *qp, void *cma_id)
> {
> struct cma_context *cma_ctx = cma_id;
> int status;
>
> status = cma_disconnect(qp, cma_ctx, CMA_CLOSE_ABRUPT);
>
> return status;
> };
> EXPORT_SYMBOL(ib_cma_disconnect);
>
> int ib_cma_listen(struct ib_device *device, struct sockaddr *address,
> __be64 service_id, void *context,
> ib_cma_listen_handler cm_listen_handler,
> void **cma_id)
> {
> struct cma_context *cma_ctx;
> int status;
>
> printk(KERN_DEBUG PFX "%s: enter >> \n",__func__);
> cma_ctx = kmalloc(sizeof *cma_ctx, GFP_KERNEL);
> if (!cma_ctx)
> return -ENOMEM;
> memset(cma_ctx, 0, sizeof *cma_ctx);
> cma_ctx->listen_cb.func = cm_listen_handler;
> cma_ctx->listen_cb.context = context;
> cma_ctx->cm_id = ib_create_cm_id(device, cma_passive_cb_handler,
> (void *)cma_ctx);
> if (IS_ERR(cma_ctx->cm_id)) {
> printk(KERN_ERR PFX "%s: cm_id creation failed\n", __func__);
> destroy_cma_ctx(cma_ctx);
> return -EAGAIN;
> }
> else
> printk(KERN_DEBUG PFX "%s: cm_id created %p\n", __func__,
> cma_ctx->cm_id);
> /* `address` is ignored at the moment ... */
> status = ib_cm_listen(cma_ctx->cm_id, service_id, 0);
> if (status) {
> printk(KERN_ERR PFX "%s: cm_listen failed %d\n", __func__,
> status);
> destroy_cma_ctx(cma_ctx);
> return status;
> }
> printk(KERN_INFO PFX "%s:cm_id=%p cma_id=%p\n", __func__,
> cma_ctx->cm_id, cma_ctx);
>
> *cma_id = (void *)cma_ctx;
> return 0;
> };
> EXPORT_SYMBOL(ib_cma_listen);
>
> int ib_cma_destroy(void *cma_id)
> {
> return destroy_cma_ctx((struct cma_context *)cma_id);
> };
> EXPORT_SYMBOL(ib_cma_destroy);
>
> int ib_cma_accept(void *cma_id, struct ib_qp *qp,
> const void *private_data, u8 private_data_len,
> void *context, ib_cma_ac_handler cm_accept_handler)
> {
> struct cma_context *cma_ctx = cma_id;
> struct ib_cm_rep_param passive_params;
> int status;
>
> printk(KERN_DEBUG PFX "%s: enter >> private_data = %s (len=%d)\n",
> __func__, (char *)private_data, private_data_len);
>
> if (private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) {
> status = -EINVAL;
> goto reject;
> }
>
> memset(&passive_params, 0, sizeof passive_params);
> passive_params.private_data = private_data;
> passive_params.private_data_len = private_data_len;
> passive_params.qp_num = qp->qp_num;
> passive_params.responder_resources = CMA_TARGET_MAX;
> passive_params.initiator_depth = CMA_INITIATOR_DEPTH;
> passive_params.rnr_retry_count = CMA_RNR_RETRY_COUNT;
>
> status = cma_modify_qp_state(cma_ctx->cm_id, qp, IB_QPS_RTR, 0);
> if (status)
> goto reject;
>
> cma_ctx->accept_cb.func = cm_accept_handler;
> cma_ctx->accept_cb.context = context;
> status = ib_send_cm_rep(cma_ctx->cm_id, &passive_params);
> if (status)
> goto reject;
>
> printk(KERN_DEBUG PFX "%s: return success\n", __func__);
> return 0;
>
> reject:
> printk(KERN_ERR PFX "%s: error status %d\n", __func__, status);
> ib_send_cm_rej(cma_ctx->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
> NULL, 0, NULL, 0);
> destroy_cma_ctx(cma_ctx);
> return status;
> };
> EXPORT_SYMBOL(ib_cma_accept);
>
> int ib_cma_reject(void *cma_id, const void *private_data,
> u8 private_data_len)
> {
> struct cma_context *cma_ctx = cma_id;
> int status;
>
> if (cma_ctx == NULL)
This check isn't needed.
> return -EINVAL;
>
> status = ib_send_cm_rej(cma_ctx->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
> NULL, 0, private_data, private_data_len);
> destroy_cma_ctx(cma_ctx);
>
> return status;
> };
> EXPORT_SYMBOL(ib_cma_reject);
As a general statement, can we reduce the number of debug print statements?
- Sean
More information about the general
mailing list