[openib-general] [PATCH 3/3] iWARP CM

Tom Tucker tom at opengridcomputing.com
Sat Mar 11 16:30:16 PST 2006


On Fri, 2006-03-10 at 16:03 -0800, Sean Hefty wrote:
> > +static struct {
> > +	struct workqueue_struct *wq;
> > +} iwcm;
> 
> Not sure why there's a wrapper around just a workqueue.  Were you able to test 
> with using the rdma_wq exposed through ib_addr.h?

This is an oversight. I'll move to the rdma_wq. 

> 
> > +struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
> > +				 iw_cm_handler cm_handler,
> > +				 void *context)
> > +{
> {snip}
> > +	return &iwcm_id_priv->id;
> > +
> > +}
> 
> nit: extra blank line after return.
> 
> > +void iw_destroy_cm_id(struct iw_cm_id *cm_id)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_LISTEN:
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->destroy_listen(cm_id);
> > +		break;
> > +	case IW_CM_STATE_CONN_RECV:
> > +	case IW_CM_STATE_CONN_SENT:
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->disconnect(cm_id,1);
> > +		break;
> > +	case IW_CM_STATE_IDLE:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		break;
> > +	default:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		printk(KERN_ERR "%s:%s:%u Illegal state %d for iw_cm_id.\n", 
> > +		       __FILE__, __FUNCTION__, __LINE__, cm_id->state);
> 
> Maybe just bug check here, or toss STATE_IDLE into the default case?
> 
> > +	}

We should bug check if cm_id is not in an known state because it implies
reuse of a freed object.

> > +
> > +	atomic_dec(&iwcm_id_priv->refcount);
> > +	wait_event(iwcm_id_priv->wait, !atomic_read(&iwcm_id_priv->refcount));
> > +
> > +	kfree(iwcm_id_priv);
> > +}
> > +EXPORT_SYMBOL(iw_destroy_cm_id);
> 
> > +int iw_cm_getpeername(struct iw_cm_id *cm_id,
> > +		      struct sockaddr_in* local_addr,
> > +		      struct sockaddr_in* remote_addr)
> > +{
> > +	/* Make sure there's a connection */
> > +	if (cm_id->state != IW_CM_STATE_ESTABLISHED)
> > +		return -ENOTCONN;
> > +
> > +	return cm_id->device->iwcm->getpeername(cm_id, local_addr, remote_addr);
> > +}
> > +EXPORT_SYMBOL(iw_cm_getpeername);
> 
> Thinking about this more, is there some standard call that can be made to map a 
> remote address to a name?

Nobody uses this. The data is cached in the public portion of the cm_id
anyway. I just just get rid of it.

> 
> > +int iw_cm_disconnect(struct iw_cm_id *cm_id)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +
> > +	if (cm_id->device == NULL || cm_id->device->iwcm == NULL)
> > +		return -EINVAL;
> 
> Can these be removed?

I am concerned about someone calling disconnect when the object has
already been disconnected or on an idle cm_id.

> 
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_CONN_SENT:
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		cm_id->state = IW_CM_STATE_CLOSING;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->disconnect(cm_id, 1);
> > +		if (ret == 0) {
> > +			struct iw_cm_event event;
> > +			event.event = IW_CM_EVENT_LLP_DISCONNECT;
> > +			event.provider_id = cm_id->provider_id;
> > +			event.status = 0;
> > +			event.local_addr = cm_id->local_addr;
> > +			event.remote_addr = cm_id->remote_addr;
> > +			event.private_data = 0;
> > +			event.private_data_len = 0;
> > +			cm_event_handler(cm_id, &event);
> > +		}
> > +		cm_id->state = IW_CM_STATE_IDLE;
> 
> Can you remind me why the state isn't just set to IDLE above?  Or at least 
> explain how we're able to set it outside of any locks.  Are races avoided 
> because we're setting it back to IDLE, and nothing acts on it when in the 
> CLOSING state?

You're right. This is stupid. I need to set the state to idle in the
thread that actually calls the users' function. Duh.
 
> Note that we queue an event, which could execute, notifying the user that the 
> cm_id has been disconnected, before we get a chance to set the state to IDLE. 
> This leaves the user in a position where they cannot trust that the cm_id is in 
> a re-usable state
> 
> > +static void iwcm_add_one(struct ib_device *device)
> > +{
> > +}
> > +
> > +static void iwcm_remove_one(struct ib_device *device)
> > +{
> > +}
> 
> *scratches head*
> 
This is detritus. The IW CM doesn't really need to register as a client.
I'll remove it.

> > + * We never return !0 from this callback because an error on the new
> > + * child shouldn't kill the listening parent.
> > + */
> > +static int cm_conn_req_handler(struct iwcm_work *work)
> 
> Make function void.

good point.

> 
> > +static int cm_conn_est_handler(struct iwcm_work *work)
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +	
> > +	cm_id_priv = work->cm_id;
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	if (cm_id_priv->id.state != IW_CM_STATE_CONN_RECV) {
> > +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +		return 0;
> > +	}
> > +
> > +	if (work->event.status == 0) {
> > +		cm_id_priv = work->cm_id;
> > +		cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
> > +	} else 
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +
> > +	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +
> > +	/* Call the client CM handler */
> > +	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
> > +	if (ret)
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> 
> Can you clarify why the state is set to IDLE?
> 
Hmm. I think this is broken. I need to revisit the destroy path from
callback functions. 

> > +static int cm_conn_rep_handler(struct iwcm_work *work)
> > +{
> {snip}
> > +	/* Call the client CM handler */
> > +	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
> > +	if (ret) 
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> Same question.
> > +
> > +	return ret;
> > +}
> > +
> > +static int cm_disconnect_handler(struct iwcm_work *work) 
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +	
> > +	cm_id_priv = work->cm_id;
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	switch (cm_id_priv->id.state) {
> > +	case IW_CM_STATE_CONN_SENT:
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		cm_id_priv->id.state = IW_CM_STATE_CLOSING;
> > +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +		ret = cm_id_priv->id.cm_handler(&work->cm_id->id, &work->event);
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> 
> The handler reports that the cm_id has been disconnected, but it isn't actually 
> available for re-use until the function returns and the state has been changed. 
>   If the user does anything with the cm_id between the time that the callback 
> executes and changing the state, they could get unexpected results.
yeah -- this is busted, per the above comment. The state needs to get
set on the other side of the call to the user callback on that thread.
> 
> > +		cm_id_priv->id.qp = NULL;
> > +		cm_id_priv->id.qp_num = 0;
> 
> We may want to clear these before invoking the cm_handler.
> 
> > +EXPORT_SYMBOL(iw_cm_init_qp_attr);
> > +
> > +static int __init iw_cm_init(void)
> > +{
> > +	memset(&iwcm, 0, sizeof iwcm);
> > +	iwcm.wq = create_workqueue("iw_cm");
> > +	if (!iwcm.wq)
> > +		return -ENOMEM;
> > +
> > +	return ib_register_client(&iwcm_client);
> 
> I don't think that we need to register.

i agree.

> 
> - Sean




More information about the general mailing list