[openib-general] [PATCH 3/3] iWARP CM

Tom Tucker tom at opengridcomputing.com
Thu Mar 9 12:14:01 PST 2006


Sean:

I've made a bunch of changes per your review. I'm testing these now and
will repost. My comments are below regarding what I did based on your
suggestions.

I'll repost once my testing is complete.  Thanks for the very thorough
review.

Tom

On Wed, 2006-03-08 at 12:41 -0800, Sean Hefty wrote:
> Tom Tucker wrote:
> > +#include <rdma/ib_cache.h>
> > +#include <rdma/ib_cm.h>
> 
> Is this needed?
> 
> > +#include <rdma/iw_cm.h>
> > +
> > +#include "cm_msgs.h"
> 
> Is this needed?
> 
Copy and paste detritus

> > +struct iwcm_id_private;
> 
> I think this can be removed.

done.

> 
> > +struct iwcm_device;
> > +struct iwcm_port {
> > +	struct iwcm_device *iwcm_dev;
> > +	struct sockaddr_in local_addr;
> > +	u8 port_num;
> > +};
> 
> Can the port have more than one address?
> 
This is dead code ... I have removed it.

> > +/* Called whenever a reference added for a cm_id */
> > +static inline void iwcm_addref_id(struct iwcm_id_private *cm_id_priv)
> > +{
> > +	atomic_inc(&cm_id_priv->refcount);
> > +}
> 
> Is a wrapper around atomic_inc() needed?
> 

I find it easier to keep track of references visually if the functions
'addref' and 'remref' functions have symmetric names. Functionally, as
you point out, it is pointless.
 
> > +struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
> > +				 iw_cm_handler cm_handler,
> > +				 void *context)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +
> > +	iwcm_id_priv = kmalloc(sizeof *iwcm_id_priv, GFP_KERNEL);
> > +	if (!iwcm_id_priv)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	memset(iwcm_id_priv, 0, sizeof *iwcm_id_priv);
> 
> Use kzalloc().
> 

tx

> > +	iwcm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +	iwcm_id_priv->id.device = device;
> > +	iwcm_id_priv->id.cm_handler = cm_handler;
> > +	iwcm_id_priv->id.context = context;
> > +	iwcm_id_priv->id.event_handler = cm_event_handler;
> > +
> > +	spin_lock_init(&iwcm_id_priv->lock);
> > +	init_waitqueue_head(&iwcm_id_priv->wait);
> > +	atomic_set(&iwcm_id_priv->refcount, 1);
> > +
> > +	return &iwcm_id_priv->id;
> > +
> > +}
> > +EXPORT_SYMBOL(iw_create_cm_id);
> 
> > +void iw_destroy_cm_id(struct iw_cm_id *cm_id)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_LISTEN:
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->destroy_listen(cm_id);
> > +		break;
> > +
> > +	case IW_CM_STATE_CONN_RECV:
> > +	case IW_CM_STATE_CONN_SENT:
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->disconnect(cm_id,1);
> > +		break;
> > +
> > +	case IW_CM_STATE_IDLE:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		break;
> > +		
> > +	default:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		printk(KERN_ERR "%s:%s:%u Illegal state %d for iw_cm_id.\n", 
> > +		       __FILE__, __FUNCTION__, __LINE__, cm_id->state);
> > +		;
> 
> random ';'
> 
> Nit: my preference would be to remove the extra blank lines after the break 
> statements.

done.

> 
> > +	}
> > +
> > +	atomic_dec(&iwcm_id_priv->refcount);
> > +	wait_event(iwcm_id_priv->wait, !atomic_read(&iwcm_id_priv->refcount));
> > +
> > +	kfree(iwcm_id_priv);
> > +}
> > +EXPORT_SYMBOL(iw_destroy_cm_id);
> 
> > +int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +
> > +	if (cm_id->device == 0 || cm_id->device->iwcm == 0)
> > +		return -EINVAL;
> 
> Use NULL in place of '0'.  Are these checks needed?

Well, it depends on our philosophy. If kernel users are trusted -- no.
If not, then we can either check them here and fail gracefully, or bug-
check the app later. Currently, _we_ are the app, so maybe we should
dump-em. 

> > +	
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	if (cm_id->state != IW_CM_STATE_IDLE) {
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		return -EBUSY;
> 
> -EINVAL seems like a better return code here.

I agree.

> 
> > +	}
> > +	cm_id->state = IW_CM_STATE_LISTEN;
> > +	spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +
> > +	ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
> > +	if (ret != 0) 
> 
> Nit: how about just if (ret)?

k

> 
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(iw_cm_listen);
> 
> > +int iw_cm_getpeername(struct iw_cm_id *cm_id,
> > +		      struct sockaddr_in* local_addr,
> > +		      struct sockaddr_in* remote_addr)
> > +{
> > +	if (cm_id->device == 0)
> > +		return -EINVAL;
> > +
> > +	if (cm_id->device->iwcm == 0)
> > +		return -EINVAL;
> 
> Use NULL - are checks needed given state check below?  Is synchronization around 
> this call needed, since it checks the state?

removed.

> > +
> > +	/* Make sure there's a connection */
> > +	if (cm_id->state != IW_CM_STATE_ESTABLISHED)
> > +		return -ENOTCONN;
> > +
> > +	return cm_id->device->iwcm->getpeername(cm_id, local_addr, remote_addr);
> > +}
> > +EXPORT_SYMBOL(iw_cm_getpeername);
> 
> > +int iw_cm_reject(struct iw_cm_id *cm_id,
> > +		 const void *private_data,
> > +		 u8 private_data_len)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	if (cm_id->device == 0 || cm_id->device->iwcm == 0)
> > +		return -EINVAL;
> 
> Use NULL - are checks needed?
removed.
> 
> > +		
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_CONN_RECV:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->reject(cm_id, private_data, 
> > +						  private_data_len);
> > +		cm_id->state = IW_CM_STATE_IDLE;
> 
> This changes the state after the lock has been released.

Good catch.
> 
> > +		break;
> > +	default:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(iw_cm_reject);
> 
> > +int iw_cm_accept(struct iw_cm_id *cm_id,
> > +		 const void *private_data,
> > +		 u8 private_data_len)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	if (cm_id->device == 0 || cm_id->device->iwcm == 0)
> > +		return -EINVAL;
> 
> ditto...
removed.
> 
> > +		
> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_CONN_RECV:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->accept(cm_id, private_data, 
> > +						  private_data_len);
> > +		if (ret == 0) {
> > +			struct iw_cm_event event;
> > +			event.event = IW_CM_EVENT_ESTABLISHED;
> > +			event.provider_id = cm_id->provider_id;
> > +			event.status = 0;
> > +			event.local_addr = cm_id->local_addr;
> > +			event.remote_addr = cm_id->remote_addr;
> 
> If the cm_id contains the provider_id, local_addr, and remote_addr, does it need 
> to be returned as part of the event?

I did this to be consistent -- i.e. the event that is delivered to the
IWCM by the provider has the same form as the event delivered to the
IWCM consumer. However, you are correct, the address has already been
cached in the cm_id by the IWCM. I don't feel strongly one way or the
other, but keeping them the same has some minor performance benefit
since in some cases, the IWCM can simply forward the provider's event to
the consumer.

I did remove a redundant copy (in cm_conn_est_handler) of the address
info _to_ the cm_id  from the event we just built _from_ the cm_id --
oof.

> 
> > +			event.private_data = 0;
> > +			event.private_data_len = 0;
> > +			cm_event_handler(cm_id, &event);
> 
> Does this result in a callback being invoked from the calling thread?  If so, 
> this behavior differs from other callbacks used throughout the stack.
> 

No, the cm_event_handler function creates a work item and enqueues it on
the IWCM work_queue thread.

> Also, the state is left as CONN_RECV.  Is this correct?
> 
The state is changed in a callback hander (cm_conn_est_handler) that gets called 
due to the work element enqueued above.

> 
> > +		}
> > +		
> > +		break;
> > +	default:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(iw_cm_accept);
> 
> > +int iw_cm_connect(struct iw_cm_id *cm_id, 
> > +		  const void *pdata,
> > +		  u8 pdata_len)
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	int ret = 0;
> > +	unsigned long flags;
> > +
> > +	if (cm_id->device == 0 || cm_id->device->iwcm == 0)
> > +		return -EINVAL;
> 
> ditto...

removed.

> 
> > +	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	if (cm_id->state != IW_CM_STATE_IDLE) {
> > +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +		return -EBUSY;
> 
> -EINVAL seems like a better return code.
> 

agreed.

> > +	}
> > +	cm_id->state = IW_CM_STATE_CONN_SENT;
> > +	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +
> > +	ret = cm_id->device->iwcm->connect(cm_id, pdata, pdata_len);
> > +	if (ret != 0)
> 
> if (ret)?  -- just my preference
> 

consistent too...

> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(iw_cm_connect);
> 
> > +int iw_cm_disconnect(struct iw_cm_id *cm_id)
> > +{
> > +	struct iwcm_id_private *iwcm_id_priv;
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	if (cm_id->device == 0 || cm_id->device->iwcm == 0 || cm_id->qp == 0)
> > +		return -EINVAL;
> 
> ditto...
> 

Changed to NULL, removed check on QP. 

> > +	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> > +	switch (cm_id->state) {
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		cm_id->state = IW_CM_STATE_CLOSING;
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = cm_id->device->iwcm->disconnect(cm_id, 1);
> > +		if (ret == 0) {
> > +			struct iw_cm_event event;
> > +			event.event = IW_CM_EVENT_LLP_DISCONNECT;
> > +			event.provider_id = cm_id->provider_id;
> > +			event.status = 0;
> > +			event.local_addr = cm_id->local_addr;
> > +			event.remote_addr = cm_id->remote_addr;
> > +			event.private_data = 0;
> > +			event.private_data_len = 0;
> > +			cm_event_handler(cm_id, &event);
> 
> See comments in iw_cm_accept().
> 

> > +		}
> > +		cm_id->state = IW_CM_STATE_IDLE;
> 
> Why is state set to CLOSING above, then set it to IDLE?  There may be a valid 
> reason, I'm just trying to understand why.
> 

While in the client callback handler and through potential downcalls, I
wanted to have a separate state. IDLE means it can be reused. I thought
we should not be able to do so until after we've popped back up through
this call chain.... that was the idea anyway.

> > +		break;
> > +	default:
> > +		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> > +		ret = -EINVAL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL(iw_cm_disconnect);
> > +
> > +static void iwcm_add_one(struct ib_device *device)
> > +{
> > +	struct iwcm_device *iwcm_dev;
> > +	struct iwcm_port *port;
> > +	unsigned long flags;
> > +	u8 i;
> > +
> > +	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IWARP)
> > +		return;
> > +
> > +	iwcm_dev = kmalloc(sizeof(*iwcm_dev) + sizeof(*port) *
> > +			 device->phys_port_cnt, GFP_KERNEL);
> > +	if (!iwcm_dev)
> > +		return;
> > +
> > +	iwcm_dev->device = device;
> > +
> > +	for (i = 1; i <= device->phys_port_cnt; i++) {
> > +		port = &iwcm_dev->port[i-1];
> > +		port->iwcm_dev = iwcm_dev;
> > +		port->port_num = i;
> > +	}
> > +
> > +	ib_set_client_data(device, &iwcm_client, iwcm_dev);
> > +
> > +	write_lock_irqsave(&iwcm.device_lock, flags);
> > +	list_add_tail(&iwcm_dev->list, &iwcm.device_list);
> > +	write_unlock_irqrestore(&iwcm.device_lock, flags);
> > +	return;
> 
> Can drop 'return'.
> 
> {snip}
> 
> > +static int cm_conn_req_handler(struct iwcm_work *work)
> 
> Return code is ignored.
> 
> > +{
> > +	struct iw_cm_id *cm_id;
> > +	struct iwcm_id_private *cm_id_priv;
> > +	int rc;
> > +
> > +	/* If the status was not successful, ignore request */
> > +	if (work->event.status) {
> > +		printk(KERN_ERR "%s:%d Bad status=%d for connection request. "
> > +		       "Should be filtered by provider\n",
> > +		       __FUNCTION__, __LINE__,
> > +		       work->event.status);
> > +		return work->event.status;
> > +	}
> 
> Can status really ever be non-zero here?

Yes, this event is generated by the provider. 

> 
> > +	cm_id = iw_create_cm_id(work->cm_id->id.device,	
> > +				work->cm_id->id.cm_handler,
> > +				work->cm_id->id.context);
> > +	if (IS_ERR(cm_id)) 
> > +		return PTR_ERR(cm_id);
> 
> Do we still need to kfree(work) before returning?  Actually, see next comment.

yes, good catch -- moved kfree to cm_work_handler

> 
> > +
> > +	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	cm_id_priv->id.local_addr = work->event.local_addr;
> > +	cm_id_priv->id.remote_addr = work->event.remote_addr;
> > +	cm_id_priv->id.provider_id = work->event.provider_id;
> > +	cm_id_priv->id.state = IW_CM_STATE_CONN_RECV;
> > +
> > +	/* Call the client CM handler */
> > +	iwcm_addref_id(cm_id_priv);
> > +	rc = cm_id->cm_handler(cm_id, &work->event);
> > +	iwcm_deref_id(cm_id_priv);
> > +	if (rc) {
> > +		cm_id->state = IW_CM_STATE_IDLE;
> > +		iw_destroy_cm_id(cm_id);
> > +	}
> > +	kfree(work);
> 
> Can we move the free into cm_work_handler()?
> 

done.

> > +	return 0;
> > +}
> > +
> > +/*
> > + * Handles the transition to established state on the passive side.
> > + */
> > +static int cm_conn_est_handler(struct iwcm_work *work)
> 
> Return code is ignored.

now used to tell cm_work_handler to destroy cm_id

> 
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +	
> > +	cm_id_priv = work->cm_id;
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	if (cm_id_priv->id.state != IW_CM_STATE_CONN_RECV) {
> > +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +		printk(KERN_ERR "%s:%d Invalid cm_id state=%d "
> > +		       "for established event\n", __FUNCTION__, __LINE__, 
> > +		       cm_id_priv->id.state);
> > +		ret = -EINVAL;
> > +		goto error_out;
> > +	}
> 
> If the user destroys the cm_id, the state could change before this callback is 
> invoked.  This shouldn't be an error.
> 

Event will be ignored if not in the CONN_RECV state.

> > +
> > +	if (work->event.status == 0) {
> > +		cm_id_priv = work->cm_id;
> > +		cm_id_priv->id.local_addr = work->event.local_addr;
> > +		cm_id_priv->id.remote_addr = work->event.remote_addr;
> > +		cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
> > +	} else 
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +
> > +	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +
> > +	/* Call the client CM handler */
> > +	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
> > +	if (ret) {
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +		iw_destroy_cm_id(&cm_id_priv->id);
> 
> I think that we still have a reference on the cm_id from cm_event_handler(), 
> which will cause destroy to block forever.
> 

return code now used to tell cm_work_handler to destroy cm_id


> > +	}
> > +
> > + error_out:
> > +	kfree(work);
> 
> Move free to cm_work_handler().
> 
done.
> > +	return ret;
> > +}
> > +
> > +/*
> > + * Handles the reply to our connect request. There are three
> > + * possibilities: 
> > + * - If the cm_id is in the wrong state when the event is
> > + *   delivered, the event is ignored. [What should we do when the
> > + *   provider does something crazy?]
> 
> The cm_id can be in the wrong state if being destroyed.  Crazy providers just 
> need to be fixed.   :)
> 
> > + * - If the remote peer accepts the connection, we update the 4-tuple
> > + *   in the cm_id with the remote peer info, move the cm_id to the
> > + *   ESTABLISHED state and deliver the event to the client.
> > + * - If the remote peer rejects the connection, or there is some
> > + *   connection error, move the cm_id to the IDLE state, and deliver
> > + *   the event to the client.
> > + */
> > +static int cm_conn_rep_handler(struct iwcm_work *work)
> 
> Return code is ignored.

now used to tell cm_work_handler to destroy cm_id

> 
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	unsigned long flags;
> > +	int ret = 0;
> > +	
> > +	cm_id_priv = work->cm_id;
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	if (cm_id_priv->id.state != IW_CM_STATE_CONN_SENT) {
> > +		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +		printk(KERN_ERR "%s:%d Invalid cm_id state=%d "
> > +		       "for connect reply event\n", __FUNCTION__, __LINE__, 
> > +	 	       cm_id_priv->id.state);
> > +		ret = -EINVAL;
> > +		goto error_out;
> > +	}
> 
> See comment in cm_conn_est_handler.

done.

> 
> > +
> > +	if (work->event.status == 0) {
> > +		cm_id_priv = work->cm_id;
> > +		cm_id_priv->id.local_addr = work->event.local_addr;
> > +		cm_id_priv->id.remote_addr = work->event.remote_addr;
> > +		cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
> > +	} else 
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +
> > +	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +
> > +	/* Call the client CM handler */
> > +	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
> > +	if (ret) {
> > +		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> > +		iw_destroy_cm_id(&cm_id_priv->id);
> 
> I think we're still holding a reference on the cm_id.
> 
see above about non-zero return code.
> > +	}
> > +
> > + error_out:
> > +	kfree(work);
> 
> Move free to cm_work_handler().
> 
done.

> > +	return ret;
> > +}
> > +
> > +static int cm_disconnect_handler(struct iwcm_work *work) 
> 
> Return value is ignored.
> 
...
> > +{
> > +	struct iwcm_id_private *cm_id_priv;
> > +	int ret = 0;
> > +	
> > +	cm_id_priv = work->cm_id;
> > +
> > +	cm_id_priv->id.state = IW_CM_STATE_IDLE;
> 
> Do we need to verify that we haven't already started to destroy the cm_id before 
> changing its state?

Yes, you're right. I need to check for CLOSING. Same for
iw_cm_disconnect.

> > +
> > +	/* Call the client CM handler */
> > +	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
> > +	if (ret)
> > +		iw_destroy_cm_id(&cm_id_priv->id);
> > +
> > +	kfree(work);
> 
> Same comments as above about free and holding reference apply here.
> 
...
> > +	return ret;
> > +}
> > +
> > +static void cm_work_handler(void *arg)
> > +{
> > +	struct iwcm_work *work = (struct iwcm_work*)arg;
> > +	struct iwcm_id_private *cm_id_priv = work->cm_id;
> > +	int rc;
> > +
> > +	switch (work->event.event) {
> > +	case IW_CM_EVENT_CONNECT_REQUEST:
> > +		rc = cm_conn_req_handler(work);
> > +		break;
> > +	case IW_CM_EVENT_CONNECT_REPLY:
> > +		rc = cm_conn_rep_handler(work);
> > +		break;
> > +	case IW_CM_EVENT_ESTABLISHED:
> > +		rc = cm_conn_est_handler(work);
> > +		break;
> > +	case IW_CM_EVENT_LLP_DISCONNECT:
> > +	case IW_CM_EVENT_LLP_TIMEOUT:
> > +	case IW_CM_EVENT_LLP_RESET:
> > +	case IW_CM_EVENT_CLOSE:
> > +		rc = cm_disconnect_handler(work);
> > +		break;
> > +	}
> > +
> > +	/* The reference was done in cm_event_handler. */
> > +	iwcm_deref_id(cm_id_priv);
> 
> The cm_*_handlers need to release the reference in order to process destruction. 
>   That or use their return code to determine if the reference should be 
> released, or if cleanup needs to be invoked.
What I'm going to do is have the return value specify whether or not the
cm_id should be destroyed. Also, the kfree of the work is being moved
here.
> 
> > +/* IW CM provider event callback handler. This function is called on
> > + * interrupt context. The function builds a work queue element
> > + * and enqueues it for processing on a work queue thread. This allows
> > + * CM client callback functions to block.
> > + */
> > +static void cm_event_handler(struct iw_cm_id *cm_id,
> > +			     struct iw_cm_event *event) 
> > +{
> > +	struct iwcm_work *work;
> > +	struct iwcm_id_private *cm_id_priv;
> > +
> > +	work = kmalloc(sizeof *work, GFP_ATOMIC);
> > +	if (!work)
> > +		return;
> > +
> > +	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> > +	
> > +	/* 
> > +	 * Reference cm_id_priv until the work is completed in 
> > +	 * cm_work_handler.
> > +	 */
> > +	iwcm_addref_id(cm_id_priv);
> > +	INIT_WORK(&work->work, cm_work_handler, work);
> > +	work->cm_id = cm_id_priv;
> > +	work->event = *event;
> > +	queue_work(iwcm.wq, &work->work);
> 
> Can you use the rdma_wq instead of creating a new one?  We'll need to be careful 
> that we don't end up with a deadlock condition, but I think that it's safe to 
> use it.
> 
Sure, I'll give it a whirl...

> > +static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
> > +				  struct ib_qp_attr *qp_attr,
> > +				  int *qp_attr_mask)
> > +{
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	spin_lock_irqsave(&cm_id_priv->lock, flags);
> > +	switch (cm_id_priv->id.state) {
> > +	case IW_CM_STATE_IDLE:
> > +	case IW_CM_STATE_CONN_SENT:
> > +	case IW_CM_STATE_CONN_RECV:
> > +	case IW_CM_STATE_ESTABLISHED:
> > +		*qp_attr_mask = IB_QP_STATE;
> 
> Do you need to set what the new state should be?

I was copying what I thought you did and assumed that what was really
happening here is that we were masking off all but the state specified
by the caller in qp_attr. So user sets IB_QPS_XXX in qp_attr and a bunch
of other things, and then calls rdma_init_qp_attr. This function will
only allow them to set the state once the QP is in RTS.

> 
> > +		ret = 0;
> > +		break;
> > +	default:
> > +		ret = -EINVAL;
> > +		break;
> > +	}
> > +	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> > +	return ret;
> > +}
> 
> - Sean




More information about the general mailing list