[openib-general] Re: [PATCH 3/3] iWARP CM - iWARP Connection Manager

Tom Tucker tom at opengridcomputing.com
Fri Mar 24 07:03:18 PST 2006


On Thu, 2006-03-23 at 09:46 -0800, Sean Hefty wrote: 
> Would something more like this work?  This waits for the CLOSE event to be
> processed before releasing the last reference, which is what I think you were
> going for.
> 
> >+struct iwcm_id_private {
> >+	struct iw_cm_id	id;
> >+
> >+	unsigned long destroy_flags;
> >+	wait_queue_head_t destroy_wait;
> >+
> >+	struct list_head work_list;
> >+
> >+	spinlock_t lock;
> >+	atomic_t refcount;
> >+};
> >+#define IWCM_DESTROY_F_CALLBACK   1
> 
> Remove destroy_flags.

It's not clear to me how we avoid blocking in iw_destroy_cm_id and
hanging the event thread if we remove this flag and combine the
destruction code as you recommend.

> 
> >+static inline int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
> >+{
> >+	int ret = 0;
> >+
> >+	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
> >+		BUG_ON(!list_empty(&cm_id_priv->work_list));
> >+		if (waitqueue_active(&cm_id_priv->destroy_wait)) {
> >+			BUG_ON(cm_id_priv->id.state != IW_CM_STATE_DESTROYING);
> >+			BUG_ON(test_bit(IWCM_DESTROY_F_CALLBACK,
> >+					&cm_id_priv->destroy_flags));
> >+			ret = 1;
> >+			wake_up(&cm_id_priv->destroy_wait);
> >+		}
> >+	}
> >+
> >+	return ret;
> >+}
> 
> Change to:
> 
> 	if (atomic_dec_and_test(&cm_id_priv->refcount))
> 		wake_up(&cm_id_priv->destroy_wait);
> 
> {snip}
> 
> >+static void destroy_cm_id(struct iw_cm_id *cm_id)
> >+{
> >+	struct iwcm_id_private *iwcm_id_priv;
> >+	unsigned long flags;
> >+	struct ib_qp *qp;
> >+	int ret;
> >+
> >+	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> >+
> >+	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> >+	switch (cm_id->state) {
> >+	case IW_CM_STATE_LISTEN:
> >+		cm_id->state = IW_CM_STATE_DESTROYING;
> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+		/* destroy the listening endpoint */
> >+		ret = cm_id->device->iwcm->destroy_listen(cm_id);
> >+		break;
> >+	case IW_CM_STATE_CONN_RECV:
> >+		cm_id->state = IW_CM_STATE_DESTROYING;
> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+		/* Need to call reject on behalf of client */
> >+		(void)iw_cm_reject(cm_id, NULL, 0);
> >+		break;
> >+	case IW_CM_STATE_ESTABLISHED:
> >+		cm_id->state = IW_CM_STATE_DESTROYING;
> >+		qp = cm_id->qp;
> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+		/* Abrupt close of the connection */
> >+		ret = iwcm_modify_qp_err(qp);
> >+		break;
> >+	default:
> >+		cm_id->state = IW_CM_STATE_DESTROYING;
> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+	}
> >+
> >+	(void)iwcm_deref_id(iwcm_id_priv);
> >+}
> >+
> >+/*
> >+ * This function is only called by the application thread and cannot
> >+ * be called by the event thread. The function will wait for all
> >+ * references to be released on the cm_id and then kfree the cm_id
> >+ * object.
> >+ */
> >+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
> >+{
> >+	struct iwcm_id_private *iwcm_id_priv;
> >+
> >+	destroy_cm_id(cm_id);
> 
> merge functions
> 
> >+
> >+	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> >+	wait_event(iwcm_id_priv->destroy_wait,
> >+		   !atomic_read(&iwcm_id_priv->refcount));
> >+
> >+	kfree(iwcm_id_priv);
> >+}
> >+EXPORT_SYMBOL(iw_destroy_cm_id);
> 
> {snip}
> 
> >+int iw_cm_accept(struct iw_cm_id *cm_id,
> >+		 const void *private_data,
> >+		 u8 private_data_len)
> >+{
> >+	struct iwcm_id_private *iwcm_id_priv;
> >+	unsigned long flags;
> >+	int ret;
> >+
> >+	iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> >+
> >+	spin_lock_irqsave(&iwcm_id_priv->lock, flags);
> >+	switch (cm_id->state) {
> >+	case IW_CM_STATE_CONN_RECV:
> 
> 		/* Take reference until CLOSE event. */
> 		atomic_inc(&iwcm_id_priv->refcount);
> 
> We need the reference before calling accept, to prevent racing with the event
> handling.

The problem that is making this so hard is an artifact of the AMSO
adapter implementation that had the luxury of defining it's own CM
interface. Here's what I think we need to do...make the event stream
consistent with the CM verbs return codes. By that I mean that if we
watch only the event stream or only the return codes from downcalls, we
can unambiguously know the state of the cm_id. In this particular case,
this means that if the return value from accept is 0, then the events
delivered by the provider will always be ESTABLISHED followed by CLOSE
or ESTABLISHED DISCONNECT CLOSE. This will push the event serialization
down into the provider where it belongs.

This will also get rid of the crap below where we generate an
ESTABLISHED event ourselves on a different thread and then try to match
up the current state + event + day of the week to predict what we should
do. 

> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+		ret = cm_id->device->iwcm->accept(cm_id, private_data,
> >+						  private_data_len);
> >+		if (ret == 0) {
> >+			struct iw_cm_event event;
> >+			event.event = IW_CM_EVENT_ESTABLISHED;
> >+			event.provider_id = cm_id->provider_id;
> >+			event.status = 0;
> >+			event.local_addr = cm_id->local_addr;
> >+			event.remote_addr = cm_id->remote_addr;
> >+			event.private_data = NULL;
> >+			event.private_data_len = 0;
> >+			cm_event_handler(cm_id, &event);
> 
> 		} else {
> 			atomic_dec(&iwcm_id_priv->refcount);
> 			cm_id->state = IW_CM_STATE_IDLE;
> 		}
> 
> I think that there's a race here where the ESTABLISHED event could be queued
> after some other event.  Do we need to generate this event, or can we get away
> without it?  I'm having a hard time seeing a fix for this.

Someone has to generate the event we can't tell the app we're
established. Per the above, the provider needs to generate this event. 
> >+
> >+		break;
> >+	default:
> >+		spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
> >+		ret = -EINVAL;
> >+	}
> >+
> >+	return ret;
> >+}
> >+EXPORT_SYMBOL(iw_cm_accept);
> 
> {snip}
> 
> >+int iw_cm_connect(struct iw_cm_id *cm_id,
> >+		  const void *pdata,
> >+		  u8 pdata_len)
> >+{
> >+	struct iwcm_id_private *cm_id_priv;
> >+	int ret = 0;
> >+	unsigned long flags;
> >+
> >+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> >+	spin_lock_irqsave(&cm_id_priv->lock, flags);
> >+	if (cm_id->state != IW_CM_STATE_IDLE) {
> >+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+		return -EINVAL;
> >+	}
> 
> 	/* Take reference until CLOSE event or failed CONNECT_REPLY */
> 	atomic_inc(&iwcm_id_priv->refcount);
> 
> Is it guaranteed that we will get one of these two events under all
> circumstances?  E.g. user calls destroy immediately after returning from this
> call.
> >+	cm_id->state = IW_CM_STATE_CONN_SENT;
> >+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+
> >+	ret = cm_id->device->iwcm->connect(cm_id, pdata, pdata_len);
> 
> 
> 	if (ret) {
> 		atomic_dec(&iwcm_id_priv->refcount);
> 		cm_id->state = IW_CM_STATE_IDLE;
> 	}
> 
> >+
> >+	return ret;
> >+}
> >+EXPORT_SYMBOL(iw_cm_connect);
> 
> 
> >+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
> >+				struct iw_cm_event *iw_event)
> >+{
> >+	unsigned long flags;
> >+	struct iw_cm_id *cm_id;
> >+	struct iwcm_id_private *cm_id_priv;
> >+	iw_cm_handler cm_handler;
> >+	void *context;
> >+	struct ib_device *device;
> >+	int ret;
> >+
> >+	/* The provider should never generate a connection request
> >+	 * event with a bad status.
> >+	 */
> >+	BUG_ON(iw_event->status);
> >+
> >+	/* We could be destroying the listening id. If so, ignore this
> >+	 * upcall. */
> >+	spin_lock_irqsave(&listen_id_priv->lock, flags);
> >+	if (listen_id_priv->id.state != IW_CM_STATE_LISTEN) {
> >+		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
> >+		return;
> >+	}
> >+	/* The listen_id can be destroyed the moment we release the
> >+	 * lock, so take the state we need to inherit before releasing
> >+	 * the lock
> >+	*/
> 
> Not exactly true.  The listen_id must remain valid throughout this callback.
> 
Yes, you are correct. There is a reference on the listen_id held by the
event handler. 

> >+	device = listen_id_priv->id.device;
> >+	cm_handler = listen_id_priv->id.cm_handler;
> >+	context = listen_id_priv->id.context;
> >+	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
> >+
> >+	cm_id = iw_create_cm_id(device,	cm_handler, context);
> >+
> >+	/* If the cm_id could not be created, ignore the request */
> >+	if (IS_ERR(cm_id))
> >+		return;
> 
> Is this safe to do, or do we need to reject the connection?  I'm wondering what
> this does to the provider.
> 
It is safe, but probably wasteful. We should reject it so the adapter
can release the resources right away.

> >+
> >+	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
> >+	cm_id_priv->id.local_addr = iw_event->local_addr;
> >+	cm_id_priv->id.remote_addr = iw_event->remote_addr;
> >+	cm_id_priv->id.provider_id = iw_event->provider_id;
> >+	cm_id_priv->id.state = IW_CM_STATE_CONN_RECV;
> >+
> >+	/* Call the client CM handler */
> >+	// XXX: atomic_inc(&cm_id_priv->refcount);
> >+	ret = cm_id->cm_handler(cm_id, iw_event);
> >+	// xxx: (void)iwcm_deref_id(cm_id_priv);
> >+	if (ret) {
> >+		set_bit(IWCM_DESTROY_F_CALLBACK, &cm_id_priv->destroy_flags);
> >+		destroy_cm_id(cm_id);
> >+		if (atomic_read(&cm_id_priv->refcount)==0)
> >+			kfree(cm_id);
> >+	}
> >+}
> >+
> >+/*
> >+ * Passive Side: CM_ID <-- ESTABLISHED
> >+ *
> >+ * The provider generated an ESTABLISHED event which means that
> >+ * the MPA negotion has completed successfully and we are now in MPA
> >+ * FPDU mode.
> >+ */
> >+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
> >+			       struct iw_cm_event *iw_event)
> >+{
> >+	unsigned long flags;
> >+
> >+	/* If the cm_id is in the wrong state, ignore the
> >+	 * request. This could happen if the app called disconnect or
> >+	 * destroy.
> >+	 */
> >+	spin_lock_irqsave(&cm_id_priv->lock, flags);
> >+	if (cm_id_priv->id.state != IW_CM_STATE_CONN_RECV) {
> >+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+		return 0;
> >+	}
> >+
> >+	/* Take a reference on behalf of the provider. This reference
> >+	 * will be removed when the provider delivers the CLOSE event.
> >+	 */
> >+	atomic_inc(&cm_id_priv->refcount);
> 
> We should have taken this reference earlier now.
> 
> >+	cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
> >+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+
> >+	/* Call the client CM handler */
> >+	return cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
> >+}
> >+
> >+/*
> >+ * Active Side: CM_ID <-- ESTABLISHED
> >+ *
> >+ * Handles the reply to our connect request:
> >+ * - If the cm_id is in the wrong state when the event is
> >+ *   delivered, the event is ignored.
> >+ * - If the remote peer accepted the connection, we update the 4-tuple
> >+ *   in the cm_id with the remote peer info, move the cm_id to the
> >+ *   ESTABLISHED state and deliver the event to the client.
> >+ * - If the remote peer rejected the connection, or there is some
> >+ *   connection error, move the cm_id to the IDLE state, and deliver
> >+ *   the bad news to the client.
> >+ */
> >+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
> >+			       struct iw_cm_event *iw_event)
> >+{
> >+	unsigned long flags;
> >+
> >+	/* If the cm_id is in the wrong state, ignore the
> >+	 * request. This could happen if the app called disconnect or
> >+	 * destroy.
> >+	 */
> >+	spin_lock_irqsave(&cm_id_priv->lock, flags);
> >+	if (cm_id_priv->id.state != IW_CM_STATE_CONN_SENT) {
> >+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+		return 0;
> >+	}
> >+	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
> >+		cm_id_priv->id.local_addr = iw_event->local_addr;
> >+		cm_id_priv->id.remote_addr = iw_event->remote_addr;
> >+		/* Take a reference on behalf of the provider. This reference
> >+		 * will be removed when the provider delivers the CLOSE event.
> >+		 */
> >+		atomic_inc(&cm_id_priv->refcount);
> 
> We should have taken this reference earlier now.
> 
> >+		cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
> >+	} else
> >+		cm_id_priv->id.state = IW_CM_STATE_IDLE;
> 
> We need to release earlier reference here if CONNECT_REPLY is reporting a
> failure.  I'm assuming that this is the last event that will be reported.
> 
> >+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
> >+
> >+	/* Call the client CM handler */
> >+	return cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
> >+}
> 
> - Sean




More information about the general mailing list