[ofa-general] [PATCH] sdp: timeout when waiting for sdp_fin

Amir Vadai amirv at mellanox.co.il
Wed Oct 29 02:07:54 PDT 2008


fixes BUG1305:
https://bugs.openfabrics.org/show_bug.cgi?id=1305

Signed-off-by: Amir Vadai <amirv at mellanox.co.il>
---
 drivers/infiniband/ulp/sdp/sdp.h       |    1 +
 drivers/infiniband/ulp/sdp/sdp_bcopy.c |    3 ++
 drivers/infiniband/ulp/sdp/sdp_cma.c   |    8 ++++-
 drivers/infiniband/ulp/sdp/sdp_main.c  |   43 ++++++++++++++++++++------------
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h
index 8638422..0e7794e 100644
--- a/drivers/infiniband/ulp/sdp/sdp.h
+++ b/drivers/infiniband/ulp/sdp/sdp.h
@@ -75,6 +75,7 @@ extern int sdp_data_debug_level;
 #define SDP_ROUTE_TIMEOUT 1000
 #define SDP_RETRY_COUNT 5
 #define SDP_KEEPALIVE_TIME (120 * 60 * HZ)
+#define SDP_FIN_WAIT_TIMEOUT (60 * HZ)
 
 #define SDP_TX_SIZE 0x40
 #define SDP_RX_SIZE 0x40
diff --git a/drivers/infiniband/ulp/sdp/sdp_bcopy.c b/drivers/infiniband/ulp/sdp/sdp_bcopy.c
index a2472e9..f1b3cb0 100644
--- a/drivers/infiniband/ulp/sdp/sdp_bcopy.c
+++ b/drivers/infiniband/ulp/sdp/sdp_bcopy.c
@@ -119,6 +119,9 @@ static void sdp_fin(struct sock *sk)
 		/* Received a reply FIN - start Infiniband tear down */
 		sdp_dbg(sk, "%s: Starting Infiniband tear down sending DREQ\n",
 				__func__);
+
+		sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
+
 		sdp_exch_state(sk, TCPF_FIN_WAIT1, TCP_TIME_WAIT);
 
 		if (sdp_sk(sk)->id) {
diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c b/drivers/infiniband/ulp/sdp/sdp_cma.c
index 6206835..64f9f38 100644
--- a/drivers/infiniband/ulp/sdp/sdp_cma.c
+++ b/drivers/infiniband/ulp/sdp/sdp_cma.c
@@ -498,8 +498,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
 
 		if (sk->sk_state == TCP_LAST_ACK) {
-			if (sdp_sk(sk)->dreq_wait_timeout)
-				sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
+			sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
 
 			sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);
 
@@ -510,6 +509,11 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		rdma_disconnect(id);
 
 		if (sk->sk_state != TCP_TIME_WAIT) {
+			if (sk->sk_state == TCP_CLOSE_WAIT) {
+				sdp_dbg(sk, "IB teardown while in TCP_CLOSE_WAIT "
+					    "taking reference to let close() finish the work\n");
+				sock_hold(sk, SOCK_REF_CM_TW);
+			}
 			sdp_set_error(sk, EPIPE);
 			rc = sdp_disconnected_handler(sk);
 		}
diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c
index 17e98bb..cbd1adb 100644
--- a/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -443,6 +443,10 @@ done:
 
 static void sdp_send_disconnect(struct sock *sk)
 {
+	queue_delayed_work(sdp_workqueue, &sdp_sk(sk)->dreq_wait_work,
+			   SDP_FIN_WAIT_TIMEOUT);
+	sdp_sk(sk)->dreq_wait_timeout = 1;
+
 	sdp_sk(sk)->sdp_disconnect = 1;
 	sdp_post_sends(sdp_sk(sk), 0);
 }
@@ -451,22 +455,19 @@ static void sdp_send_disconnect(struct sock *sk)
  *	State processing on a close.
  *	TCP_ESTABLISHED -> TCP_FIN_WAIT1 -> TCP_CLOSE
  */
-
 static int sdp_close_state(struct sock *sk)
 {
-	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
-		return 0;
-
-	if (sk->sk_state == TCP_ESTABLISHED)
+	switch (sk->sk_state) {
+	case TCP_ESTABLISHED:
 		sdp_exch_state(sk, TCPF_ESTABLISHED, TCP_FIN_WAIT1);
-	else if (sk->sk_state == TCP_CLOSE_WAIT) {
+		break;
+	case TCP_CLOSE_WAIT:
 		sdp_exch_state(sk, TCPF_CLOSE_WAIT, TCP_LAST_ACK);
-
-		sdp_sk(sk)->dreq_wait_timeout = 1;
-		queue_delayed_work(sdp_workqueue, &sdp_sk(sk)->dreq_wait_work,
-				   TCP_FIN_TIMEOUT);
-	} else
+		break;
+	default:
 		return 0;
+	}
+
 	return 1;
 }
 
@@ -836,6 +837,11 @@ static int sdp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
 {
+	if (!ssk->dreq_wait_timeout)
+		return;
+
+	sdp_dbg(&ssk->isk.sk, "cancelling dreq wait timeout #####\n");
+
 	ssk->dreq_wait_timeout = 0;
 	cancel_delayed_work(&ssk->dreq_wait_work);
 	atomic_dec(ssk->isk.sk.sk_prot->orphan_count);
@@ -847,8 +853,7 @@ void sdp_destroy_work(struct work_struct *work)
 	struct sock *sk = &ssk->isk.sk;
 	sdp_dbg(sk, "%s: refcnt %d\n", __func__, atomic_read(&sk->sk_refcnt));
 
-	if (ssk->dreq_wait_timeout)
-		sdp_cancel_dreq_wait_timeout(ssk);
+	sdp_cancel_dreq_wait_timeout(ssk);
 
 	if (sk->sk_state == TCP_TIME_WAIT)
 		sock_put(sk, SOCK_REF_CM_TW);
@@ -868,15 +873,21 @@ void sdp_dreq_wait_timeout_work(struct work_struct *work)
 
 	lock_sock(sk);
 
-	if (!sdp_sk(sk)->dreq_wait_timeout) {
+	if (!sdp_sk(sk)->dreq_wait_timeout ||
+	    !((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK))) {
 		release_sock(sk);
 		return;
 	}
 
-	sdp_dbg(sk, "%s: timed out waiting for DREQ\n", __func__);
+	sdp_warn(sk, "timed out waiting for FIN/DREQ. "
+		 "going into abortive close.\n");
 
 	sdp_sk(sk)->dreq_wait_timeout = 0;
-	sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);
+
+	if (sk->sk_state == TCP_FIN_WAIT1)
+		atomic_dec(ssk->isk.sk.sk_prot->orphan_count);
+
+	sdp_exch_state(sk, TCPF_LAST_ACK | TCPF_FIN_WAIT1, TCP_TIME_WAIT);
 
 	release_sock(sk);
 
-- 
1.5.3




More information about the general mailing list