[ofa-general] [PATCH] sdp: timeout when waiting for sdp_fin
Amir Vadai
amirv at mellanox.co.il
Wed Oct 29 02:07:54 PDT 2008
fixes BUG1305:
https://bugs.openfabrics.org/show_bug.cgi?id=1305
Signed-off-by: Amir Vadai <amirv at mellanox.co.il>
---
drivers/infiniband/ulp/sdp/sdp.h | 1 +
drivers/infiniband/ulp/sdp/sdp_bcopy.c | 3 ++
drivers/infiniband/ulp/sdp/sdp_cma.c | 8 ++++-
drivers/infiniband/ulp/sdp/sdp_main.c | 43 ++++++++++++++++++++------------
4 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h
index 8638422..0e7794e 100644
--- a/drivers/infiniband/ulp/sdp/sdp.h
+++ b/drivers/infiniband/ulp/sdp/sdp.h
@@ -75,6 +75,7 @@ extern int sdp_data_debug_level;
#define SDP_ROUTE_TIMEOUT 1000
#define SDP_RETRY_COUNT 5
#define SDP_KEEPALIVE_TIME (120 * 60 * HZ)
+#define SDP_FIN_WAIT_TIMEOUT (60 * HZ)
#define SDP_TX_SIZE 0x40
#define SDP_RX_SIZE 0x40
diff --git a/drivers/infiniband/ulp/sdp/sdp_bcopy.c b/drivers/infiniband/ulp/sdp/sdp_bcopy.c
index a2472e9..f1b3cb0 100644
--- a/drivers/infiniband/ulp/sdp/sdp_bcopy.c
+++ b/drivers/infiniband/ulp/sdp/sdp_bcopy.c
@@ -119,6 +119,9 @@ static void sdp_fin(struct sock *sk)
/* Received a reply FIN - start Infiniband tear down */
sdp_dbg(sk, "%s: Starting Infiniband tear down sending DREQ\n",
__func__);
+
+ sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
+
sdp_exch_state(sk, TCPF_FIN_WAIT1, TCP_TIME_WAIT);
if (sdp_sk(sk)->id) {
diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c b/drivers/infiniband/ulp/sdp/sdp_cma.c
index 6206835..64f9f38 100644
--- a/drivers/infiniband/ulp/sdp/sdp_cma.c
+++ b/drivers/infiniband/ulp/sdp/sdp_cma.c
@@ -498,8 +498,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n");
if (sk->sk_state == TCP_LAST_ACK) {
- if (sdp_sk(sk)->dreq_wait_timeout)
- sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
+ sdp_cancel_dreq_wait_timeout(sdp_sk(sk));
sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);
@@ -510,6 +509,11 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
rdma_disconnect(id);
if (sk->sk_state != TCP_TIME_WAIT) {
+ if (sk->sk_state == TCP_CLOSE_WAIT) {
+ sdp_dbg(sk, "IB teardown while in TCP_CLOSE_WAIT "
+ "taking reference to let close() finish the work\n");
+ sock_hold(sk, SOCK_REF_CM_TW);
+ }
sdp_set_error(sk, EPIPE);
rc = sdp_disconnected_handler(sk);
}
diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c
index 17e98bb..cbd1adb 100644
--- a/drivers/infiniband/ulp/sdp/sdp_main.c
+++ b/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -443,6 +443,10 @@ done:
static void sdp_send_disconnect(struct sock *sk)
{
+ queue_delayed_work(sdp_workqueue, &sdp_sk(sk)->dreq_wait_work,
+ SDP_FIN_WAIT_TIMEOUT);
+ sdp_sk(sk)->dreq_wait_timeout = 1;
+
sdp_sk(sk)->sdp_disconnect = 1;
sdp_post_sends(sdp_sk(sk), 0);
}
@@ -451,22 +455,19 @@ static void sdp_send_disconnect(struct sock *sk)
* State processing on a close.
* TCP_ESTABLISHED -> TCP_FIN_WAIT1 -> TCP_CLOSE
*/
-
static int sdp_close_state(struct sock *sk)
{
- if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
- return 0;
-
- if (sk->sk_state == TCP_ESTABLISHED)
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
sdp_exch_state(sk, TCPF_ESTABLISHED, TCP_FIN_WAIT1);
- else if (sk->sk_state == TCP_CLOSE_WAIT) {
+ break;
+ case TCP_CLOSE_WAIT:
sdp_exch_state(sk, TCPF_CLOSE_WAIT, TCP_LAST_ACK);
-
- sdp_sk(sk)->dreq_wait_timeout = 1;
- queue_delayed_work(sdp_workqueue, &sdp_sk(sk)->dreq_wait_work,
- TCP_FIN_TIMEOUT);
- } else
+ break;
+ default:
return 0;
+ }
+
return 1;
}
@@ -836,6 +837,11 @@ static int sdp_ioctl(struct sock *sk, int cmd, unsigned long arg)
void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
{
+ if (!ssk->dreq_wait_timeout)
+ return;
+
+ sdp_dbg(&ssk->isk.sk, "cancelling dreq wait timeout #####\n");
+
ssk->dreq_wait_timeout = 0;
cancel_delayed_work(&ssk->dreq_wait_work);
atomic_dec(ssk->isk.sk.sk_prot->orphan_count);
@@ -847,8 +853,7 @@ void sdp_destroy_work(struct work_struct *work)
struct sock *sk = &ssk->isk.sk;
sdp_dbg(sk, "%s: refcnt %d\n", __func__, atomic_read(&sk->sk_refcnt));
- if (ssk->dreq_wait_timeout)
- sdp_cancel_dreq_wait_timeout(ssk);
+ sdp_cancel_dreq_wait_timeout(ssk);
if (sk->sk_state == TCP_TIME_WAIT)
sock_put(sk, SOCK_REF_CM_TW);
@@ -868,15 +873,21 @@ void sdp_dreq_wait_timeout_work(struct work_struct *work)
lock_sock(sk);
- if (!sdp_sk(sk)->dreq_wait_timeout) {
+ if (!sdp_sk(sk)->dreq_wait_timeout ||
+ !((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_LAST_ACK))) {
release_sock(sk);
return;
}
- sdp_dbg(sk, "%s: timed out waiting for DREQ\n", __func__);
+ sdp_warn(sk, "timed out waiting for FIN/DREQ. "
+ "going into abortive close.\n");
sdp_sk(sk)->dreq_wait_timeout = 0;
- sdp_exch_state(sk, TCPF_LAST_ACK, TCP_TIME_WAIT);
+
+ if (sk->sk_state == TCP_FIN_WAIT1)
+ atomic_dec(ssk->isk.sk.sk_prot->orphan_count);
+
+ sdp_exch_state(sk, TCPF_LAST_ACK | TCPF_FIN_WAIT1, TCP_TIME_WAIT);
release_sock(sk);
--
1.5.3
More information about the general
mailing list