[ofa-general] [PATCH 4/4] RDMA/nes: Fix hang issues for large cluster dynamic connections

Faisal Latif faisal.latif at intel.com
Wed Apr 22 12:09:58 PDT 2009


Running large cluster setup, we are hanging after many hours of testing.
Fixing required going over the code and making sure the rexmit entry was
properly removed based on the cm_node's state and packet received.
Also when recieing FIN packet, making sure seq# and there were no errors
before calling handle_fin().

Following are the changes done in nes_cm.c.

* handle_ack_pkt() need to return error value, so in case of error,
  handle_fin() is not called. Some celanup done while going over the code.

* handle_rst_pkt(), handling of cm_node's NES_CM_STATE_LAST_ACK is missing.

* process_packet(), in case of FIN only packet is received, call
  check_seq() before processing.

* in handle_fin_pkt(), we are calling cleanup_retrans_entry() for all
  conditions, even if the packets needs to be dropped.

Signed-off-by: Faisal Latif <faisal.latif at intel.com>
---
 drivers/infiniband/hw/nes/nes_cm.c |   56 ++++++++++++++++--------------------
 1 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index bcfd3c6..d1577a4 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1330,18 +1330,20 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
 	nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
 		"refcnt=%d\n", cm_node, cm_node->state,
 		atomic_read(&cm_node->ref_count));
-	cm_node->tcp_cntxt.rcv_nxt++;
-	cleanup_retrans_entry(cm_node);
 	switch (cm_node->state) {
 	case NES_CM_STATE_SYN_RCVD:
 	case NES_CM_STATE_SYN_SENT:
 	case NES_CM_STATE_ESTABLISHED:
 	case NES_CM_STATE_MPAREQ_SENT:
 	case NES_CM_STATE_MPAREJ_RCVD:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_LAST_ACK;
 		send_fin(cm_node, NULL);
 		break;
 	case NES_CM_STATE_FIN_WAIT1:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_CLOSING;
 		send_ack(cm_node, NULL);
 		/* Wait for ACK as this is simultanous close..
@@ -1349,11 +1351,15 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
 		* Just rm the node.. Done.. */
 		break;
 	case NES_CM_STATE_FIN_WAIT2:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_TIME_WAIT;
 		send_ack(cm_node, NULL);
 		schedule_nes_timer(cm_node, NULL,  NES_TIMER_TYPE_CLOSE, 1, 0);
 		break;
 	case NES_CM_STATE_TIME_WAIT:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_CLOSED;
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
 		break;
@@ -1389,7 +1395,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		passive_state = atomic_add_return(1, &cm_node->passive_state);
 		if (passive_state ==  NES_SEND_RESET_EVENT)
 			create_event(cm_node, NES_CM_EVENT_RESET);
-		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_CLOSED;
 		dev_kfree_skb_any(skb);
 		break;
@@ -1403,17 +1408,16 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		active_open_err(cm_node, skb, reset);
 		break;
 	case NES_CM_STATE_CLOSED:
-		cleanup_retrans_entry(cm_node);
 		drop_packet(skb);
 		break;
+	case NES_CM_STATE_LAST_ACK:
+		cm_node->cm_id->rem_ref(cm_node->cm_id);
 	case NES_CM_STATE_TIME_WAIT:
-		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_CLOSED;
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
 		drop_packet(skb);
 		break;
 	case NES_CM_STATE_FIN_WAIT1:
-		cleanup_retrans_entry(cm_node);
 		nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
 	default:
 		drop_packet(skb);
@@ -1460,6 +1464,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 				NES_PASSIVE_STATE_INDICATED);
 		break;
 	case NES_CM_STATE_MPAREQ_SENT:
+		cleanup_retrans_entry(cm_node);
 		if (res_type == NES_MPA_REQUEST_REJECT) {
 			type = NES_CM_EVENT_MPA_REJECT;
 			cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
@@ -1657,49 +1662,39 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	}
 }
 
-static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	struct tcphdr *tcph)
 {
 	int datasize = 0;
 	u32 inc_sequence;
 	u32 rem_seq_ack;
 	u32 rem_seq;
-	int ret;
+	int ret = 0;
 	int optionsize;
 	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
 
 	if (check_seq(cm_node, tcph, skb))
-		return;
+		return -EINVAL;
 
 	skb_pull(skb, tcph->doff << 2);
 	inc_sequence = ntohl(tcph->seq);
 	rem_seq = ntohl(tcph->seq);
 	rem_seq_ack =  ntohl(tcph->ack_seq);
 	datasize = skb->len;
-	cleanup_retrans_entry(cm_node);
 	switch (cm_node->state) {
 	case NES_CM_STATE_SYN_RCVD:
 		/* Passive OPEN */
+		cleanup_retrans_entry(cm_node);
 		ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
 		if (ret)
 			break;
 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
-		if (cm_node->tcp_cntxt.rem_ack_num !=
-		    cm_node->tcp_cntxt.loc_seq_num) {
-			nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
-			cleanup_retrans_entry(cm_node);
-			send_reset(cm_node, skb);
-			return;
-		}
 		cm_node->state = NES_CM_STATE_ESTABLISHED;
-		cleanup_retrans_entry(cm_node);
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			handle_rcv_mpa(cm_node, skb);
-		} else { /* rcvd ACK only */
+		} else  /* rcvd ACK only */
 			dev_kfree_skb_any(skb);
-			cleanup_retrans_entry(cm_node);
-		 }
 		break;
 	case NES_CM_STATE_ESTABLISHED:
 		/* Passive OPEN */
@@ -1711,15 +1706,12 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 			drop_packet(skb);
 		break;
 	case NES_CM_STATE_MPAREQ_SENT:
-		cleanup_retrans_entry(cm_node);
 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			handle_rcv_mpa(cm_node, skb);
-		} else { /* Could be just an ack pkt.. */
-			cleanup_retrans_entry(cm_node);
+		} else  /* Could be just an ack pkt.. */
 			dev_kfree_skb_any(skb);
-		}
 		break;
 	case NES_CM_STATE_LISTENING:
 	case NES_CM_STATE_CLOSED:
@@ -1727,11 +1719,10 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		send_reset(cm_node, skb);
 		break;
 	case NES_CM_STATE_LAST_ACK:
+	case NES_CM_STATE_CLOSING:
 		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_CLOSED;
 		cm_node->cm_id->rem_ref(cm_node->cm_id);
-	case NES_CM_STATE_CLOSING:
-		cleanup_retrans_entry(cm_node);
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
 		drop_packet(skb);
 		break;
@@ -1746,9 +1737,11 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	case NES_CM_STATE_MPAREQ_RCVD:
 	case NES_CM_STATE_UNKNOWN:
 	default:
+		cleanup_retrans_entry(cm_node);
 		drop_packet(skb);
 		break;
 	}
+	return ret;
 }
 
 
@@ -1854,6 +1847,7 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	enum nes_tcpip_pkt_type	pkt_type = NES_PKT_TYPE_UNKNOWN;
 	struct tcphdr *tcph = tcp_hdr(skb);
 	u32     fin_set = 0;
+	int ret = 0;
 	skb_pull(skb, ip_hdr(skb)->ihl << 2);
 
 	nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
@@ -1879,17 +1873,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		handle_synack_pkt(cm_node, skb, tcph);
 		break;
 	case NES_PKT_TYPE_ACK:
-		handle_ack_pkt(cm_node, skb, tcph);
-		if (fin_set)
+		ret = handle_ack_pkt(cm_node, skb, tcph);
+		if (fin_set && !ret)
 			handle_fin_pkt(cm_node);
 		break;
 	case NES_PKT_TYPE_RST:
 		handle_rst_pkt(cm_node, skb, tcph);
 		break;
 	default:
-		drop_packet(skb);
-		if (fin_set)
+		if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
 			handle_fin_pkt(cm_node);
+		drop_packet(skb);
 		break;
 	}
 }
-- 
1.5.3.3




More information about the general mailing list