[ofa-general] [PATCH 1/1] SDP - Various bzcopy bugs

Jim Mott jim at mellanox.com
Mon Dec 3 13:35:06 PST 2007


The Mellanox regression tests posted a number of failures when
multiple threads were accessing the same sockets concurrently.  In
addition to test failures, there were log messages of the form:
  sdp_sock(54386:19002): Could not reap -5 in-flight sends

This fix handles all these failures and errors.

Signed-off-by: Jim Mott <jim at mellanox.com>
---

Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp.h
2007-12-03 11:34:45.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h	2007-12-03
13:44:43.000000000 -0600
@@ -173,7 +173,6 @@
 
 	/* BZCOPY data */
 	int   zcopy_thresh;
-	void *zcopy_context;
 
 	struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1];
 	struct ib_wc  ibwc[SDP_NUM_WC];
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-03 11:34:45.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-03 13:44:30.000000000 -0600
@@ -242,15 +242,11 @@
 	++ssk->tx_tail;
 
 	/* TODO: AIO and real zcopy cdoe; add their context support here
*/
-	if (ssk->zcopy_context && skb->data_len) {
+	if (skb->h.raw) {
 		struct bzcopy_state *bz;
-		struct sdp_bsdh *h;
 
-		h = (struct sdp_bsdh *)skb->data;
-		if (h->mid == SDP_MID_DATA) {
-			bz = (struct bzcopy_state *)ssk->zcopy_context;
-			bz->busy--;
-		}
+		bz = (struct bzcopy_state *)skb->h.raw;
+		bz->busy--;
 	}
 
 	return skb;
@@ -751,12 +747,8 @@
 		sdp_post_recvs(ssk);
 		sdp_post_sends(ssk, 0);
 
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
-			if (ssk->zcopy_context)
-				sdp_bzcopy_write_space(ssk);
-			else
-				sk_stream_write_space(&ssk->isk.sk);
-		}
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			sk_stream_write_space(&ssk->isk.sk);
 	}
 
 	return ret;
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-03 11:34:51.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-03 13:14:10.000000000 -0600
@@ -1203,10 +1203,24 @@
 
 static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state
*bz)
 {
-	int i;
+	int i, max_retry;
 	struct sdp_sock *ssk = (struct sdp_sock *)bz->ssk;
 
-	ssk->zcopy_context = NULL;
+	/* Wait for in-flight sends; should be quick */
+	if (bz->busy) {
+		struct sock *sk = &ssk->isk.sk;
+
+		for (max_retry = 0; max_retry < 10000; max_retry++) {
+			poll_send_cq(sk);
+
+			if (!bz->busy)
+				break;
+		}
+
+		if (bz->busy)
+			sdp_warn(sk, "Could not reap %d in-flight
sends\n",
+				 bz->busy);
+	}
 
 	if (bz->pages) {
 		for (i = bz->cur_page; i < bz->page_cnt; i++)
@@ -1280,14 +1294,14 @@
 	}
 
 	up_write(&current->mm->mmap_sem);
-	ssk->zcopy_context = bz;
 
 	return bz;
 
 out_2:
 	up_write(&current->mm->mmap_sem);
+	kfree(bz->pages);
 out_1:
-	sdp_bz_cleanup(bz);
+	kfree(bz);
 
 	return NULL;
 }
@@ -1461,19 +1475,17 @@
 };
 
 /* like sk_stream_memory_free - except measures remote credits */
-static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk)
+static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk,
+					 struct bzcopy_state *bz)
 {
-	struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
-
-	BUG_ON(!bz);
 	return slots_free(ssk) > bz->busy;
 }
 
 /* like sk_stream_wait_memory - except waits on remote credits */
-static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p)
+static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p,
+				  struct bzcopy_state *bz)
 {
 	struct sock *sk = &ssk->isk.sk;
-	struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
 	int err = 0;
 	long vm_wait = 0;
 	long current_timeo = *timeo_p;
@@ -1481,7 +1493,7 @@
 
 	BUG_ON(!bz);
 
-	if (sdp_bzcopy_slots_avail(ssk))
+	if (sdp_bzcopy_slots_avail(ssk, bz))
 		current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
 
 	while (1) {
@@ -1506,13 +1518,13 @@
 
 		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-		if (sdp_bzcopy_slots_avail(ssk))
+		if (sdp_bzcopy_slots_avail(ssk, bz))
 			break;
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
 		sk_wait_event(sk, &current_timeo,
-			sdp_bzcopy_slots_avail(ssk) && vm_wait);
+			sdp_bzcopy_slots_avail(ssk, bz) && vm_wait);
 		sk->sk_write_pending--;
 
 		if (vm_wait) {
@@ -1603,7 +1615,8 @@
 			skb = sk->sk_write_queue.prev;
 
 			if (!sk->sk_send_head ||
-			    (copy = size_goal - skb->len) <= 0) {
+			    (copy = size_goal - skb->len) <= 0 ||
+			    bz != (struct bzcopy_state *)skb->h.raw) {
 
 new_segment:
 				/*
@@ -1614,7 +1627,7 @@
 				 * receive credits.
 				 */
 				if (bz) {
-					if
(!sdp_bzcopy_slots_avail(ssk))
+					if (!sdp_bzcopy_slots_avail(ssk,
bz))
 						goto wait_for_sndbuf;
 				} else {
 					if (!sk_stream_memory_free(sk))
@@ -1626,6 +1639,8 @@
 				if (!skb)
 					goto wait_for_memory;
 
+				skb->h.raw = (unsigned char *)bz;
+
 				/*
 				 * Check whether we can use HW checksum.
 				 */
@@ -1691,7 +1706,7 @@
 			if (copied)
 				sdp_push(sk, ssk, flags & ~MSG_MORE,
mss_now, TCP_NAGLE_PUSH);
 
-			err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo)
:
+			err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo,
bz) :
 				     sk_stream_wait_memory(sk, &timeo);
 			if (err)
 				goto do_error;
@@ -1704,24 +1719,10 @@
 out:
 	if (copied) {
 		sdp_push(sk, ssk, flags, mss_now, ssk->nonagle);
-		if (bz) {
-			int max_retry;
-
-			/* Wait for in-flight sends; should be quick */
-			for (max_retry = 0; max_retry < 10000;
max_retry++) {
-				if (!bz->busy)
-					break;
-
-				poll_send_cq(sk);
-			}
-
-			if (bz->busy)
-				sdp_warn(sk,
-					 "Could not reap %d in-flight
sends\n",
-					 bz->busy);
 
+		if (bz)
 			bz = sdp_bz_cleanup(bz);
-		} else
+		else
 			if (size > send_poll_thresh)
 				poll_send_cq(sk);
 	}



More information about the general mailing list