[ofa-general] [PATCH 1/1 V2] SDP - Various bzcopy fixes
Jim Mott
jim at mellanox.com
Tue Dec 4 12:16:47 PST 2007
The Mellanox regression tests posted a number of failures when
multiple threads were accessing the same sockets concurrently. In
addition to test failures, there were log messages of the form:
sdp_sock(54386:19002): Could not reap -5 in-flight sends
This fix handles all these failures and errors.
The V2 is a fix to handle 2.6.22+ kernels where sk_buffs have
changed.
Signed-off-by: Jim Mott <jim at mellanox.com>
---
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp.h
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h 2007-12-04
12:08:59.000000000 -0600
@@ -173,7 +173,6 @@
/* BZCOPY data */
int zcopy_thresh;
- void *zcopy_context;
struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1];
struct ib_wc ibwc[SDP_NUM_WC];
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-04 12:55:45.000000000 -0600
@@ -218,6 +218,7 @@
struct ib_device *dev;
struct sdp_buf *tx_req;
struct sk_buff *skb;
+ struct bzcopy_state *bz;
int i, frags;
if (unlikely(mseq != ssk->tx_tail)) {
@@ -242,16 +243,9 @@
++ssk->tx_tail;
/* TODO: AIO and real zcopy cdoe; add their context support here
*/
- if (ssk->zcopy_context && skb->data_len) {
- struct bzcopy_state *bz;
- struct sdp_bsdh *h;
-
- h = (struct sdp_bsdh *)skb->data;
- if (h->mid == SDP_MID_DATA) {
- bz = (struct bzcopy_state *)ssk->zcopy_context;
- bz->busy--;
- }
- }
+ bz = *(struct bzcopy_state **)skb->cb;
+ if (bz)
+ bz->busy--;
return skb;
}
@@ -751,12 +745,8 @@
sdp_post_recvs(ssk);
sdp_post_sends(ssk, 0);
- if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
- if (ssk->zcopy_context)
- sdp_bzcopy_write_space(ssk);
- else
- sk_stream_write_space(&ssk->isk.sk);
- }
+ if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ sk_stream_write_space(&ssk->isk.sk);
}
return ret;
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-04 12:54:34.000000000 -0600
@@ -1203,10 +1203,24 @@
static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state
*bz)
{
- int i;
+ int i, max_retry;
struct sdp_sock *ssk = (struct sdp_sock *)bz->ssk;
- ssk->zcopy_context = NULL;
+ /* Wait for in-flight sends; should be quick */
+ if (bz->busy) {
+ struct sock *sk = &ssk->isk.sk;
+
+ for (max_retry = 0; max_retry < 10000; max_retry++) {
+ poll_send_cq(sk);
+
+ if (!bz->busy)
+ break;
+ }
+
+ if (bz->busy)
+ sdp_warn(sk, "Could not reap %d in-flight
sends\n",
+ bz->busy);
+ }
if (bz->pages) {
for (i = bz->cur_page; i < bz->page_cnt; i++)
@@ -1280,14 +1294,14 @@
}
up_write(¤t->mm->mmap_sem);
- ssk->zcopy_context = bz;
return bz;
out_2:
up_write(¤t->mm->mmap_sem);
+ kfree(bz->pages);
out_1:
- sdp_bz_cleanup(bz);
+ kfree(bz);
return NULL;
}
@@ -1461,19 +1475,17 @@
};
/* like sk_stream_memory_free - except measures remote credits */
-static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk)
+static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk,
+ struct bzcopy_state *bz)
{
- struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
-
- BUG_ON(!bz);
return slots_free(ssk) > bz->busy;
}
/* like sk_stream_wait_memory - except waits on remote credits */
-static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p)
+static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p,
+ struct bzcopy_state *bz)
{
struct sock *sk = &ssk->isk.sk;
- struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
@@ -1481,7 +1493,7 @@
BUG_ON(!bz);
- if (sdp_bzcopy_slots_avail(ssk))
+ if (sdp_bzcopy_slots_avail(ssk, bz))
current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
while (1) {
@@ -1506,13 +1518,13 @@
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- if (sdp_bzcopy_slots_avail(ssk))
+ if (sdp_bzcopy_slots_avail(ssk, bz))
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
sk_wait_event(sk, ¤t_timeo,
- sdp_bzcopy_slots_avail(ssk) && vm_wait);
+ sdp_bzcopy_slots_avail(ssk, bz) && vm_wait);
sk->sk_write_pending--;
if (vm_wait) {
@@ -1603,7 +1615,8 @@
skb = sk->sk_write_queue.prev;
if (!sk->sk_send_head ||
- (copy = size_goal - skb->len) <= 0) {
+ (copy = size_goal - skb->len) <= 0 ||
+ bz != *(struct bzcopy_state **)skb->cb) {
new_segment:
/*
@@ -1614,7 +1627,7 @@
* receive credits.
*/
if (bz) {
- if
(!sdp_bzcopy_slots_avail(ssk))
+ if (!sdp_bzcopy_slots_avail(ssk,
bz))
goto wait_for_sndbuf;
} else {
if (!sk_stream_memory_free(sk))
@@ -1626,6 +1639,8 @@
if (!skb)
goto wait_for_memory;
+ *((struct bzcopy_state **)skb->cb) = bz;
+
/*
* Check whether we can use HW checksum.
*/
@@ -1691,7 +1706,7 @@
if (copied)
sdp_push(sk, ssk, flags & ~MSG_MORE,
mss_now, TCP_NAGLE_PUSH);
- err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo)
:
+ err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo,
bz) :
sk_stream_wait_memory(sk, &timeo);
if (err)
goto do_error;
@@ -1704,24 +1719,10 @@
out:
if (copied) {
sdp_push(sk, ssk, flags, mss_now, ssk->nonagle);
- if (bz) {
- int max_retry;
-
- /* Wait for in-flight sends; should be quick */
- for (max_retry = 0; max_retry < 10000;
max_retry++) {
- if (!bz->busy)
- break;
-
- poll_send_cq(sk);
- }
-
- if (bz->busy)
- sdp_warn(sk,
- "Could not reap %d in-flight
sends\n",
- bz->busy);
+ if (bz)
bz = sdp_bz_cleanup(bz);
- } else
+ else
if (size > send_poll_thresh)
poll_send_cq(sk);
}
More information about the general
mailing list