[ofa-general] [PATCH 2/3] RDS: Fix RDS congestion monitoring
Olaf Kirch
olaf.kirch at oracle.com
Wed May 7 03:54:13 PDT 2008
commit 01b72f27721c2dccf42dd5eea2e5d5e573cd8585
Author: Olaf Kirch <olaf.kirch at oracle.com>
Date: Wed May 7 10:40:36 2008 +0200
Fix RDS congestion monitoring
The RDS congestion monitoring code tries to help applications
deal with remote congestion more efficiently. If enabled,
an application that tried to send to a congested port will
receive a notification as soon as the port becomes uncongested
again. For efficiency reasons, the application isn't given a
complete 8K congestion bitmap, but a 64bit mask that represents
the ports having changed, with port N being represented by
(1 << (port % 64))
This code had several bugs in it:
- the macro used to translate port numbers to the mask bit
shifted integer 1, not 1ULL, resulting in undefined
behavior when (port % 64) >= 32
- rds_ib_cong_recv computes the 64bit mask of all ports
that changed from congested to uncongested. It got the
bit arithmetics wrong.
Also, it used be64_to_cpu to convert the mask, which
is wrong, as the congestion map is little endian
- in the IB send completion handler, we need to check
whether there is a pending congestion map update we
need to send.
- in rds_poll, we should grab the rs_lock spinlock
when testing whether rs_cong_mask is non-zero
Signed-off-by: Olaf Kirch <olaf.kirch at oracle.com>
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 7c20dd4..2fa2d0e 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -165,8 +165,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
if (rds_cong_updated_since(&rs->rs_cong_track))
mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
} else {
+ spin_lock(&rs->rs_lock);
if (rs->rs_cong_notify)
mask |= (POLLIN | POLLRDNORM);
+ spin_unlock(&rs->rs_lock);
}
if (!list_empty(&rs->rs_recv_queue)
|| !list_empty(&rs->rs_notify_queue))
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 4ec85ce..beeb539 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -238,7 +238,7 @@ void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
if (waitqueue_active(&rds_poll_waitq))
wake_up_all(&rds_poll_waitq);
- if (!list_empty(&rds_cong_monitor)) {
+ if (portmask && !list_empty(&rds_cong_monitor)) {
unsigned long flags;
struct rds_sock *rs;
diff --git a/net/rds/ib_rds.h b/net/rds/ib_rds.h
index cea73fc..e098036 100644
--- a/net/rds/ib_rds.h
+++ b/net/rds/ib_rds.h
@@ -176,7 +176,7 @@ struct rds_info_tcp_socket {
*/
#define RDS_CONG_MONITOR_SIZE 64
#define RDS_CONG_MONITOR_BIT(port) (((unsigned int) port) % RDS_CONG_MONITOR_SIZE)
-#define RDS_CONG_MONITOR_MASK(port) (1 << RDS_CONG_MONITOR_BIT(port))
+#define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port))
/*
* RDMA related types
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 6c9cc9e..8ffbb0c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -639,8 +639,9 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
src = addr + frag_off;
dst = (void *)map->m_page_addrs[map_page] + map_off;
for (k = 0; k < to_copy; k += 8) {
- /* Record ports that became uncongested */
- uncongested |= *src & (*src ^ *dst);
+ /* Record ports that became uncongested, ie
+ * bits that changed from 0 to 1. */
+ uncongested |= ~(*src) & *dst;
*dst++ = *src++;
}
kunmap_atomic(addr, KM_SOFTIRQ0);
@@ -662,7 +663,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
}
/* the congestion map is in little endian order */
- uncongested = be64_to_cpu(uncongested);
+ uncongested = le64_to_cpu(uncongested);
rds_cong_map_updated(map, uncongested);
}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 724167c..567f62f 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -218,7 +218,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
rds_ib_ring_free(&ic->i_send_ring, completed);
- if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
+ if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
+ || test_bit(0, &conn->c_map_queued))
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
/* We expect errors as the qp is drained during shutdown */
--
Olaf Kirch | --- o --- Nous sommes du soleil we love when we play
okir at lst.de | / | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax
More information about the general
mailing list