[ofa-general] [GIT PULL] 2.6.22: please pull rdma-dev.git
Sean Hefty
sean.hefty at intel.com
Thu Apr 5 14:33:35 PDT 2007
Roland, please review and pull patches from
git.openfabrics.org/~shefty/rdma-dev.git for-roland
This will pull in some patches that I would like queued for 2.6.22.
Sean Hefty (6):
rdma_ucm: simplify ucma_get_event code
ib_ucm: simplify ib_ucm_event code
ib_sa: set src_path_bits correctly in ib_init_ah_from_path
IB/cm: limit cm message timeout
IB/mad: Fix GRH handling for sent/received MADs
IB/ipoib: use ib_init_ah_from_path to initialize ah_attr
Patch details are listed below for easier review / feedback.
- Sean
commit 6042f5b86a92af4392c85949049f237396447d69
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 11:50:11 2007 -0700
IB/ipoib: use ib_init_ah_from_path to initialize ah_attr
To support destinations that are not on the local IB subnet,
IPoIB should include the GRH information when constructing an
address handle. Using the existing ib_init_ah_from_path call
will do this for us.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c
b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0741c6d..5a9ff7f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -395,14 +395,10 @@ static void path_rec_completion(int status,
skb_queue_head_init(&skqueue);
if (!status) {
- struct ib_ah_attr av = {
- .dlid = be16_to_cpu(pathrec->dlid),
- .sl = pathrec->sl,
- .port_num = priv->port,
- .static_rate = pathrec->rate
- };
-
- ah = ipoib_create_ah(dev, priv->pd, &av);
+ struct ib_ah_attr av;
+
+ if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
+ ah = ipoib_create_ah(dev, priv->pd, &av);
}
spin_lock_irqsave(&priv->lock, flags);
commit 86cbcbb332b85501df98a7dccd8e2d40d1c2ffa0
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 11:49:21 2007 -0700
IB/mad: Fix GRH handling for sent/received MADs
We need to set the SGID index for routed MADs and pass received
GRH information to userspace when a MAD is received.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c069ebe..7774cf5 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -231,12 +231,17 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- /* XXX parse GRH */
- packet->mad.hdr.gid_index = 0;
- packet->mad.hdr.hop_limit = 0;
- packet->mad.hdr.traffic_class = 0;
- memset(packet->mad.hdr.gid, 0, 16);
- packet->mad.hdr.flow_label = 0;
+ struct ib_ah_attr ah_attr;
+
+ ib_init_ah_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+
+ packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
+ packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
+ packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
+ memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
}
if (queue_packet(file, agent, packet))
@@ -473,6 +478,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user
*buf,
if (packet->mad.hdr.grh_present) {
ah_attr.ah_flags = IB_AH_GRH;
memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+ ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
commit 3bed3bb2d0bb02ca8a590111c57fc1843624d2a4
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 10:51:16 2007 -0700
IB/cm: limit cm message timeout
Limit the timeout that the ib_cm will wait to receive a response to a
message, to avoid excessively large (on the order of hours) timeout
values. This prevents consuming resources tracking requests for
extended periods of time, and allows quicker retries.
This helps correct for a bug in an SRP Engenio target sending a large
value (> 1 hour) as a service timeout.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 842cd0b..706fdbf 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -54,6 +54,17 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+#define PFX "ib_cm: "
+
+/*
+ * Limit CM message timeouts to something reasonable:
+ * 32 seconds per message, with up to 15 retries
+ */
+static int max_timeout = 23;
+module_param(max_timeout, int, 0644);
+MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
+ "(default=23, or ~32 seconds)");
+
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
@@ -888,11 +899,23 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_req_set_init_depth(req_msg, param->initiator_depth);
cm_req_set_remote_resp_timeout(req_msg,
param->remote_cm_response_timeout);
+ if (param->remote_cm_response_timeout > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
+ "%d, decreasing\n", param->remote_cm_response_timeout,
+ max_timeout);
+ cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
+ }
cm_req_set_qp_type(req_msg, param->qp_type);
cm_req_set_flow_ctrl(req_msg, param->flow_control);
cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
cm_req_set_local_resp_timeout(req_msg,
param->local_cm_response_timeout);
+ if (param->local_cm_response_timeout > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
+ "%d, decreasing\n", param->local_cm_response_timeout,
+ max_timeout);
+ cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
+ }
cm_req_set_retry_count(req_msg, param->retry_count);
req_msg->pkey = param->primary_path->pkey;
cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
@@ -1002,6 +1025,11 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
param->remote_cm_response_timeout);
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
+ cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
cm_id_priv->max_cm_retries = param->max_cm_retries;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
@@ -1401,6 +1429,13 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
cm_req_get_local_resp_timeout(req_msg));
+ if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
+ printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
+ "decreasing used timeout_ms\n",
+ cm_req_get_local_resp_timeout(req_msg), max_timeout);
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
+
cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
@@ -2304,6 +2339,12 @@ static int cm_mra_handler(struct cm_work *work)
cm_mra_get_service_timeout(mra_msg);
timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.packet_life_time);
+ if (timeout > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
+ "decreasing used timeout_ms\n", timeout,
+ cm_convert_to_ms(max_timeout));
+ timeout = cm_convert_to_ms(max_timeout);
+ }
spin_lock_irqsave(&cm_id_priv->lock, flags);
switch (cm_id_priv->id.state) {
@@ -2707,6 +2748,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_id->service_id = param->service_id;
cm_id->service_mask = __constant_cpu_to_be64(~0ULL);
cm_id_priv->timeout_ms = param->timeout_ms;
+ if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
+ printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
+ "decreasing used timeout_ms\n", param->timeout_ms,
+ cm_convert_to_ms(max_timeout));
+ cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
+ }
cm_id_priv->max_cm_retries = param->max_cm_retries;
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
commit e847d67ea97caabb6aaa5b9e8a1c47bba9bc3824
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 10:51:10 2007 -0700
ib_sa: set src_path_bits correctly in ib_init_ah_from_path
The src_path_bits needs to mask off the base LID value.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 68db633..9a7eaad 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
struct ib_sa_sm_ah {
struct ib_ah *ah;
struct kref ref;
+ u8 src_path_mask;
};
struct ib_sa_port {
@@ -380,6 +381,7 @@ static void update_sm_ah(struct work_struct *work)
}
kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
memset(&ah_attr, 0, sizeof ah_attr);
ah_attr.dlid = port_attr.sm_lid;
@@ -460,6 +462,25 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
}
EXPORT_SYMBOL(ib_sa_cancel_query);
+static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+{
+ struct ib_sa_device *sa_dev;
+ struct ib_sa_port *port;
+ unsigned long flags;
+ u8 src_path_mask;
+
+ sa_dev = ib_get_client_data(device, &sa_client);
+ if (!sa_dev)
+ return 0x7f;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->ah_lock, flags);
+ src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ return src_path_mask;
+}
+
int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
{
@@ -469,7 +490,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
+ ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
+ get_src_path_mask(device, port_num);
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
commit 1e6ed3730a3d1db723e4bfccc5f1cfd1b0691aab
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 10:51:05 2007 -0700
ib_ucm: simplify ib_ucm_event code
Simplify the wait on event code.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index ee51d79..2586a3e 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -407,29 +407,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
mutex_lock(&file->file_mutex);
while (list_empty(&file->events)) {
+ mutex_unlock(&file->file_mutex);
- if (file->filp->f_flags & O_NONBLOCK) {
- result = -EAGAIN;
- break;
- }
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
- if (signal_pending(current)) {
- result = -ERESTARTSYS;
- break;
- }
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->events)))
+ return -ERESTARTSYS;
- prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-
- mutex_unlock(&file->file_mutex);
- schedule();
mutex_lock(&file->file_mutex);
-
- finish_wait(&file->poll_wait, &wait);
}
- if (result)
- goto done;
-
uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
if (ib_ucm_new_cm_id(uevent->resp.event)) {
commit ed0b96bf383b3352c400e684c1b8fcb4868f68f2
Author: Sean Hefty <sean.hefty at intel.com>
Date: Thu Apr 5 10:49:51 2007 -0700
rdma_ucm: simplify ucma_get_event code
Simplify the wait on event code.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c859134..53b4c94 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -306,26 +306,18 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char
__user *inbuf,
mutex_lock(&file->mut);
while (list_empty(&file->event_list)) {
- if (file->filp->f_flags & O_NONBLOCK) {
- ret = -EAGAIN;
- break;
- }
+ mutex_unlock(&file->mut);
- if (signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
+ if (file->filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(file->poll_wait,
+ !list_empty(&file->event_list)))
+ return -ERESTARTSYS;
- prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
- mutex_unlock(&file->mut);
- schedule();
mutex_lock(&file->mut);
- finish_wait(&file->poll_wait, &wait);
}
- if (ret)
- goto done;
-
uevent = list_entry(file->event_list.next, struct ucma_event, list);
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
More information about the general
mailing list