[ofa-general] Re: [PATCH RFC/untested v0.5] IPoIB/CM: fix SRQ WR leak
Michael S. Tsirkin
mst at dev.mellanox.co.il
Thu May 17 00:30:28 PDT 2007
> Quoting Roland Dreier <rdreier at cisco.com>:
> Subject: Re: [PATCH RFC/untested v0.5] IPoIB/CM: fix SRQ WR leak
>
> > + * - Put the QP in the Error State
> > + * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
> > + * - either:
> > + * drain the CQ by invoking the Poll CQ verb and either wait for CQ
> > + * to be empty or the number of Poll CQ operations has exceeded
> > + * CQ capacity size;
> > + * - or
> > + * post another WR that completes on the same CQ and wait for this
> > + * WR to return as a WC; (NB: this is the option that we use)
> > + * and then invoke a Destroy QP or Reset QP.
>
> I guess this last line would look better as
>
> * - invoke a Destroy QP or Reset QP.
Hmm, I would like to quote the spec *literally*. Maybe
- and then invoke a Destroy QP or Reset QP.
> > +static struct ib_qp_attr ipoib_cm_err_attr __read_mostly = {
> > + .qp_state = IB_QPS_ERR
> > +};
> > +
> > +#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
> > +
> > +static struct ib_send_wr ipoib_cm_rx_drain_wr __read_mostly = {
> > + .wr_id = IPOIB_CM_RX_DRAIN_WRID,
> > + .opcode = IB_WR_SEND
> > +};
>
> I don't think these are hot enough to be worth marking as __read_mostly.
> (better to leave them in normal .data so that stuff that is written to
> ends up getting spaced out more)
OK, thanks for the suggestion.
> > + qp_attr.qp_state = IB_QPS_INIT;
> > + qp_attr.port_num = priv->port;
> > + qp_attr.qkey = 0;
> > + qp_attr.qp_access_flags = 0;
> > + ret = ib_modify_qp(priv->cm.rx_drain_qp, &qp_attr,
> > + IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | IB_QP_QKEY);
> > + if (ret) {
> > + ipoib_warn(priv, "failed to modify drain QP to INIT: %d\n", ret);
> > + goto err_qp;
> > + }
> > +
> > + /* We put the QP in error state directly: this way, hardware
> > + * will immediately generate WC for each WR we post, without
> > + * sending anything on the wire. */
> > + qp_attr.qp_state = IB_QPS_ERR;
> > + ret = ib_modify_qp(priv->cm.rx_drain_qp, &qp_attr, IB_QP_STATE);
> > + if (ret) {
> > + ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
> > + goto err_qp;
> > + }
>
> This actually seems like a good motivation for the mthca RESET ->
> ERROR fix. We could avoid the transition to INIT if we fixed mthca
> and mlx4, right?
Yes. That was the motivation.
> (By the way, any interest in making an mlx4 patch to
> fix that too?)
Easy (I also fixed reset to reset on the way).
IB/mlx4: fix RESET -> ERROR and RESET -> RESET transitions
Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>
---
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cd7069..c93daab 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -573,7 +573,7 @@ static int to_mlx4_st(enum ib_qp_type type)
}
}
-static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
+static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
int attr_mask)
{
u8 dest_rd_atomic;
@@ -603,7 +603,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *att
return cpu_to_be32(hw_access_flags);
}
-static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
+static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
int attr_mask)
{
if (attr_mask & IB_QP_PKEY_INDEX)
@@ -619,7 +619,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
{
path->grh_mylmc = ah->src_path_bits & 0x7f;
@@ -655,14 +655,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
return 0;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
- enum ib_qp_state cur_state, new_state;
int sqd_event;
int err = -EINVAL;
@@ -670,34 +670,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!context)
return -ENOMEM;
- mutex_lock(&qp->mutex);
-
- cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
- new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
- goto out;
-
- if ((attr_mask & IB_QP_PKEY_INDEX) &&
- attr->pkey_index >= dev->dev->caps.pkey_table_len) {
- goto out;
- }
-
- if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
- goto out;
- }
-
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
context->flags |= cpu_to_be32(1 << 8); /* DE? */
@@ -920,11 +892,83 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
out:
- mutex_unlock(&qp->mutex);
kfree(context);
return err;
}
+static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
+static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
+ [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY),
+ [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_ACCESS_FLAGS),
+ [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+ [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
+ IB_QP_QKEY),
+};
+
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+
+ mutex_lock(&qp->mutex);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->dev->caps.pkey_table_len) {
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
+ goto out;
+ }
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
+ err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
+ mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
+ IB_QPS_RESET, IB_QPS_INIT);
+ if (err)
+ goto out;
+ cur_state = IB_QPS_INIT;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe)
{
--
MST
More information about the general
mailing list