[ofa-general] ***SPAM*** Re: [PATCH 3/12 V2] libmlx4: XRC base implementation

Jack Morgenstein jackm at dev.mellanox.co.il
Mon Jul 14 01:24:44 PDT 2008


On Monday 14 July 2008 10:59, Ron Livne wrote:
> Hi Jack:
> 
> +#ifdef HAVE_IBV_XRC_OPS
> +       context->ibv_ctx.more_ops = &mlx4_more_ops;
> +#endif
> 
> As you know, I'm using more_ops for my new ibv_create_qp_expanded user verb.
> I need the context->ibv_ctx.more_ops to be always initialized, even if
> HAVE_IBV_XRC_OPS isn't set.
> 
> I'll release a patch for it.
> 
> Ron

Your approach is not correct.

The whole point of  
> > +AC_CHECK_MEMBER(struct ibv_context.more_ops,
> > +    [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],,
> > +    [#include <infiniband/verbs.h>])
is to check that the more-ops member exists in the ibv_context structure.
It may not exist if an older version of libibverbs is installed on the host,
and this possibility must be checked-for (as is done by AC_CHECK_MEMBER above).

If more_ops does exist, then the HAVE_IBV_MORE_OPS macro gets defined (at libmlx4 installation
time) in config.h, and more_ops will get properly initialized.

If it does not exist, then you will not clobber the application by initializing
memory which is outside the ibv_context structure.

- Jack

> On Thu, Jul 10, 2008 at 6:51 PM, Jack Morgenstein
> <jackm at dev.mellanox.co.il> wrote:
> > From 33c27150ed20ff34622a45abd5ff27727f4c34aa Mon Sep 17 00:00:00 2001
> > From: Jack Morgenstein <jackm at dev.mellanox.co.il>
> > Date: Wed, 9 Jul 2008 13:55:24 +0300
> > Subject: [PATCH] XRC implementation, consolidated:
> >
> > xrc ops were moved to their own structure at the end of
> > struct ibv_context (to preserve binary compatibility).
> >
> > Check for ibv_context.xrc_ops member via AC_CHECK_MEMBER
> >
> > XRC QPs have MSB set in qp number, for identification in
> > completion handling.
> >
> > jackm at mtldesk006:/local/roland/libmlx4> head -n 20 patches/xrc_consolidated.patch
> > XRC implementation, consolidated:
> >
> > xrc ops were moved to their own structure at the end of
> > struct ibv_context (to preserve binary compatibility).
> >
> > Check for ibv_context.xrc_ops member via AC_CHECK_MEMBER
> >
> > XRC QPs have MSB set in qp number, for identification in
> > completion handling.
> >
> > Various bug fixes.
> > (OFED 1.3 commit 39fe7f47e8fc07f356098df048d00740ba585fc5)
> >
> > V2:
> > 1. checkpatch.pl cleanup
> > 2. Changed xrc_ops to more ops
> > 3. Check for xrc verbs in ibv_more_ops via AC_CHECK_MEMBER
> >
> > Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
> > ---
> >  configure.in   |    3 +
> >  src/cq.c       |   35 ++++++++++++--
> >  src/mlx4-abi.h |   16 ++++++
> >  src/mlx4.c     |   20 ++++++++
> >  src/mlx4.h     |   46 ++++++++++++++++++
> >  src/qp.c       |    7 ++-
> >  src/srq.c      |   50 +++++++++++++++++++
> >  src/verbs.c    |  144 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
> >  src/wqe.h      |    2 +-
> >  9 files changed, 303 insertions(+), 20 deletions(-)
> >
> > diff --git a/configure.in b/configure.in
> > index 25f27f7..9304539 100644
> > --- a/configure.in
> > +++ b/configure.in
> > @@ -42,6 +42,12 @@ AC_CHECK_HEADER(valgrind/memcheck.h,
> >  dnl Checks for typedefs, structures, and compiler characteristics.
> >  AC_C_CONST
> >  AC_CHECK_SIZEOF(long)
> > +AC_CHECK_MEMBER(struct ibv_context.more_ops,
> > +    [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],,
> > +    [#include <infiniband/verbs.h>])
> > +AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
> > +    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
> > +    [#include <infiniband/verbs.h>])
> >
> >  dnl Checks for library functions
> >  AC_CHECK_FUNC(ibv_read_sysfs_file, [],
> > diff --git a/src/cq.c b/src/cq.c
> > index 68e16e9..c598b87 100644
> > --- a/src/cq.c
> > +++ b/src/cq.c
> > @@ -196,10 +196,12 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
> >        struct mlx4_cqe *cqe;
> >        struct mlx4_srq *srq;
> >        uint32_t qpn;
> > +       uint32_t srqn;
> >        uint32_t g_mlpath_rqpn;
> >        uint16_t wqe_index;
> >        int is_error;
> >        int is_send;
> > +       int is_src_recv = 0;
> >
> >        cqe = next_cqe_sw(cq);
> >        if (!cqe)
> > @@ -221,20 +223,30 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
> >        is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
> >                MLX4_CQE_OPCODE_ERROR;
> >
> > -       if (!*cur_qp ||
> > -           (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
> > +       if (qpn & MLX4_XRC_QPN_BIT && !is_send) {
> > +               srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff;
> > +               /*
> > +                * We do not have to take the XRC SRQ table lock here,
> > +                * because CQs will be locked while XRC SRQs are removed
> > +                * from the table.
> > +                */
> > +               srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn);
> > +               if (!srq)
> > +                       return CQ_POLL_ERR;
> > +               is_src_recv = 1;
> > +       } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
> >                /*
> >                 * We do not have to take the QP table lock here,
> >                 * because CQs will be locked while QPs are removed
> >                 * from the table.
> >                 */
> >                *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
> > -                                      ntohl(cqe->my_qpn) & 0xffffff);
> > +                                      qpn & 0xffffff);
> >                if (!*cur_qp)
> >                        return CQ_POLL_ERR;
> >        }
> >
> > -       wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
> > +       wc->qp_num = qpn & 0xffffff;
> >
> >        if (is_send) {
> >                wq = &(*cur_qp)->sq;
> > @@ -242,6 +254,10 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
> >                wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
> >                wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
> >                ++wq->tail;
> > +       } else if (is_src_recv) {
> > +               wqe_index = htons(cqe->wqe_index);
> > +               wc->wr_id = srq->wrid[wqe_index];
> > +               mlx4_free_srq_wqe(srq, wqe_index);
> >        } else if ((*cur_qp)->ibv_qp.srq) {
> >                srq = to_msrq((*cur_qp)->ibv_qp.srq);
> >                wqe_index = htons(cqe->wqe_index);
> > @@ -387,6 +403,10 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
> >        uint32_t prod_index;
> >        uint8_t owner_bit;
> >        int nfreed = 0;
> > +       int is_xrc_srq = 0;
> > +
> > +       if (srq && srq->ibv_srq.xrc_cq)
> > +               is_xrc_srq = 1;
> >
> >        /*
> >         * First we need to find the current producer index, so we
> > @@ -405,7 +425,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
> >         */
> >        while ((int) --prod_index - (int) cq->cons_index >= 0) {
> >                cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
> > -               if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
> > +               if (is_xrc_srq &&
> > +                   (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) &&
> > +                   !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
> > +                       mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
> > +                       ++nfreed;
> > +               } else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
> >                        if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
> >                                mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
> >                        ++nfreed;
> > diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
> > index 20a40c9..1b1253c 100644
> > --- a/src/mlx4-abi.h
> > +++ b/src/mlx4-abi.h
> > @@ -68,6 +68,14 @@ struct mlx4_resize_cq {
> >        __u64                           buf_addr;
> >  };
> >
> > +#ifdef HAVE_IBV_XRC_OPS
> > +struct mlx4_create_xrc_srq {
> > +       struct ibv_create_xrc_srq       ibv_cmd;
> > +       __u64                           buf_addr;
> > +       __u64                           db_addr;
> > +};
> > +#endif
> > +
> >  struct mlx4_create_srq {
> >        struct ibv_create_srq           ibv_cmd;
> >        __u64                           buf_addr;
> > @@ -90,4 +98,12 @@ struct mlx4_create_qp {
> >        __u8                            reserved[5];
> >  };
> >
> > +#ifdef HAVE_IBV_XRC_OPS
> > +struct mlx4_open_xrc_domain_resp {
> > +       struct ibv_open_xrc_domain_resp ibv_resp;
> > +       __u32                           xrcdn;
> > +       __u32                           reserved;
> > +};
> > +#endif
> > +
> >  #endif /* MLX4_ABI_H */
> > diff --git a/src/mlx4.c b/src/mlx4.c
> > index 34ece39..8a46543 100644
> > --- a/src/mlx4.c
> > +++ b/src/mlx4.c
> > @@ -68,6 +68,16 @@ struct {
> >        HCA(MELLANOX, 0x673c),  /* MT25408 "Hermon" QDR PCIe gen2 */
> >  };
> >
> > +#ifdef HAVE_IBV_MORE_OPS
> > +static struct ibv_more_ops mlx4_more_ops = {
> > +#ifdef HAVE_IBV_XRC_OPS
> > +       .create_xrc_srq   = mlx4_create_xrc_srq,
> > +       .open_xrc_domain  = mlx4_open_xrc_domain,
> > +       .close_xrc_domain = mlx4_close_xrc_domain,
> > +#endif
> > +};
> > +#endif
> > +
> >  static struct ibv_context_ops mlx4_ctx_ops = {
> >        .query_device  = mlx4_query_device,
> >        .query_port    = mlx4_query_port,
> > @@ -123,6 +131,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
> >        for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
> >                context->qp_table[i].refcnt = 0;
> >
> > +       context->num_xrc_srqs = resp.qp_tab_size;
> > +       context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1
> > +                                      - MLX4_XRC_SRQ_TABLE_BITS;
> > +       context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1;
> > +
> > +       pthread_mutex_init(&context->xrc_srq_table_mutex, NULL);
> > +       for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
> > +               context->xrc_srq_table[i].refcnt = 0;
> > +
> >        for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
> >                context->db_list[i] = NULL;
> >
> > @@ -155,6 +172,9 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
> >        pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
> >
> >        context->ibv_ctx.ops = mlx4_ctx_ops;
> > +#ifdef HAVE_IBV_XRC_OPS
> > +       context->ibv_ctx.more_ops = &mlx4_more_ops;
> > +#endif
> >
> >        return &context->ibv_ctx;
> >
> > diff --git a/src/mlx4.h b/src/mlx4.h
> > index 827a201..28a8f9c 100644
> > --- a/src/mlx4.h
> > +++ b/src/mlx4.h
> > @@ -79,6 +79,11 @@
> >
> >  #endif
> >
> > +#ifndef HAVE_IBV_MORE_OPS
> > +#undef HAVE_IBV_XRC_OPS
> > +#undef HAVE_IBV_CREATE_QP_EXP
> > +#endif
> > +
> >  #define HIDDEN         __attribute__((visibility ("hidden")))
> >
> >  #define PFX            "mlx4: "
> > @@ -97,6 +97,16 @@ enum {
> >        MLX4_QP_TABLE_MASK              = MLX4_QP_TABLE_SIZE - 1
> >  };
> >
> > +enum {
> > +       MLX4_XRC_SRQ_TABLE_BITS         = 8,
> > +       MLX4_XRC_SRQ_TABLE_SIZE         = 1 << MLX4_XRC_SRQ_TABLE_BITS,
> > +       MLX4_XRC_SRQ_TABLE_MASK         = MLX4_XRC_SRQ_TABLE_SIZE - 1
> > +};
> > +
> > +enum {
> > +       MLX4_XRC_QPN_BIT                = (1 << 23)
> > +};
> > +
> >  enum mlx4_db_type {
> >        MLX4_DB_TYPE_CQ,
> >        MLX4_DB_TYPE_RQ,
> > @@ -157,6 +167,15 @@ struct mlx4_context {
> >        int                             qp_table_shift;
> >        int                             qp_table_mask;
> >
> > +       struct {
> > +               struct mlx4_srq       **table;
> > +               int                     refcnt;
> > +       }                               xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE];
> > +       pthread_mutex_t                 xrc_srq_table_mutex;
> > +       int                             num_xrc_srqs;
> > +       int                             xrc_srq_table_shift;
> > +       int                             xrc_srq_table_mask;
> > +
> >        struct mlx4_db_page            *db_list[MLX4_NUM_DB_TYPE];
> >        pthread_mutex_t                 db_list_mutex;
> >  };
> > @@ -243,6 +262,11 @@ struct mlx4_ah {
> >        struct mlx4_av                  av;
> >  };
> >
> > +struct mlx4_xrc_domain {
> > +       struct ibv_xrc_domain           ibv_xrcd;
> > +       uint32_t                        xrcdn;
> > +};
> > +
> >  static inline unsigned long align(unsigned long val, unsigned long align)
> >  {
> >        return (val + align - 1) & ~(align - 1);
> > @@ -287,6 +311,13 @@ static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
> >        return to_mxxx(ah, ah);
> >  }
> >
> > +#ifdef HAVE_IBV_XRC_OPS
> > +static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd)
> > +{
> > +       return to_mxxx(xrcd, xrc_domain);
> > +}
> > +#endif
> > +
> >  int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
> >  void mlx4_free_buf(struct mlx4_buf *buf);
> >
> > @@ -333,6 +364,10 @@ void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
> >  int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
> >                       struct ibv_recv_wr *wr,
> >                       struct ibv_recv_wr **bad_wr);
> > +struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
> > +int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
> > +                      struct mlx4_srq *srq);
> > +void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
> >
> >  struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
> >  int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> > @@ -363,5 +398,16 @@ int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
> >  void mlx4_free_av(struct mlx4_ah *ah);
> >  int mlx4_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
> >  int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
> > +#ifdef HAVE_IBV_XRC_OPS
> > +struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
> > +                                   struct ibv_xrc_domain *xrc_domain,
> > +                                   struct ibv_cq *xrc_cq,
> > +                                   struct ibv_srq_init_attr *attr);
> > +struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
> > +                                           int fd, int oflag);
> > +
> > +int mlx4_close_xrc_domain(struct ibv_xrc_domain *d);
> > +#endif
> > +
> >
> >  #endif /* MLX4_H */
> > diff --git a/src/qp.c b/src/qp.c
> > index bb98c09..02c5d34 100644
> > --- a/src/qp.c
> > +++ b/src/qp.c
> > @@ -226,7 +226,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
> >                ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
> >                qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
> >
> > -               ctrl->srcrb_flags =
> > +               ctrl->xrcrb_flags =
> >                        (wr->send_flags & IBV_SEND_SIGNALED ?
> >                         htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
> >                        (wr->send_flags & IBV_SEND_SOLICITED ?
> > @@ -243,6 +243,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
> >                size = sizeof *ctrl / 16;
> >
> >                switch (ibqp->qp_type) {
> > +               case IBV_QPT_XRC:
> > +                       ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
> > +                       /* fall thru */
> >                case IBV_QPT_RC:
> >                case IBV_QPT_UC:
> >                        switch (wr->opcode) {
> > @@ -543,6 +546,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
> >                size += sizeof (struct mlx4_wqe_raddr_seg);
> >                break;
> >
> > +       case IBV_QPT_XRC:
> >        case IBV_QPT_RC:
> >                size += sizeof (struct mlx4_wqe_raddr_seg);
> >                /*
> > @@ -630,6 +634,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
> >
> >        case IBV_QPT_UC:
> >        case IBV_QPT_RC:
> > +       case IBV_QPT_XRC:
> >                wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
> >                break;
> >
> > diff --git a/src/srq.c b/src/srq.c
> > index ba2ceb9..1350792 100644
> > --- a/src/srq.c
> > +++ b/src/srq.c
> > @@ -167,3 +167,53 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
> >
> >        return 0;
> >  }
> > +
> > +struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
> > +{
> > +       int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
> > +
> > +       if (ctx->xrc_srq_table[tind].refcnt)
> > +               return ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask];
> > +       else
> > +               return NULL;
> > +}
> > +
> > +int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
> > +                      struct mlx4_srq *srq)
> > +{
> > +       int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
> > +       int ret = 0;
> > +
> > +       pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
> > +
> > +       if (!ctx->xrc_srq_table[tind].refcnt) {
> > +               ctx->xrc_srq_table[tind].table = calloc(ctx->xrc_srq_table_mask + 1,
> > +                                                       sizeof(struct mlx4_srq *));
> > +               if (!ctx->xrc_srq_table[tind].table) {
> > +                       ret = -1;
> > +                       goto out;
> > +               }
> > +       }
> > +
> > +       ++ctx->xrc_srq_table[tind].refcnt;
> > +       ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = srq;
> > +
> > +out:
> > +       pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
> > +       return ret;
> > +}
> > +
> > +void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
> > +{
> > +       int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
> > +
> > +       pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
> > +
> > +       if (!--ctx->xrc_srq_table[tind].refcnt)
> > +               free(ctx->xrc_srq_table[tind].table);
> > +       else
> > +               ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = NULL;
> > +
> > +       pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
> > +}
> > +
> > diff --git a/src/verbs.c b/src/verbs.c
> > index 3935ed5..b883992 100644
> > --- a/src/verbs.c
> > +++ b/src/verbs.c
> > @@ -368,18 +368,36 @@ int mlx4_query_srq(struct ibv_srq *srq,
> >        return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
> >  }
> >
> > -int mlx4_destroy_srq(struct ibv_srq *srq)
> > +int mlx4_destroy_srq(struct ibv_srq *ibsrq)
> >  {
> > +       struct mlx4_srq *srq = to_msrq(ibsrq);
> > +       struct mlx4_cq *mcq = NULL;
> >        int ret;
> >
> > -       ret = ibv_cmd_destroy_srq(srq);
> > -       if (ret)
> > +       if (ibsrq->xrc_cq) {
> > +               /* is an xrc_srq */
> > +               mcq = to_mcq(ibsrq->xrc_cq);
> > +               mlx4_cq_clean(mcq, 0, srq);
> > +               pthread_spin_lock(&mcq->lock);
> > +               mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn);
> > +               pthread_spin_unlock(&mcq->lock);
> > +       }
> > +
> > +       ret = ibv_cmd_destroy_srq(ibsrq);
> > +       if (ret) {
> > +               if (ibsrq->xrc_cq) {
> > +                       pthread_spin_lock(&mcq->lock);
> > +                       mlx4_store_xrc_srq(to_mctx(ibsrq->context),
> > +                                          srq->srqn, srq);
> > +                       pthread_spin_unlock(&mcq->lock);
> > +               }
> >                return ret;
> > +       }
> >
> > -       mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
> > -       mlx4_free_buf(&to_msrq(srq)->buf);
> > -       free(to_msrq(srq)->wrid);
> > -       free(to_msrq(srq));
> > +       mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db);
> > +       mlx4_free_buf(&srq->buf);
> > +       free(srq->wrid);
> > +       free(srq);
> >
> >        return 0;
> >  }
> > @@ -413,7 +431,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
> >        qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
> >        qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
> >
> > -       if (attr->srq)
> > +       if (attr->srq || attr->qp_type == IBV_QPT_XRC)
> >                attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
> >        else {
> >                if (attr->cap.max_recv_sge < 1)
> > @@ -431,7 +449,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
> >            pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
> >                goto err_free;
> >
> > -       if (!attr->srq) {
> > +       if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
> >                qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
> >                if (!qp->db)
> >                        goto err_free;
> > @@ -440,7 +458,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
> >        }
> >
> >        cmd.buf_addr        = (uintptr_t) qp->buf.buf;
> > -       if (attr->srq)
> > +       if (attr->srq || attr->qp_type == IBV_QPT_XRC)
> >                cmd.db_addr = 0;
> >        else
> >                cmd.db_addr = (uintptr_t) qp->db;
> > @@ -477,7 +495,7 @@ err_destroy:
> >        ibv_cmd_destroy_qp(&qp->ibv_qp);
> >
> >  err_rq_db:
> > -       if (!attr->srq)
> > +       if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
> >                mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
> >
> >  err_free:
> > @@ -536,7 +554,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
> >                        mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
> >
> >                mlx4_init_qp_indices(to_mqp(qp));
> > -               if (!qp->srq)
> > +               if (!qp->srq && qp->qp_type != IBV_QPT_XRC)
> >                        *to_mqp(qp)->db = 0;
> >        }
> >
> > @@ -595,7 +613,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
> >
> >        mlx4_unlock_cqs(ibqp);
> >
> > -       if (!ibqp->srq)
> > +       if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC)
> >                mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
> >        free(qp->sq.wrid);
> >        if (qp->rq.wqe_cnt)
> > @@ -653,3 +671,103 @@ int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
> >  {
> >        return ibv_cmd_detach_mcast(qp, gid, lid);
> >  }
> > +
> > +#ifdef HAVE_IBV_XRC_OPS
> > +struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
> > +                                   struct ibv_xrc_domain *xrc_domain,
> > +                                   struct ibv_cq *xrc_cq,
> > +                                   struct ibv_srq_init_attr *attr)
> > +{
> > +       struct mlx4_create_xrc_srq  cmd;
> > +       struct mlx4_create_srq_resp resp;
> > +       struct mlx4_srq            *srq;
> > +       int                         ret;
> > +
> > +       /* Sanity check SRQ size before proceeding */
> > +       if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
> > +               return NULL;
> > +
> > +       srq = malloc(sizeof *srq);
> > +       if (!srq)
> > +               return NULL;
> > +
> > +       if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
> > +               goto err;
> > +
> > +       srq->max     = align_queue_size(attr->attr.max_wr + 1);
> > +       srq->max_gs  = attr->attr.max_sge;
> > +       srq->counter = 0;
> > +
> > +       if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
> > +               goto err;
> > +
> > +       srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
> > +       if (!srq->db)
> > +               goto err_free;
> > +
> > +       *srq->db = 0;
> > +
> > +       cmd.buf_addr = (uintptr_t) srq->buf.buf;
> > +       cmd.db_addr  = (uintptr_t) srq->db;
> > +
> > +       ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr,
> > +                                    xrc_domain->handle,
> > +                                    xrc_cq->handle,
> > +                                    &cmd.ibv_cmd, sizeof cmd,
> > +                                    &resp.ibv_resp, sizeof resp);
> > +       if (ret)
> > +               goto err_db;
> > +
> > +       srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn;
> > +
> > +       ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq);
> > +       if (ret)
> > +               goto err_destroy;
> > +
> > +       return &srq->ibv_srq;
> > +
> > +err_destroy:
> > +       ibv_cmd_destroy_srq(&srq->ibv_srq);
> > +
> > +err_db:
> > +       mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
> > +
> > +err_free:
> > +       free(srq->wrid);
> > +       mlx4_free_buf(&srq->buf);
> > +
> > +err:
> > +       free(srq);
> > +
> > +       return NULL;
> > +}
> > +
> > +struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
> > +                                           int fd, int oflag)
> > +{
> > +       int ret;
> > +       struct mlx4_open_xrc_domain_resp resp;
> > +       struct mlx4_xrc_domain *xrcd;
> > +
> > +       xrcd = malloc(sizeof *xrcd);
> > +       if (!xrcd)
> > +               return NULL;
> > +
> > +       ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd,
> > +                                     &resp.ibv_resp, sizeof resp);
> > +       if (ret) {
> > +               free(xrcd);
> > +               return NULL;
> > +       }
> > +
> > +       xrcd->xrcdn = resp.xrcdn;
> > +       return &xrcd->ibv_xrcd;
> > +}
> > +
> > +int mlx4_close_xrc_domain(struct ibv_xrc_domain *d)
> > +{
> > +       ibv_cmd_close_xrc_domain(d);
> > +       free(d);
> > +       return 0;
> > +}
> > +#endif
> > diff --git a/src/wqe.h b/src/wqe.h
> > index 6f7f309..fa2f8ac 100644
> > --- a/src/wqe.h
> > +++ b/src/wqe.h
> > @@ -65,7 +65,7 @@ struct mlx4_wqe_ctrl_seg {
> >         * [1]   SE (solicited event)
> >         * [0]   FL (force loopback)
> >         */
> > -       uint32_t                srcrb_flags;
> > +       uint32_t                xrcrb_flags;
> >        /*
> >         * imm is immediate data for send/RDMA write w/ immediate;
> >         * also invalidation key for send with invalidate; input
> > --
> > 1.5.1.6
> >
> >
> 



More information about the general mailing list