[openib-general] [RFC] libibverbs changes for PathScale merge
Roland Dreier
rolandd at cisco.com
Thu Oct 13 14:22:26 PDT 2005
Here are the changes to libibverbs required to support PathScale's
driver. Again, I'm happy with them and would just like to get
comments on them before I commit them to svn.
Thanks,
Roland
--- libibverbs/include/infiniband/driver.h (revision 3774)
+++ libibverbs/include/infiniband/driver.h (working copy)
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -92,6 +93,8 @@ extern int ibv_cmd_create_cq(struct ibv_
int comp_vector, struct ibv_cq *cq,
struct ibv_create_cq *cmd, size_t cmd_size,
struct ibv_create_cq_resp *resp, size_t resp_size);
+extern int ibv_cmd_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
+extern int ibv_cmd_req_notify_cq(struct ibv_cq *cq, int solicited);
extern int ibv_cmd_destroy_cq(struct ibv_cq *cq);
extern int ibv_cmd_create_srq(struct ibv_pd *pd,
@@ -111,6 +114,15 @@ extern int ibv_cmd_modify_qp(struct ibv_
enum ibv_qp_attr_mask attr_mask,
struct ibv_modify_qp *cmd, size_t cmd_size);
extern int ibv_cmd_destroy_qp(struct ibv_qp *qp);
+extern int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr);
+extern int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+extern int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr);
+extern int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
+ struct ibv_ah_attr *attr);
+extern int ibv_cmd_destroy_ah(struct ibv_ah *ah);
extern int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
extern int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
--- libibverbs/include/infiniband/verbs.h (revision 3774)
+++ libibverbs/include/infiniband/verbs.h (working copy)
@@ -2,6 +2,7 @@
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -488,6 +489,7 @@ struct ibv_qp {
uint32_t handle;
uint32_t qp_num;
enum ibv_qp_state state;
+ enum ibv_qp_type qp_type;
pthread_mutex_t mutex;
pthread_cond_t cond;
@@ -513,6 +515,7 @@ struct ibv_cq {
struct ibv_ah {
struct ibv_context *context;
struct ibv_pd *pd;
+ uint32_t handle;
};
struct ibv_device;
--- libibverbs/include/infiniband/kern-abi.h (revision 3774)
+++ libibverbs/include/infiniband/kern-abi.h (working copy)
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -93,8 +94,11 @@ enum {
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ * - Do not use pointer types -- pass pointers in __u64 instead.
+ * - Make sure that any structure larger than 4 bytes is padded to a
+ * multiple of 8 bytes. Otherwise the structure size will be
+ * different between 32-bit and 64-bit architectures.
*/
struct ibv_kern_async_event {
@@ -298,6 +302,47 @@ struct ibv_create_cq_resp {
__u32 cqe;
};
+struct ibv_kern_wc {
+ __u64 wr_id;
+ __u32 status;
+ __u32 opcode;
+ __u32 vendor_err;
+ __u32 byte_len;
+ __u32 imm_data;
+ __u32 qp_num;
+ __u32 src_qp;
+ __u32 wc_flags;
+ __u16 pkey_index;
+ __u16 slid;
+ __u8 sl;
+ __u8 dlid_path_bits;
+ __u8 port_num;
+ __u8 reserved;
+};
+
+struct ibv_poll_cq {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 cq_handle;
+ __u32 ne;
+};
+
+struct ibv_poll_cq_resp {
+ __u32 count;
+ __u32 reserved;
+ struct ibv_kern_wc wc[0];
+};
+
+struct ibv_req_notify_cq {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 cq_handle;
+ __u32 solicited;
+};
+
struct ibv_destroy_cq {
__u32 command;
__u16 in_words;
@@ -400,6 +445,130 @@ struct ibv_destroy_qp_resp {
__u32 events_reported;
};
+struct ibv_kern_send_wr {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 opcode;
+ __u32 send_flags;
+ __u32 imm_data;
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ __u32 reserved;
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ __u32 reserved;
+ } atomic;
+ struct {
+ __u32 ah;
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ __u32 reserved;
+ } ud;
+ } wr;
+};
+
+struct ibv_post_send {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 qp_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ibv_kern_send_wr send_wr[0];
+};
+
+struct ibv_post_send_resp {
+ __u32 bad_wr;
+};
+
+struct ibv_kern_recv_wr {
+ __u64 wr_id;
+ __u32 num_sge;
+ __u32 reserved;
+};
+
+struct ibv_post_recv {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 qp_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ibv_kern_recv_wr recv_wr[0];
+};
+
+struct ibv_post_recv_resp {
+ __u32 bad_wr;
+};
+
+struct ibv_post_srq_recv {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 srq_handle;
+ __u32 wr_count;
+ __u32 sge_count;
+ __u32 wqe_size;
+ struct ibv_kern_recv_wr recv_wr[0];
+};
+
+struct ibv_post_srq_recv_resp {
+ __u32 bad_wr;
+};
+
+struct ibv_kern_global_route {
+ __u8 dgid[16];
+ __u32 flow_label;
+ __u8 sgid_index;
+ __u8 hop_limit;
+ __u8 traffic_class;
+ __u8 reserved;
+};
+
+struct ibv_kern_ah_attr {
+ struct ibv_kern_global_route grh;
+ __u16 dlid;
+ __u8 sl;
+ __u8 src_path_bits;
+ __u8 static_rate;
+ __u8 is_global;
+ __u8 port_num;
+ __u8 reserved;
+};
+
+struct ibv_create_ah {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u64 user_handle;
+ __u32 pd_handle;
+ __u32 reserved;
+ struct ibv_kern_ah_attr attr;
+};
+
+struct ibv_create_ah_resp {
+ __u32 handle;
+};
+
+struct ibv_destroy_ah {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u32 ah_handle;
+};
+
struct ibv_attach_mcast {
__u32 command;
__u16 in_words;
--- libibverbs/src/libibverbs.map (revision 3774)
+++ libibverbs/src/libibverbs.map (working copy)
@@ -41,6 +41,8 @@ IBVERBS_1.0 {
ibv_cmd_reg_mr;
ibv_cmd_dereg_mr;
ibv_cmd_create_cq;
+ ibv_cmd_poll_cq;
+ ibv_cmd_req_notify_cq;
ibv_cmd_destroy_cq;
ibv_cmd_create_srq;
ibv_cmd_modify_srq;
@@ -48,6 +50,11 @@ IBVERBS_1.0 {
ibv_cmd_create_qp;
ibv_cmd_modify_qp;
ibv_cmd_destroy_qp;
+ ibv_cmd_post_send;
+ ibv_cmd_post_recv;
+ ibv_cmd_post_srq_recv;
+ ibv_cmd_create_ah;
+ ibv_cmd_destroy_ah;
ibv_cmd_attach_mcast;
ibv_cmd_detach_mcast;
local: *;
--- libibverbs/src/cmd.c (revision 3774)
+++ libibverbs/src/cmd.c (working copy)
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -304,6 +305,65 @@ int ibv_cmd_create_cq(struct ibv_context
return 0;
}
+int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+ struct ibv_poll_cq cmd;
+ struct ibv_poll_cq_resp *resp;
+ int i;
+ int rsize;
+ int ret;
+
+ rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc);
+ resp = malloc(rsize);
+ if (!resp)
+ return -1;
+
+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
+ cmd.cq_handle = ibcq->handle;
+ cmd.ne = ne;
+
+ if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < resp->count; i++) {
+ wc[i].wr_id = resp->wc[i].wr_id;
+ wc[i].status = resp->wc[i].status;
+ wc[i].opcode = resp->wc[i].opcode;
+ wc[i].vendor_err = resp->wc[i].vendor_err;
+ wc[i].byte_len = resp->wc[i].byte_len;
+ wc[i].imm_data = resp->wc[i].imm_data;
+ wc[i].qp_num = resp->wc[i].qp_num;
+ wc[i].src_qp = resp->wc[i].src_qp;
+ wc[i].wc_flags = resp->wc[i].wc_flags;
+ wc[i].pkey_index = resp->wc[i].pkey_index;
+ wc[i].slid = resp->wc[i].slid;
+ wc[i].sl = resp->wc[i].sl;
+ wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
+ }
+
+ ret = resp->count;
+
+out:
+ free(resp);
+ return ret;
+}
+
+int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited)
+{
+ struct ibv_req_notify_cq cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
+ cmd.cq_handle = ibcq->handle;
+ cmd.solicited = solicited ? 0 : 1;
+
+ if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
+
static int ibv_cmd_destroy_cq_v1(struct ibv_cq *cq)
{
struct ibv_destroy_cq_v1 cmd;
@@ -441,6 +501,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
qp->handle = resp.qp_handle;
qp->qp_num = resp.qpn;
+ qp->qp_type = attr->qp_type;
return 0;
}
@@ -518,6 +579,251 @@ static int ibv_cmd_destroy_qp_v1(struct
return 0;
}
+int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+ struct ibv_send_wr **bad_wr)
+{
+ struct ibv_post_send *cmd;
+ struct ibv_post_send_resp resp;
+ struct ibv_send_wr *i;
+ struct ibv_kern_send_wr *n, *tmp;
+ struct ibv_sge *s;
+ unsigned wr_count = 0;
+ unsigned sge_count = 0;
+ int size;
+ int ret = 0;
+
+ for (i = wr; i; i = i->next) {
+ wr_count++;
+ sge_count += i->num_sge;
+ }
+
+ size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+ cmd = alloca(size);
+
+ IBV_INIT_CMD_RESP(cmd, size, POST_SEND, &resp, sizeof resp);
+ cmd->qp_handle = ibqp->handle;
+ cmd->wr_count = wr_count;
+ cmd->sge_count = sge_count;
+ cmd->wqe_size = sizeof *n;
+
+ n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
+ s = (struct ibv_sge *) (n + wr_count);
+
+ tmp = n;
+ for (i = wr; i; i = i->next) {
+ tmp->wr_id = i->wr_id;
+ tmp->num_sge = i->num_sge;
+ tmp->opcode = i->opcode;
+ tmp->send_flags = i->send_flags;
+ tmp->imm_data = i->imm_data;
+ if (ibqp->qp_type == IBV_QPT_UD) {
+ tmp->wr.ud.ah = i->wr.ud.ah->handle;
+ tmp->wr.ud.remote_qpn = i->wr.ud.remote_qpn;
+ tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
+ } else {
+ switch(i->opcode) {
+ case IBV_WR_RDMA_WRITE:
+ case IBV_WR_RDMA_WRITE_WITH_IMM:
+ case IBV_WR_RDMA_READ:
+ tmp->wr.rdma.remote_addr =
+ i->wr.rdma.remote_addr;
+ tmp->wr.rdma.rkey = i->wr.rdma.rkey;
+ break;
+ case IBV_WR_ATOMIC_CMP_AND_SWP:
+ case IBV_WR_ATOMIC_FETCH_AND_ADD:
+ tmp->wr.atomic.remote_addr =
+ i->wr.atomic.remote_addr;
+ tmp->wr.atomic.compare_add =
+ i->wr.atomic.compare_add;
+ tmp->wr.atomic.swap = i->wr.atomic.swap;
+ tmp->wr.atomic.rkey = i->wr.atomic.rkey;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (tmp->num_sge) {
+ memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+ s += tmp->num_sge;
+ }
+
+ tmp++;
+ }
+
+ resp.bad_wr = 0;
+ if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd)
+ ret = errno;
+
+ wr_count = resp.bad_wr;
+ if (wr_count) {
+ i = wr;
+ while (--wr_count)
+ i = i->next;
+ *bad_wr = i;
+ }
+
+ return ret;
+}
+
+int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct ibv_post_recv *cmd;
+ struct ibv_post_recv_resp resp;
+ struct ibv_recv_wr *i;
+ struct ibv_kern_recv_wr *n, *tmp;
+ struct ibv_sge *s;
+ unsigned wr_count = 0;
+ unsigned sge_count = 0;
+ int size;
+ int ret = 0;
+
+ for (i = wr; i; i = i->next) {
+ wr_count++;
+ sge_count += i->num_sge;
+ }
+
+ size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+ cmd = alloca(size);
+
+ IBV_INIT_CMD_RESP(cmd, size, POST_RECV, &resp, sizeof resp);
+ cmd->qp_handle = ibqp->handle;
+ cmd->wr_count = wr_count;
+ cmd->sge_count = sge_count;
+ cmd->wqe_size = sizeof *n;
+
+ n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+ s = (struct ibv_sge *) (n + wr_count);
+
+ tmp = n;
+ for (i = wr; i; i = i->next) {
+ tmp->wr_id = i->wr_id;
+ tmp->num_sge = i->num_sge;
+
+ if (tmp->num_sge) {
+ memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+ s += tmp->num_sge;
+ }
+
+ tmp++;
+ }
+
+ resp.bad_wr = 0;
+ if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd)
+ ret = errno;
+
+ wr_count = resp.bad_wr;
+ if (wr_count) {
+ i = wr;
+ while (--wr_count)
+ i = i->next;
+ *bad_wr = i;
+ }
+
+ return ret;
+}
+
+int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
+ struct ibv_recv_wr **bad_wr)
+{
+ struct ibv_post_srq_recv *cmd;
+ struct ibv_post_srq_recv_resp resp;
+ struct ibv_recv_wr *i;
+ struct ibv_kern_recv_wr *n, *tmp;
+ struct ibv_sge *s;
+ unsigned wr_count = 0;
+ unsigned sge_count = 0;
+ int size;
+ int ret = 0;
+
+ for (i = wr; i; i = i->next) {
+ wr_count++;
+ sge_count += i->num_sge;
+ }
+
+ size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+ cmd = alloca(size);
+
+ IBV_INIT_CMD_RESP(cmd, size, POST_SRQ_RECV, &resp, sizeof resp);
+ cmd->srq_handle = srq->handle;
+ cmd->wr_count = wr_count;
+ cmd->sge_count = sge_count;
+ cmd->wqe_size = sizeof *n;
+
+ n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+ s = (struct ibv_sge *) (n + wr_count);
+
+ tmp = n;
+ for (i = wr; i; i = i->next) {
+ tmp->wr_id = i->wr_id;
+ tmp->num_sge = i->num_sge;
+
+ if (tmp->num_sge) {
+ memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+ s += tmp->num_sge;
+ }
+
+ tmp++;
+ }
+
+ resp.bad_wr = 0;
+ if (write(srq->context->cmd_fd, cmd, size) != sizeof cmd)
+ ret = errno;
+
+ wr_count = resp.bad_wr;
+ if (wr_count) {
+ i = wr;
+ while (--wr_count)
+ i = i->next;
+ *bad_wr = i;
+ }
+
+ return ret;
+}
+
+int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
+ struct ibv_ah_attr *attr)
+{
+ struct ibv_create_ah cmd;
+ struct ibv_create_ah_resp resp;
+
+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp);
+ cmd.user_handle = (uintptr_t) ah;
+ cmd.pd_handle = pd->handle;
+ cmd.attr.dlid = attr->dlid;
+ cmd.attr.sl = attr->sl;
+ cmd.attr.src_path_bits = attr->src_path_bits;
+ cmd.attr.static_rate = attr->static_rate;
+ cmd.attr.is_global = attr->is_global;
+ cmd.attr.port_num = attr->port_num;
+ cmd.attr.grh.flow_label = attr->grh.flow_label;
+ cmd.attr.grh.sgid_index = attr->grh.sgid_index;
+ cmd.attr.grh.hop_limit = attr->grh.hop_limit;
+ cmd.attr.grh.traffic_class = attr->grh.traffic_class;
+ memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
+
+ if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ ah->handle = resp.handle;
+
+ return 0;
+}
+
+int ibv_cmd_destroy_ah(struct ibv_ah *ah)
+{
+ struct ibv_destroy_ah cmd;
+
+ IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
+ cmd.ah_handle = ah->handle;
+
+ if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ return 0;
+}
+
int ibv_cmd_destroy_qp(struct ibv_qp *qp)
{
struct ibv_destroy_qp cmd;
More information about the general
mailing list