[openib-general] [RFC] libibverbs changes for PathScale merge

Roland Dreier rolandd at cisco.com
Thu Oct 13 14:22:26 PDT 2005


Here are the changes to libibverbs required to support PathScale's
driver.  Again, I'm happy with them and would just like to get
comments on them before I commit them to svn.

Thanks,
  Roland

--- libibverbs/include/infiniband/driver.h	(revision 3774)
+++ libibverbs/include/infiniband/driver.h	(working copy)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -92,6 +93,8 @@ extern int ibv_cmd_create_cq(struct ibv_
 			     int comp_vector, struct ibv_cq *cq,
 			     struct ibv_create_cq *cmd, size_t cmd_size,
 			     struct ibv_create_cq_resp *resp, size_t resp_size);
+extern int ibv_cmd_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
+extern int ibv_cmd_req_notify_cq(struct ibv_cq *cq, int solicited);
 extern int ibv_cmd_destroy_cq(struct ibv_cq *cq);
 
 extern int ibv_cmd_create_srq(struct ibv_pd *pd,
@@ -111,6 +114,15 @@ extern int ibv_cmd_modify_qp(struct ibv_
 			     enum ibv_qp_attr_mask attr_mask,
 			     struct ibv_modify_qp *cmd, size_t cmd_size);
 extern int ibv_cmd_destroy_qp(struct ibv_qp *qp);
+extern int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+			     struct ibv_send_wr **bad_wr);
+extern int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+			     struct ibv_recv_wr **bad_wr);
+extern int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
+				 struct ibv_recv_wr **bad_wr);
+extern int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
+			     struct ibv_ah_attr *attr);
+extern int ibv_cmd_destroy_ah(struct ibv_ah *ah);
 extern int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 extern int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 
--- libibverbs/include/infiniband/verbs.h	(revision 3774)
+++ libibverbs/include/infiniband/verbs.h	(working copy)
@@ -2,6 +2,7 @@
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -488,6 +489,7 @@ struct ibv_qp {
 	uint32_t		handle;
 	uint32_t		qp_num;
 	enum ibv_qp_state       state;
+	enum ibv_qp_type	qp_type;
 
 	pthread_mutex_t		mutex;
 	pthread_cond_t		cond;
@@ -513,6 +515,7 @@ struct ibv_cq {
 struct ibv_ah {
 	struct ibv_context     *context;
 	struct ibv_pd	       *pd;
+	uint32_t		handle;
 };
 
 struct ibv_device;
--- libibverbs/include/infiniband/kern-abi.h	(revision 3774)
+++ libibverbs/include/infiniband/kern-abi.h	(working copy)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -93,8 +94,11 @@ enum {
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ibv_kern_async_event {
@@ -298,6 +302,47 @@ struct ibv_create_cq_resp {
 	__u32 cqe;
 };
 
+struct ibv_kern_wc {
+        __u64  wr_id;
+        __u32  status;
+        __u32  opcode;
+        __u32  vendor_err;
+        __u32  byte_len;
+        __u32  imm_data;
+        __u32  qp_num;
+        __u32  src_qp;
+        __u32  wc_flags;
+        __u16  pkey_index;
+        __u16  slid;
+        __u8   sl;
+        __u8   dlid_path_bits;
+	__u8   port_num;
+	__u8   reserved;
+};
+
+struct ibv_poll_cq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 cq_handle;
+	__u32 ne;
+};
+
+struct ibv_poll_cq_resp {
+	__u32 count;
+	__u32 reserved;
+	struct ibv_kern_wc wc[0];
+};
+
+struct ibv_req_notify_cq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 cq_handle;
+	__u32 solicited;
+};
+
 struct ibv_destroy_cq {
 	__u32 command;
 	__u16 in_words;
@@ -400,6 +445,130 @@ struct ibv_destroy_qp_resp {
 	__u32 events_reported;
 };
 
+struct ibv_kern_send_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 opcode;
+	__u32 send_flags;
+	__u32 imm_data;
+	union {
+		struct {
+			__u64 remote_addr;
+			__u32 rkey;
+			__u32 reserved;
+		} rdma;
+		struct {
+			__u64 remote_addr;
+			__u64 compare_add;
+			__u64 swap;
+			__u32 rkey;
+			__u32 reserved;
+		} atomic;
+		struct {
+			__u32 ah;
+			__u32 remote_qpn;
+			__u32 remote_qkey;
+			__u32 reserved;
+		} ud;
+	} wr;
+};
+
+struct ibv_post_send {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ibv_kern_send_wr send_wr[0];
+};
+
+struct ibv_post_send_resp {
+	__u32 bad_wr;
+};
+
+struct ibv_kern_recv_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 reserved;
+};
+
+struct ibv_post_recv {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ibv_kern_recv_wr recv_wr[0];
+};
+
+struct ibv_post_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ibv_post_srq_recv {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 srq_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ibv_kern_recv_wr recv_wr[0];
+};
+
+struct ibv_post_srq_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ibv_kern_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ibv_kern_ah_attr {
+	struct ibv_kern_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ibv_create_ah {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 reserved;
+	struct ibv_kern_ah_attr attr;
+};
+
+struct ibv_create_ah_resp {
+	__u32 handle;
+};
+
+struct ibv_destroy_ah {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 ah_handle;
+};
+
 struct ibv_attach_mcast {
 	__u32 command;
 	__u16 in_words;
--- libibverbs/src/libibverbs.map	(revision 3774)
+++ libibverbs/src/libibverbs.map	(working copy)
@@ -41,6 +41,8 @@ IBVERBS_1.0 {
 		ibv_cmd_reg_mr;
 		ibv_cmd_dereg_mr;
 		ibv_cmd_create_cq;
+		ibv_cmd_poll_cq;
+		ibv_cmd_req_notify_cq;
 		ibv_cmd_destroy_cq;
 		ibv_cmd_create_srq;
 		ibv_cmd_modify_srq;
@@ -48,6 +50,11 @@ IBVERBS_1.0 {
 		ibv_cmd_create_qp;
 		ibv_cmd_modify_qp;
 		ibv_cmd_destroy_qp;
+		ibv_cmd_post_send;
+		ibv_cmd_post_recv;
+		ibv_cmd_post_srq_recv;
+		ibv_cmd_create_ah;
+		ibv_cmd_destroy_ah;
 		ibv_cmd_attach_mcast;
 		ibv_cmd_detach_mcast;
 	local: *;
--- libibverbs/src/cmd.c	(revision 3774)
+++ libibverbs/src/cmd.c	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -304,6 +305,65 @@ int ibv_cmd_create_cq(struct ibv_context
 	return 0;
 }
 
+int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+	struct ibv_poll_cq       cmd;
+	struct ibv_poll_cq_resp *resp;
+	int                      i;
+	int                      rsize;
+	int                      ret;
+
+	rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc);
+	resp  = malloc(rsize);
+	if (!resp)
+		return -1;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
+	cmd.cq_handle = ibcq->handle;
+	cmd.ne        = ne;
+
+	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
+		ret = -1;
+		goto out;
+	}
+
+	for (i = 0; i < resp->count; i++) {
+		wc[i].wr_id 	     = resp->wc[i].wr_id;
+		wc[i].status 	     = resp->wc[i].status;
+		wc[i].opcode 	     = resp->wc[i].opcode;
+		wc[i].vendor_err     = resp->wc[i].vendor_err;
+		wc[i].byte_len 	     = resp->wc[i].byte_len;
+		wc[i].imm_data 	     = resp->wc[i].imm_data;
+		wc[i].qp_num 	     = resp->wc[i].qp_num;
+		wc[i].src_qp 	     = resp->wc[i].src_qp;
+		wc[i].wc_flags 	     = resp->wc[i].wc_flags;
+		wc[i].pkey_index     = resp->wc[i].pkey_index;
+		wc[i].slid 	     = resp->wc[i].slid;
+		wc[i].sl 	     = resp->wc[i].sl;
+		wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
+	}
+
+	ret = resp->count;
+
+out:
+	free(resp);
+	return ret;
+}
+
+int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited)
+{
+	struct ibv_req_notify_cq cmd;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
+	cmd.cq_handle = ibcq->handle;
+	cmd.solicited = solicited ? 0 : 1;
+
+	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	return 0;
+}
+
 static int ibv_cmd_destroy_cq_v1(struct ibv_cq *cq)
 {
 	struct ibv_destroy_cq_v1 cmd;
@@ -441,6 +501,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 
 	qp->handle  = resp.qp_handle;
 	qp->qp_num  = resp.qpn;
+	qp->qp_type = attr->qp_type;
 
 	return 0;
 }
@@ -518,6 +579,251 @@ static int ibv_cmd_destroy_qp_v1(struct 
 	return 0;
 }
 
+int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+		      struct ibv_send_wr **bad_wr)
+{
+	struct ibv_post_send     *cmd;
+	struct ibv_post_send_resp resp;
+	struct ibv_send_wr       *i;
+	struct ibv_kern_send_wr  *n, *tmp;
+	struct ibv_sge           *s;
+	unsigned                  wr_count = 0;
+	unsigned                  sge_count = 0;
+	int                       size;
+	int                       ret = 0;
+
+	for (i = wr; i; i = i->next) {
+		wr_count++;
+		sge_count += i->num_sge;
+	}
+
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
+
+	IBV_INIT_CMD_RESP(cmd, size, POST_SEND, &resp, sizeof resp);
+	cmd->qp_handle = ibqp->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
+	cmd->wqe_size  = sizeof *n;
+
+	n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
+
+	tmp = n;
+	for (i = wr; i; i = i->next) {
+		tmp->wr_id 	= i->wr_id;
+		tmp->num_sge 	= i->num_sge;
+		tmp->opcode 	= i->opcode;
+		tmp->send_flags = i->send_flags;
+		tmp->imm_data 	= i->imm_data;
+		if (ibqp->qp_type == IBV_QPT_UD) {
+			tmp->wr.ud.ah 	       = i->wr.ud.ah->handle;
+			tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
+			tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
+		} else {
+			switch(i->opcode) {
+			case IBV_WR_RDMA_WRITE:
+			case IBV_WR_RDMA_WRITE_WITH_IMM:
+			case IBV_WR_RDMA_READ:
+				tmp->wr.rdma.remote_addr =
+					i->wr.rdma.remote_addr;
+				tmp->wr.rdma.rkey = i->wr.rdma.rkey;
+				break;
+			case IBV_WR_ATOMIC_CMP_AND_SWP:
+			case IBV_WR_ATOMIC_FETCH_AND_ADD:
+				tmp->wr.atomic.remote_addr =
+					i->wr.atomic.remote_addr;
+				tmp->wr.atomic.compare_add =
+					i->wr.atomic.compare_add;
+				tmp->wr.atomic.swap = i->wr.atomic.swap;
+				tmp->wr.atomic.rkey = i->wr.atomic.rkey;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (tmp->num_sge) {
+			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+			s += tmp->num_sge;
+		}
+
+		tmp++;
+	}
+
+	resp.bad_wr = 0;
+	if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd)
+		ret = errno;
+
+	wr_count = resp.bad_wr;
+	if (wr_count) {
+		i = wr;
+		while (--wr_count)
+			i = i->next;
+		*bad_wr = i;
+	}
+
+	return ret;
+}
+
+int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+		      struct ibv_recv_wr **bad_wr)
+{
+	struct ibv_post_recv     *cmd;
+	struct ibv_post_recv_resp resp;
+	struct ibv_recv_wr       *i;
+	struct ibv_kern_recv_wr  *n, *tmp;
+	struct ibv_sge           *s;
+	unsigned                  wr_count = 0;
+	unsigned                  sge_count = 0;
+	int                       size;
+	int                       ret = 0;
+
+	for (i = wr; i; i = i->next) {
+		wr_count++;
+		sge_count += i->num_sge;
+	}
+
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
+
+	IBV_INIT_CMD_RESP(cmd, size, POST_RECV, &resp, sizeof resp);
+	cmd->qp_handle = ibqp->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
+	cmd->wqe_size  = sizeof *n;
+
+	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
+
+	tmp = n;
+	for (i = wr; i; i = i->next) {
+		tmp->wr_id   = i->wr_id;
+		tmp->num_sge = i->num_sge;
+
+		if (tmp->num_sge) {
+			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+			s += tmp->num_sge;
+		}
+
+		tmp++;
+	}
+
+	resp.bad_wr = 0;
+	if (write(ibqp->context->cmd_fd, cmd, size) != sizeof cmd)
+		ret = errno;
+
+	wr_count = resp.bad_wr;
+	if (wr_count) {
+		i = wr;
+		while (--wr_count)
+			i = i->next;
+		*bad_wr = i;
+	}
+
+	return ret;
+}
+
+int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
+		      struct ibv_recv_wr **bad_wr)
+{
+	struct ibv_post_srq_recv *cmd;
+	struct ibv_post_srq_recv_resp resp;
+	struct ibv_recv_wr       *i;
+	struct ibv_kern_recv_wr  *n, *tmp;
+	struct ibv_sge           *s;
+	unsigned                  wr_count = 0;
+	unsigned                  sge_count = 0;
+	int                       size;
+	int                       ret = 0;
+
+	for (i = wr; i; i = i->next) {
+		wr_count++;
+		sge_count += i->num_sge;
+	}
+
+	size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
+	cmd  = alloca(size);
+
+	IBV_INIT_CMD_RESP(cmd, size, POST_SRQ_RECV, &resp, sizeof resp);
+	cmd->srq_handle = srq->handle;
+	cmd->wr_count  = wr_count;
+	cmd->sge_count = sge_count;
+	cmd->wqe_size  = sizeof *n;
+
+	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
+	s = (struct ibv_sge *) (n + wr_count);
+
+	tmp = n;
+	for (i = wr; i; i = i->next) {
+		tmp->wr_id = i->wr_id;
+		tmp->num_sge = i->num_sge;
+
+		if (tmp->num_sge) {
+			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
+			s += tmp->num_sge;
+		}
+
+		tmp++;
+	}
+
+	resp.bad_wr = 0;
+	if (write(srq->context->cmd_fd, cmd, size) != sizeof cmd)
+		ret = errno;
+
+	wr_count = resp.bad_wr;
+	if (wr_count) {
+		i = wr;
+		while (--wr_count)
+			i = i->next;
+		*bad_wr = i;
+	}
+
+	return ret;
+}
+
+int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
+		      struct ibv_ah_attr *attr)
+{
+	struct ibv_create_ah      cmd;
+	struct ibv_create_ah_resp resp;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp);
+	cmd.user_handle            = (uintptr_t) ah;
+	cmd.pd_handle              = pd->handle;
+	cmd.attr.dlid              = attr->dlid;
+	cmd.attr.sl                = attr->sl;
+	cmd.attr.src_path_bits     = attr->src_path_bits;
+	cmd.attr.static_rate       = attr->static_rate;
+	cmd.attr.is_global         = attr->is_global;
+	cmd.attr.port_num          = attr->port_num;
+	cmd.attr.grh.flow_label    = attr->grh.flow_label;
+	cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
+	cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
+	cmd.attr.grh.traffic_class = attr->grh.traffic_class;
+	memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
+
+	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	ah->handle = resp.handle;
+
+	return 0;
+}
+
+int ibv_cmd_destroy_ah(struct ibv_ah *ah)
+{
+	struct ibv_destroy_ah cmd;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
+	cmd.ah_handle = ah->handle;
+
+	if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	return 0;
+}
+
 int ibv_cmd_destroy_qp(struct ibv_qp *qp)
 {
 	struct ibv_destroy_qp      cmd;



More information about the general mailing list