[openib-general] [RFC] [PATCH 2/7] ibrdmaverbs src files

Krishna Kumar krkumar2 at in.ibm.com
Mon Jul 10 03:24:16 PDT 2006


Sorry, this should be [PATCH 4/7]. And I seem to have
missed "Signed off" for earlier patches. Repeatedly doing
evolution which failed each time with "glibc free" error
led me to forget it the time individual mails were sent
out :-)

thanks,

- KK

On Mon, 2006-07-10 at 15:52 +0530, Krishna Kumar wrote:
> This library provides equivalent functionality to
> libibverbs, but changes the data types and verb
> API's to be transport neutral. This patch contains
> the source files.
> 
> Signed-of-by: Krishna Kumar <krkumar2 at in.ibm.com>
> 
> diff -ruNp ORG/librdmaverbs/src/cmd.c NEW/librdmaverbs/src/cmd.c
> --- ORG/librdmaverbs/src/cmd.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/cmd.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,1060 @@
> +/*
> + * Copyright (c) 2005 Topspin Communications.  All rights reserved.
> + * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
> + * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id: cmd.c 7631 2006-06-02 19:53:25Z swise $
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <errno.h>
> +#include <alloca.h>
> +#include <string.h>
> +
> +#include "rdmaverbs.h"
> +
> +static int rdma_cmd_get_context_v2(struct rdma_context *context,
> +				  struct rdma_get_context *new_cmd,
> +				  size_t new_cmd_size,
> +				  struct rdma_get_context_resp *resp,
> +				  size_t resp_size)
> +{
> +	struct rdma_abi_compat_v2 *t;
> +	struct rdma_get_context_v2 *cmd;
> +	size_t cmd_size;
> +	uint32_t cq_fd;
> +
> +	t = malloc(sizeof *t);
> +	if (!t)
> +		return ENOMEM;
> +	pthread_mutex_init(&t->in_use, NULL);
> +
> +	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
> +	cmd      = alloca(cmd_size);
> +	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof
> *new_cmd);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp, resp_size);
> +	cmd->cq_fd_tab = (uintptr_t) &cq_fd;
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	context->async_fd         = resp->async_fd;
> +	context->num_comp_vectors = 1;
> +	t->channel.fd		  = cq_fd;
> +	context->abi_compat       = t;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_get_context(struct rdma_context *context, struct
> rdma_get_context *cmd,
> +			size_t cmd_size, struct rdma_get_context_resp *resp,
> +			size_t resp_size)
> +{
> +	if (abi_ver <= 2)
> +		return rdma_cmd_get_context_v2(context, cmd, cmd_size, resp,
> resp_size);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp, resp_size);
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	context->async_fd         = resp->async_fd;
> +	context->num_comp_vectors = resp->num_comp_vectors;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_query_device(struct rdma_context *context,
> +			 struct rdma_device_attr *device_attr,
> +			 uint64_t *raw_fw_ver,
> +			 struct rdma_query_device *cmd, size_t cmd_size)
> +{
> +	struct rdma_query_device_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, QUERY_DEVICE, &resp, sizeof resp);
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	memset(device_attr->fw_ver, 0, sizeof device_attr->fw_ver);
> +	*raw_fw_ver			       = resp.fw_ver;
> +	device_attr->node_guid 		       = resp.node_guid;
> +	device_attr->sys_image_guid 	       = resp.sys_image_guid;
> +	device_attr->max_mr_size 	       = resp.max_mr_size;
> +	device_attr->page_size_cap 	       = resp.page_size_cap;
> +	device_attr->vendor_id 		       = resp.vendor_id;
> +	device_attr->vendor_part_id 	       = resp.vendor_part_id;
> +	device_attr->hw_ver 		       = resp.hw_ver;
> +	device_attr->max_qp 		       = resp.max_qp;
> +	device_attr->max_qp_wr 		       = resp.max_qp_wr;
> +	device_attr->device_cap_flags 	       = resp.device_cap_flags;
> +	device_attr->max_sge 		       = resp.max_sge;
> +	device_attr->max_sge_rd 	       = resp.max_sge_rd;
> +	device_attr->max_cq 		       = resp.max_cq;
> +	device_attr->max_cqe 		       = resp.max_cqe;
> +	device_attr->max_mr 		       = resp.max_mr;
> +	device_attr->max_pd 		       = resp.max_pd;
> +	device_attr->max_qp_rd_atom 	       = resp.max_qp_rd_atom;
> +	device_attr->max_ee_rd_atom 	       = resp.max_ee_rd_atom;
> +	device_attr->max_res_rd_atom 	       = resp.max_res_rd_atom;
> +	device_attr->max_qp_init_rd_atom       = resp.max_qp_init_rd_atom;
> +	device_attr->max_ee_init_rd_atom       = resp.max_ee_init_rd_atom;
> +	device_attr->atomic_cap 	       = resp.atomic_cap;
> +	device_attr->max_ee 		       = resp.max_ee;
> +	device_attr->max_rdd 		       = resp.max_rdd;
> +	device_attr->max_mw 		       = resp.max_mw;
> +	device_attr->max_raw_ipv6_qp 	       = resp.max_raw_ipv6_qp;
> +	device_attr->max_raw_ethy_qp 	       = resp.max_raw_ethy_qp;
> +	device_attr->max_mcast_grp 	       = resp.max_mcast_grp;
> +	device_attr->max_mcast_qp_attach       = resp.max_mcast_qp_attach;
> +	device_attr->max_total_mcast_qp_attach =
> resp.max_total_mcast_qp_attach;
> +	device_attr->max_ah 		       = resp.max_ah;
> +	device_attr->max_fmr 		       = resp.max_fmr;
> +	device_attr->max_map_per_fmr 	       = resp.max_map_per_fmr;
> +	device_attr->max_srq 		       = resp.max_srq;
> +	device_attr->max_srq_wr 	       = resp.max_srq_wr;
> +	device_attr->max_srq_sge 	       = resp.max_srq_sge;
> +	device_attr->max_pkeys 		       = resp.max_pkeys;
> +	device_attr->local_ca_ack_delay        = resp.local_ca_ack_delay;
> +	device_attr->phys_port_cnt	       = resp.phys_port_cnt;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_query_port(struct rdma_context *context, uint8_t port_num,
> +		       struct rdma_port_attr *port_attr,
> +		       struct rdma_query_port *cmd, size_t cmd_size)
> +{
> +	struct rdma_query_port_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp, sizeof resp);
> +	cmd->port_num = port_num;
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	port_attr->state      	   = resp.state;
> +	port_attr->max_mtu         = resp.max_mtu;
> +	port_attr->active_mtu      = resp.active_mtu;
> +	port_attr->gid_tbl_len     = resp.gid_tbl_len;
> +	port_attr->port_cap_flags  = resp.port_cap_flags;
> +	port_attr->max_msg_sz      = resp.max_msg_sz;
> +	port_attr->bad_pkey_cntr   = resp.bad_pkey_cntr;
> +	port_attr->qkey_viol_cntr  = resp.qkey_viol_cntr;
> +	port_attr->pkey_tbl_len    = resp.pkey_tbl_len;
> +	port_attr->lid 	      	   = resp.lid;
> +	port_attr->sm_lid 	   = resp.sm_lid;
> +	port_attr->lmc 	      	   = resp.lmc;
> +	port_attr->max_vl_num      = resp.max_vl_num;
> +	port_attr->sm_sl      	   = resp.sm_sl;
> +	port_attr->subnet_timeout  = resp.subnet_timeout;
> +	port_attr->init_type_reply = resp.init_type_reply;
> +	port_attr->active_width    = resp.active_width;
> +	port_attr->active_speed    = resp.active_speed;
> +	port_attr->phys_state      = resp.phys_state;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_alloc_pd(struct rdma_context *context, struct rdma_pd *pd,
> +		     struct rdma_alloc_pd *cmd, size_t cmd_size,
> +		     struct rdma_alloc_pd_resp *resp, size_t resp_size)
> +{
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, resp, resp_size);
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	pd->handle = resp->pd_handle;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_dealloc_pd(struct rdma_pd *pd)
> +{
> +	struct rdma_dealloc_pd cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
> +	cmd.pd_handle = pd->handle;
> +
> +	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_reg_mr(struct rdma_pd *pd, void *addr, size_t length,
> +		   uint64_t hca_va, enum rdma_access_flags access,
> +		   struct rdma_mr *mr, struct rdma_reg_mr *cmd,
> +		   size_t cmd_size)
> +{
> +	struct rdma_reg_mr_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, REG_MR, &resp, sizeof resp);
> +
> +	cmd->start 	  = (uintptr_t) addr;
> +	cmd->length 	  = length;
> +	cmd->hca_va 	  = hca_va;
> +	cmd->pd_handle 	  = pd->handle;
> +	cmd->access_flags = access;
> +
> +	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	mr->handle  = resp.mr_handle;
> +	mr->lkey    = resp.lkey;
> +	mr->rkey    = resp.rkey;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_dereg_mr(struct rdma_mr *mr)
> +{
> +	struct rdma_dereg_mr cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
> +	cmd.mr_handle = mr->handle;
> +
> +	if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +static int rdma_cmd_create_cq_v2(struct rdma_context *context, int cqe,
> +				struct rdma_cq *cq,
> +				struct rdma_create_cq *new_cmd, size_t new_cmd_size,
> +				struct rdma_create_cq_resp *resp, size_t resp_size)
> +{
> +	struct rdma_create_cq_v2 *cmd;
> +	size_t cmd_size;
> +
> +	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
> +	cmd      = alloca(cmd_size);
> +	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof
> *new_cmd);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, resp, resp_size);
> +	cmd->user_handle   = (uintptr_t) cq;
> +	cmd->cqe           = cqe;
> +	cmd->event_handler = 0;
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	cq->handle = resp->cq_handle;
> +	cq->cqe    = resp->cqe;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_create_cq(struct rdma_context *context, int cqe,
> +		      struct rdma_comp_channel *channel,
> +		      int comp_vector, struct rdma_cq *cq,
> +		      struct rdma_create_cq *cmd, size_t cmd_size,
> +		      struct rdma_create_cq_resp *resp, size_t resp_size)
> +{
> +	if (abi_ver <= 2)
> +		return rdma_cmd_create_cq_v2(context, cqe, cq,
> +					    cmd, cmd_size, resp, resp_size);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, resp, resp_size);
> +	cmd->user_handle   = (uintptr_t) cq;
> +	cmd->cqe           = cqe;
> +	cmd->comp_vector   = comp_vector;
> +	cmd->comp_channel  = channel ? channel->fd : -1;
> +	cmd->reserved      = 0;
> +
> +	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	cq->handle = resp->cq_handle;
> +	cq->cqe    = resp->cqe;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_poll_cq(struct rdma_cq *ibcq, int ne, struct rdma_wc *wc)
> +{
> +	struct rdma_poll_cq       cmd;
> +	struct rdma_poll_cq_resp *resp;
> +	int                      i;
> +	int                      rsize;
> +	int                      ret;
> +
> +	rsize = sizeof *resp + ne * sizeof(struct rdma_kern_wc);
> +	resp  = malloc(rsize);
> +	if (!resp)
> +		return -1;
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
> +	cmd.cq_handle = ibcq->handle;
> +	cmd.ne        = ne;
> +
> +	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < resp->count; i++) {
> +		wc[i].wr_id 	     = resp->wc[i].wr_id;
> +		wc[i].status 	     = resp->wc[i].status;
> +		wc[i].opcode 	     = resp->wc[i].opcode;
> +		wc[i].vendor_err     = resp->wc[i].vendor_err;
> +		wc[i].byte_len 	     = resp->wc[i].byte_len;
> +		wc[i].imm_data 	     = resp->wc[i].imm_data;
> +		wc[i].qp_num 	     = resp->wc[i].qp_num;
> +		wc[i].src_qp 	     = resp->wc[i].src_qp;
> +		wc[i].wc_flags 	     = resp->wc[i].wc_flags;
> +		wc[i].pkey_index     = resp->wc[i].pkey_index;
> +		wc[i].slid 	     = resp->wc[i].slid;
> +		wc[i].sl 	     = resp->wc[i].sl;
> +		wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
> +	}
> +
> +	ret = resp->count;
> +
> +out:
> +	free(resp);
> +	return ret;
> +}
> +
> +int rdma_cmd_req_notify_cq(struct rdma_cq *ibcq, int solicited_only)
> +{
> +	struct rdma_req_notify_cq cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
> +	cmd.cq_handle = ibcq->handle;
> +	cmd.solicited = !!solicited_only;
> +
> +	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_resize_cq(struct rdma_cq *cq, int cqe,
> +		      struct rdma_resize_cq *cmd, size_t cmd_size)
> +{
> +	struct rdma_resize_cq_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, RESIZE_CQ, &resp, sizeof resp);
> +	cmd->cq_handle = cq->handle;
> +	cmd->cqe       = cqe;
> +
> +	if (write(cq->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	cq->cqe = resp.cqe;
> +
> +	return 0;
> +}
> +
> +static int rdma_cmd_destroy_cq_v1(struct rdma_cq *cq)
> +{
> +	struct rdma_destroy_cq_v1 cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DESTROY_CQ);
> +	cmd.cq_handle = cq->handle;
> +
> +	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_destroy_cq(struct rdma_cq *cq)
> +{
> +	struct rdma_destroy_cq      cmd;
> +	struct rdma_destroy_cq_resp resp;
> +
> +	if (abi_ver == 1)
> +		return rdma_cmd_destroy_cq_v1(cq);
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
> +	cmd.cq_handle = cq->handle;
> +
> +	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	pthread_mutex_lock(&cq->mutex);
> +	while (cq->comp_events_completed  != resp.comp_events_reported ||
> +	       cq->async_events_completed != resp.async_events_reported)
> +		pthread_cond_wait(&cq->cond, &cq->mutex);
> +	pthread_mutex_unlock(&cq->mutex);
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_create_srq(struct rdma_pd *pd,
> +		       struct rdma_srq *srq, struct rdma_srq_init_attr *attr,
> +		       struct rdma_create_srq *cmd, size_t cmd_size,
> +		       struct rdma_create_srq_resp *resp, size_t resp_size)
> +{
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, CREATE_SRQ, resp, resp_size);
> +	cmd->user_handle = (uintptr_t) srq;
> +	cmd->pd_handle 	 = pd->handle;
> +	cmd->max_wr      = attr->attr.max_wr;
> +	cmd->max_sge     = attr->attr.max_sge;
> +	cmd->srq_limit   = attr->attr.srq_limit;
> +
> +	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	srq->handle = resp->srq_handle;
> +
> +	if (abi_ver > 5) {
> +		attr->attr.max_wr = resp->max_wr;
> +		attr->attr.max_sge = resp->max_sge;
> +	} else {
> +		struct rdma_create_srq_resp_v5 *resp_v5 =
> +			(struct rdma_create_srq_resp_v5 *) resp;
> +
> +		memmove((void *) resp + sizeof *resp,
> +			(void *) resp_v5 + sizeof *resp_v5,
> +			resp_size - sizeof *resp);
> +	}
> +
> +	return 0;
> +}
> +
> +static int rdma_cmd_modify_srq_v3(struct rdma_srq *srq,
> +				 struct rdma_srq_attr *srq_attr,
> +				 enum rdma_srq_attr_mask srq_attr_mask,
> +				 struct rdma_modify_srq *new_cmd,
> +				 size_t new_cmd_size)
> +{
> +	struct rdma_modify_srq_v3 *cmd;
> +	size_t cmd_size;
> +
> +	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
> +	cmd      = alloca(cmd_size);
> +	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof
> *new_cmd);
> +
> +	RDMA_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
> +
> +	cmd->srq_handle	= srq->handle;
> +	cmd->attr_mask	= srq_attr_mask;
> +	cmd->max_wr	= srq_attr->max_wr;
> +	cmd->srq_limit	= srq_attr->srq_limit;
> +	cmd->max_sge	= 0;
> +	cmd->reserved	= 0;
> +
> +	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_modify_srq(struct rdma_srq *srq,
> +		       struct rdma_srq_attr *srq_attr,
> +		       enum rdma_srq_attr_mask srq_attr_mask,
> +		       struct rdma_modify_srq *cmd, size_t cmd_size)
> +{
> +	if (abi_ver == 3)
> +		return rdma_cmd_modify_srq_v3(srq, srq_attr, srq_attr_mask,
> +					     cmd, cmd_size);
> +
> +	RDMA_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
> +
> +	cmd->srq_handle	= srq->handle;
> +	cmd->attr_mask	= srq_attr_mask;
> +	cmd->max_wr	= srq_attr->max_wr;
> +	cmd->srq_limit	= srq_attr->srq_limit;
> +
> +	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_query_srq(struct rdma_srq *srq, struct rdma_srq_attr
> *srq_attr,
> +		      struct rdma_query_srq *cmd, size_t cmd_size)
> +{
> +	struct rdma_query_srq_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, QUERY_SRQ, &resp, sizeof resp);
> +	cmd->srq_handle = srq->handle;
> +
> +	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	srq_attr->max_wr    = resp.max_wr;
> +	srq_attr->max_sge   = resp.max_sge;
> +	srq_attr->srq_limit = resp.srq_limit;
> +
> +	return 0;
> +}
> +
> +static int rdma_cmd_destroy_srq_v1(struct rdma_srq *srq)
> +{
> +	struct rdma_destroy_srq_v1 cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DESTROY_SRQ);
> +	cmd.srq_handle = srq->handle;
> +
> +	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_destroy_srq(struct rdma_srq *srq)
> +{
> +	struct rdma_destroy_srq      cmd;
> +	struct rdma_destroy_srq_resp resp;
> +
> +	if (abi_ver == 1)
> +		return rdma_cmd_destroy_srq_v1(srq);
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_SRQ, &resp, sizeof resp);
> +	cmd.srq_handle = srq->handle;
> +
> +	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	pthread_mutex_lock(&srq->mutex);
> +	while (srq->events_completed != resp.events_reported)
> +		pthread_cond_wait(&srq->cond, &srq->mutex);
> +	pthread_mutex_unlock(&srq->mutex);
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_create_qp(struct rdma_pd *pd,
> +		      struct rdma_qp *qp, struct rdma_qp_init_attr *attr,
> +		      struct rdma_create_qp *cmd, size_t cmd_size,
> +		      struct rdma_create_qp_resp *resp, size_t resp_size)
> +{
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
> +
> +	cmd->user_handle     = (uintptr_t) qp;
> +	cmd->pd_handle 	     = pd->handle;
> +	cmd->send_cq_handle  = attr->send_cq->handle;
> +	cmd->recv_cq_handle  = attr->recv_cq->handle;
> +	cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
> +	cmd->max_send_wr     = attr->cap.max_send_wr;
> +	cmd->max_recv_wr     = attr->cap.max_recv_wr;
> +	cmd->max_send_sge    = attr->cap.max_send_sge;
> +	cmd->max_recv_sge    = attr->cap.max_recv_sge;
> +	cmd->max_inline_data = attr->cap.max_inline_data;
> +	cmd->sq_sig_all	     = attr->sq_sig_all;
> +	cmd->qp_type 	     = attr->qp_type;
> +	cmd->is_srq 	     = !!attr->srq;
> +
> +	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	qp->handle 		  = resp->qp_handle;
> +	qp->qp_num 		  = resp->qpn;
> +
> +	if (abi_ver > 3) {
> +		attr->cap.max_recv_sge    = resp->max_recv_sge;
> +		attr->cap.max_send_sge    = resp->max_send_sge;
> +		attr->cap.max_recv_wr     = resp->max_recv_wr;
> +		attr->cap.max_send_wr     = resp->max_send_wr;
> +		attr->cap.max_inline_data = resp->max_inline_data;
> +	}
> +
> +	if (abi_ver == 4) {
> +		struct rdma_create_qp_resp_v4 *resp_v4 =
> +			(struct rdma_create_qp_resp_v4 *) resp;
> +
> +		memmove((void *) resp + sizeof *resp,
> +			(void *) resp_v4 + sizeof *resp_v4,
> +			resp_size - sizeof *resp);
> +	} else if (abi_ver <= 3) {
> +		struct rdma_create_qp_resp_v3 *resp_v3 =
> +			(struct rdma_create_qp_resp_v3 *) resp;
> +
> +		memmove((void *) resp + sizeof *resp,
> +			(void *) resp_v3 + sizeof *resp_v3,
> +			resp_size - sizeof *resp);
> +	}
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_query_qp(struct rdma_qp *qp, struct rdma_qp_attr *attr,
> +		     enum rdma_qp_attr_mask attr_mask,
> +		     struct rdma_qp_init_attr *init_attr,
> +		     struct rdma_query_qp *cmd, size_t cmd_size)
> +{
> +	struct rdma_query_qp_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, QUERY_QP, &resp, sizeof resp);
> +	cmd->qp_handle = qp->handle;
> +	cmd->attr_mask = attr_mask;
> +
> +	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	attr->qkey                          = resp.qkey;
> +	attr->rq_psn                        = resp.rq_psn;
> +	attr->sq_psn                        = resp.sq_psn;
> +	attr->dest_qp_num                   = resp.dest_qp_num;
> +	attr->qp_access_flags               = resp.qp_access_flags;
> +	attr->pkey_index                    = resp.pkey_index;
> +	attr->alt_pkey_index                = resp.alt_pkey_index;
> +	attr->qp_state                      = resp.qp_state;
> +	attr->cur_qp_state                  = resp.cur_qp_state;
> +	attr->path_mtu                      = resp.path_mtu;
> +	attr->path_mig_state                = resp.path_mig_state;
> +	attr->en_sqd_async_notify           = resp.en_sqd_async_notify;
> +	attr->max_rd_atomic                 = resp.max_rd_atomic;
> +	attr->max_dest_rd_atomic            = resp.max_dest_rd_atomic;
> +	attr->min_rnr_timer                 = resp.min_rnr_timer;
> +	attr->port_num                      = resp.port_num;
> +	attr->timeout                       = resp.timeout;
> +	attr->retry_cnt                     = resp.retry_cnt;
> +	attr->rnr_retry                     = resp.rnr_retry;
> +	attr->alt_port_num                  = resp.alt_port_num;
> +	attr->alt_timeout                   = resp.alt_timeout;
> +	attr->cap.max_send_wr               = resp.max_send_wr;
> +	attr->cap.max_recv_wr               = resp.max_recv_wr;
> +	attr->cap.max_send_sge              = resp.max_send_sge;
> +	attr->cap.max_recv_sge              = resp.max_recv_sge;
> +	attr->cap.max_inline_data           = resp.max_inline_data;
> +
> +	memcpy(attr->ah_attr.grh.dgid.raw, resp.dest.dgid, 16);
> +	attr->ah_attr.grh.flow_label        = resp.dest.flow_label;
> +	attr->ah_attr.dlid                  = resp.dest.dlid;
> +	attr->ah_attr.grh.sgid_index        = resp.dest.sgid_index;
> +	attr->ah_attr.grh.hop_limit         = resp.dest.hop_limit;
> +	attr->ah_attr.grh.traffic_class     = resp.dest.traffic_class;
> +	attr->ah_attr.sl                    = resp.dest.sl;
> +	attr->ah_attr.src_path_bits         = resp.dest.src_path_bits;
> +	attr->ah_attr.static_rate           = resp.dest.static_rate;
> +	attr->ah_attr.is_global             = resp.dest.is_global;
> +	attr->ah_attr.port_num              = resp.dest.port_num;
> +
> +	memcpy(attr->alt_ah_attr.grh.dgid.raw, resp.alt_dest.dgid, 16);
> +	attr->alt_ah_attr.grh.flow_label    = resp.alt_dest.flow_label;
> +	attr->alt_ah_attr.dlid              = resp.alt_dest.dlid;
> +	attr->alt_ah_attr.grh.sgid_index    = resp.alt_dest.sgid_index;
> +	attr->alt_ah_attr.grh.hop_limit     = resp.alt_dest.hop_limit;
> +	attr->alt_ah_attr.grh.traffic_class = resp.alt_dest.traffic_class;
> +	attr->alt_ah_attr.sl                = resp.alt_dest.sl;
> +	attr->alt_ah_attr.src_path_bits     = resp.alt_dest.src_path_bits;
> +	attr->alt_ah_attr.static_rate       = resp.alt_dest.static_rate;
> +	attr->alt_ah_attr.is_global         = resp.alt_dest.is_global;
> +	attr->alt_ah_attr.port_num          = resp.alt_dest.port_num;
> +
> +	init_attr->qp_context               = qp->qp_context;
> +	init_attr->send_cq                  = qp->send_cq;
> +	init_attr->recv_cq                  = qp->recv_cq;
> +	init_attr->srq                      = qp->srq;
> +	init_attr->qp_type                  = qp->qp_type;
> +	init_attr->cap.max_send_wr          = resp.max_send_wr;
> +	init_attr->cap.max_recv_wr          = resp.max_recv_wr;
> +	init_attr->cap.max_send_sge         = resp.max_send_sge;
> +	init_attr->cap.max_recv_sge         = resp.max_recv_sge;
> +	init_attr->cap.max_inline_data      = resp.max_inline_data;
> +	init_attr->sq_sig_all               = resp.sq_sig_all;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_modify_qp(struct rdma_qp *qp, struct rdma_qp_attr *attr,
> +		      enum rdma_qp_attr_mask attr_mask,
> +		      struct rdma_modify_qp *cmd, size_t cmd_size)
> +{
> +	RDMA_INIT_CMD(cmd, cmd_size, MODIFY_QP);
> +
> +	cmd->qp_handle 		 = qp->handle;
> +	cmd->attr_mask 		 = attr_mask;
> +	cmd->qkey 		 = attr->qkey;
> +	cmd->rq_psn 		 = attr->rq_psn;
> +	cmd->sq_psn 		 = attr->sq_psn;
> +	cmd->dest_qp_num 	 = attr->dest_qp_num;
> +	cmd->qp_access_flags 	 = attr->qp_access_flags;
> +	cmd->pkey_index		 = attr->pkey_index;
> +	cmd->alt_pkey_index 	 = attr->alt_pkey_index;
> +	cmd->qp_state 		 = attr->qp_state;
> +	cmd->cur_qp_state 	 = attr->cur_qp_state;
> +	cmd->path_mtu 		 = attr->path_mtu;
> +	cmd->path_mig_state 	 = attr->path_mig_state;
> +	cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
> +	cmd->max_rd_atomic 	 = attr->max_rd_atomic;
> +	cmd->max_dest_rd_atomic  = attr->max_dest_rd_atomic;
> +	cmd->min_rnr_timer 	 = attr->min_rnr_timer;
> +	cmd->port_num 		 = attr->port_num;
> +	cmd->timeout 		 = attr->timeout;
> +	cmd->retry_cnt 		 = attr->retry_cnt;
> +	cmd->rnr_retry 		 = attr->rnr_retry;
> +	cmd->alt_port_num 	 = attr->alt_port_num;
> +	cmd->alt_timeout 	 = attr->alt_timeout;
> +
> +	memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
> +	cmd->dest.flow_label 	    = attr->ah_attr.grh.flow_label;
> +	cmd->dest.dlid 		    = attr->ah_attr.dlid;
> +	cmd->dest.sgid_index 	    = attr->ah_attr.grh.sgid_index;
> +	cmd->dest.hop_limit 	    = attr->ah_attr.grh.hop_limit;
> +	cmd->dest.traffic_class     = attr->ah_attr.grh.traffic_class;
> +	cmd->dest.sl 		    = attr->ah_attr.sl;
> +	cmd->dest.src_path_bits     = attr->ah_attr.src_path_bits;
> +	cmd->dest.static_rate 	    = attr->ah_attr.static_rate;
> +	cmd->dest.is_global 	    = attr->ah_attr.is_global;
> +	cmd->dest.port_num 	    = attr->ah_attr.port_num;
> +
> +	memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
> +	cmd->alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
> +	cmd->alt_dest.dlid 	    = attr->alt_ah_attr.dlid;
> +	cmd->alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
> +	cmd->alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
> +	cmd->alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
> +	cmd->alt_dest.sl 	    = attr->alt_ah_attr.sl;
> +	cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
> +	cmd->alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
> +	cmd->alt_dest.is_global     = attr->alt_ah_attr.is_global;
> +	cmd->alt_dest.port_num 	    = attr->alt_ah_attr.port_num;
> +
> +	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +static int rdma_cmd_destroy_qp_v1(struct rdma_qp *qp)
> +{
> +	struct rdma_destroy_qp_v1 cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DESTROY_QP);
> +	cmd.qp_handle = qp->handle;
> +
> +	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_post_send(struct rdma_qp *ibqp, struct rdma_send_wr *wr,
> +		      struct rdma_send_wr **bad_wr)
> +{
> +	struct rdma_post_send     *cmd;
> +	struct rdma_post_send_resp resp;
> +	struct rdma_send_wr       *i;
> +	struct rdma_kern_send_wr  *n, *tmp;
> +	struct rdma_sge           *s;
> +	unsigned                  wr_count = 0;
> +	unsigned                  sge_count = 0;
> +	int                       cmd_size;
> +	int                       ret = 0;
> +
> +	for (i = wr; i; i = i->next) {
> +		wr_count++;
> +		sge_count += i->num_sge;
> +	}
> +
> +	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
> +	cmd  = alloca(cmd_size);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, POST_SEND, &resp, sizeof resp);
> +	cmd->qp_handle = ibqp->handle;
> +	cmd->wr_count  = wr_count;
> +	cmd->sge_count = sge_count;
> +	cmd->wqe_size  = sizeof *n;
> +
> +	n = (struct rdma_kern_send_wr *) ((void *) cmd + sizeof *cmd);
> +	s = (struct rdma_sge *) (n + wr_count);
> +
> +	tmp = n;
> +	for (i = wr; i; i = i->next) {
> +		tmp->wr_id 	= i->wr_id;
> +		tmp->num_sge 	= i->num_sge;
> +		tmp->opcode 	= i->opcode;
> +		tmp->send_flags = i->send_flags;
> +		tmp->imm_data 	= i->imm_data;
> +		if (ibqp->qp_type == RDMA_QPT_UD) {
> +			tmp->wr.ud.ah 	       = i->wr.ud.ah->handle;
> +			tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
> +			tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
> +		} else {
> +			switch(i->opcode) {
> +			case RDMA_WR_RDMA_WRITE:
> +			case RDMA_WR_RDMA_WRITE_WITH_IMM:
> +			case RDMA_WR_RDMA_READ:
> +				tmp->wr.rdma.remote_addr =
> +					i->wr.rdma.remote_addr;
> +				tmp->wr.rdma.rkey = i->wr.rdma.rkey;
> +				break;
> +			case RDMA_WR_ATOMIC_CMP_AND_SWP:
> +			case RDMA_WR_ATOMIC_FETCH_AND_ADD:
> +				tmp->wr.atomic.remote_addr =
> +					i->wr.atomic.remote_addr;
> +				tmp->wr.atomic.compare_add =
> +					i->wr.atomic.compare_add;
> +				tmp->wr.atomic.swap = i->wr.atomic.swap;
> +				tmp->wr.atomic.rkey = i->wr.atomic.rkey;
> +				break;
> +			default:
> +				break;
> +			}
> +		}
> +
> +		if (tmp->num_sge) {
> +			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
> +			s += tmp->num_sge;
> +		}
> +
> +		tmp++;
> +	}
> +
> +	resp.bad_wr = 0;
> +	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		ret = errno;
> +
> +	wr_count = resp.bad_wr;
> +	if (wr_count) {
> +		i = wr;
> +		while (--wr_count)
> +			i = i->next;
> +		*bad_wr = i;
> +	}
> +
> +	return ret;
> +}
> +
> +int rdma_cmd_post_recv(struct rdma_qp *ibqp, struct rdma_recv_wr *wr,
> +		      struct rdma_recv_wr **bad_wr)
> +{
> +	struct rdma_post_recv     *cmd;
> +	struct rdma_post_recv_resp resp;
> +	struct rdma_recv_wr       *i;
> +	struct rdma_kern_recv_wr  *n, *tmp;
> +	struct rdma_sge           *s;
> +	unsigned                  wr_count = 0;
> +	unsigned                  sge_count = 0;
> +	int                       cmd_size;
> +	int                       ret = 0;
> +
> +	for (i = wr; i; i = i->next) {
> +		wr_count++;
> +		sge_count += i->num_sge;
> +	}
> +
> +	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
> +	cmd  = alloca(cmd_size);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, POST_RECV, &resp, sizeof resp);
> +	cmd->qp_handle = ibqp->handle;
> +	cmd->wr_count  = wr_count;
> +	cmd->sge_count = sge_count;
> +	cmd->wqe_size  = sizeof *n;
> +
> +	n = (struct rdma_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
> +	s = (struct rdma_sge *) (n + wr_count);
> +
> +	tmp = n;
> +	for (i = wr; i; i = i->next) {
> +		tmp->wr_id   = i->wr_id;
> +		tmp->num_sge = i->num_sge;
> +
> +		if (tmp->num_sge) {
> +			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
> +			s += tmp->num_sge;
> +		}
> +
> +		tmp++;
> +	}
> +
> +	resp.bad_wr = 0;
> +	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		ret = errno;
> +
> +	wr_count = resp.bad_wr;
> +	if (wr_count) {
> +		i = wr;
> +		while (--wr_count)
> +			i = i->next;
> +		*bad_wr = i;
> +	}
> +
> +	return ret;
> +}
> +
> +int rdma_cmd_post_srq_recv(struct rdma_srq *srq, struct rdma_recv_wr
> *wr,
> +		      struct rdma_recv_wr **bad_wr)
> +{
> +	struct rdma_post_srq_recv *cmd;
> +	struct rdma_post_srq_recv_resp resp;
> +	struct rdma_recv_wr       *i;
> +	struct rdma_kern_recv_wr  *n, *tmp;
> +	struct rdma_sge           *s;
> +	unsigned                  wr_count = 0;
> +	unsigned                  sge_count = 0;
> +	int                       cmd_size;
> +	int                       ret = 0;
> +
> +	for (i = wr; i; i = i->next) {
> +		wr_count++;
> +		sge_count += i->num_sge;
> +	}
> +
> +	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
> +	cmd  = alloca(cmd_size);
> +
> +	RDMA_INIT_CMD_RESP(cmd, cmd_size, POST_SRQ_RECV, &resp, sizeof resp);
> +	cmd->srq_handle = srq->handle;
> +	cmd->wr_count  = wr_count;
> +	cmd->sge_count = sge_count;
> +	cmd->wqe_size  = sizeof *n;
> +
> +	n = (struct rdma_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
> +	s = (struct rdma_sge *) (n + wr_count);
> +
> +	tmp = n;
> +	for (i = wr; i; i = i->next) {
> +		tmp->wr_id = i->wr_id;
> +		tmp->num_sge = i->num_sge;
> +
> +		if (tmp->num_sge) {
> +			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
> +			s += tmp->num_sge;
> +		}
> +
> +		tmp++;
> +	}
> +
> +	resp.bad_wr = 0;
> +	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
> +		ret = errno;
> +
> +	wr_count = resp.bad_wr;
> +	if (wr_count) {
> +		i = wr;
> +		while (--wr_count)
> +			i = i->next;
> +		*bad_wr = i;
> +	}
> +
> +	return ret;
> +}
> +
> +int rdma_cmd_create_ah(struct rdma_pd *pd, struct rdma_ah *ah,
> +		      struct rdma_ah_attr *attr)
> +{
> +	struct rdma_create_ah      cmd;
> +	struct rdma_create_ah_resp resp;
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, &resp, sizeof resp);
> +	cmd.user_handle            = (uintptr_t) ah;
> +	cmd.pd_handle              = pd->handle;
> +	cmd.attr.dlid              = attr->dlid;
> +	cmd.attr.sl                = attr->sl;
> +	cmd.attr.src_path_bits     = attr->src_path_bits;
> +	cmd.attr.static_rate       = attr->static_rate;
> +	cmd.attr.is_global         = attr->is_global;
> +	cmd.attr.port_num          = attr->port_num;
> +	cmd.attr.grh.flow_label    = attr->grh.flow_label;
> +	cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
> +	cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
> +	cmd.attr.grh.traffic_class = attr->grh.traffic_class;
> +	memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
> +
> +	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	ah->handle = resp.handle;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_destroy_ah(struct rdma_ah *ah)
> +{
> +	struct rdma_destroy_ah cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
> +	cmd.ah_handle = ah->handle;
> +
> +	if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_destroy_qp(struct rdma_qp *qp)
> +{
> +	struct rdma_destroy_qp      cmd;
> +	struct rdma_destroy_qp_resp resp;
> +
> +	if (abi_ver == 1)
> +		return rdma_cmd_destroy_qp_v1(qp);
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_QP, &resp, sizeof resp);
> +	cmd.qp_handle = qp->handle;
> +
> +	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	pthread_mutex_lock(&qp->mutex);
> +	while (qp->events_completed != resp.events_reported)
> +		pthread_cond_wait(&qp->cond, &qp->mutex);
> +	pthread_mutex_unlock(&qp->mutex);
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_attach_mcast(struct rdma_qp *qp, union rdma_gid *gid,
> uint16_t lid)
> +{
> +	struct rdma_attach_mcast cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, ATTACH_MCAST);
> +	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
> +	cmd.qp_handle = qp->handle;
> +	cmd.mlid      = lid;
> +
> +	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> +
> +int rdma_cmd_detach_mcast(struct rdma_qp *qp, union rdma_gid *gid,
> uint16_t lid)
> +{
> +	struct rdma_detach_mcast cmd;
> +
> +	RDMA_INIT_CMD(&cmd, sizeof cmd, DETACH_MCAST);
> +	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
> +	cmd.qp_handle = qp->handle;
> +	cmd.mlid      = lid;
> +
> +	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
> +		return errno;
> +
> +	return 0;
> +}
> diff -ruNp ORG/librdmaverbs/src/device.c NEW/librdmaverbs/src/device.c
> --- ORG/librdmaverbs/src/device.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/device.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,270 @@
> +/*
> + * Copyright (c) 2004, 2005 Topspin Communications.  All rights
> reserved.
> + * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id: device.c 7631 2006-06-02 19:53:25Z swise $
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <stdio.h>
> +#include <netinet/in.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <alloca.h>
> +
> +#include <rdma/arch.h>
> +
> +#include "rdmaverbs.h"
> +
> +static pthread_mutex_t device_list_lock = PTHREAD_MUTEX_INITIALIZER;
> +static int num_devices;
> +static struct rdma_device **device_list;
> +
> +struct rdma_device **rdma_get_device_list(int *num)
> +{
> +	struct rdma_device **l;
> +	int i;
> +
> +	pthread_mutex_lock(&device_list_lock);
> +
> +	if (!num_devices)
> +		num_devices = rdmaverbs_init(&device_list);
> +
> +	l = calloc(num_devices + 1, sizeof (struct rdma_device *));
> +	for (i = 0; i < num_devices; ++i)
> +		l[i] = device_list[i];
> +
> +	pthread_mutex_unlock(&device_list_lock);
> +
> +	if (num)
> +		*num = l ? num_devices : 0;
> +
> +	return l;
> +}
> +
> +void rdma_free_device_list(struct rdma_device **list)
> +{
> +	free(list);
> +}
> +
> +const char *rdma_get_device_name(struct rdma_device *device)
> +{
> +	return device->name;
> +}
> +
> +uint64_t rdma_get_device_guid(struct rdma_device *device)
> +{
> +	char attr[24];
> +	uint64_t guid = 0;
> +	uint16_t parts[4];
> +	int i;
> +
> +	if (rdma_read_sysfs_file(device->ibdev_path, "node_guid",
> +				attr, sizeof attr) < 0)
> +		return 0;
> +
> +	if (sscanf(attr, "%hx:%hx:%hx:%hx",
> +		   parts, parts + 1, parts + 2, parts + 3) != 4)
> +		return 0;
> +
> +	for (i = 0; i < 4; ++i)
> +		guid = (guid << 16) | parts[i];
> +
> +	return htonll(guid);
> +}
> +
> +static enum rdma_node_type query_node_type(struct rdma_device *device)
> +{
> +	char node_desc[24];
> +	char node_str[24];
> +	int node_type;
> +
> +	if (rdma_read_sysfs_file(device->ibdev_path, "node_type",
> +				node_desc, sizeof(node_desc)) < 0)
> +		return RDMA_NODE_UNKNOWN;
> +
> +	sscanf(node_desc, "%d: %s\n", (int*)&node_type, node_str);
> +	return (enum rdma_node_type) node_type;
> +}
> +
> +struct rdma_context *rdma_open_device(struct rdma_device *device)
> +{
> +	char *devpath;
> +	int cmd_fd;
> +	struct rdma_context *context;
> +
> +	asprintf(&devpath, "/dev/infiniband/%s", device->dev_name);
> +
> +	/*
> +	 * We'll only be doing writes, but we need O_RDWR in case the
> +	 * provider needs to mmap() the file.
> +	 */
> +	cmd_fd = open(devpath, O_RDWR);
> +	free(devpath);
> +
> +	if (cmd_fd < 0)
> +		return NULL;
> +
> +	device->node_type = query_node_type(device);
> +
> +	context = device->ops.alloc_context(device, cmd_fd);
> +	if (!context)
> +		goto err;
> +
> +	context->device = device;
> +	context->cmd_fd = cmd_fd;
> +
> +	return context;
> +
> +err:
> +	close(cmd_fd);
> +
> +	return NULL;
> +}
> +
> +int rdma_close_device(struct rdma_context *context)
> +{
> +	int async_fd = context->async_fd;
> +	int cmd_fd   = context->cmd_fd;
> +	int cq_fd    = -1;
> +
> +	if (abi_ver <= 2) {
> +		struct rdma_abi_compat_v2 *t = context->abi_compat;
> +		cq_fd = t->channel.fd;
> +		free(context->abi_compat);
> +	}
> +
> +	context->device->ops.free_context(context);
> +
> +	close(async_fd);
> +	close(cmd_fd);
> +	if (abi_ver <= 2)
> +		close(cq_fd);
> +
> +	return 0;
> +}
> +
> +int rdma_get_async_event(struct rdma_context *context,
> +			struct rdma_async_event *event)
> +{
> +	struct rdma_kern_async_event ev;
> +
> +	if (read(context->async_fd, &ev, sizeof ev) != sizeof ev)
> +		return -1;
> +
> +	event->event_type = ev.event_type;
> +
> +	switch (event->event_type) {
> +	case RDMA_EVENT_CQ_ERR:
> +		event->element.cq = (void *) (uintptr_t) ev.element;
> +		break;
> +
> +	case RDMA_EVENT_QP_FATAL:
> +	case RDMA_EVENT_QP_REQ_ERR:
> +	case RDMA_EVENT_QP_ACCESS_ERR:
> +	case RDMA_EVENT_COMM_EST:
> +	case RDMA_EVENT_SQ_DRAINED:
> +	case RDMA_EVENT_PATH_MIG:
> +	case RDMA_EVENT_PATH_MIG_ERR:
> +	case RDMA_EVENT_QP_LAST_WQE_REACHED:
> +		event->element.qp = (void *) (uintptr_t) ev.element;
> +		break;
> +
> +	case RDMA_EVENT_SRQ_ERR:
> +	case RDMA_EVENT_SRQ_LIMIT_REACHED:
> +		event->element.srq = (void *) (uintptr_t) ev.element;
> +		break;
> +
> +	default:
> +		event->element.port_num = ev.element;
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +void rdma_ack_async_event(struct rdma_async_event *event)
> +{
> +	switch (event->event_type) {
> +	case RDMA_EVENT_CQ_ERR:
> +	{
> +		struct rdma_cq *cq = event->element.cq;
> +
> +		pthread_mutex_lock(&cq->mutex);
> +		++cq->async_events_completed;
> +		pthread_cond_signal(&cq->cond);
> +		pthread_mutex_unlock(&cq->mutex);
> +
> +		return;
> +	}
> +
> +	case RDMA_EVENT_QP_FATAL:
> +	case RDMA_EVENT_QP_REQ_ERR:
> +	case RDMA_EVENT_QP_ACCESS_ERR:
> +	case RDMA_EVENT_COMM_EST:
> +	case RDMA_EVENT_SQ_DRAINED:
> +	case RDMA_EVENT_PATH_MIG:
> +	case RDMA_EVENT_PATH_MIG_ERR:
> +	case RDMA_EVENT_QP_LAST_WQE_REACHED:
> +	{
> +		struct rdma_qp *qp = event->element.qp;
> +
> +		pthread_mutex_lock(&qp->mutex);
> +		++qp->events_completed;
> +		pthread_cond_signal(&qp->cond);
> +		pthread_mutex_unlock(&qp->mutex);
> +
> +		return;
> +	}
> +
> +	case RDMA_EVENT_SRQ_ERR:
> +	case RDMA_EVENT_SRQ_LIMIT_REACHED:
> +	{
> +		struct rdma_srq *srq = event->element.srq;
> +
> +		pthread_mutex_lock(&srq->mutex);
> +		++srq->events_completed;
> +		pthread_cond_signal(&srq->cond);
> +		pthread_mutex_unlock(&srq->mutex);
> +
> +		return;
> +	}
> +
> +	default:
> +		return;
> +	}
> +}
> diff -ruNp ORG/librdmaverbs/src/init.c NEW/librdmaverbs/src/init.c
> --- ORG/librdmaverbs/src/init.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/init.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,274 @@
> +/*
> + * Copyright (c) 2004, 2005 Topspin Communications.  All rights
> reserved.
> + * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id: init.c 7631 2006-06-02 19:53:25Z swise $
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <glob.h>
> +#include <stdio.h>
> +#include <dlfcn.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <dirent.h>
> +
> +#include "rdmaverbs.h"
> +
> +#ifndef OPENRDMA_DRIVER_PATH_ENV
> +#  define OPENRDMA_DRIVER_PATH_ENV "OPENRDMA_DRIVER_PATH"
> +#endif
> +
> +HIDDEN int abi_ver;
> +
> +static char default_path[] = DRIVER_PATH;
> +static const char *user_path;
> +
> +static struct rdma_driver *driver_list;
> +
> +static void load_driver(char *so_path)
> +{
> +	void *dlhandle;
> +	rdma_driver_init_func init_func;
> +	struct rdma_driver *driver;
> +
> +	dlhandle = dlopen(so_path, RTLD_NOW);
> +	if (!dlhandle) {
> +		fprintf(stderr, PFX "Warning: couldn't load driver %s: %s\n",
> +			so_path, dlerror());
> +		return;
> +	}
> +
> +	dlerror();
> +	init_func = dlsym(dlhandle, "rdma_driver_init");
> +	if (dlerror() != NULL || !init_func) {
> +		dlclose(dlhandle);
> +		return;
> +	}
> +
> +	driver = malloc(sizeof *driver);
> +	if (!driver) {
> +		fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n",
> so_path);
> +		dlclose(dlhandle);
> +		return;
> +	}
> +
> +	driver->init_func = init_func;
> +	driver->next      = driver_list;
> +	driver_list       = driver;
> +}
> +
> +static void find_drivers(char *dir)
> +{
> +	size_t len = strlen(dir);
> +	glob_t so_glob;
> +	char *pat;
> +	int ret;
> +	int i;
> +
> +	if (!len)
> +		return;
> +
> +	while (len && dir[len - 1] == '/')
> +		dir[--len] = '\0';
> +
> +	asprintf(&pat, "%s/*.so", dir);
> +
> +	ret = glob(pat, 0, NULL, &so_glob);
> +	free(pat);
> +
> +	if (ret) {
> +		if (ret != GLOB_NOMATCH)
> +			fprintf(stderr, PFX "Warning: couldn't search %s\n", pat);
> +		return;
> +	}
> +
> +	for (i = 0; i < so_glob.gl_pathc; ++i)
> +		load_driver(so_glob.gl_pathv[i]);
> +
> +	globfree(&so_glob);
> +}
> +
> +static struct rdma_device *init_drivers(const char *class_path,
> +				       const char *dev_name)
> +{
> +	struct rdma_driver *driver;
> +	struct rdma_device *dev;
> +	int abi_ver = 0;
> +	char sys_path[RDMA_SYSFS_PATH_MAX];
> +	char ibdev_name[RDMA_SYSFS_NAME_MAX];
> +	char value[8];
> +
> +	snprintf(sys_path, sizeof sys_path, "%s/%s",
> +		 class_path, dev_name);
> +
> +	if (rdma_read_sysfs_file(sys_path, "abi_version", value, sizeof value)
> > 0)
> +		abi_ver = strtol(value, NULL, 10);
> +
> +	if (rdma_read_sysfs_file(sys_path, "ibdev", ibdev_name, sizeof
> ibdev_name) < 0) {
> +		fprintf(stderr, PFX "Warning: no ibdev class attr for %s\n",
> +			sys_path);
> +		return NULL;
> +	}
> +
> +	for (driver = driver_list; driver; driver = driver->next) {
> +		dev = driver->init_func(sys_path, abi_ver);
> +		if (!dev)
> +			continue;
> +
> +		dev->driver = driver;
> +		strcpy(dev->dev_path, sys_path);
> +		snprintf(dev->ibdev_path, RDMA_SYSFS_PATH_MAX, "%s/class/infiniband/%
> s",
> +			 rdma_get_sysfs_path(), ibdev_name);
> +		strcpy(dev->dev_name, dev_name);
> +		strcpy(dev->name, ibdev_name);
> +
> +		return dev;
> +	}
> +
> +	fprintf(stderr, PFX "Warning: no userspace device-specific driver
> found for %s\n"
> +		"	driver search path: ", dev_name);
> +	if (user_path)
> +		fprintf(stderr, "%s:", user_path);
> +	fprintf(stderr, "%s\n", default_path);
> +
> +	return NULL;
> +}
> +
> +static int check_abi_version(const char *path)
> +{
> +	char value[8];
> +
> +	if (rdma_read_sysfs_file(path, "class/infiniband_verbs/abi_version",
> +				value, sizeof value) < 0) {
> +		fprintf(stderr, PFX "Fatal: couldn't read uverbs ABI version.\n");
> +		return -1;
> +	}
> +
> +	abi_ver = strtol(value, NULL, 10);
> +
> +	if (abi_ver < RDMA_USER_VERBS_MIN_ABI_VERSION ||
> +	    abi_ver > RDMA_USER_VERBS_MAX_ABI_VERSION) {
> +		fprintf(stderr, PFX "Fatal: kernel ABI version %d "
> +			"doesn't match library version %d.\n",
> +			abi_ver, RDMA_USER_VERBS_MAX_ABI_VERSION);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +HIDDEN int rdmaverbs_init(struct rdma_device ***list)
> +{
> +	const char *sysfs_path;
> +	char *wr_path, *dir;
> +	char class_path[RDMA_SYSFS_PATH_MAX];
> +	DIR *class_dir;
> +	struct dirent *dent;
> +	struct rdma_device *device;
> +	struct rdma_device **new_list;
> +	int num_devices = 0;
> +	int list_size = 0;
> +
> +	*list = NULL;
> +
> +	if (rdma_init_mem_map())
> +		return 0;
> +
> +	find_drivers(default_path);
> +
> +	/*
> +	 * Only follow use path passed in through the calling user's
> +	 * environment if we're not running SUID.
> +	 */
> +	if (getuid() == geteuid()) {
> +		user_path = getenv(OPENRDMA_DRIVER_PATH_ENV);
> +		if (user_path) {
> +			wr_path = strdupa(user_path);
> +			while ((dir = strsep(&wr_path, ";:")))
> +				find_drivers(dir);
> +		}
> +	}
> +
> +	/*
> +	 * Now check if a driver is statically linked.  Since we push
> +	 * drivers onto our driver list, the last driver we find will
> +	 * be the first one we try.
> +	 */
> +	load_driver(NULL);
> +
> +	sysfs_path = rdma_get_sysfs_path();
> +	if (!sysfs_path) {
> +		fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n");
> +		return 0;
> +	}
> +
> +	if (check_abi_version(sysfs_path))
> +		return 0;
> +
> +	snprintf(class_path, sizeof class_path, "%s/class/infiniband_verbs",
> +		 sysfs_path);
> +	class_dir = opendir(class_path);
> +	if (!class_dir) {
> +		fprintf(stderr, PFX "Fatal: couldn't open sysfs class "
> +			"directory '%s'.\n", class_path);
> +		return 0;
> +	}
> +
> +	while ((dent = readdir(class_dir))) {
> +		if (dent->d_name[0] == '.' || dent->d_type == DT_REG)
> +			continue;
> +
> +		device = init_drivers(class_path, dent->d_name);
> +		if (!device)
> +			continue;
> +
> +		if (list_size <= num_devices) {
> +			list_size = list_size ? list_size * 2 : 1;
> +			new_list = realloc(*list, list_size * sizeof (struct rdma_device
> *));
> +			if (!new_list)
> +				goto out;
> +			*list = new_list;
> +		}
> +
> +		(*list)[num_devices++] = device;
> +	}
> +
> +	closedir(class_dir);
> +
> +out:
> +	return num_devices;
> +}
> diff -ruNp ORG/librdmaverbs/src/librdmaverbs.map
> NEW/librdmaverbs/src/librdmaverbs.map
> --- ORG/librdmaverbs/src/librdmaverbs.map	1969-12-31 16:00:00.000000000
> -0800
> +++ NEW/librdmaverbs/src/librdmaverbs.map	2006-07-10 18:07:47.000000000
> -0700
> @@ -0,0 +1,76 @@
> +IBVERBS_1.0 {
> +	global:
> +		rdma_get_device_list;
> +		rdma_free_device_list;
> +		rdma_get_device_name;
> +		rdma_get_device_guid;
> +		rdma_open_device;
> +		rdma_close_device;
> +		rdma_get_async_event;
> +		rdma_ack_async_event;
> +		rdma_query_device;
> +		rdma_query_port;
> +		rdma_query_gid;
> +		rdma_query_pkey;
> +		rdma_alloc_pd;
> +		rdma_dealloc_pd;
> +		rdma_reg_mr;
> +		rdma_dereg_mr;
> +		rdma_create_comp_channel;
> +		rdma_destroy_comp_channel;
> +		rdma_create_cq;
> +		rdma_resize_cq;
> +		rdma_destroy_cq;
> +		rdma_get_cq_event;
> +		rdma_ack_cq_events;
> +		rdma_create_srq;
> +		rdma_modify_srq;
> +		rdma_query_srq;
> +		rdma_destroy_srq;
> +		rdmav_create_qp;
> +		rdma_query_qp;
> +		rdma_modify_qp;
> +		rdmav_destroy_qp;
> +		rdma_create_ah;
> +		rdma_destroy_ah;
> +		rdma_attach_mcast;
> +		rdma_detach_mcast;
> +		rdma_cmd_get_context;
> +		rdma_cmd_query_device;
> +		rdma_cmd_query_port;
> +		rdma_cmd_query_gid;
> +		rdma_cmd_query_pkey;
> +		rdma_cmd_alloc_pd;
> +		rdma_cmd_dealloc_pd;
> +		rdma_cmd_reg_mr;
> +		rdma_cmd_dereg_mr;
> +		rdma_cmd_create_cq;
> +		rdma_cmd_poll_cq;
> +		rdma_cmd_req_notify_cq;
> +		rdma_cmd_resize_cq;
> +		rdma_cmd_destroy_cq;
> +		rdma_cmd_create_srq;
> +		rdma_cmd_modify_srq;
> +		rdma_cmd_query_srq;
> +		rdma_cmd_destroy_srq;
> +		rdma_cmd_create_qp;
> +		rdma_cmd_query_qp;
> +		rdma_cmd_modify_qp;
> +		rdma_cmd_destroy_qp;
> +		rdma_cmd_post_send;
> +		rdma_cmd_post_recv;
> +		rdma_cmd_post_srq_recv;
> +		rdma_cmd_create_ah;
> +		rdma_cmd_destroy_ah;
> +		rdma_cmd_attach_mcast;
> +		rdma_cmd_detach_mcast;
> +		rdma_copy_qp_attr_from_kern;
> +		rdma_copy_path_rec_from_kern;
> +		rdma_copy_path_rec_to_kern;
> +		rdma_rate_to_mult;
> +		mult_to_rdma_rate;
> +		rdma_get_sysfs_path;
> +		rdma_read_sysfs_file;
> +
> +	local: *;
> +};
> diff -ruNp ORG/librdmaverbs/src/marshall.c
> NEW/librdmaverbs/src/marshall.c
> --- ORG/librdmaverbs/src/marshall.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/marshall.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,142 @@
> +/*
> + * Copyright (c) 2005 Intel Corporation.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <string.h>
> +
> +#include <rdma/marshall.h>
> +
> +static void rdma_copy_ah_attr_from_kern(struct rdma_ah_attr *dst,
> +				       struct rdma_kern_ah_attr *src)
> +{
> +	memcpy(dst->grh.dgid.raw, src->grh.dgid, sizeof dst->grh.dgid);
> +	dst->grh.flow_label = src->grh.flow_label;
> +	dst->grh.sgid_index = src->grh.sgid_index;
> +	dst->grh.hop_limit = src->grh.hop_limit;
> +	dst->grh.traffic_class = src->grh.traffic_class;
> +
> +	dst->dlid = src->dlid;
> +	dst->sl = src->sl;
> +	dst->src_path_bits = src->src_path_bits;
> +	dst->static_rate = src->static_rate;
> +	dst->is_global = src->is_global;
> +	dst->port_num = src->port_num;
> +}
> +
> +void rdma_copy_qp_attr_from_kern(struct rdma_qp_attr *dst,
> +				struct rdma_kern_qp_attr *src)
> +{
> +	dst->cur_qp_state = src->cur_qp_state;
> +	dst->path_mtu = src->path_mtu;
> +	dst->path_mig_state = src->path_mig_state;
> +	dst->qkey = src->qkey;
> +	dst->rq_psn = src->rq_psn;
> +	dst->sq_psn = src->sq_psn;
> +	dst->dest_qp_num = src->dest_qp_num;
> +	dst->qp_access_flags = src->qp_access_flags;
> +
> +	dst->cap.max_send_wr = src->max_send_wr;
> +	dst->cap.max_recv_wr = src->max_recv_wr;
> +	dst->cap.max_send_sge = src->max_send_sge;
> +	dst->cap.max_recv_sge = src->max_recv_sge;
> +	dst->cap.max_inline_data = src->max_inline_data;
> +
> +	rdma_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr);
> +	rdma_copy_ah_attr_from_kern(&dst->alt_ah_attr, &src->alt_ah_attr);
> +
> +	dst->pkey_index = src->pkey_index;
> +	dst->alt_pkey_index = src->alt_pkey_index;
> +	dst->en_sqd_async_notify = src->en_sqd_async_notify;
> +	dst->sq_draining = src->sq_draining;
> +	dst->max_rd_atomic = src->max_rd_atomic;
> +	dst->max_dest_rd_atomic = src->max_dest_rd_atomic;
> +	dst->min_rnr_timer = src->min_rnr_timer;
> +	dst->port_num = src->port_num;
> +	dst->timeout = src->timeout;
> +	dst->retry_cnt = src->retry_cnt;
> +	dst->rnr_retry = src->rnr_retry;
> +	dst->alt_port_num = src->alt_port_num;
> +	dst->alt_timeout = src->alt_timeout;
> +}
> +
> +void rdma_copy_path_rec_from_kern(struct rdma_sa_path_rec *dst,
> +				 struct rdma_kern_path_rec *src)
> +{
> +	memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
> +	memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
> +
> +	dst->dlid		= src->dlid;
> +	dst->slid		= src->slid;
> +	dst->raw_traffic	= src->raw_traffic;
> +	dst->flow_label		= src->flow_label;
> +	dst->hop_limit		= src->hop_limit;
> +	dst->traffic_class	= src->traffic_class;
> +	dst->reversible		= src->reversible;
> +	dst->numb_path		= src->numb_path;
> +	dst->pkey		= src->pkey;
> +	dst->sl			= src->sl;
> +	dst->mtu_selector	= src->mtu_selector;
> +	dst->mtu		= src->mtu;
> +	dst->rate_selector	= src->rate_selector;
> +	dst->rate		= src->rate;
> +	dst->packet_life_time	= src->packet_life_time;
> +	dst->preference		= src->preference;
> +	dst->packet_life_time_selector = src->packet_life_time_selector;
> +}
> +
> +void rdma_copy_path_rec_to_kern(struct rdma_kern_path_rec *dst,
> +			       struct rdma_sa_path_rec *src)
> +{
> +	memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
> +	memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
> +
> +	dst->dlid		= src->dlid;
> +	dst->slid		= src->slid;
> +	dst->raw_traffic	= src->raw_traffic;
> +	dst->flow_label		= src->flow_label;
> +	dst->hop_limit		= src->hop_limit;
> +	dst->traffic_class	= src->traffic_class;
> +	dst->reversible		= src->reversible;
> +	dst->numb_path		= src->numb_path;
> +	dst->pkey		= src->pkey;
> +	dst->sl			= src->sl;
> +	dst->mtu_selector	= src->mtu_selector;
> +	dst->mtu		= src->mtu;
> +	dst->rate_selector	= src->rate_selector;
> +	dst->rate		= src->rate;
> +	dst->packet_life_time	= src->packet_life_time;
> +	dst->preference		= src->preference;
> +	dst->packet_life_time_selector = src->packet_life_time_selector;
> +}
> diff -ruNp ORG/librdmaverbs/src/memory.c NEW/librdmaverbs/src/memory.c
> --- ORG/librdmaverbs/src/memory.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/memory.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,258 @@
> +/*
> + * Copyright (c) 2004, 2005 Topspin Communications.  All rights
> reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id: memory.c 6987 2006-05-08 15:18:51Z tom $
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +
> +#include "rdmaverbs.h"
> +
> +/*
> + * We keep a linked list of page ranges that have been locked along
> with a
> + * reference count to manage overlapping registrations, etc.
> + *
> + * Eventually we should turn this into an RB-tree or something similar
> + * to avoid the O(n) cost of registering/unregistering memory.
> + */
> +
> +struct rdma_mem_node {
> +	struct rdma_mem_node *prev, *next;
> +	uintptr_t            start, end;
> +	int                  refcnt;
> +};
> +
> +static struct {
> +	struct rdma_mem_node *first;
> +	pthread_mutex_t      mutex;
> +	uintptr_t            page_size;
> +} mem_map;
> +
> +int rdma_init_mem_map(void)
> +{
> +	struct rdma_mem_node *node = NULL;
> +
> +	node = malloc(sizeof *node);
> +	if (!node)
> +		goto fail;
> +
> +	node->prev   = node->next = NULL;
> +	node->start  = 0;
> +	node->end    = UINTPTR_MAX;
> +	node->refcnt = 0;
> +
> +	mem_map.first = node;
> +
> +	mem_map.page_size = sysconf(_SC_PAGESIZE);
> +	if (mem_map.page_size < 0)
> +		goto fail;
> +
> +	if (pthread_mutex_init(&mem_map.mutex, NULL))
> +		goto fail;
> +
> +	return 0;
> +
> +fail:
> +	if (node)
> +		free(node);
> +
> +	return -1;
> +}
> +
> +static struct rdma_mem_node *__mm_find_first(uintptr_t start, uintptr_t
> end)
> +{
> +	struct rdma_mem_node *node = mem_map.first;
> +
> +	while (node) {
> +		if ((node->start <= start && node->end >= start) ||
> +		    (node->start <= end   && node->end >= end))
> +			break;
> +		node = node->next;
> +	}
> +
> +	return node;
> +}
> +
> +static struct rdma_mem_node *__mm_prev(struct rdma_mem_node *node)
> +{
> +	return node->prev;
> +}
> +
> +static struct rdma_mem_node *__mm_next(struct rdma_mem_node *node)
> +{
> +	return node->next;
> +}
> +
> +static void __mm_add(struct rdma_mem_node *node,
> +		     struct rdma_mem_node *new)
> +{
> +	new->prev  = node;
> +	new->next  = node->next;
> +	node->next = new;
> +	if (new->next)
> +		new->next->prev = new;
> +}
> +
> +static void __mm_remove(struct rdma_mem_node *node)
> +{
> +	/* Never have to remove the first node, so we can use prev */
> +	node->prev->next = node->next;
> +	if (node->next)
> +		node->next->prev = node->prev;
> +}
> +
> +int rdma_lock_range(void *base, size_t size)
> +{
> +	uintptr_t start, end;
> +	struct rdma_mem_node *node, *tmp;
> +	int ret = 0;
> +
> +	if (!size)
> +		return 0;
> +
> +	start = (uintptr_t) base & ~(mem_map.page_size - 1);
> +	end   = ((uintptr_t) (base + size + mem_map.page_size - 1) &
> +		 ~(mem_map.page_size - 1)) - 1;
> +
> +	pthread_mutex_lock(&mem_map.mutex);
> +
> +	node = __mm_find_first(start, end);
> +
> +	if (node->start < start) {
> +		tmp = malloc(sizeof *tmp);
> +		if (!tmp) {
> +			ret = -1;
> +			goto out;
> +		}
> +
> +		tmp->start  = start;
> +		tmp->end    = node->end;
> +		tmp->refcnt = node->refcnt;
> +		node->end   = start - 1;
> +
> +		__mm_add(node, tmp);
> +		node = tmp;
> +	}
> +
> +	while (node->start <= end) {
> +		if (node->end > end) {
> +			tmp = malloc(sizeof *tmp);
> +			if (!tmp) {
> +				ret = -1;
> +				goto out;
> +			}
> +
> +			tmp->start  = end + 1;
> +			tmp->end    = node->end;
> +			tmp->refcnt = node->refcnt;
> +			node->end   = end;
> +
> +			__mm_add(node, tmp);
> +		}
> +
> +
> +		if (node->refcnt++ == 0) {
> +			ret = mlock((void *) node->start,
> +				    node->end - node->start + 1);
> +			if (ret)
> +				goto out;
> +		}
> +
> +		node = __mm_next(node);
> +	}
> +
> +out:
> +	pthread_mutex_unlock(&mem_map.mutex);
> +
> +	return ret;
> +}
> +
> +int rdma_unlock_range(void *base, size_t size)
> +{
> +	uintptr_t start, end;
> +	struct rdma_mem_node *node, *tmp;
> +	int ret = 0;
> +
> +	if (!size)
> +		return 0;
> +
> +	start = (uintptr_t) base & ~(mem_map.page_size - 1);
> +	end   = ((uintptr_t) (base + size + mem_map.page_size - 1) &
> +		 ~(mem_map.page_size - 1)) - 1;
> +
> +	pthread_mutex_lock(&mem_map.mutex);
> +
> +	node = __mm_find_first(start, end);
> +
> +	if (node->start != start) {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	while (node && node->end <= end) {
> +		if (--node->refcnt == 0) {
> +			ret = munlock((void *) node->start,
> +				      node->end - node->start + 1);
> +		}
> +
> +		if (__mm_prev(node) && node->refcnt == __mm_prev(node)->refcnt) {
> +			__mm_prev(node)->end = node->end;
> +			tmp = __mm_prev(node);
> +			__mm_remove(node);
> +			node = tmp;
> +		}
> +
> +		node = __mm_next(node);
> +	}
> +
> +	if (node && node->refcnt == __mm_prev(node)->refcnt) {
> +		__mm_prev(node)->end = node->end;
> +		tmp = __mm_prev(node);
> +		__mm_remove(node);
> +	}
> +
> +	if (node->end != end) {
> +		ret = -1;
> +		goto out;
> +	}
> +
> +out:
> +	pthread_mutex_unlock(&mem_map.mutex);
> +
> +	return ret;
> +}
> diff -ruNp ORG/librdmaverbs/src/rdmaverbs.h
> NEW/librdmaverbs/src/rdmaverbs.h
> --- ORG/librdmaverbs/src/rdmaverbs.h	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/rdmaverbs.h	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,87 @@
> +/*
> + * Copyright (c) 2004, 2005 Topspin Communications.  All rights
> reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +#ifndef RDMA_VERBS_H
> +#define RDMA_VERBS_H
> +
> +#include <pthread.h>
> +
> +#include <rdma/driver.h>
> +
> +#define HIDDEN		__attribute__((visibility ("hidden")))
> +
> +#define INIT		__attribute__((constructor))
> +#define FINI		__attribute__((destructor))
> +
> +#define PFX		"librdmaverbs: "
> +
> +struct rdma_driver {
> +	rdma_driver_init_func	init_func;
> +	struct rdma_driver      *next;
> +};
> +
> +struct rdma_abi_compat_v2 {
> +	struct rdma_comp_channel	channel;
> +	pthread_mutex_t		in_use;
> +};
> +
> +extern HIDDEN int abi_ver;
> +
> +extern HIDDEN int rdmaverbs_init(struct rdma_device ***list);
> +
> +extern HIDDEN int rdma_init_mem_map(void);
> +extern HIDDEN int rdma_lock_range(void *base, size_t size);
> +extern HIDDEN int rdma_unlock_range(void *base, size_t size);
> +
> +#define RDMA_INIT_CMD(cmd, size, opcode)					\
> +	do {								\
> +		if (abi_ver > 2)					\
> +			(cmd)->command = RDMA_USER_VERBS_CMD_##opcode;	\
> +		else							\
> +			(cmd)->command = RDMA_USER_VERBS_CMD_##opcode##_V2; \
> +		(cmd)->in_words  = (size) / 4;				\
> +		(cmd)->out_words = 0;					\
> +	} while (0)
> +
> +#define RDMA_INIT_CMD_RESP(cmd, size, opcode, out, outsize)		\
> +	do {								\
> +		if (abi_ver > 2)					\
> +			(cmd)->command = RDMA_USER_VERBS_CMD_##opcode;	\
> +		else							\
> +			(cmd)->command = RDMA_USER_VERBS_CMD_##opcode##_V2; \
> +		(cmd)->in_words  = (size) / 4;				\
> +		(cmd)->out_words = (outsize) / 4;			\
> +		(cmd)->response  = (uintptr_t) (out);			\
> +	} while (0)
> +
> +#endif /* RDMA_VERBS_H */
> diff -ruNp ORG/librdmaverbs/src/sysfs.c NEW/librdmaverbs/src/sysfs.c
> --- ORG/librdmaverbs/src/sysfs.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/sysfs.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,104 @@
> +/*
> + * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id$
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <string.h>
> +
> +#include "rdmaverbs.h"
> +
> +static char *sysfs_path;
> +
> +const char *rdma_get_sysfs_path(void)
> +{
> +	char *env = NULL;
> +
> +	if (sysfs_path)
> +		return sysfs_path;
> +
> +	/*
> +	 * Only follow use path passed in through the calling user's
> +	 * environment if we're not running SUID.
> +	 */
> +	if (getuid() == geteuid())
> +		env = getenv("SYSFS_PATH");
> +
> +	if (env) {
> +		int len;
> +
> +		sysfs_path = strndup(env, RDMA_SYSFS_PATH_MAX);
> +		len = strlen(sysfs_path);
> +		while (len > 0 && sysfs_path[len - 1] == '/') {
> +			--len;
> +			sysfs_path[len] = '\0';
> +		}
> +	} else
> +		sysfs_path = "/sys";
> +
> +	return sysfs_path;
> +}
> +
> +int rdma_read_sysfs_file(const char *dir, const char *file,
> +			char *buf, size_t size)
> +{
> +	char *path;
> +	int fd;
> +	int len;
> +
> +	asprintf(&path, "%s/%s", dir, file);
> +
> +	fd = open(path, O_RDONLY);
> +	if (fd < 0) {
> +		free(path);
> +		return -1;
> +	}
> +
> +	len = read(fd, buf, size);
> +
> +	close(fd);
> +	free(path);
> +
> +	if (len > 0 && buf[len - 1] == '\n')
> +		buf[--len] = '\0';
> +
> +	return len;
> +}
> diff -ruNp ORG/librdmaverbs/src/verbs.c NEW/librdmaverbs/src/verbs.c
> --- ORG/librdmaverbs/src/verbs.c	1969-12-31 16:00:00.000000000 -0800
> +++ NEW/librdmaverbs/src/verbs.c	2006-07-10 18:07:47.000000000 -0700
> @@ -0,0 +1,408 @@
> +/*
> + * Copyright (c) 2005 Topspin Communications.  All rights reserved.
> + * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + * $Id: verbs.c 7631 2006-06-02 19:53:25Z swise $
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif /* HAVE_CONFIG_H */
> +
> +#include <stdio.h>
> +#include <netinet/in.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <errno.h>
> +
> +#include "rdmaverbs.h"
> +
> +int rdma_rate_to_mult(enum rdma_rate rate)
> +{
> +	switch (rate) {
> +	case RDMA_RATE_2_5_GBPS: return  1;
> +	case RDMA_RATE_5_GBPS:   return  2;
> +	case RDMA_RATE_10_GBPS:  return  4;
> +	case RDMA_RATE_20_GBPS:  return  8;
> +	case RDMA_RATE_30_GBPS:  return 12;
> +	case RDMA_RATE_40_GBPS:  return 16;
> +	case RDMA_RATE_60_GBPS:  return 24;
> +	case RDMA_RATE_80_GBPS:  return 32;
> +	case RDMA_RATE_120_GBPS: return 48;
> +	default:           return -1;
> +	}
> +}
> +
> +enum rdma_rate mult_to_rdma_rate(int mult)
> +{
> +	switch (mult) {
> +	case 1:  return RDMA_RATE_2_5_GBPS;
> +	case 2:  return RDMA_RATE_5_GBPS;
> +	case 4:  return RDMA_RATE_10_GBPS;
> +	case 8:  return RDMA_RATE_20_GBPS;
> +	case 12: return RDMA_RATE_30_GBPS;
> +	case 16: return RDMA_RATE_40_GBPS;
> +	case 24: return RDMA_RATE_60_GBPS;
> +	case 32: return RDMA_RATE_80_GBPS;
> +	case 48: return RDMA_RATE_120_GBPS;
> +	default: return RDMA_RATE_MAX;
> +	}
> +}
> +
> +int rdma_query_device(struct rdma_context *context,
> +		     struct rdma_device_attr *device_attr)
> +{
> +	return context->ops.query_device(context, device_attr);
> +}
> +
> +int rdma_query_port(struct rdma_context *context, uint8_t port_num,
> +		   struct rdma_port_attr *port_attr)
> +{
> +	return context->ops.query_port(context, port_num, port_attr);
> +}
> +
> +int rdma_query_gid(struct rdma_context *context, uint8_t port_num,
> +		  int index, union rdma_gid *gid)
> +{
> +	char name[24];
> +	char attr[41];
> +	uint16_t val;
> +	int i;
> +
> +	snprintf(name, sizeof name, "ports/%d/gids/%d", port_num, index);
> +
> +	if (rdma_read_sysfs_file(context->device->ibdev_path, name,
> +				attr, sizeof attr) < 0)
> +		return -1;
> +
> +	for (i = 0; i < 8; ++i) {
> +		if (sscanf(attr + i * 5, "%hx", &val) != 1)
> +			return -1;
> +		gid->raw[i * 2    ] = val >> 8;
> +		gid->raw[i * 2 + 1] = val & 0xff;
> +	}
> +
> +	return 0;
> +}
> +
> +int rdma_query_pkey(struct rdma_context *context, uint8_t port_num,
> +		   int index, uint16_t *pkey)
> +{
> +	char name[24];
> +	char attr[8];
> +	uint16_t val;
> +
> +	snprintf(name, sizeof name, "ports/%d/pkeys/%d", port_num, index);
> +
> +	if (rdma_read_sysfs_file(context->device->ibdev_path, name,
> +				attr, sizeof attr) < 0)
> +		return -1;
> +
> +	if (sscanf(attr, "%hx", &val) != 1)
> +		return -1;
> +
> +	*pkey = htons(val);
> +	return 0;
> +}
> +
> +struct rdma_pd *rdma_alloc_pd(struct rdma_context *context)
> +{
> +	struct rdma_pd *pd;
> +
> +	pd = context->ops.alloc_pd(context);
> +	if (pd)
> +		pd->context = context;
> +
> +	return pd;
> +}
> +
> +int rdma_dealloc_pd(struct rdma_pd *pd)
> +{
> +	return pd->context->ops.dealloc_pd(pd);
> +}
> +
> +struct rdma_mr *rdma_reg_mr(struct rdma_pd *pd, void *addr,
> +			  size_t length, enum rdma_access_flags access)
> +{
> +	struct rdma_mr *mr;
> +
> +	mr = pd->context->ops.reg_mr(pd, addr, length, access);
> +	if (mr) {
> +		mr->context = pd->context;
> +		mr->pd      = pd;
> +	}
> +
> +	return mr;
> +}
> +
> +int rdma_dereg_mr(struct rdma_mr *mr)
> +{
> +	return mr->context->ops.dereg_mr(mr);
> +}
> +
> +static struct rdma_comp_channel *rdma_create_comp_channel_v2(struct
> rdma_context *context)
> +{
> +	struct rdma_abi_compat_v2 *t = context->abi_compat;
> +	static int warned;
> +
> +	if (!pthread_mutex_trylock(&t->in_use))
> +		return &t->channel;
> +
> +	if (!warned) {
> +		fprintf(stderr, PFX "Warning: kernel's ABI version %d limits
> capacity.\n"
> +			"    Only one completion channel can be created per context.\n",
> +			abi_ver);
> +		++warned;
> +	}
> +
> +	return NULL;
> +}
> +
> +struct rdma_comp_channel *rdma_create_comp_channel(struct rdma_context
> *context)
> +{
> +	struct rdma_comp_channel            *channel;
> +	struct rdma_create_comp_channel      cmd;
> +	struct rdma_create_comp_channel_resp resp;
> +
> +	if (abi_ver <= 2)
> +		return rdma_create_comp_channel_v2(context);
> +
> +	channel = malloc(sizeof *channel);
> +	if (!channel)
> +		return NULL;
> +
> +	RDMA_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_COMP_CHANNEL, &resp,
> sizeof resp);
> +	if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
> +		free(channel);
> +		return NULL;
> +	}
> +
> +	channel->fd = resp.fd;
> +
> +	return channel;
> +}
> +
> +static int rdma_destroy_comp_channel_v2(struct rdma_comp_channel
> *channel)
> +{
> +	struct rdma_abi_compat_v2 *t = (struct rdma_abi_compat_v2 *) channel;
> +	pthread_mutex_unlock(&t->in_use);
> +	return 0;
> +}
> +
> +int rdma_destroy_comp_channel(struct rdma_comp_channel *channel)
> +{
> +	if (abi_ver <= 2)
> +		return rdma_destroy_comp_channel_v2(channel);
> +
> +	close(channel->fd);
> +	free(channel);
> +
> +	return 0;
> +}
> +
> +struct rdma_cq *rdma_create_cq(struct rdma_context *context, int cqe,
> void *cq_context,
> +			     struct rdma_comp_channel *channel, int comp_vector)
> +{
> +	struct rdma_cq *cq = context->ops.create_cq(context, cqe, channel,
> +						   comp_vector);
> +
> +	if (cq) {
> +		cq->context    	     	   = context;
> +		cq->cq_context 	     	   = cq_context;
> +		cq->comp_events_completed  = 0;
> +		cq->async_events_completed = 0;
> +		pthread_mutex_init(&cq->mutex, NULL);
> +		pthread_cond_init(&cq->cond, NULL);
> +	}
> +
> +	return cq;
> +}
> +
> +int rdma_resize_cq(struct rdma_cq *cq, int cqe)
> +{
> +	if (!cq->context->ops.resize_cq)
> +		return ENOSYS;
> +
> +	return cq->context->ops.resize_cq(cq, cqe);
> +}
> +
> +int rdma_destroy_cq(struct rdma_cq *cq)
> +{
> +	return cq->context->ops.destroy_cq(cq);
> +}
> +
> +
> +int rdma_get_cq_event(struct rdma_comp_channel *channel,
> +		     struct rdma_cq **cq, void **cq_context)
> +{
> +	struct rdma_comp_event ev;
> +
> +	if (read(channel->fd, &ev, sizeof ev) != sizeof ev)
> +		return -1;
> +
> +	*cq         = (struct rdma_cq *) (uintptr_t) ev.cq_handle;
> +	*cq_context = (*cq)->cq_context;
> +
> +	if ((*cq)->context->ops.cq_event)
> +		(*cq)->context->ops.cq_event(*cq);
> +
> +	return 0;
> +}
> +
> +void rdma_ack_cq_events(struct rdma_cq *cq, unsigned int nevents)
> +{
> +	pthread_mutex_lock(&cq->mutex);
> +	cq->comp_events_completed += nevents;
> +	pthread_cond_signal(&cq->cond);
> +	pthread_mutex_unlock(&cq->mutex);
> +}
> +
> +struct rdma_srq *rdma_create_srq(struct rdma_pd *pd,
> +			       struct rdma_srq_init_attr *srq_init_attr)
> +{
> +	struct rdma_srq *srq;
> +
> +	if (!pd->context->ops.create_srq)
> +		return NULL;
> +
> +	srq = pd->context->ops.create_srq(pd, srq_init_attr);
> +	if (srq) {
> +		srq->context          = pd->context;
> +		srq->srq_context      = srq_init_attr->srq_context;
> +		srq->pd               = pd;
> +		srq->events_completed = 0;
> +		pthread_mutex_init(&srq->mutex, NULL);
> +		pthread_cond_init(&srq->cond, NULL);
> +	}
> +
> +	return srq;
> +}
> +
> +int rdma_modify_srq(struct rdma_srq *srq,
> +		   struct rdma_srq_attr *srq_attr,
> +		   enum rdma_srq_attr_mask srq_attr_mask)
> +{
> +	return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask);
> +}
> +
> +int rdma_query_srq(struct rdma_srq *srq, struct rdma_srq_attr
> *srq_attr)
> +{
> +	return srq->context->ops.query_srq(srq, srq_attr);
> +}
> +
> +int rdma_destroy_srq(struct rdma_srq *srq)
> +{
> +	return srq->context->ops.destroy_srq(srq);
> +}
> +
> +struct rdma_qp *rdmav_create_qp(struct rdma_pd *pd,
> +			     struct rdma_qp_init_attr *qp_init_attr)
> +{
> +	struct rdma_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
> +
> +	if (qp) {
> +		qp->context    	     = pd->context;
> +		qp->qp_context 	     = qp_init_attr->qp_context;
> +		qp->pd         	     = pd;
> +		qp->send_cq    	     = qp_init_attr->send_cq;
> +		qp->recv_cq    	     = qp_init_attr->recv_cq;
> +		qp->srq        	     = qp_init_attr->srq;
> +		qp->qp_type          = qp_init_attr->qp_type;
> +		qp->events_completed = 0;
> +		pthread_mutex_init(&qp->mutex, NULL);
> +		pthread_cond_init(&qp->cond, NULL);
> +	}
> +
> +	return qp;
> +}
> +
> +int rdma_query_qp(struct rdma_qp *qp, struct rdma_qp_attr *attr,
> +		 enum rdma_qp_attr_mask attr_mask,
> +		 struct rdma_qp_init_attr *init_attr)
> +{
> +	int ret;
> +
> +	ret = qp->context->ops.query_qp(qp, attr, attr_mask, init_attr);
> +	if (ret)
> +		return ret;
> +
> +	if (attr_mask & RDMA_QP_STATE)
> +		qp->state = attr->qp_state;
> +
> +	return 0;
> +}
> +
> +int rdma_modify_qp(struct rdma_qp *qp, struct rdma_qp_attr *attr,
> +		  enum rdma_qp_attr_mask attr_mask)
> +{
> +	int ret;
> +
> +	ret = qp->context->ops.modify_qp(qp, attr, attr_mask);
> +	if (ret)
> +		return ret;
> +
> +	if (attr_mask & RDMA_QP_STATE)
> +		qp->state = attr->qp_state;
> +
> +	return 0;
> +}
> +
> +int rdmav_destroy_qp(struct rdma_qp *qp)
> +{
> +	return qp->context->ops.destroy_qp(qp);
> +}
> +
> +struct rdma_ah *rdma_create_ah(struct rdma_pd *pd, struct rdma_ah_attr
> *attr)
> +{
> +	struct rdma_ah *ah = pd->context->ops.create_ah(pd, attr);
> +
> +	if (ah) {
> +		ah->context = pd->context;
> +		ah->pd      = pd;
> +	}
> +
> +	return ah;
> +}
> +
> +int rdma_destroy_ah(struct rdma_ah *ah)
> +{
> +	return ah->context->ops.destroy_ah(ah);
> +}
> +
> +int rdma_attach_mcast(struct rdma_qp *qp, union rdma_gid *gid, uint16_t
> lid)
> +{
> +	return qp->context->ops.attach_mcast(qp, gid, lid);
> +}
> +
> +int rdma_detach_mcast(struct rdma_qp *qp, union rdma_gid *gid, uint16_t
> lid)
> +{
> +	return qp->context->ops.detach_mcast(qp, gid, lid);
> +}
> 





More information about the general mailing list