[ofa-general] [PATCH/RFC] Add support for "send with invalidate" to libibverbs

Steve Wise swise at opengridcomputing.com
Tue Apr 1 15:21:24 PDT 2008


looks ok to me.

Roland Dreier wrote:
> In kernel commit c80cf84d ("IB/core: Add support for "send with
> invalidate" work requests"), which is currently queued for 2.6.26, I
> added support for send with invalidate work reqeuests on the kernel side
> of things.  This patch adds the matching support to libibverbs.
> 
> There is one part that's a bit tricky: in ibv_cmd_query_device(), I
> added a bit of code to move IBV_DEVICE_SEND_W_INV to the reserved bit
> where it used to be.  This is to make sure that the userspace low-level
> driver for the device in question really supports send with invalidate.
> To see why this is necessary, suppose that we didn't do this and a user
> had a system with
> 
>  - a new kernel with a low-level driver that sets the
>    IB_DEVICE_SEND_W_INV bit
>  - a new libibverbs with send with invalidate support
>  - an old userspace driver that has no send with invalidate support
> 
> In this case send with invalidate requests would be silently turned into
> plain send requests with no way that an application to know this.  With
> the approach in my patch, the application will not see
> IBV_DEVICE_SEND_W_INV set and hence should not use send with invalidate
> requests.
> 
> This scheme means that low-level drivers that support send with
> invalidate should add some autoconf code that checks if
> IBV_DEVICE_KERNEL_SEND_W_INV is defined, and if so, compile in code in
> the query_device method that sets IBV_DEVICE_SEND_W_INV if
> ibv_cmd_query_device() returns IBV_DEVICE_KERNEL_SEND_W_INV set.
> 
> This patch also adds enum values for a few more device capability bits
> defined in the kernel.
> 
> Does this approach make sense to people?
> ---
> diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
> index 0db083a..ee799bb 100644
> --- a/include/infiniband/kern-abi.h
> +++ b/include/infiniband/kern-abi.h
> @@ -592,6 +592,10 @@ struct ibv_kern_send_wr {
>  			__u32 remote_qkey;
>  			__u32 reserved;
>  		} ud;
> +		struct {
> +			__u32 rkey;
> +			__u32 reserved;
> +		} invalidate;
>  	} wr;
>  };
>  
> diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
> index a51bb9d..679386a 100644
> --- a/include/infiniband/verbs.h
> +++ b/include/infiniband/verbs.h
> @@ -92,7 +92,18 @@ enum ibv_device_cap_flags {
>  	IBV_DEVICE_SYS_IMAGE_GUID	= 1 << 11,
>  	IBV_DEVICE_RC_RNR_NAK_GEN	= 1 << 12,
>  	IBV_DEVICE_SRQ_RESIZE		= 1 << 13,
> -	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14
> +	IBV_DEVICE_N_NOTIFY_CQ		= 1 << 14,
> +	IBV_DEVICE_ZERO_STAG		= 1 << 15,
> +	/*
> +	 * IBV_DEVICE_KERNEL_SEND_W_INV is used by libibverbs to
> +	 * signal to low-level driver libraries that the kernel set
> +	 * the "send with invalidate" capaibility bit.  Applications
> +	 * should only test IBV_DEVICE_SEND_W_INV and never look at
> +	 * IBV_DEVICE_KERNEL_SEND_W_INV.
> +	 */
> +	IBV_DEVICE_KERNEL_SEND_W_INV	= 1 << 16,
> +	IBV_DEVICE_MEM_WINDOW		= 1 << 17,
> +	IBV_DEVICE_SEND_W_INV		= 1 << 21
>  };
>  
>  enum ibv_atomic_cap {
> @@ -492,7 +503,8 @@ enum ibv_send_flags {
>  	IBV_SEND_FENCE		= 1 << 0,
>  	IBV_SEND_SIGNALED	= 1 << 1,
>  	IBV_SEND_SOLICITED	= 1 << 2,
> -	IBV_SEND_INLINE		= 1 << 3
> +	IBV_SEND_INLINE		= 1 << 3,
> +	IBV_SEND_INVALIDATE	= 1 << 6
>  };
>  
>  struct ibv_sge {
> @@ -525,6 +537,9 @@ struct ibv_send_wr {
>  			uint32_t	remote_qpn;
>  			uint32_t	remote_qkey;
>  		} ud;
> +		struct {
> +			uint32_t	rkey;
> +		} invalidate;
>  	} wr;
>  };
>  
> diff --git a/src/cmd.c b/src/cmd.c
> index 9db8aa6..3e0ff0a 100644
> --- a/src/cmd.c
> +++ b/src/cmd.c
> @@ -159,6 +159,17 @@ int ibv_cmd_query_device(struct ibv_context *context,
>  	device_attr->local_ca_ack_delay        = resp.local_ca_ack_delay;
>  	device_attr->phys_port_cnt	       = resp.phys_port_cnt;
>  
> +	/*
> +	 * If the kernel driver says that it supports send with
> +	 * invalidate work requests, then move the flag to
> +	 * IBV_DEVICE_KERNEL_SEND_W_INV so that the low-level driver
> +	 * gets a chance to make sure it supports the operation as well.
> +	 */
> +	if (device_attr->device_cap_flags & IBV_DEVICE_SEND_W_INV) {
> +		device_attr->device_cap_flags &= ~IBV_DEVICE_SEND_W_INV;
> +		device_attr->device_cap_flags |= ~IBV_DEVICE_KERNEL_SEND_W_INV;
> +	}
> +
>  	return 0;
>  }
>  
> @@ -859,6 +870,11 @@ int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
>  					i->wr.rdma.remote_addr;
>  				tmp->wr.rdma.rkey = i->wr.rdma.rkey;
>  				break;
> +			case IBV_WR_SEND:
> +			case IBV_WR_SEND_WITH_IMM:
> +				tmp->wr.invalidate.rkey =
> +					i->wr.invalidate.rkey;
> +				break;
>  			case IBV_WR_ATOMIC_CMP_AND_SWP:
>  			case IBV_WR_ATOMIC_FETCH_AND_ADD:
>  				tmp->wr.atomic.remote_addr =
> _______________________________________________
> general mailing list
> general at lists.openfabrics.org
> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
> 
> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general



More information about the general mailing list