[ofa-general] [PATCH RFC] sharing userspace IB objects

Michael S. Tsirkin mst at dev.mellanox.co.il
Mon Jun 25 06:06:04 PDT 2007


> > Quoting Roland Dreier <rdreier at cisco.com>:
> > Subject: Re: Sharing userspace IB objects
> > 
> >  > Can you please elaborate a little bit more on what steps are required to
> >  > achieve this? I have a connection manager running as a separate process from
> >  > the apps which would be sending/receiving data on QPs. I was hoping to
> >  > create IB objects via CM and be made sharable to the apps.
> > 
> > You would have to do a lot of hacking of low-level stuff (libibverbs
> > and whatever userspace driver libraries you need) to handle passing
> > file descriptors through unix domain sockets and figure out a way to
> > make the CQ/QP buffers visible in the address space of the process
> > that will actually use them.  And also handle doorbell pages etc.
>
> This is related to scalability stuff that Dror presented at Sonoma
> http://www.openfabrics.org/archives/spring2007sonoma/Tuesday%20May%201/gdror%20Next%20Generation%20Hardware%20Assists%20And%20Scalability2.pdf
> 
> See especially the shared send queue slide.
> 
> So, since the need seems to be there, I started thinking about how this could be done.
> Basically, we could create shared memory objects (shm_open) and use these
> for all hardware-accessible registers, as well as necessary control (head/tail pointers,
> spinlocks used for protection, etc).
> 
> If we do this, we can use unix domain sockets for everything,
> a client just mmaps the fd that it got. Does this make sense?

OK, here's a draft showing how an API to do this could look like.

Basically the idea is that we'd ask low-level drivers to provide an
(optional) API to
1. allocate context and all its objects inside a shared memory object
2. pack and unpack objects from/to unix domain socket messages

So to share a QP, the server would
A. open shared context, create pd, cq, qp
B. listen on unix domain socket
C. pack the context, pd, cq, qp
D. send them to clients that connect

The client would
A. create unix domain socket
B. connect to server
C. get message from server
D. unpack context, pd, cq, qp

Roland, all, any comments on the API?
Next, I'm going to look at adding this support into some level drivers.

---

diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index acc1b82..b16e186 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -38,6 +38,7 @@
 
 #include <stdint.h>
 #include <pthread.h>
+#include <sys/socket.h>
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern "C" {
@@ -601,6 +602,9 @@ struct ibv_device;
 struct ibv_context;
 
 struct ibv_device_ops {
+	struct ibv_context *	(*alloc_shared_context)(struct ibv_device *device,
+							int cmd_fd,
+							int shm_fd, off_t offset);
 	struct ibv_context *	(*alloc_context)(struct ibv_device *device, int cmd_fd);
 	void			(*free_context)(struct ibv_context *context);
 };
@@ -680,6 +684,26 @@ struct ibv_context_ops {
 	int			(*detach_mcast)(struct ibv_qp *qp, union ibv_gid *gid,
 						uint16_t lid);
 	void			(*async_event)(struct ibv_async_event *event);
+
+	int (*context_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_context *);
+	int (*pd_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_pd *);
+	int (*mr_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_mr *);
+	int (*mw_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_mw *);
+	int (*srq_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_srq *);
+	int (*cq_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_cq *);
+	int (*qp_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_qp *);
+	int (*comp_channel_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_comp_channel *);
+	int (*ah_csmg_pack)(struct msghdr *, struct cmsghdr **,struct ibv_ah *);
+
+	struct ibv_context *(*context_cmsg_unpack)(struct ibv_device *, struct msghdr *, struct cmsghdr **);
+	struct ibv_pd *(*pd_cmsg_unpack)(struct ibv_context *, struct msghdr *, struct cmsghdr **);
+	struct ibv_mr *(*mr_cmsg_unpack)(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+	struct ibv_mw *(*mw_cmsg_unpack)(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+	struct ibv_srq *(*srq_cmsg_unpack)(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+	struct ibv_comp_channel *(*comp_channel_cmsg_unpack)(struct ibv_context *, struct msghdr *, struct cmsghdr **);
+	struct ibv_cq *(*cq_cmsg_unpack)(struct ibv_context *, void *cq_context, struct ibv_comp_channel *, struct msghdr *, struct cmsghdr **);
+	struct ibv_qp *(*qp_cmsg_unpack)(struct ibv_pd *pd, struct ibv_qp_init_attr *init_attr, struct struct msghdr *, struct cmsghdr **);
+	struct ibv_ah *(*ah_cmsg_unpack)(struct ibv_pd *pd, struct msghdr *, struct cmsghdr **);
 };
 
 struct ibv_context {
@@ -1074,6 +1098,30 @@ int ibv_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
  */
 int ibv_fork_init(void);
 
+struct ibv_context *ibv_open_shared_device(struct ibv_device *device,
+					   int fd, off_t offset);
+int ibv_cmsg_space(struct ibv_context *);
+
+int ibv_context_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_context *);
+int ibv_pd_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_pd *);
+int ibv_mr_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_mr *);
+int ibv_mw_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_mw *);
+int ibv_srq_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_srq *);
+int ibv_cq_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_cq *);
+int ibv_qp_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_qp *);
+int ibv_comp_channel_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_comp_channel *);
+int ibv_ah_csmg_pack(struct msghdr *, struct cmsghdr **,struct ibv_ah *);
+
+struct ibv_context *ibv_context_cmsg_unpack(struct ibv_device *, struct msghdr *, struct cmsghdr **);
+struct ibv_pd *ibv_pd_cmsg_unpack(struct ibv_context *, struct msghdr *, struct cmsghdr **);
+struct ibv_mr *ibv_mr_cmsg_unpack(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+struct ibv_mw *ibv_mw_cmsg_unpack(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+struct ibv_srq *ibv_srq_cmsg_unpack(struct ibv_pd *, struct msghdr *, struct cmsghdr **);
+struct ibv_comp_channel *ibv_comp_channel_cmsg_unpack(struct ibv_context *, struct msghdr *, struct cmsghdr **);
+struct ibv_cq *ibv_cq_cmsg_unpack(struct ibv_context *, void *cq_context, struct ibv_comp_channel *, struct msghdr *, struct cmsghdr **);
+struct ibv_qp *ibv_qp_cmsg_unpack(struct ibv_pd *pd, struct ibv_qp_init_attr *init_attr, struct struct msghdr *, struct cmsghdr **);
+struct ibv_ah *ibv_ah_cmsg_unpack(struct ibv_pd *pd, struct msghdr *, struct cmsghdr **);
+
 END_C_DECLS
 
 #  undef __attribute_const

-- 
MST



More information about the general mailing list