[ofa-general] [PATCHv4] libibverbs: Add RDMAoE support

Eli Cohen eli at mellanox.co.il
Wed Aug 5 01:34:22 PDT 2009


Extend the ibv_query_port() verb to return a port transport protocol which can
be one of RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP or RDMA_TRANSPORT_RDMAOE.
This can be used by applications to know if they must use GRH as is the case in
RDMAoE.  Add a new system call to get the MAC address of the remote port that a
UD address vector refers to.  Update ibv_rc_pingpong and ibv_ud_pingpong to
accept a remote GID so that they can be used with an RDMAoE port.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
Changed the reference to a port from link type to protocol type. This
patch is tagged v4 to create correspondence with the kernel patches.


 examples/devinfo.c            |   15 ++++++++++++
 examples/pingpong.c           |    9 +++++++
 examples/pingpong.h           |    2 +
 examples/rc_pingpong.c        |   50 ++++++++++++++++++++++++++++++++--------
 examples/ud_pingpong.c        |   38 +++++++++++++++++++++++++++----
 include/infiniband/driver.h   |    1 +
 include/infiniband/kern-abi.h |   25 ++++++++++++++++++--
 include/infiniband/verbs.h    |   12 +++++++++
 src/cmd.c                     |   20 ++++++++++++++++
 src/libibverbs.map            |    1 +
 10 files changed, 155 insertions(+), 18 deletions(-)

diff --git a/examples/devinfo.c b/examples/devinfo.c
index caa5d5f..a42a6dc 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -175,6 +175,20 @@ static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tb
 	return rc;
 }
 
+static const char *transport_type_str(enum rdma_transport_type type)
+{
+	switch (type) {
+	case RDMA_TRANSPORT_IB:
+		return "IB";
+	case RDMA_TRANSPORT_IWARP:
+		return "IWARP";
+	case RDMA_TRANSPORT_RDMAOE:
+		return "RDMAOE";
+	default:
+		return "Unknown";
+	}
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
 	struct ibv_context *ctx;
@@ -273,6 +287,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
+		printf("\t\t\ttrasnport_type:\t\t%s\n", transport_type_str(port_attr.transport));
 
 		if (verbose) {
 			printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index b916f59..d4a46e4 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -31,6 +31,8 @@
  */
 
 #include "pingpong.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
 
 enum ibv_mtu pp_mtu_to_enum(int mtu)
 {
@@ -53,3 +55,10 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 
 	return attr.lid;
 }
+
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr)
+{
+	return ibv_query_port(context, port, attr);
+}
+
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 71d7c3f..16d3466 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -37,5 +37,7 @@
 
 enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr);
 
 #endif /* IBV_PINGPONG_H */
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 26fa45c..4250cdf 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid		 dgid;
 };
 
 struct pingpong_dest {
@@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 			.port_num	= port
 		}
 	};
+
+	if (ctx->dgid.global.interface_id) {
+		attr.ah_attr.is_global = 1;
+		attr.ah_attr.grh.hop_limit = 1;
+		attr.ah_attr.grh.dgid = ctx->dgid;
+	}
 	if (ibv_modify_qp(ctx->qp, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_AV                 |
@@ -289,11 +297,11 @@ out:
 
 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 					    int rx_depth, int port,
-					    int use_event)
+					    int use_event, int is_server)
 {
 	struct pingpong_context *ctx;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 		return NULL;
 	}
 
-	memset(ctx->buf, 0, size);
+	memset(ctx->buf, 0x7b + is_server, size);
 
 	ctx->context = ibv_open_device(ib_dev);
 	if (!ctx->context) {
@@ -481,6 +489,7 @@ static void usage(const char *argv0)
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -l, --sl=<sl>          service level value\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -504,6 +513,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char			*grh = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -520,10 +530,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -575,6 +586,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+			grh = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -614,7 +629,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
+	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, !servername);
 	if (!ctx)
 		return 1;
 
@@ -630,17 +645,31 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-	my_dest.qpn = ctx->qp->qp_num;
-	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
+
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
 		return 1;
 	}
 
+	my_dest.lid = ctx->portinfo.lid;
+	if (ctx->portinfo.transport == RDMA_TRANSPORT_RDMAOE) {
+		if (!grh) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+		inet_pton(AF_INET6, grh, &ctx->dgid);
+	} else {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	}
+	my_dest.qpn = ctx->qp->qp_num;
+	my_dest.psn = lrand48() & 0xffffff;
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
 	       my_dest.lid, my_dest.qpn, my_dest.psn);
 
+
 	if (servername)
 		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
 	else
@@ -705,6 +734,7 @@ int main(int argc, char *argv[])
 					fprintf(stderr, "poll CQ failed %d\n", ne);
 					return 1;
 				}
+
 			} while (!use_event && ne < 1);
 
 			for (i = 0; i < ne; ++i) {
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 8f3d50b..b3aa55d 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -68,6 +68,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 		return 1;
 	}
 
+	if (ctx->dgid.global.interface_id) {
+		ah_attr.is_global = 1;
+		ah_attr.grh.hop_limit = 1;
+		ah_attr.grh.dgid = ctx->dgid;
+	}
+
 	ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
 	if (!ctx->ah) {
 		fprintf(stderr, "Failed to create AH\n");
@@ -478,6 +486,7 @@ static void usage(const char *argv0)
 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid              specify remote gid\n");
 }
 
 int main(int argc, char *argv[])
@@ -500,6 +509,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char 			*gid = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -515,10 +525,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -563,6 +574,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+                        gid = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -618,12 +633,25 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
+		return 1;
+	}
+	my_dest.lid = ctx->portinfo.lid;
+
 	my_dest.qpn = ctx->qp->qp_num;
 	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
-		return 1;
+	if (ctx->portinfo.transport == RDMA_TRANSPORT_IB) {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	} else {
+		if (!gid) {
+			fprintf(stderr, "must specify remote GID\n");
+			return 1;
+		}
+		inet_pton(AF_INET6, gid, &ctx->dgid);
 	}
 
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 67a3bf8..cbd261f 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
 int ibv_cmd_destroy_ah(struct ibv_ah *ah);
 int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac);
 
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..7823da8 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -46,7 +46,7 @@
  * The minimum and maximum kernel ABI that we can handle.
  */
 #define IB_USER_VERBS_MIN_ABI_VERSION	1
-#define IB_USER_VERBS_MAX_ABI_VERSION	6
+#define IB_USER_VERBS_MAX_ABI_VERSION	7
 
 enum {
 	IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +85,8 @@ enum {
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_GET_MAC,
 };
 
 /*
@@ -223,7 +224,8 @@ struct ibv_query_port_resp {
 	__u8  active_width;
 	__u8  active_speed;
 	__u8  phys_state;
-	__u8  reserved[3];
+	__u8  transport;
+	__u8  reserved[2];
 };
 
 struct ibv_alloc_pd {
@@ -798,6 +800,7 @@ enum {
 	IB_USER_VERBS_CMD_QUERY_SRQ_V2,
 	IB_USER_VERBS_CMD_DESTROY_SRQ_V2,
 	IB_USER_VERBS_CMD_POST_SRQ_RECV_V2,
+	IB_USER_VERBS_CMD_GET_MAC_V2 = -1,
 	/*
 	 * Set commands that didn't exist to -1 so our compile-time
 	 * trick opcodes in IBV_INIT_CMD() doesn't break.
@@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 {
 	__u32 srq_handle;
 };
 
+struct ibv_get_mac {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 pd_handle;
+	__u8  port;
+	__u8  reserved[3];
+	__u8  dgid[16];
+};
+
+struct ibv_get_mac_resp {
+	__u8	mac[6];
+	__u16	reserved;
+};
+
 #endif /* KERN_ABI_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index a04cc62..f81f17f 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -61,6 +61,7 @@ union ibv_gid {
 		uint64_t	subnet_prefix;
 		uint64_t	interface_id;
 	} global;
+	uint32_t		dwords[4];
 };
 
 enum ibv_node_type {
@@ -161,6 +162,16 @@ enum ibv_port_state {
 	IBV_PORT_ACTIVE_DEFER	= 5
 };
 
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP,
+	RDMA_TRANSPORT_RDMAOE
+};
+enum ibv_port_link_type {
+	PORT_LINK_IB,
+	PORT_LINK_ETH
+};
+
 struct ibv_port_attr {
 	enum ibv_port_state	state;
 	enum ibv_mtu		max_mtu;
@@ -181,6 +192,7 @@ struct ibv_port_attr {
 	uint8_t			active_width;
 	uint8_t			active_speed;
 	uint8_t			phys_state;
+	enum rdma_transport_type transport;
 };
 
 enum ibv_event_type {
diff --git a/src/cmd.c b/src/cmd.c
index 66d7134..30754ac 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context,
 	return 0;
 }
 
+#include <stdio.h>
 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 		       struct ibv_port_attr *port_attr,
 		       struct ibv_query_port *cmd, size_t cmd_size)
@@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 	port_attr->active_width    = resp.active_width;
 	port_attr->active_speed    = resp.active_speed;
 	port_attr->phys_state      = resp.phys_state;
+	port_attr->transport       = resp.transport;
 
 	return 0;
 }
@@ -1122,3 +1124,21 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 
 	return 0;
 }
+
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac)
+{
+	struct ibv_get_mac cmd;
+	struct ibv_get_mac_resp resp;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp);
+	memcpy(cmd.dgid, gid, sizeof cmd.dgid);
+	cmd.pd_handle = pd->handle;
+	cmd.port = port;
+
+	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	memcpy(mac, resp.mac, 6);
+
+	return 0;
+}
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 1827da0..1688e73 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -64,6 +64,7 @@ IBVERBS_1.0 {
 		ibv_cmd_destroy_ah;
 		ibv_cmd_attach_mcast;
 		ibv_cmd_detach_mcast;
+		ibv_cmd_get_mac;
 		ibv_copy_qp_attr_from_kern;
 		ibv_copy_path_rec_from_kern;
 		ibv_copy_path_rec_to_kern;
-- 
1.6.3.3




More information about the general mailing list