[ewg] [PATCH] libibverbs: Add RDMAoE support

Eli Cohen eli at mellanox.co.il
Mon Jun 15 06:42:36 PDT 2009


Extend the ibv_query_port() verb to return enum ibv_port_link_type which
reports the link type to be either IB or Ethernet. This can be used by
applications to know if they must use GRH as is the case in RDMAoE.
Add a new system call to get the MAC address of the remote port that a UD
address vector refers to.
Update ibv_rc_pingpong and ibv_ud_pingpong to accept a remote GID so that they
can be used with an RDMAoE port.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
 examples/devinfo.c            |   13 ++++++++++
 examples/pingpong.c           |   31 +++++++++++++++++++++++++
 examples/pingpong.h           |    3 ++
 examples/rc_pingpong.c        |   50 ++++++++++++++++++++++++++++++++--------
 examples/ud_pingpong.c        |   37 +++++++++++++++++++++++++++---
 include/infiniband/driver.h   |    1 +
 include/infiniband/kern-abi.h |   23 +++++++++++++++++-
 include/infiniband/verbs.h    |    7 +++++
 src/cmd.c                     |   20 ++++++++++++++++
 src/libibverbs.map            |    1 +
 10 files changed, 170 insertions(+), 16 deletions(-)

diff --git a/examples/devinfo.c b/examples/devinfo.c
index caa5d5f..fc9dbb9 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -175,6 +175,18 @@ static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tb
 	return rc;
 }
 
+static const char *link_type_str(enum ibv_port_link_type type)
+{
+	switch (type) {
+	case PORT_LINK_IB:
+		return "PORT_LINK_IB";
+	case PORT_LINK_ETH:
+		return "PORT_LINK_ETH";
+	default:
+		return "Unknown";
+	}
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
 	struct ibv_context *ctx;
@@ -273,6 +285,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
+		printf("\t\t\tlink_type:\t\t%s\n", link_type_str(port_attr.link_type));
 
 		if (verbose) {
 			printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index b916f59..e53e2fa 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -31,6 +31,8 @@
  */
 
 #include "pingpong.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
 
 enum ibv_mtu pp_mtu_to_enum(int mtu)
 {
@@ -53,3 +55,32 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 
 	return attr.lid;
 }
+
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr)
+{
+	return ibv_query_port(context, port, attr);
+}
+
+void str2gid(char *grh, union ibv_gid *gid)
+{
+	char tmp;
+
+	tmp = grh[8];
+	grh[8] = 0;
+	gid->dwords[0] = htonl(strtoul(grh, NULL, 16));
+	grh[8] = tmp;
+
+	tmp = grh[16];
+	grh[16] = 0;
+	gid->dwords[1] = htonl(strtoul(grh + 8, NULL, 16));
+	grh[16] = tmp;
+
+	tmp = grh[24];
+	grh[24] = 0;
+	gid->dwords[2] = htonl(strtoul(grh + 16, NULL, 16));
+	grh[24] = tmp;
+
+	gid->dwords[3] = htonl(strtoul(grh + 24, NULL, 16));
+}
+
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 71d7c3f..8c82b32 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -37,5 +37,8 @@
 
 enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr);
+void str2gid(char *grh, union ibv_gid *gid);
 
 #endif /* IBV_PINGPONG_H */
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 26fa45c..23cad7a 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid		 dgid;
 };
 
 struct pingpong_dest {
@@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 			.port_num	= port
 		}
 	};
+
+	if (ctx->dgid.global.interface_id) {
+		attr.ah_attr.is_global = 1;
+		attr.ah_attr.grh.hop_limit = 1;	
+		attr.ah_attr.grh.dgid = ctx->dgid;
+	}
 	if (ibv_modify_qp(ctx->qp, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_AV                 |
@@ -289,11 +297,11 @@ out:
 
 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 					    int rx_depth, int port,
-					    int use_event)
+					    int use_event, int is_server)
 {
 	struct pingpong_context *ctx;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 		return NULL;
 	}
 
-	memset(ctx->buf, 0, size);
+	memset(ctx->buf, 0x7b + is_server, size);
 
 	ctx->context = ibv_open_device(ib_dev);
 	if (!ctx->context) {
@@ -481,6 +489,7 @@ static void usage(const char *argv0)
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -l, --sl=<sl>          service level value\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -504,6 +513,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char			*grh = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -520,10 +530,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e:g", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -575,6 +586,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+			grh = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -614,7 +629,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
+	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, !servername);
 	if (!ctx)
 		return 1;
 
@@ -630,17 +645,31 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-	my_dest.qpn = ctx->qp->qp_num;
-	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
+	
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
 		return 1;
 	}
 
+	my_dest.lid = ctx->portinfo.lid;
+	if (ctx->portinfo.link_type == PORT_LINK_ETH) {
+		if (!grh) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+		str2gid(grh, &ctx->dgid);
+	} else {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	}
+	my_dest.qpn = ctx->qp->qp_num;
+	my_dest.psn = lrand48() & 0xffffff;
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
 	       my_dest.lid, my_dest.qpn, my_dest.psn);
 
+
 	if (servername)
 		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
 	else
@@ -705,6 +734,7 @@ int main(int argc, char *argv[])
 					fprintf(stderr, "poll CQ failed %d\n", ne);
 					return 1;
 				}
+
 			} while (!use_event && ne < 1);
 
 			for (i = 0; i < ne; ++i) {
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 8f3d50b..3709b16 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -68,6 +68,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 		return 1;
 	}
 
+	if (ctx->dgid.global.interface_id) {
+		ah_attr.is_global = 1;
+		ah_attr.grh.hop_limit = 1;
+		ah_attr.grh.dgid = ctx->dgid;
+	}
+
 	ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
 	if (!ctx->ah) {
 		fprintf(stderr, "Failed to create AH\n");
@@ -478,6 +486,7 @@ static void usage(const char *argv0)
 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid              specify remote gid\n");
 }
 
 int main(int argc, char *argv[])
@@ -500,6 +509,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char 			*gid;
 
 	srand48(getpid() * time(NULL));
 
@@ -515,6 +525,7 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
@@ -563,6 +574,11 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+                        gid = strdupa(optarg);
+			printf("gid = %s\n", gid);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -618,12 +634,25 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
+		return 1;
+	}
+	my_dest.lid = ctx->portinfo.lid;
+
 	my_dest.qpn = ctx->qp->qp_num;
 	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
-		return 1;
+	if (ctx->portinfo.link_type == PORT_LINK_IB) {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	} else {
+		if (!gid) {
+			fprintf(stderr, "must specify remote GID\n");
+			return 1;
+		}
+		str2gid(gid, &ctx->dgid);
 	}
 
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 67a3bf8..cbd261f 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
 int ibv_cmd_destroy_ah(struct ibv_ah *ah);
 int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
 int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac);
 
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..1dc5f0c 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -85,7 +85,8 @@ enum {
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_GET_MAC
 };
 
 /*
@@ -223,7 +224,8 @@ struct ibv_query_port_resp {
 	__u8  active_width;
 	__u8  active_speed;
 	__u8  phys_state;
-	__u8  reserved[3];
+	__u8  link_type;
+	__u8  reserved[2];
 };
 
 struct ibv_alloc_pd {
@@ -803,6 +805,7 @@ enum {
 	 * trick opcodes in IBV_INIT_CMD() doesn't break.
 	 */
 	IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL_V2 = -1,
+	IB_USER_VERBS_CMD_GET_MAC_V2 = -1,
 };
 
 struct ibv_destroy_cq_v1 {
@@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 {
 	__u32 srq_handle;
 };
 
+struct ibv_get_mac {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 pd_handle;
+	__u8  port;
+	__u8  reserved[3];
+	__u8  dgid[16];
+};
+
+struct ibv_get_mac_resp {
+	__u8	mac[6];
+	__u16	reserved;
+};
+
 #endif /* KERN_ABI_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index a04cc62..badbb02 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -61,6 +61,7 @@ union ibv_gid {
 		uint64_t	subnet_prefix;
 		uint64_t	interface_id;
 	} global;
+	uint32_t		dwords[4];
 };
 
 enum ibv_node_type {
@@ -161,6 +162,11 @@ enum ibv_port_state {
 	IBV_PORT_ACTIVE_DEFER	= 5
 };
 
+enum ibv_port_link_type {
+	PORT_LINK_IB,
+	PORT_LINK_ETH
+};
+
 struct ibv_port_attr {
 	enum ibv_port_state	state;
 	enum ibv_mtu		max_mtu;
@@ -181,6 +187,7 @@ struct ibv_port_attr {
 	uint8_t			active_width;
 	uint8_t			active_speed;
 	uint8_t			phys_state;
+	enum ibv_port_link_type link_type;
 };
 
 enum ibv_event_type {
diff --git a/src/cmd.c b/src/cmd.c
index 66d7134..9007bd3 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context,
 	return 0;
 }
 
+#include <stdio.h>
 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 		       struct ibv_port_attr *port_attr,
 		       struct ibv_query_port *cmd, size_t cmd_size)
@@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 	port_attr->active_width    = resp.active_width;
 	port_attr->active_speed    = resp.active_speed;
 	port_attr->phys_state      = resp.phys_state;
+	port_attr->link_type       = resp.link_type;
 
 	return 0;
 }
@@ -1122,3 +1124,21 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
 
 	return 0;
 }
+
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac)
+{
+	struct ibv_get_mac cmd;
+	struct ibv_get_mac_resp resp;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp);
+	memcpy(cmd.dgid, gid, sizeof cmd.dgid);
+	cmd.pd_handle = pd->handle;
+	cmd.port = port;
+
+	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	memcpy(mac, resp.mac, 6);
+
+	return 0;
+}
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 1827da0..1688e73 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -64,6 +64,7 @@ IBVERBS_1.0 {
 		ibv_cmd_destroy_ah;
 		ibv_cmd_attach_mcast;
 		ibv_cmd_detach_mcast;
+		ibv_cmd_get_mac;
 		ibv_copy_qp_attr_from_kern;
 		ibv_copy_path_rec_from_kern;
 		ibv_copy_path_rec_to_kern;
-- 
1.6.3.1




More information about the ewg mailing list