[ewg] Re: [PATCH] rdmaoe/libibverbs: handle binary compatibility

Eli Cohen eli at dev.mellanox.co.il
Thu Dec 10 13:14:55 PST 2009


On Thu, Dec 10, 2009 at 10:33:53AM -0700, Jason Gunthorpe wrote:
> Could you prepare this based on Roland's tree? This patch won't apply.
> 

I quote two patches, one for libibverbs based on 74638ac, and the
other for libmlx4 based on 444f634. I changed the padding handling as
you requested for libibverbs. You also need to apply a patch to the
kernel driver to match the new values for link_layer. I put it here
too.

libibverbs:


diff --git a/examples/devinfo.c b/examples/devinfo.c
index 84f95c7..393ec04 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -184,6 +184,19 @@ static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tb
 	return rc;
 }
 
+static const char *link_layer_str(uint8_t link_layer)
+{
+	switch (link_layer) {
+	case IBV_LINK_LAYER_UNSPECIFIED:
+	case IBV_LINK_LAYER_INFINIBAND:
+		return "IB";
+	case IBV_LINK_LAYER_ETHERNET:
+		return "Ethernet";
+	default:
+		return "Unknown";
+	}
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
 	struct ibv_context *ctx;
@@ -284,6 +297,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
 		printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
 		printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
+		printf("\t\t\tlink_layer:\t\t%s\n", link_layer_str(port_attr.link_layer));
 
 		if (verbose) {
 			printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index b916f59..d4a46e4 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -31,6 +31,8 @@
  */
 
 #include "pingpong.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
 
 enum ibv_mtu pp_mtu_to_enum(int mtu)
 {
@@ -53,3 +55,10 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int port)
 
 	return attr.lid;
 }
+
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr)
+{
+	return ibv_query_port(context, port, attr);
+}
+
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 71d7c3f..16d3466 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -37,5 +37,7 @@
 
 enum ibv_mtu pp_mtu_to_enum(int mtu);
 uint16_t pp_get_local_lid(struct ibv_context *context, int port);
+int pp_get_port_info(struct ibv_context *context, int port,
+		     struct ibv_port_attr *attr);
 
 #endif /* IBV_PINGPONG_H */
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index fa969e0..4d0bd0d 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid		 dgid;
 };
 
 struct pingpong_dest {
@@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 			.port_num	= port
 		}
 	};
+
+	if (ctx->dgid.global.interface_id) {
+		attr.ah_attr.is_global = 1;
+		attr.ah_attr.grh.hop_limit = 1;
+		attr.ah_attr.grh.dgid = ctx->dgid;
+	}
 	if (ibv_modify_qp(ctx->qp, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_AV                 |
@@ -289,11 +297,11 @@ out:
 
 static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 					    int rx_depth, int port,
-					    int use_event)
+					    int use_event, int is_server)
 {
 	struct pingpong_context *ctx;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 		return NULL;
 	}
 
-	memset(ctx->buf, 0, size);
+	memset(ctx->buf, 0x7b + is_server, size);
 
 	ctx->context = ibv_open_device(ib_dev);
 	if (!ctx->context) {
@@ -481,6 +489,7 @@ static void usage(const char *argv0)
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -l, --sl=<sl>          service level value\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -504,6 +513,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char			*grh = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -520,10 +530,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -576,6 +587,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+			grh = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -615,7 +630,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
+	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, !servername);
 	if (!ctx)
 		return 1;
 
@@ -631,17 +646,31 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-	my_dest.qpn = ctx->qp->qp_num;
-	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
+
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
 		return 1;
 	}
 
+	my_dest.lid = ctx->portinfo.lid;
+	if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+		if (!grh) {
+			fprintf(stderr, "Must supply remote gid\n");
+			return 1;
+		}
+		inet_pton(AF_INET6, grh, &ctx->dgid);
+	} else {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	}
+	my_dest.qpn = ctx->qp->qp_num;
+	my_dest.psn = lrand48() & 0xffffff;
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
 	       my_dest.lid, my_dest.qpn, my_dest.psn);
 
+
 	if (servername)
 		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
 	else
@@ -706,6 +735,7 @@ int main(int argc, char *argv[])
 					fprintf(stderr, "poll CQ failed %d\n", ne);
 					return 1;
 				}
+
 			} while (!use_event && ne < 1);
 
 			for (i = 0; i < ne; ++i) {
diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c
index 1e36c57..eda9013 100644
--- a/examples/srq_pingpong.c
+++ b/examples/srq_pingpong.c
@@ -71,6 +71,8 @@ struct pingpong_context {
 	int			 num_qp;
 	int			 rx_depth;
 	int			 pending[MAX_QP];
+	struct ibv_port_attr	 portinfo;
+	union ibv_gid		 dgid;
 };
 
 struct pingpong_dest {
@@ -101,6 +103,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu m
 				.port_num	= port
 			}
 		};
+
+		if (ctx->dgid.global.interface_id) {
+			attr.ah_attr.is_global = 1;
+			attr.ah_attr.grh.hop_limit = 1;
+			attr.ah_attr.grh.dgid = ctx->dgid;
+		}
 		if (ibv_modify_qp(ctx->qp[i], &attr,
 				  IBV_QP_STATE              |
 				  IBV_QP_AV                 |
@@ -327,7 +335,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 	struct pingpong_context *ctx;
 	int i;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -551,6 +559,7 @@ static void usage(const char *argv0)
 	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
 	printf("  -l, --sl=<sl>          service level value\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -578,6 +587,7 @@ int main(int argc, char *argv[])
 	int                      i;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char			*grh = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -595,10 +605,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -655,6 +666,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+			grh = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -722,13 +737,25 @@ int main(int argc, char *argv[])
 
 	memset(my_dest, 0, sizeof my_dest);
 
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
+		return 1;
+	}
 	for (i = 0; i < num_qp; ++i) {
 		my_dest[i].qpn = ctx->qp[i]->qp_num;
 		my_dest[i].psn = lrand48() & 0xffffff;
-		my_dest[i].lid = pp_get_local_lid(ctx->context, ib_port);
-		if (!my_dest[i].lid) {
-			fprintf(stderr, "Couldn't get local LID\n");
-			return 1;
+		if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+			if (!grh) {
+				fprintf(stderr, "Must supply remote gid\n");
+				return 1;
+			}
+			inet_pton(AF_INET6, grh, &ctx->dgid);
+		} else {
+			my_dest[i].lid = ctx->portinfo.lid;
+			if (!my_dest[i].lid) {
+				fprintf(stderr, "Couldn't get local LID\n");
+				return 1;
+			}
 		}
 
 		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c
index 6f31247..2bc7da5 100644
--- a/examples/uc_pingpong.c
+++ b/examples/uc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr	 portinfo;
+	union ibv_gid		 dgid;
 };
 
 struct pingpong_dest {
@@ -92,6 +94,13 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 			.port_num	= port
 		}
 	};
+
+	if (ctx->dgid.global.interface_id) {
+		attr.ah_attr.is_global = 1;
+		attr.ah_attr.grh.hop_limit = 1;
+		attr.ah_attr.grh.dgid = ctx->dgid;
+	}
+
 	if (ibv_modify_qp(ctx->qp, &attr,
 			  IBV_QP_STATE              |
 			  IBV_QP_AV                 |
@@ -281,7 +290,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
 {
 	struct pingpong_context *ctx;
 
-	ctx = malloc(sizeof *ctx);
+	ctx = calloc(1, sizeof *ctx);
 	if (!ctx)
 		return NULL;
 
@@ -469,6 +478,7 @@ static void usage(const char *argv0)
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -l, --sl=<sl>          service level value\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid=<remote gid> gid of the other port\n");
 }
 
 int main(int argc, char *argv[])
@@ -492,6 +502,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char			*grh = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -508,10 +519,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -564,6 +576,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+			grh = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -619,14 +635,27 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
-	my_dest.qpn = ctx->qp->qp_num;
-	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
 		return 1;
 	}
 
+	my_dest.lid = ctx->portinfo.lid;
+	if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+		if (!grh) {
+			fprintf(stderr, "Must supply remote gid\n");
+			return 1;
+		}
+		inet_pton(AF_INET6, grh, &ctx->dgid);
+	} else {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	}
+	my_dest.qpn = ctx->qp->qp_num;
+	my_dest.psn = lrand48() & 0xffffff;
+
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
 	       my_dest.lid, my_dest.qpn, my_dest.psn);
 
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 6f10212..e30d6d6 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -68,6 +68,8 @@ struct pingpong_context {
 	int			 size;
 	int			 rx_depth;
 	int			 pending;
+	struct ibv_port_attr     portinfo;
+	union ibv_gid            dgid;
 };
 
 struct pingpong_dest {
@@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
 		return 1;
 	}
 
+	if (ctx->dgid.global.interface_id) {
+		ah_attr.is_global = 1;
+		ah_attr.grh.hop_limit = 1;
+		ah_attr.grh.dgid = ctx->dgid;
+	}
+
 	ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
 	if (!ctx->ah) {
 		fprintf(stderr, "Failed to create AH\n");
@@ -478,6 +486,7 @@ static void usage(const char *argv0)
 	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
 	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
 	printf("  -e, --events           sleep on CQ events (default poll)\n");
+	printf("  -g, --gid              specify remote gid\n");
 }
 
 int main(int argc, char *argv[])
@@ -500,6 +509,7 @@ int main(int argc, char *argv[])
 	int                      rcnt, scnt;
 	int                      num_cq_events = 0;
 	int                      sl = 0;
+	char 			*gid = NULL;
 
 	srand48(getpid() * time(NULL));
 
@@ -515,10 +525,11 @@ int main(int argc, char *argv[])
 			{ .name = "iters",    .has_arg = 1, .val = 'n' },
 			{ .name = "sl",       .has_arg = 1, .val = 'l' },
 			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ .name = "gid",      .has_arg = 1, .val = 'g' },
 			{ 0 }
 		};
 
-		c = getopt_long(argc, argv, "p:d:i:s:r:n:l:e", long_options, NULL);
+		c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL);
 		if (c == -1)
 			break;
 
@@ -563,6 +574,10 @@ int main(int argc, char *argv[])
 			++use_event;
 			break;
 
+		case 'g':
+                        gid = strdupa(optarg);
+			break;
+
 		default:
 			usage(argv[0]);
 			return 1;
@@ -618,12 +633,25 @@ int main(int argc, char *argv[])
 			return 1;
 		}
 
-	my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
+	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+		fprintf(stderr, "Couldn't get port info\n");
+		return 1;
+	}
+	my_dest.lid = ctx->portinfo.lid;
+
 	my_dest.qpn = ctx->qp->qp_num;
 	my_dest.psn = lrand48() & 0xffffff;
-	if (!my_dest.lid) {
-		fprintf(stderr, "Couldn't get local LID\n");
-		return 1;
+	if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) {
+		if (!my_dest.lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+	} else {
+		if (!gid) {
+			fprintf(stderr, "must specify remote GID\n");
+			return 1;
+		}
+		inet_pton(AF_INET6, gid, &ctx->dgid);
 	}
 
 	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 9a81416..8d7c2c6 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
 int ibv_cmd_destroy_ah(struct ibv_ah *ah);
 int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
 int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid);
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac);
 
 int ibv_dontfork_range(void *base, size_t size);
 int ibv_dofork_range(void *base, size_t size);
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..8ef8844 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -46,7 +46,7 @@
  * The minimum and maximum kernel ABI that we can handle.
  */
 #define IB_USER_VERBS_MIN_ABI_VERSION	1
-#define IB_USER_VERBS_MAX_ABI_VERSION	6
+#define IB_USER_VERBS_MAX_ABI_VERSION	7
 
 enum {
 	IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +85,8 @@ enum {
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_GET_MAC
 };
 
 /*
@@ -223,7 +224,8 @@ struct ibv_query_port_resp {
 	__u8  active_width;
 	__u8  active_speed;
 	__u8  phys_state;
-	__u8  reserved[3];
+	__u8  link_layer;
+	__u8  reserved[2];
 };
 
 struct ibv_alloc_pd {
@@ -798,6 +800,7 @@ enum {
 	IB_USER_VERBS_CMD_QUERY_SRQ_V2,
 	IB_USER_VERBS_CMD_DESTROY_SRQ_V2,
 	IB_USER_VERBS_CMD_POST_SRQ_RECV_V2,
+	IB_USER_VERBS_CMD_GET_MAC_V2 = -1,
 	/*
 	 * Set commands that didn't exist to -1 so our compile-time
 	 * trick opcodes in IBV_INIT_CMD() doesn't break.
@@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 {
 	__u32 srq_handle;
 };
 
+struct ibv_get_mac {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u32 pd_handle;
+	__u8  port;
+	__u8  reserved[3];
+	__u8  dgid[16];
+};
+
+struct ibv_get_mac_resp {
+	__u8	mac[6];
+	__u16	reserved;
+};
+
 #endif /* KERN_ABI_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index 0f1cb2e..ecdbc6f 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -38,6 +38,7 @@
 
 #include <stdint.h>
 #include <pthread.h>
+#include <string.h>
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern "C" {
@@ -61,6 +62,7 @@ union ibv_gid {
 		uint64_t	subnet_prefix;
 		uint64_t	interface_id;
 	} global;
+	uint32_t		dwords[4];
 };
 
 enum ibv_node_type {
@@ -161,6 +163,12 @@ enum ibv_port_state {
 	IBV_PORT_ACTIVE_DEFER	= 5
 };
 
+enum {
+	IBV_LINK_LAYER_UNSPECIFIED,
+	IBV_LINK_LAYER_INFINIBAND,
+	IBV_LINK_LAYER_ETHERNET,
+};
+
 struct ibv_port_attr {
 	enum ibv_port_state	state;
 	enum ibv_mtu		max_mtu;
@@ -181,6 +189,8 @@ struct ibv_port_attr {
 	uint8_t			active_width;
 	uint8_t			active_speed;
 	uint8_t			phys_state;
+	uint8_t			link_layer;
+	uint8_t			pad;
 };
 
 enum ibv_event_type {
@@ -693,6 +703,16 @@ struct ibv_context {
 	void		       *abi_compat;
 };
 
+static inline int ___ibv_query_port(struct ibv_context *context,
+				    uint8_t port_num,
+				    struct ibv_port_attr *port_attr)
+{
+	port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
+	port_attr->pad = 0;
+
+	return context->ops.query_port(context, port_num, port_attr);
+}
+
 /**
  * ibv_get_device_list - Get list of IB devices currently available
  * @num_devices: optional.  if non-NULL, set to the number of devices
@@ -1097,4 +1117,7 @@ END_C_DECLS
 
 #  undef __attribute_const
 
+#define ibv_query_port(context, port_num, port_attr) \
+	___ibv_query_port(context, port_num, port_attr)
+
 #endif /* INFINIBAND_VERBS_H */
diff --git a/src/cmd.c b/src/cmd.c
index cbd5288..5183d59 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context,
 	return 0;
 }
 
+#include <stdio.h>
 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 		       struct ibv_port_attr *port_attr,
 		       struct ibv_query_port *cmd, size_t cmd_size)
@@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
 	port_attr->active_width    = resp.active_width;
 	port_attr->active_speed    = resp.active_speed;
 	port_attr->phys_state      = resp.phys_state;
+	port_attr->link_layer      = resp.link_layer;
 
 	return 0;
 }
@@ -1122,3 +1124,22 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t l
 
 	return 0;
 }
+
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac)
+{
+	struct ibv_get_mac cmd;
+	struct ibv_get_mac_resp resp;
+
+	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp);
+	memcpy(cmd.dgid, gid, sizeof cmd.dgid);
+	cmd.pd_handle = pd->handle;
+	cmd.port = port;
+
+	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	memcpy(mac, resp.mac, 6);
+
+	return 0;
+}
+
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 1827da0..1688e73 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -64,6 +64,7 @@ IBVERBS_1.0 {
 		ibv_cmd_destroy_ah;
 		ibv_cmd_attach_mcast;
 		ibv_cmd_detach_mcast;
+		ibv_cmd_get_mac;
 		ibv_copy_qp_attr_from_kern;
 		ibv_copy_path_rec_from_kern;
 		ibv_copy_path_rec_to_kern;
diff --git a/src/verbs.c b/src/verbs.c
index ba3c0a4..2b175b6 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -86,6 +86,7 @@ default_symver(__ibv_query_device, ibv_query_device);
 int __ibv_query_port(struct ibv_context *context, uint8_t port_num,
 		     struct ibv_port_attr *port_attr)
 {
+	port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED;
 	return context->ops.query_port(context, port_num, port_attr);
 }
 default_symver(__ibv_query_port, ibv_query_port);



libmlx4:


diff --git a/src/mlx4.h b/src/mlx4.h
index 4445998..661255b 100644
--- a/src/mlx4.h
+++ b/src/mlx4.h
@@ -236,11 +236,14 @@ struct mlx4_av {
 	uint8_t				hop_limit;
 	uint32_t			sl_tclass_flowlabel;
 	uint8_t				dgid[16];
+	uint8_t				mac[8];
 };
 
 struct mlx4_ah {
 	struct ibv_ah			ibv_ah;
 	struct mlx4_av			av;
+	uint16_t			vlan;
+	uint8_t				mac[6];
 };
 
 static inline unsigned long align(unsigned long val, unsigned long align)
diff --git a/src/qp.c b/src/qp.c
index d194ae3..cd8fab0 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -143,6 +143,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = htonl(wr->wr.ud.remote_qpn);
 	dseg->qkey = htonl(wr->wr.ud.remote_qkey);
+	dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan);
+	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6);
 }
 
 static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
diff --git a/src/verbs.c b/src/verbs.c
index 1ac1362..667ef68 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -614,9 +614,21 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 	return 0;
 }
 
+static int mcast_mac(uint8_t *mac)
+{
+	int i;
+	uint8_t val = 0xff;
+
+	for (i = 0; i < 6; ++i)
+		val &= mac[i];
+
+	return val == 0xff;
+}
+
 struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 {
 	struct mlx4_ah *ah;
+	struct ibv_port_attr port_attr;
 
 	ah = malloc(sizeof *ah);
 	if (!ah)
@@ -642,7 +654,24 @@ struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
 		memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
 	}
 
+	if (ibv_query_port(pd->context, attr->port_num, &port_attr))
+		goto err;
+
+	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
+		if (ibv_cmd_get_mac(pd, attr->port_num, ah->av.dgid, ah->mac))
+			goto err;
+
+		ah->vlan = 0;
+		if (mcast_mac(ah->mac))
+			ah->av.dlid = htons(0xc000);
+
+	}
+
+
 	return &ah->ibv_ah;
+err:
+	free(ah);
+	return NULL;
 }
 
 int mlx4_destroy_ah(struct ibv_ah *ah)
diff --git a/src/wqe.h b/src/wqe.h
index 6f7f309..bbd22ba 100644
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -78,7 +78,8 @@ struct mlx4_wqe_datagram_seg {
 	uint32_t		av[8];
 	uint32_t		dqpn;
 	uint32_t		qkey;
-	uint32_t		reserved[2];
+	uint16_t		vlan;
+	uint8_t			mac[6];
 };
 
 struct mlx4_wqe_data_seg {


kernel driver:

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 012aadf..d592bd2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -452,7 +452,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	resp.active_width    = attr.active_width;
 	resp.active_speed    = attr.active_speed;
 	resp.phys_state      = attr.phys_state;
-	resp.transport	     = attr.transport;
+	resp.transport	     = attr.transport == RDMA_TRANSPORT_RDMAOE ?
+		IB_LINK_LAYER_ETHERNET : IB_LINK_LAYER_INFINIBAND;
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bf6e860..57653b7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -73,6 +73,12 @@ enum rdma_transport_type {
 	RDMA_TRANSPORT_RDMAOE
 };
 
+enum {
+	IV_LINK_LAYER_UNSPECIFIED,
+	IB_LINK_LAYER_INFINIBAND,
+	IB_LINK_LAYER_ETHERNET,
+};
+
 enum ib_device_cap_flags {
 	IB_DEVICE_RESIZE_MAX_WR		= 1,
 	IB_DEVICE_BAD_PKEY_CNTR		= (1<<1),
-- 
1.6.5.5




More information about the ewg mailing list