[ofa-general] [PATCHv4] libibverbs: Add RDMAoE support
Eli Cohen
eli at mellanox.co.il
Wed Aug 5 01:34:22 PDT 2009
Extend the ibv_query_port() verb to return a port transport protocol which can
be one of RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP or RDMA_TRANSPORT_RDMAOE.
This can be used by applications to know if they must use GRH as is the case in
RDMAoE. Add a new system call to get the MAC address of the remote port that a
UD address vector refers to. Update ibv_rc_pingpong and ibv_ud_pingpong to
accept a remote GID so that they can be used with an RDMAoE port.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
Changed the reference to a port from link type to protocol type. This
patch is tagged v4 to create correspondence with the kernel patches.
examples/devinfo.c | 15 ++++++++++++
examples/pingpong.c | 9 +++++++
examples/pingpong.h | 2 +
examples/rc_pingpong.c | 50 ++++++++++++++++++++++++++++++++--------
examples/ud_pingpong.c | 38 +++++++++++++++++++++++++++----
include/infiniband/driver.h | 1 +
include/infiniband/kern-abi.h | 25 ++++++++++++++++++--
include/infiniband/verbs.h | 12 +++++++++
src/cmd.c | 20 ++++++++++++++++
src/libibverbs.map | 1 +
10 files changed, 155 insertions(+), 18 deletions(-)
diff --git a/examples/devinfo.c b/examples/devinfo.c
index caa5d5f..a42a6dc 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -175,6 +175,20 @@ static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tb
return rc;
}
+static const char *transport_type_str(enum rdma_transport_type type)
+{
+ switch (type) {
+ case RDMA_TRANSPORT_IB:
+ return "IB";
+ case RDMA_TRANSPORT_IWARP:
+ return "IWARP";
+ case RDMA_TRANSPORT_RDMAOE:
+ return "RDMAOE";
+ default:
+ return "Unknown";
+ }
+}
+
static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
{
struct ibv_context *ctx;
@@ -273,6 +287,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid);
printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid);
printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc);
+ printf("\t\t\ttrasnport_type:\t\t%s\n", transport_type_str(port_attr.transport));
if (verbose) {
printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz);
diff --git a/examples/pingpong.c b/examples/pingpong.c
index b916f59..d4a46e4 100644
--- a/examples/pingpong.c
+++ b/examples/pingpong.c
@@ -31,6 +31,8 @@
*/
#include "pingpong.h"
+#include <arpa/inet.h>
+#include <stdlib.h>
enum ibv_mtu pp_mtu_to_enum(int mtu)
{
@@ -53,3 +55,10 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int port)
return attr.lid;
}
+
+int pp_get_port_info(struct ibv_context *context, int port,
+ struct ibv_port_attr *attr)
+{
+ return ibv_query_port(context, port, attr);
+}
+
diff --git a/examples/pingpong.h b/examples/pingpong.h
index 71d7c3f..16d3466 100644
--- a/examples/pingpong.h
+++ b/examples/pingpong.h
@@ -37,5 +37,7 @@
enum ibv_mtu pp_mtu_to_enum(int mtu);
uint16_t pp_get_local_lid(struct ibv_context *context, int port);
+int pp_get_port_info(struct ibv_context *context, int port,
+ struct ibv_port_attr *attr);
#endif /* IBV_PINGPONG_H */
diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c
index 26fa45c..4250cdf 100644
--- a/examples/rc_pingpong.c
+++ b/examples/rc_pingpong.c
@@ -67,6 +67,8 @@ struct pingpong_context {
int size;
int rx_depth;
int pending;
+ struct ibv_port_attr portinfo;
+ union ibv_gid dgid;
};
struct pingpong_dest {
@@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
.port_num = port
}
};
+
+ if (ctx->dgid.global.interface_id) {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.grh.dgid = ctx->dgid;
+ }
if (ibv_modify_qp(ctx->qp, &attr,
IBV_QP_STATE |
IBV_QP_AV |
@@ -289,11 +297,11 @@ out:
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
int rx_depth, int port,
- int use_event)
+ int use_event, int is_server)
{
struct pingpong_context *ctx;
- ctx = malloc(sizeof *ctx);
+ ctx = calloc(1, sizeof *ctx);
if (!ctx)
return NULL;
@@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
return NULL;
}
- memset(ctx->buf, 0, size);
+ memset(ctx->buf, 0x7b + is_server, size);
ctx->context = ibv_open_device(ib_dev);
if (!ctx->context) {
@@ -481,6 +489,7 @@ static void usage(const char *argv0)
printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
printf(" -l, --sl=<sl> service level value\n");
printf(" -e, --events sleep on CQ events (default poll)\n");
+ printf(" -g, --gid=<remote gid> gid of the other port\n");
}
int main(int argc, char *argv[])
@@ -504,6 +513,7 @@ int main(int argc, char *argv[])
int rcnt, scnt;
int num_cq_events = 0;
int sl = 0;
+ char *grh = NULL;
srand48(getpid() * time(NULL));
@@ -520,10 +530,11 @@ int main(int argc, char *argv[])
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "sl", .has_arg = 1, .val = 'l' },
{ .name = "events", .has_arg = 0, .val = 'e' },
+ { .name = "gid", .has_arg = 1, .val = 'g' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL);
+ c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL);
if (c == -1)
break;
@@ -575,6 +586,10 @@ int main(int argc, char *argv[])
++use_event;
break;
+ case 'g':
+ grh = strdupa(optarg);
+ break;
+
default:
usage(argv[0]);
return 1;
@@ -614,7 +629,7 @@ int main(int argc, char *argv[])
}
}
- ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
+ ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, !servername);
if (!ctx)
return 1;
@@ -630,17 +645,31 @@ int main(int argc, char *argv[])
return 1;
}
- my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
- my_dest.qpn = ctx->qp->qp_num;
- my_dest.psn = lrand48() & 0xffffff;
- if (!my_dest.lid) {
- fprintf(stderr, "Couldn't get local LID\n");
+
+ if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+ fprintf(stderr, "Couldn't get port info\n");
return 1;
}
+ my_dest.lid = ctx->portinfo.lid;
+ if (ctx->portinfo.transport == RDMA_TRANSPORT_RDMAOE) {
+ if (!grh) {
+ fprintf(stderr, "Couldn't get local LID\n");
+ return 1;
+ }
+ inet_pton(AF_INET6, grh, &ctx->dgid);
+ } else {
+ if (!my_dest.lid) {
+ fprintf(stderr, "Couldn't get local LID\n");
+ return 1;
+ }
+ }
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
my_dest.lid, my_dest.qpn, my_dest.psn);
+
if (servername)
rem_dest = pp_client_exch_dest(servername, port, &my_dest);
else
@@ -705,6 +734,7 @@ int main(int argc, char *argv[])
fprintf(stderr, "poll CQ failed %d\n", ne);
return 1;
}
+
} while (!use_event && ne < 1);
for (i = 0; i < ne; ++i) {
diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c
index 8f3d50b..b3aa55d 100644
--- a/examples/ud_pingpong.c
+++ b/examples/ud_pingpong.c
@@ -68,6 +68,8 @@ struct pingpong_context {
int size;
int rx_depth;
int pending;
+ struct ibv_port_attr portinfo;
+ union ibv_gid dgid;
};
struct pingpong_dest {
@@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
return 1;
}
+ if (ctx->dgid.global.interface_id) {
+ ah_attr.is_global = 1;
+ ah_attr.grh.hop_limit = 1;
+ ah_attr.grh.dgid = ctx->dgid;
+ }
+
ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
if (!ctx->ah) {
fprintf(stderr, "Failed to create AH\n");
@@ -478,6 +486,7 @@ static void usage(const char *argv0)
printf(" -r, --rx-depth=<dep> number of receives to post at a time (default 500)\n");
printf(" -n, --iters=<iters> number of exchanges (default 1000)\n");
printf(" -e, --events sleep on CQ events (default poll)\n");
+ printf(" -g, --gid specify remote gid\n");
}
int main(int argc, char *argv[])
@@ -500,6 +509,7 @@ int main(int argc, char *argv[])
int rcnt, scnt;
int num_cq_events = 0;
int sl = 0;
+ char *gid = NULL;
srand48(getpid() * time(NULL));
@@ -515,10 +525,11 @@ int main(int argc, char *argv[])
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "sl", .has_arg = 1, .val = 'l' },
{ .name = "events", .has_arg = 0, .val = 'e' },
+ { .name = "gid", .has_arg = 1, .val = 'g' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:r:n:l:e", long_options, NULL);
+ c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL);
if (c == -1)
break;
@@ -563,6 +574,10 @@ int main(int argc, char *argv[])
++use_event;
break;
+ case 'g':
+ gid = strdupa(optarg);
+ break;
+
default:
usage(argv[0]);
return 1;
@@ -618,12 +633,25 @@ int main(int argc, char *argv[])
return 1;
}
- my_dest.lid = pp_get_local_lid(ctx->context, ib_port);
+ if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
+ fprintf(stderr, "Couldn't get port info\n");
+ return 1;
+ }
+ my_dest.lid = ctx->portinfo.lid;
+
my_dest.qpn = ctx->qp->qp_num;
my_dest.psn = lrand48() & 0xffffff;
- if (!my_dest.lid) {
- fprintf(stderr, "Couldn't get local LID\n");
- return 1;
+ if (ctx->portinfo.transport == RDMA_TRANSPORT_IB) {
+ if (!my_dest.lid) {
+ fprintf(stderr, "Couldn't get local LID\n");
+ return 1;
+ }
+ } else {
+ if (!gid) {
+ fprintf(stderr, "must specify remote GID\n");
+ return 1;
+ }
+ inet_pton(AF_INET6, gid, &ctx->dgid);
}
printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h
index 67a3bf8..cbd261f 100644
--- a/include/infiniband/driver.h
+++ b/include/infiniband/driver.h
@@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
int ibv_cmd_destroy_ah(struct ibv_ah *ah);
int ibv_cmd_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid);
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac);
int ibv_dontfork_range(void *base, size_t size);
int ibv_dofork_range(void *base, size_t size);
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index 0db083a..7823da8 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -46,7 +46,7 @@
* The minimum and maximum kernel ABI that we can handle.
*/
#define IB_USER_VERBS_MIN_ABI_VERSION 1
-#define IB_USER_VERBS_MAX_ABI_VERSION 6
+#define IB_USER_VERBS_MAX_ABI_VERSION 7
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@@ -85,7 +85,8 @@ enum {
IB_USER_VERBS_CMD_MODIFY_SRQ,
IB_USER_VERBS_CMD_QUERY_SRQ,
IB_USER_VERBS_CMD_DESTROY_SRQ,
- IB_USER_VERBS_CMD_POST_SRQ_RECV
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ IB_USER_VERBS_CMD_GET_MAC,
};
/*
@@ -223,7 +224,8 @@ struct ibv_query_port_resp {
__u8 active_width;
__u8 active_speed;
__u8 phys_state;
- __u8 reserved[3];
+ __u8 transport;
+ __u8 reserved[2];
};
struct ibv_alloc_pd {
@@ -798,6 +800,7 @@ enum {
IB_USER_VERBS_CMD_QUERY_SRQ_V2,
IB_USER_VERBS_CMD_DESTROY_SRQ_V2,
IB_USER_VERBS_CMD_POST_SRQ_RECV_V2,
+ IB_USER_VERBS_CMD_GET_MAC_V2 = -1,
/*
* Set commands that didn't exist to -1 so our compile-time
* trick opcodes in IBV_INIT_CMD() doesn't break.
@@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 {
__u32 srq_handle;
};
+struct ibv_get_mac {
+ __u32 command;
+ __u16 in_words;
+ __u16 out_words;
+ __u64 response;
+ __u32 pd_handle;
+ __u8 port;
+ __u8 reserved[3];
+ __u8 dgid[16];
+};
+
+struct ibv_get_mac_resp {
+ __u8 mac[6];
+ __u16 reserved;
+};
+
#endif /* KERN_ABI_H */
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index a04cc62..f81f17f 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -61,6 +61,7 @@ union ibv_gid {
uint64_t subnet_prefix;
uint64_t interface_id;
} global;
+ uint32_t dwords[4];
};
enum ibv_node_type {
@@ -161,6 +162,16 @@ enum ibv_port_state {
IBV_PORT_ACTIVE_DEFER = 5
};
+enum rdma_transport_type {
+ RDMA_TRANSPORT_IB,
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_RDMAOE
+};
+enum ibv_port_link_type {
+ PORT_LINK_IB,
+ PORT_LINK_ETH
+};
+
struct ibv_port_attr {
enum ibv_port_state state;
enum ibv_mtu max_mtu;
@@ -181,6 +192,7 @@ struct ibv_port_attr {
uint8_t active_width;
uint8_t active_speed;
uint8_t phys_state;
+ enum rdma_transport_type transport;
};
enum ibv_event_type {
diff --git a/src/cmd.c b/src/cmd.c
index 66d7134..30754ac 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context,
return 0;
}
+#include <stdio.h>
int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
struct ibv_port_attr *port_attr,
struct ibv_query_port *cmd, size_t cmd_size)
@@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
port_attr->active_width = resp.active_width;
port_attr->active_speed = resp.active_speed;
port_attr->phys_state = resp.phys_state;
+ port_attr->transport = resp.transport;
return 0;
}
@@ -1122,3 +1124,21 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
return 0;
}
+
+int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac)
+{
+ struct ibv_get_mac cmd;
+ struct ibv_get_mac_resp resp;
+
+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp);
+ memcpy(cmd.dgid, gid, sizeof cmd.dgid);
+ cmd.pd_handle = pd->handle;
+ cmd.port = port;
+
+ if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+ return errno;
+
+ memcpy(mac, resp.mac, 6);
+
+ return 0;
+}
diff --git a/src/libibverbs.map b/src/libibverbs.map
index 1827da0..1688e73 100644
--- a/src/libibverbs.map
+++ b/src/libibverbs.map
@@ -64,6 +64,7 @@ IBVERBS_1.0 {
ibv_cmd_destroy_ah;
ibv_cmd_attach_mcast;
ibv_cmd_detach_mcast;
+ ibv_cmd_get_mac;
ibv_copy_qp_attr_from_kern;
ibv_copy_path_rec_from_kern;
ibv_copy_path_rec_to_kern;
--
1.6.3.3
More information about the general
mailing list