[ofw] patch 1/2 Add support for RDMAoEth to the low level driver
Tzachi Dar
tzachid at mellanox.co.il
Thu Jan 21 09:42:39 PST 2010
Hi James,
I'm currently working on some urgent issue so I don't have time to look
at it right now.
In any case, please note that I'm looking for a solution that will allow
people to have 3 modes of work:
eth only
ib only
LLE - which actually means that eth as well as IB will work (IB over eth
though).
This is the reason that I have also put the code in mlx4_register_mac
under comment as well.
Thanks
Tzachi
________________________________
From: James Yang [mailto:jyang at xsigo.com]
Sent: Tuesday, January 19, 2010 8:37 PM
To: Tzachi Dar; ofw at lists.openfabrics.org
Subject: RE: [ofw] patch 1/2 Add support for RDMAoEth to the low
level driver
Need the following patch so that it will also work on IB mode.
HCA pdo was created twice in the old patch and failed driver start up.
By the way, it seems ipoib didn't work well at least in win2008
with the patch. Interface mismatch?
Thanks,
James
Index: drv.c
===================================================================
--- drv.c (revision 2617)
+++ drv.c (working copy)
@@ -95,7 +95,6 @@
#endif
-static
NTSTATUS
__create_child(
__in WDFDEVICE Device,
@@ -228,7 +227,7 @@
if ( p_fdo->children_created )
goto end;
-
+
// eventually we'll have all information about children in
Registry
// DriverEntry will read it into a Global storage and
// this routine will create all the children on base on this
info
@@ -244,6 +243,16 @@
break;
}
eth_created = TRUE;
+
+ //For now we it's either IB or ETH, and we always create
LLE if it's ETH
+ if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] ==
MLX4_PORT_TYPE_IB) ) {
+ status = __create_child(Device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+ if (!NT_SUCCESS(status)) {
+ MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV,
("__create_child (ib)failed with 0x%x\n", status));
+ break;
+ }
+ ib_created = TRUE;
+ }
} else {
if (eth_created){
//
@@ -869,6 +878,9 @@
goto err;
}
+ pdev->p_wdf_device = Device;
+ pdev->ib_hca_created = 0;
+
// start the card
status = __start_card(Device, p_fdo );
if( !NT_SUCCESS( status ) )
________________________________
From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Tzachi Dar
Sent: Wednesday, December 02, 2009 8:22 AM
To: ofw at lists.openfabrics.org
Subject: [ofw] patch 1/2 Add support for RDMAoEth to the low
level driver
Here are the changes in the low level driver:
Thanks
Tzachi
Index: hw/mlx4/kernel/bus/core/ud_header.c
===================================================================
--- hw/mlx4/kernel/bus/core/ud_header.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/ud_header.c (working copy)
@@ -62,6 +62,15 @@
{ STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) }
};
+static const struct ib_field eth_table[] = {
+ { STRUCT_FIELD_INIT(eth, dmac_h, 0, 0, 32) },
+ { STRUCT_FIELD_INIT(eth, dmac_l, 1, 0, 16) },
+ { STRUCT_FIELD_INIT(eth, smac_h, 1, 16,16) },
+ { STRUCT_FIELD_INIT(eth, smac_l, 2, 0 ,32) },
+ { STRUCT_FIELD_INIT(eth, type, 3, 0, 16)}
+};
+
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) },
{ STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) },
@@ -279,3 +288,93 @@
return 0;
}
EXPORT_SYMBOL(ib_ud_header_unpack);
+
+/**
+ * ib_rdmaoe_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @header:Structure to initialize
+ *
+ * ib_rdmaoe_ud_header_init() initializes the grh.ip_version,
grh.payload_length,
+ * grh.next_header, bth.opcode, bth.pad_count and
+ * bth.transport_header_version fields of a &struct
eth_ud_header given
+ * the payload length and whether a GRH will be included.
+ */
+void ib_rdmaoe_ud_header_init(int payload_bytes,
+ int grh_present,
+ struct eth_ud_header *header)
+{
+ int header_len;
+
+ memset(header, 0, sizeof *header);
+
+ header_len =
+ sizeof header->eth +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES;
+ if (grh_present)
+ header_len += IB_GRH_BYTES;
+
+ header->grh_present = grh_present;
+ if (grh_present) {
+ header->grh.ip_version = 6;
+ header->grh.payload_length =
+ cpu_to_be16((IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) & ~3); /* round up */
+ header->grh.next_header = 0x1b;
+ }
+
+ if (header->immediate_present)
+ header->bth.opcode =
IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ else
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ header->bth.pad_count =(u8) ((4 -
payload_bytes) & 3);
+ header->bth.transport_header_version = 0;
+}
+
+
+
+/**
+ * rdmaoe_ud_header_pack - Pack UD header struct into eth wire
format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header
into wire
+ * format in the buffer @buf.
+ */
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+ void *buf)
+{
+ int len = 0;
+
+ ib_pack(eth_table, ARRAY_SIZE(eth_table),
+ &header->eth, buf);
+ len += IB_ETH_BYTES;
+
+ if (header->grh_present) {
+ ib_pack(grh_table, ARRAY_SIZE(grh_table),
+ &header->grh, (u8*)buf + len);
+ len += IB_GRH_BYTES;
+ }
+
+ ib_pack(bth_table, ARRAY_SIZE(bth_table),
+ &header->bth, (u8*)buf + len);
+ len += IB_BTH_BYTES;
+
+ ib_pack(deth_table, ARRAY_SIZE(deth_table),
+ &header->deth, (u8*)buf + len);
+ len += IB_DETH_BYTES;
+
+ if (header->immediate_present) {
+ memcpy((u8*)buf + len, &header->immediate_data,
+ sizeof header->immediate_data);
+ len += sizeof header->immediate_data;
+ }
+
+ return len;
+}
+
+
Index: hw/mlx4/kernel/bus/core/verbs.c
===================================================================
--- hw/mlx4/kernel/bus/core/verbs.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/verbs.c (working copy)
@@ -336,3 +336,28 @@
}
EXPORT_SYMBOL(ib_destroy_ah);
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ case RDMA_NODE_IB_SWITCH:
+ case RDMA_NODE_IB_ROUTER:
+ return RDMA_TRANSPORT_IB;
+ case RDMA_NODE_RNIC:
+ return RDMA_TRANSPORT_IWARP;
+ default:
+ ASSERT(FALSE);
+ return 0;
+ }
+}
+
+enum rdma_transport_type rdma_port_get_transport(struct
ib_device *device,
+ u8 port_num)
+{
+ return device->get_port_transport ?
+ device->get_port_transport(device, port_num) :
+ rdma_node_get_transport(device->node_type);
+}
+EXPORT_SYMBOL(rdma_port_get_transport);
+
Index: hw/mlx4/kernel/bus/drv/drv.c
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/drv.c (working copy)
@@ -95,7 +95,6 @@
#endif
-static
NTSTATUS
__create_child(
__in WDFDEVICE Device,
@@ -228,13 +227,21 @@
if ( p_fdo->children_created )
goto end;
-
+
// eventually we'll have all information about children in
Registry
// DriverEntry will read it into a Global storage and
// this routine will create all the children on base on this
info
number_of_ib_ports = mlx4_count_ib_ports(mdev);
ASSERT(number_of_ib_ports >=0 && number_of_ib_ports <=2);
+ //For now we it's either IB or ETH, and we always create LLE
if it's ETH
+ if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] ==
MLX4_PORT_TYPE_IB) ) {
+ status = __create_child(Device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+ if (!NT_SUCCESS(status)) {
+ MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV,
("__create_child (ib)failed with 0x%x\n", status));
+ }
+ }
+
for (i = 1; i <= mdev->caps.num_ports; i++) {
if (mlx4_is_enabled_port(mdev, i)) {
if(mlx4_is_eth_port(mdev, i)) {
@@ -869,6 +876,9 @@
goto err;
}
+ pdev->p_wdf_device = Device;
+ pdev->ib_hca_created = 0;
+
// start the card
status = __start_card(Device, p_fdo );
if( !NT_SUCCESS( status ) )
Index: hw/mlx4/kernel/bus/drv/stat.c
===================================================================
--- hw/mlx4/kernel/bus/drv/stat.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/stat.c (working copy)
@@ -113,7 +113,7 @@
void st_print_mlx_header( struct mlx4_dev *mdev, struct
mlx4_ib_sqp *sqp, struct mlx4_wqe_mlx_seg *mlx )
{
if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_UDH )
- __print_ud_header( mdev, &sqp->ud_header );
+ __print_ud_header( mdev, &sqp->hdr.ib );
if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_WQE )
__print_mlx( mdev, mlx );
}
Index: hw/mlx4/kernel/bus/ib/ah.c
===================================================================
--- hw/mlx4/kernel/bus/ib/ah.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/ah.c (working copy)
@@ -32,68 +32,199 @@
#include "mlx4_ib.h"
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct
ib_ah_attr *ah_attr)
+static inline int rdma_link_local_addr(struct in6_addr *addr)
{
+ if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
+ addr->s6_addr32[1] == 0)
+ return 1;
+ else
+ return 0;
+}
+
+inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+ memcpy(mac, &addr->s6_addr[8], 3);
+ memcpy(mac + 3, &addr->s6_addr[13], 3);
+ mac[0] ^= 2;
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+ return addr->s6_addr[0] == 0xff ? 1 : 0;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8
*mac)
+{
+ int i;
+
+ mac[0] = 0x33;
+ mac[1] = 0x33;
+ for (i = 2; i < 6; ++i)
+ mac[i] = addr->s6_addr[i + 10];
+
+}
+
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+ u8 *mac, int *is_mcast)
+{
+ int err = 0;
+ struct sockaddr_in6 dst;
+
+ UNREFERENCED_PARAMETER(dev);
+
+ *is_mcast = 0;
+ memcpy(dst.sin6_addr.s6_addr, ah_attr->grh.dgid.raw,
sizeof(ah_attr->grh.dgid.raw));
+
+ if (rdma_link_local_addr(&dst.sin6_addr))
+ rdma_get_ll_mac(&dst.sin6_addr, mac);
+ else if (rdma_is_multicast_addr(&dst.sin6_addr)) {
+ rdma_get_mcast_mac(&dst.sin6_addr, mac);
+ *is_mcast = 1;
+ } else {
+ err = -EINVAL; //jyang:todo
+ ASSERT(FALSE);
+ }
+ return err;
+}
+
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct
ib_ah_attr *ah_attr,
+ struct mlx4_ib_ah *ah)
+{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- struct mlx4_ib_ah *ah;
if (mlx4_is_barred(pd->device->dma_device))
return ERR_PTR(-EFAULT);
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
- memset(&ah->av, 0, sizeof ah->av);
-
- ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
(ah_attr->port_num << 24));
- ah->av.g_slid = ah_attr->src_path_bits;
- ah->av.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.stat_rate = ah_attr->static_rate +
MLX4_STAT_RATE_OFFSET;
- while (ah->av.stat_rate > IB_RATE_2_5_GBPS +
MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.stat_rate &
dev->caps.stat_rate_support))
- --ah->av.stat_rate;
- }
- ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
(ah_attr->port_num << 24));
+ ah->av.ib.g_slid = ah_attr->src_path_bits;
if (ah_attr->ah_flags & IB_AH_GRH) {
- ah->av.g_slid |= 0x80;
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.sl_tclass_flowlabel |=
+ ah->av.ib.g_slid |= 0x80;
+ ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+ ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
}
+ ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+ ah->av.ib.stat_rate = ah_attr->static_rate +
MLX4_STAT_RATE_OFFSET;
+ while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS +
MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.ib.stat_rate &
dev->caps.stat_rate_support))
+ --ah->av.ib.stat_rate;
+ }
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl <<
28);
+
return &ah->ibah;
}
+struct ib_ah *create_rdmaoe_ah(struct ib_pd *pd, struct
ib_ah_attr *ah_attr,
+ struct mlx4_ib_ah *ah)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct mlx4_dev *dev = ibdev->dev;
+ u8 mac[6];
+ int err;
+ int is_mcast;
+
+ if (mlx4_is_barred(pd->device->dma_device))
+ return ERR_PTR(-EFAULT);
+
+ err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast);
+ if (err)
+ return ERR_PTR(err);
+
+ memcpy(ah->av.eth.mac_0_1, mac, 2);
+ memcpy(ah->av.eth.mac_2_5, mac + 2, 4);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
(ah_attr->port_num << 24));
+ ah->av.ib.g_slid = 0x80;
+ if (ah_attr->static_rate) {
+ ah->av.ib.stat_rate = ah_attr->static_rate +
MLX4_STAT_RATE_OFFSET;
+ while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS +
MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.ib.stat_rate &
dev->caps.stat_rate_support))
+ --ah->av.ib.stat_rate;
+ }
+
+ /*
+ * HW requires multicast LID so we just choose one.
+ */
+ if (is_mcast)
+ ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl <<
28);
+
+ return &ah->ibah;
+}
+
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct
ib_ah_attr *ah_attr)
+{
+ struct mlx4_ib_ah *ah;
+ enum rdma_transport_type transport;
+
+ struct ib_ah *ret;
+
+ ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ transport = rdma_port_get_transport(pd->device,
ah_attr->port_num);
+ if (transport == RDMA_TRANSPORT_RDMAOE) {
+ if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ ret = ERR_PTR(-EINVAL);
+ goto out;
+ } else {
+ /* TBD: need to handle the case when we get called
+ in an atomic context and there we might sleep. We
+ don't expect this currently since we're working with
+ link local addresses which we can translate without
+ going to sleep */
+ ret = create_rdmaoe_ah(pd, ah_attr, ah);
+ if (IS_ERR(ret))
+ goto out;
+ else
+ return ret;
+ }
+ } else
+ return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+out:
+ kfree(ah);
+ return ret;
+}
+
+
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr
*ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
+ enum rdma_transport_type transport;
+ transport = rdma_port_get_transport(ibah->device,
ah_attr->port_num);
+
if (mlx4_is_barred(ibah->device->dma_device))
return -EFAULT;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(ah->av.dlid);
- ah_attr->sl =
(u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
- ah_attr->port_num = (u8)(be32_to_cpu(ah->av.port_pd) >>
24);
- if (ah->av.stat_rate)
- ah_attr->static_rate = ah->av.stat_rate -
MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+ ah_attr->dlid = transport == RDMA_TRANSPORT_IB ?
be16_to_cpu(ah->av.ib.dlid) : 0;
+ ah_attr->sl =
(u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);
+ ah_attr->port_num = (u8)(be32_to_cpu(ah->av.ib.port_pd)
>> 24);
+ if (ah->av.ib.stat_rate)
+ ah_attr->static_rate = ah->av.ib.stat_rate -
MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.traffic_class =
- (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20);
+ (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20);
ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+ ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
+ ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+ memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
}
return 0;
@@ -108,7 +239,7 @@
// Leo: temporary
int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr
*ah_attr )
{
- struct mlx4_av *av = &to_mah(ibah)->av;
+ struct mlx4_av *av = &to_mah(ibah)->av.ib;
struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev;
if (mlx4_is_barred(dev))
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/main.c (working copy)
@@ -133,31 +133,21 @@
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+
+static enum rdma_transport_type
+mlx4_ib_port_get_transport(struct ib_device *device, u8
port_num)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct mlx4_dev *dev = to_mdev(device)->dev;
- if (mlx4_is_barred(ibdev->dma_device))
- return -EFAULT;
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad)
- goto out;
+ return dev->caps.port_mask & (1 << (port_num - 1)) ?
+ RDMA_TRANSPORT_IB : RDMA_TRANSPORT_RDMAOE;
+}
- memset(props, 0, sizeof *props);
- init_query_mad(in_mad);
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
-
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL,
in_mad, out_mad);
- if (err)
- goto out;
-
+static void ib_link_query_port(struct ib_device *ibdev, u8
port,
+ struct ib_port_attr *props,
+ struct ib_smp *out_mad)
+{
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data +
18));
@@ -177,7 +167,64 @@
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_IB;
+}
+static void eth_link_query_port(struct ib_device *ibdev, u8
port,
+ struct ib_port_attr *props,
+ struct ib_smp *out_mad)
+{
+
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data
+ 20));
+ props->gid_tbl_len =
to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ props->pkey_tbl_len =
(u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port];
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data
+ 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data
+ 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ //props->active_mtu = rdmaoe->mtu[port - 1];
+ props->active_mtu = 1500; //jyang:hardcoded
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_RDMAOE;
+
+ //props->state = netif_running(ndev) && netif_oper_up(ndev)
?
+ // IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->state = IB_PORT_ACTIVE; //jyang: just hardcoded it now
+ props->phys_state = props->state;
+}
+
+
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL,
in_mad, out_mad);
+ if (err)
+ goto out;
+
+ mlx4_ib_port_get_transport(ibdev, port) == RDMA_TRANSPORT_IB ?
+ ib_link_query_port(ibdev, port, props, out_mad) :
+ eth_link_query_port(ibdev, port, props, out_mad);
+
out:
kfree(in_mad);
kfree(out_mad);
@@ -522,6 +569,7 @@
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.get_port_transport = mlx4_ib_port_get_transport;
ibdev->ib_dev.query_gid_chunk = mlx4_ib_query_gid_chunk;
ibdev->ib_dev.query_pkey_chunk = mlx4_ib_query_pkey_chunk;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
Index: hw/mlx4/kernel/bus/ib/mlx4_ib.h
===================================================================
--- hw/mlx4/kernel/bus/ib/mlx4_ib.h (revision 2617)
+++ hw/mlx4/kernel/bus/ib/mlx4_ib.h (working copy)
@@ -165,14 +165,15 @@
struct mlx4_ib_ah {
struct ib_ah ibah;
- struct mlx4_av av;
+ union mlx4_ext_av av;
};
+
enum {
/*
* Largest possible UD header: send with GRH and immediate
data.
*/
- MLX4_IB_UD_HEADER_SIZE = 72
+ MLX4_IB_UD_HEADER_SIZE = 76
};
struct mlx4_ib_sqp {
@@ -180,7 +181,10 @@
int pkey_index;
u32 qkey;
u32 send_psn;
- struct ib_ud_header ud_header;
+ union {
+ struct ib_ud_header ib;
+ struct eth_ud_header eth;
+ } hdr;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
};
@@ -340,9 +344,14 @@
int __init mlx4_ib_init(void);
void __exit mlx4_ib_cleanup(void);
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+ u8 *mac, int *is_mcast);
+
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
- return !!(ah->av.g_slid & 0x80);
+ return !!(ah->av.ib.g_slid & 0x80);
+
}
#endif /* MLX4_IB_H */
Index: hw/mlx4/kernel/bus/ib/qp.c
===================================================================
--- hw/mlx4/kernel/bus/ib/qp.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/qp.c (working copy)
@@ -46,10 +46,16 @@
enum {
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
- MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+ MLX4_IB_LINK_TYPE_IB = 0,
+ MLX4_IB_LINK_TYPE_ETH = 1
};
enum {
+ MLX4_RDMAOE_ETHERTYPE = 0x8915
+};
+
+enum {
MLX4_IB_MIN_SQ_STRIDE = 6
};
@@ -65,6 +71,8 @@
__constant_cpu_to_be32(MLX4_OPCODE_NOP) /* [IB_WR_NOP]
*/
};
+extern inline void rdma_get_ll_mac(struct in6_addr *addr, u8
*mac);
+
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
@@ -724,6 +732,12 @@
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
{
+ int err;
+ int is_eth = rdma_port_get_transport(&dev->ib_dev, port) ==
+ RDMA_TRANSPORT_RDMAOE ? 1 : 0;
+ u8 mac[6];
+ int is_mcast;
+
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->static_rate) {
@@ -754,7 +768,21 @@
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
- return 0;
+ if (is_eth) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -1;
+
+ err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast);
+ if (err)
+ return err;
+
+ memcpy(path->dmac, mac, 6);
+ path->ackto = MLX4_IB_LINK_TYPE_ETH;
+ /* use index 0 into MAC table for RDMAoE */
+ path->grh_mylmc &= 0x80;
+ }
+
+ return 0;
}
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
@@ -1146,79 +1174,132 @@
return opcode;
}
+
+
+
static int build_mlx_header(struct mlx4_ib_sqp *sqp,
ib_send_wr_t *wr,
- void *wqe)
+ void *wqe, unsigned *mlx_seg_len)
{
enum ib_wr_opcode opcode = to_wr_opcode(wr);
struct ib_device *ib_dev =
&to_mdev(sqp->qp.ibqp.device)->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof
*mlx);
struct mlx4_ib_ah *ah = to_mah((struct ib_ah
*)wr->dgrm.ud.h_av);
- __be16 pkey;
+ u16 pkey;
int send_size;
int header_size;
int spc;
- u32 i;
+ u16 i;
+ struct ib_ud_header *ib = NULL;
+ struct eth_ud_header *eth = NULL;
+ struct ib_unpacked_grh *grh;
+ struct ib_unpacked_bth *bth;
+ struct ib_unpacked_deth *deth;
+ u8 *tmp;
+ u8 mac[6];
send_size = 0;
for (i = 0; i < wr->num_ds; ++i)
send_size += wr->ds_array[i].length;
- ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah),
&sqp->ud_header);
+ if (rdma_port_get_transport(sqp->qp.ibqp.device, sqp->qp.port)
== RDMA_TRANSPORT_IB) {
- sqp->ud_header.lrh.service_level =
- (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
- sqp->ud_header.lrh.destination_lid = ah->av.dlid;
- sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid
& 0x7f);
+ ib = &sqp->hdr.ib;
+ grh = &ib->grh;
+ bth = &ib->bth;
+ deth = &ib->deth;
+ ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), ib);
+ ib->lrh.service_level =
+ (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);
+ ib->lrh.destination_lid = ah->av.ib.dlid;
+ ib->lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid &
0x7f);
+ } else {
+ eth = &sqp->hdr.eth;
+ grh = ð->grh;
+ bth = ð->bth;
+ deth = ð->deth;
+ ib_rdmaoe_ud_header_init(send_size,
mlx4_ib_ah_grh_present(ah), eth);
+ }
+
+
if (mlx4_ib_ah_grh_present(ah)) {
- sqp->ud_header.grh.traffic_class =
- (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) &
0xff);
- sqp->ud_header.grh.flow_label =
- ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
- sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
- ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >>
24),
- ah->av.gid_index, &sqp->ud_header.grh.source_gid);
- memcpy(sqp->ud_header.grh.destination_gid.raw,
- ah->av.dgid, 16);
+ grh->traffic_class =
+ (u8)((be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) &
0xff);
+ grh->flow_label =
+ ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ grh->hop_limit = ah->av.ib.hop_limit;
+ ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.ib.port_pd)
>> 24),
+ ah->av.ib.gid_index, &grh->source_gid);
+ memcpy(grh->destination_gid.raw,
+ ah->av.ib.dgid, 16);
}
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ?
MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
- mlx->rlid = sqp->ud_header.lrh.destination_lid;
+ if (ib) {
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ?
MLX4_WQE_MLX_VL15 : 0) |
+ (ib->lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+ (ib->lrh.service_level << 8));
+ mlx->rlid = ib->lrh.destination_lid;
+
+ }
+
switch (opcode) {
case IB_WR_SEND:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
- sqp->ud_header.immediate_present = 0;
+ bth->opcode = IB_OPCODE_UD_SEND_ONLY;
+ if (ib)
+ ib->immediate_present = 0;
+ else
+ eth->immediate_present = 0;
break;
case IB_WR_SEND_WITH_IMM:
- sqp->ud_header.bth.opcode =
IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
- sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->immediate_data;
+ bth->opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ if (ib) {
+ ib->immediate_present = 1;
+ ib->immediate_data = wr->immediate_data;
+ } else {
+ eth->immediate_present = 1;
+ eth->immediate_data = wr->immediate_data;
+ }
break;
default:
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15
: 0;
- if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
- sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
- sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt &
IB_SEND_OPT_SOLICITED));
+ if (ib) {
+ ib->lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (ib->lrh.destination_lid == IB_LID_PERMISSIVE)
+ ib->lrh.source_lid = IB_LID_PERMISSIVE;
+ } else {
+ memcpy(eth->eth.dmac_h, ah->av.eth.mac_0_1, 2);
+ memcpy(eth->eth.dmac_h + 2, ah->av.eth.mac_2_5, 2);
+ memcpy(eth->eth.dmac_l, ah->av.eth.mac_2_5 + 2, 2);
+ rdma_get_ll_mac((struct in6_addr *)&grh->source_gid, mac);
+
+ tmp = mac;
+ memcpy(eth->eth.smac_h, tmp, 2);
+ memcpy(eth->eth.smac_l, tmp + 2, 4);
+ eth->eth.type = cpu_to_be16(MLX4_RDMAOE_ETHERTYPE);
+ }
+
+ bth->solicited_event = (u8)(!!(wr->send_opt &
IB_SEND_SOLICITED));
+
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index,
&pkey);
else
ib_get_cached_pkey(ib_dev, sqp->qp.port,
wr->dgrm.ud.pkey_index, &pkey);
- sqp->ud_header.bth.pkey = pkey;
- sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
- sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1
<< 24) - 1));
- sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey &
0x00000080 ?
- cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
- sqp->ud_header.deth.source_qpn =
cpu_to_be32(sqp->qp.ibqp.qp_num);
+ bth->pkey = pkey;
+ bth->destination_qpn = wr->dgrm.ud.remote_qp;
+ bth->psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ deth->qkey = wr->dgrm.ud.remote_qkey & 0x80000000 ?
+ cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
+ deth->source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
- header_size = ib_ud_header_pack(&sqp->ud_header,
sqp->header_buf);
+ if (ib)
+ header_size = ib_ud_header_pack(ib, sqp->header_buf);
+ else
+ header_size = rdmaoe_ud_header_pack(eth, sqp->header_buf);
#if 0
{
@@ -1271,7 +1352,10 @@
i = 2;
}
- return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) +
header_size, 16);
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size,
16);
+ return 0;
+
}
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq,
struct ib_cq *ib_cq)
@@ -1314,9 +1398,13 @@
static void set_datagram_seg(struct mlx4_wqe_datagram_seg
*dseg,
ib_send_wr_t *wr)
{
+
memcpy(dseg->av, &to_mah((struct ib_ah
*)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av));
dseg->dqpn = wr->dgrm.ud.remote_qp;
dseg->qkey = wr->dgrm.ud.remote_qkey;
+ dseg->vlan = to_mah((struct ib_ah
*)wr->dgrm.ud.h_av)->av.eth.vlan;
+ memcpy(dseg->mac_0_1, to_mah((struct ib_ah
*)wr->dgrm.ud.h_av)->av.eth.mac_0_1, 6);
+
}
static void set_mlx_icrc_seg(void *dseg)
@@ -1398,7 +1486,7 @@
int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
ib_send_wr_t **bad_wr)
{
- enum ib_wr_opcode opcode;
+ enum ib_wr_opcode opcode;// = to_wr_opcode(wr);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_dev *dev = to_mdev(ibqp->device)->dev;
u8 *wqe /*, *wqe_start*/;
@@ -1525,16 +1613,14 @@
case IB_QPT_SMI:
case IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl);
+ err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (err < 0) {
if (bad_wr)
*bad_wr = wr;
goto out;
}
-
- wqe += err;
- size += err / 16;
-
+ wqe += seglen;
+ size += seglen / 16;
err = 0;
break;
Index: hw/mlx4/kernel/bus/inc/cmd.h
===================================================================
--- hw/mlx4/kernel/bus/inc/cmd.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/cmd.h (working copy)
@@ -138,6 +138,7 @@
MLX4_SET_PORT_MAC_TABLE = 0x2,
MLX4_SET_PORT_VLAN_TABLE = 0x3,
MLX4_SET_PORT_PRIO_MAP = 0x4,
+ MLX4_SET_PORT_GID_TABLE = 0x5,
};
struct mlx4_dev;
Index: hw/mlx4/kernel/bus/inc/device.h
===================================================================
--- hw/mlx4/kernel/bus/inc/device.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/device.h (working copy)
@@ -208,8 +208,9 @@
int log_num_prios;
int num_fc_exch;
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
- enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
- int reserved_fexch_mpts_base;
+ u32 port_mask;
+ enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
+ int reserved_fexch_mpts_base;
int total_reserved_qps;
};
@@ -343,6 +344,28 @@
u8 dgid[16];
};
+struct mlx4_eth_av {
+ __be32 port_pd;
+ u8 reserved1;
+ u8 smac_idx;
+ u16 reserved2;
+ u8 reserved3;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ u8 dgid[16];
+ u32 reserved4[2];
+ __be16 vlan;
+ u8 mac_0_1[2];
+ u8 mac_2_5[4];
+};
+
+union mlx4_ext_av {
+ struct mlx4_av ib;
+ struct mlx4_eth_av eth;
+};
+
#define MLX4_DEV_SIGNATURE 0xf1b34a6e
struct mlx4_dev_params {
Index: hw/mlx4/kernel/bus/inc/ib_pack.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_pack.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_pack.h (working copy)
@@ -39,6 +39,7 @@
enum {
IB_LRH_BYTES = 8,
+ IB_ETH_BYTES = 14,
IB_GRH_BYTES = 40,
IB_BTH_BYTES = 12,
IB_DETH_BYTES = 8
@@ -212,6 +213,15 @@
__be32 source_qpn;
};
+struct ib_unpacked_eth {
+ u8 dmac_h[4];
+ u8 dmac_l[2];
+ u8 smac_h[2];
+ u8 smac_l[4];
+ __be16 type;
+};
+
+
struct ib_ud_header {
struct ib_unpacked_lrh lrh;
int grh_present;
@@ -222,6 +232,19 @@
__be32 immediate_data;
};
+
+
+struct eth_ud_header {
+ struct ib_unpacked_eth eth;
+ int grh_present;
+ struct ib_unpacked_grh grh;
+ struct ib_unpacked_bth bth;
+ struct ib_unpacked_deth deth;
+ int immediate_present;
+ __be32 immediate_data;
+};
+
+
void ib_pack(const struct ib_field *desc,
int desc_len,
void *structure,
@@ -236,10 +259,18 @@
int grh_present,
struct ib_ud_header *header);
+void ib_rdmaoe_ud_header_init(int payload_bytes,
+ int grh_present,
+ struct eth_ud_header *header);
+
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf);
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header);
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+ void *buf);
+
+
#endif /* IB_PACK_H */
Index: hw/mlx4/kernel/bus/inc/ib_verbs.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_verbs.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_verbs.h (working copy)
@@ -53,6 +53,34 @@
#include "ib_verbs_ex.h"
+/*
+ * IPv6 address structure
+ */
+
+struct in6_addr
+{
+ union
+ {
+ __u8 u6_addr8[16];
+ __be16 u6_addr16[8];
+ __be32 u6_addr32[4];
+ } in6_u;
+#define s6_addr in6_u.u6_addr8
+#define s6_addr16 in6_u.u6_addr16
+#define s6_addr32 in6_u.u6_addr32
+};
+
+
+struct sockaddr_in6 {
+ unsigned short int sin6_family; /* AF_INET6 */
+ __be16 sin6_port; /* Transport layer port # */
+ __be32 sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ __u32 sin6_scope_id; /* scope id (new in RFC2553) */
+};
+
+#define AF_INET6 10 /* IP version 6 */
+
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
@@ -63,7 +91,8 @@
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_RDMAOE
};
enum rdma_transport_type
@@ -231,6 +260,7 @@
u8 active_width;
u8 active_speed;
u8 phys_state;
+ enum rdma_transport_type transport;
};
enum ib_device_modify_flags {
@@ -633,6 +663,10 @@
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO,
+ IB_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV,
+ IB_WR_FAST_REG_MR,
IB_WR_NOP
};
@@ -920,6 +954,9 @@
int (*query_port)(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr);
+ enum rdma_transport_type (*get_port_transport)(struct
ib_device *device,
+ u8 port_num);
+
int (*query_gid_chunk)(struct ib_device *device,
u8 port_num, int index,
union ib_gid gid[8], int size);
@@ -1127,6 +1164,11 @@
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
+enum rdma_transport_type rdma_port_get_transport(struct
ib_device *device,
+ u8 port_num);
+int rdma_is_transport_supported(struct ib_device *device,
+ enum rdma_transport_type transport);
+
int ib_query_gid_chunk(struct ib_device *device,
u8 port_num, int index, union ib_gid gid[8], int size);
Index: hw/mlx4/kernel/bus/inc/qp.h
===================================================================
--- hw/mlx4/kernel/bus/inc/qp.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/qp.h (working copy)
@@ -113,7 +113,9 @@
u8 snooper_flags;
u8 reserved3[2];
u8 counter_index;
- u8 reserved4[7];
+ u8 reserved4;
+ u8 dmac[6];
+
};
struct mlx4_qp_context {
@@ -213,7 +215,9 @@
__be32 av[8];
__be32 dqpn;
__be32 qkey;
- __be32 reservd[2];
+ __be16 vlan;
+ u8 mac_0_1[2];
+ u8 mac_2_5[4];
};
#pragma warning( disable : 4200)
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/main.c (working copy)
@@ -139,7 +139,9 @@
int count = 0;
for (i = 0; i < dev->caps.num_ports; i++) {
- if (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) {
+ if ((dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) ||
+ (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_ETH))
+ {
count++;
}
}
@@ -170,6 +172,16 @@
return FALSE;
}
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+ int i;
+
+ dev->caps.port_mask = 0;
+ for (i = 1; i <= dev->caps.num_ports; ++i)
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
+ dev->caps.port_mask |= 1 << (i - 1);
+}
+
static int mlx4_dev_cap(struct mlx4_dev *dev, struct
mlx4_dev_cap *dev_cap)
{
int err;
@@ -309,6 +321,8 @@
++num_eth_ports;
}
+ mlx4_set_port_mask(dev);
+
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] =
dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
Index: hw/mlx4/kernel/bus/net/port.c
===================================================================
--- hw/mlx4/kernel/bus/net/port.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/port.c (working copy)
@@ -33,7 +33,9 @@
#include "mlx4.h"
#include "cmd.h"
+#include "public.h"
+extern NTSTATUS __create_child();
void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port)
{
@@ -60,6 +62,10 @@
table->refs[i] = 0;
}
table->max = 1 << dev->caps.log_num_vlans;
+ if(table->max > MLX4_MAX_VLAN_NUM)
+ {
+ table->max = MLX4_MAX_VLAN_NUM;
+ }
table->total = 0;
}
@@ -84,6 +90,52 @@
return err;
}
+static void mlx4_addrconf_ifid_eui48_win(u8 *eui, u64 mac)
+{
+ u8 *p = (u8*)&mac+2; //mac 6 bytes
+ memcpy(eui, p, 3);
+ memcpy(eui + 5, p + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+}
+
+
+static int update_ipv6_gids_win(struct mlx4_dev *dev, int port,
int clear, u64 mac)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids, *tmpgids;
+ int err;
+
+ tmpgids = kzalloc(128 * sizeof *gids, GFP_ATOMIC);
+ if (!tmpgids)
+ return -ENOMEM;
+
+ if (!clear) {
+ mlx4_addrconf_ifid_eui48_win(&tmpgids[0].raw[8],
cpu_to_be64(mac));
+ tmpgids[0].global.subnet_prefix =
cpu_to_be64(0xfe80000000000000LL);
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, tmpgids, 128 * sizeof *gids);
+
+ err = mlx4_cmd(dev, mailbox->dma.da, MLX4_SET_PORT_GID_TABLE
<< 8 | port,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+out:
+ kfree(tmpgids);
+ return err;
+}
+
+
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac,
int *index)
{
struct mlx4_mac_table *table =
@@ -112,7 +164,7 @@
}
mlx4_dbg(dev, "Free mac index is %d\n", free);
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
/* No free mac entries */
err = -ENOSPC;
goto out;
@@ -132,6 +184,20 @@
*index = free;
++table->total;
+
+ //update port guid with mac address
+ update_ipv6_gids_win(dev, port, 0, mac);
+
+ if(!InterlockedExchange(&dev->pdev->ib_hca_created, 1))
+ {
+ NTSTATUS status = STATUS_SUCCESS;
+ status = __create_child(dev->pdev->p_wdf_device,
BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 );
+ if (!NT_SUCCESS(status)) {
+ mlx4_err(dev, "__create_child (ib)failed with 0x%x\n",
status);
+ dev->pdev->ib_hca_created = FALSE;
+ }
+ }
+
out:
up(&table->mac_sem);
return err;
@@ -207,7 +273,7 @@
}
}
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
/* No free vlan entries */
err = -ENOSPC;
goto out;
Index: hw/mlx4/kernel/bus/net/SOURCES
===================================================================
--- hw/mlx4/kernel/bus/net/SOURCES (revision 2617)
+++ hw/mlx4/kernel/bus/net/SOURCES (working copy)
@@ -31,7 +31,7 @@
srq.c \
port.c \
-INCLUDES=..;..\inc;..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\
..\inc\kernel;
+INCLUDES=..;..\inc;..\..\inc;..\..\..\inc;..\core\$O;..\..\..\..\..\inc
;..\..\..\..\..\inc\kernel;
C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS
-D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS
#-DFORCE_LIVEFISH
Index: hw/mlx4/kernel/hca/av.c
===================================================================
--- hw/mlx4/kernel/hca/av.c (revision 2617)
+++ hw/mlx4/kernel/hca/av.c (working copy)
@@ -74,6 +74,7 @@
p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr);
if (IS_ERR(p_ib_ah)) {
err = PTR_ERR(p_ib_ah);
+ status = errno_to_iberr(err);
HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed
(%d)\n", err));
goto err_create_ah;
}
Index: hw/mlx4/kernel/hca/data.c
===================================================================
--- hw/mlx4/kernel/hca/data.c (revision 2617)
+++ hw/mlx4/kernel/hca/data.c (working copy)
@@ -339,6 +339,7 @@
ibal_port_p->max_vls = mthca_port_p->max_vl_num;
ibal_port_p->sm_lid = cl_ntoh16(mthca_port_p->sm_lid);
ibal_port_p->sm_sl = mthca_port_p->sm_sl;
+ ibal_port_p->transport = mthca_port_p->transport;
ibal_port_p->link_state = (mthca_port_p->state != 0) ?
(uint8_t)mthca_port_p->state : IB_LINK_DOWN;
ibal_port_p->num_gids =
(uint16_t)mthca_port_p->gid_tbl_len;
ibal_port_p->num_pkeys = mthca_port_p->pkey_tbl_len;
Index: hw/mlx4/kernel/inc/l2w.h
===================================================================
--- hw/mlx4/kernel/inc/l2w.h (revision 2617)
+++ hw/mlx4/kernel/inc/l2w.h (working copy)
@@ -185,6 +185,8 @@
DMA_ADAPTER * p_dma_adapter; /* HCA adapter object */
DEVICE_OBJECT * p_self_do; /* mlx4_bus's FDO */
DEVICE_OBJECT * pdo; /* mlx4_bus's PDO */
+ PVOID p_wdf_device; /* wdf_device
*/
+ LONG ib_hca_created;
// mlx4_ib: various objects and info
struct ib_device * ib_dev;
// mlx4_net: various objects and info
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h (revision 2617)
+++ inc/iba/ib_types.h (working copy)
@@ -9419,6 +9419,8 @@
TO_LONG_PTR(ib_gid_t*, p_gid_table);
TO_LONG_PTR(ib_net16_t*,p_pkey_table);
+ enum rdma_transport_type transport;
+
} ib_port_attr_t;
/*
* SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types.h (revision 2617)
+++ ulp/opensm/user/include/iba/ib_types.h (working copy)
@@ -8676,6 +8676,7 @@
ib_gid_t *p_gid_table;
ib_net16_t *p_pkey_table;
+ enum rdma_transport_type transport;
} ib_port_attr_t;
/*
* SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types_extended.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types_extended.h (revision
2617)
+++ ulp/opensm/user/include/iba/ib_types_extended.h (working
copy)
@@ -586,6 +586,7 @@
TO_LONG_PTR(ib_gid_t*, p_gid_table);
TO_LONG_PTR(ib_net16_t*,p_pkey_table);
+ enum rdma_transport_type transport;
} ib_port_attr_t;
/*
* SEE ALSO
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20100121/489f1683/attachment.html>
More information about the ofw
mailing list