[ofw] patch 1/2 Add support for RDMAoEth to the low level driver
Tzachi Dar
tzachid at mellanox.co.il
Wed Dec 2 08:21:35 PST 2009
Here are the changes in the low level driver:
Thanks
Tzachi
Index: hw/mlx4/kernel/bus/core/ud_header.c
===================================================================
--- hw/mlx4/kernel/bus/core/ud_header.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/ud_header.c (working copy)
@@ -62,6 +62,15 @@
{ STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) }
};
+static const struct ib_field eth_table[] = {
+ { STRUCT_FIELD_INIT(eth, dmac_h, 0, 0, 32) },
+ { STRUCT_FIELD_INIT(eth, dmac_l, 1, 0, 16) },
+ { STRUCT_FIELD_INIT(eth, smac_h, 1, 16,16) },
+ { STRUCT_FIELD_INIT(eth, smac_l, 2, 0 ,32) },
+ { STRUCT_FIELD_INIT(eth, type, 3, 0, 16)}
+};
+
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) },
{ STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) },
@@ -279,3 +288,93 @@
return 0;
}
EXPORT_SYMBOL(ib_ud_header_unpack);
+
+/**
+ * ib_rdmaoe_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @header:Structure to initialize
+ *
+ * ib_rdmaoe_ud_header_init() initializes the grh.ip_version,
grh.payload_length,
+ * grh.next_header, bth.opcode, bth.pad_count and
+ * bth.transport_header_version fields of a &struct eth_ud_header given
+ * the payload length and whether a GRH will be included.
+ */
+void ib_rdmaoe_ud_header_init(int payload_bytes,
+ int grh_present,
+ struct eth_ud_header *header)
+{
+ int header_len;
+
+ memset(header, 0, sizeof *header);
+
+ header_len =
+ sizeof header->eth +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES;
+ if (grh_present)
+ header_len += IB_GRH_BYTES;
+
+ header->grh_present = grh_present;
+ if (grh_present) {
+ header->grh.ip_version = 6;
+ header->grh.payload_length =
+ cpu_to_be16((IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4 + /* ICRC */
+ 3) & ~3); /* round up */
+ header->grh.next_header = 0x1b;
+ }
+
+ if (header->immediate_present)
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ else
+ header->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ header->bth.pad_count =(u8) ((4 - payload_bytes) & 3);
+ header->bth.transport_header_version = 0;
+}
+
+
+
+/**
+ * rdmaoe_ud_header_pack - Pack UD header struct into eth wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header into wire
+ * format in the buffer @buf.
+ */
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+ void *buf)
+{
+ int len = 0;
+
+ ib_pack(eth_table, ARRAY_SIZE(eth_table),
+ &header->eth, buf);
+ len += IB_ETH_BYTES;
+
+ if (header->grh_present) {
+ ib_pack(grh_table, ARRAY_SIZE(grh_table),
+ &header->grh, (u8*)buf + len);
+ len += IB_GRH_BYTES;
+ }
+
+ ib_pack(bth_table, ARRAY_SIZE(bth_table),
+ &header->bth, (u8*)buf + len);
+ len += IB_BTH_BYTES;
+
+ ib_pack(deth_table, ARRAY_SIZE(deth_table),
+ &header->deth, (u8*)buf + len);
+ len += IB_DETH_BYTES;
+
+ if (header->immediate_present) {
+ memcpy((u8*)buf + len, &header->immediate_data,
+ sizeof header->immediate_data);
+ len += sizeof header->immediate_data;
+ }
+
+ return len;
+}
+
+
Index: hw/mlx4/kernel/bus/core/verbs.c
===================================================================
--- hw/mlx4/kernel/bus/core/verbs.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/verbs.c (working copy)
@@ -336,3 +336,28 @@
}
EXPORT_SYMBOL(ib_destroy_ah);
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ case RDMA_NODE_IB_SWITCH:
+ case RDMA_NODE_IB_ROUTER:
+ return RDMA_TRANSPORT_IB;
+ case RDMA_NODE_RNIC:
+ return RDMA_TRANSPORT_IWARP;
+ default:
+ ASSERT(FALSE);
+ return 0;
+ }
+}
+
+enum rdma_transport_type rdma_port_get_transport(struct ib_device
*device,
+ u8 port_num)
+{
+ return device->get_port_transport ?
+ device->get_port_transport(device, port_num) :
+ rdma_node_get_transport(device->node_type);
+}
+EXPORT_SYMBOL(rdma_port_get_transport);
+
Index: hw/mlx4/kernel/bus/drv/drv.c
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/drv.c (working copy)
@@ -95,7 +95,6 @@
#endif
-static
NTSTATUS
__create_child(
__in WDFDEVICE Device,
@@ -228,13 +227,21 @@
if ( p_fdo->children_created )
goto end;
-
+
// eventually we'll have all information about children in Registry
// DriverEntry will read it into a Global storage and
// this routine will create all the children on base on this info
number_of_ib_ports = mlx4_count_ib_ports(mdev);
ASSERT(number_of_ib_ports >=0 && number_of_ib_ports <=2);
+ //For now we it's either IB or ETH, and we always create LLE if it's
ETH
+ if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] ==
MLX4_PORT_TYPE_IB) ) {
+ status = __create_child(Device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+ if (!NT_SUCCESS(status)) {
+ MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child
(ib)failed with 0x%x\n", status));
+ }
+ }
+
for (i = 1; i <= mdev->caps.num_ports; i++) {
if (mlx4_is_enabled_port(mdev, i)) {
if(mlx4_is_eth_port(mdev, i)) {
@@ -869,6 +876,9 @@
goto err;
}
+ pdev->p_wdf_device = Device;
+ pdev->ib_hca_created = 0;
+
// start the card
status = __start_card(Device, p_fdo );
if( !NT_SUCCESS( status ) )
Index: hw/mlx4/kernel/bus/drv/stat.c
===================================================================
--- hw/mlx4/kernel/bus/drv/stat.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/stat.c (working copy)
@@ -113,7 +113,7 @@
void st_print_mlx_header( struct mlx4_dev *mdev, struct mlx4_ib_sqp
*sqp, struct mlx4_wqe_mlx_seg *mlx )
{
if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_UDH )
- __print_ud_header( mdev, &sqp->ud_header );
+ __print_ud_header( mdev, &sqp->hdr.ib );
if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_WQE )
__print_mlx( mdev, mlx );
}
Index: hw/mlx4/kernel/bus/ib/ah.c
===================================================================
--- hw/mlx4/kernel/bus/ib/ah.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/ah.c (working copy)
@@ -32,68 +32,199 @@
#include "mlx4_ib.h"
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr)
+static inline int rdma_link_local_addr(struct in6_addr *addr)
{
+ if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
+ addr->s6_addr32[1] == 0)
+ return 1;
+ else
+ return 0;
+}
+
+inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+ memcpy(mac, &addr->s6_addr[8], 3);
+ memcpy(mac + 3, &addr->s6_addr[13], 3);
+ mac[0] ^= 2;
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+ return addr->s6_addr[0] == 0xff ? 1 : 0;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+ int i;
+
+ mac[0] = 0x33;
+ mac[1] = 0x33;
+ for (i = 2; i < 6; ++i)
+ mac[i] = addr->s6_addr[i + 10];
+
+}
+
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+ u8 *mac, int *is_mcast)
+{
+ int err = 0;
+ struct sockaddr_in6 dst;
+
+ UNREFERENCED_PARAMETER(dev);
+
+ *is_mcast = 0;
+ memcpy(dst.sin6_addr.s6_addr, ah_attr->grh.dgid.raw,
sizeof(ah_attr->grh.dgid.raw));
+
+ if (rdma_link_local_addr(&dst.sin6_addr))
+ rdma_get_ll_mac(&dst.sin6_addr, mac);
+ else if (rdma_is_multicast_addr(&dst.sin6_addr)) {
+ rdma_get_mcast_mac(&dst.sin6_addr, mac);
+ *is_mcast = 1;
+ } else {
+ err = -EINVAL; //jyang:todo
+ ASSERT(FALSE);
+ }
+ return err;
+}
+
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr,
+ struct mlx4_ib_ah *ah)
+{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- struct mlx4_ib_ah *ah;
if (mlx4_is_barred(pd->device->dma_device))
return ERR_PTR(-EFAULT);
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
- memset(&ah->av, 0, sizeof ah->av);
-
- ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num <<
24));
- ah->av.g_slid = ah_attr->src_path_bits;
- ah->av.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
- while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
- --ah->av.stat_rate;
- }
- ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num
<< 24));
+ ah->av.ib.g_slid = ah_attr->src_path_bits;
if (ah_attr->ah_flags & IB_AH_GRH) {
- ah->av.g_slid |= 0x80;
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.sl_tclass_flowlabel |=
+ ah->av.ib.g_slid |= 0x80;
+ ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+ ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
}
+ ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+ ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET
&&
+ !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.ib.stat_rate;
+ }
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
return &ah->ibah;
}
+struct ib_ah *create_rdmaoe_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr,
+ struct mlx4_ib_ah *ah)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct mlx4_dev *dev = ibdev->dev;
+ u8 mac[6];
+ int err;
+ int is_mcast;
+
+ if (mlx4_is_barred(pd->device->dma_device))
+ return ERR_PTR(-EFAULT);
+
+ err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast);
+ if (err)
+ return ERR_PTR(err);
+
+ memcpy(ah->av.eth.mac_0_1, mac, 2);
+ memcpy(ah->av.eth.mac_2_5, mac + 2, 4);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num
<< 24));
+ ah->av.ib.g_slid = 0x80;
+ if (ah_attr->static_rate) {
+ ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET
&&
+ !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.ib.stat_rate;
+ }
+
+ /*
+ * HW requires multicast LID so we just choose one.
+ */
+ if (is_mcast)
+ ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
+ return &ah->ibah;
+}
+
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr)
+{
+ struct mlx4_ib_ah *ah;
+ enum rdma_transport_type transport;
+
+ struct ib_ah *ret;
+
+ ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ transport = rdma_port_get_transport(pd->device, ah_attr->port_num);
+ if (transport == RDMA_TRANSPORT_RDMAOE) {
+ if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ ret = ERR_PTR(-EINVAL);
+ goto out;
+ } else {
+ /* TBD: need to handle the case when we get called
+ in an atomic context and there we might sleep. We
+ don't expect this currently since we're working with
+ link local addresses which we can translate without
+ going to sleep */
+ ret = create_rdmaoe_ah(pd, ah_attr, ah);
+ if (IS_ERR(ret))
+ goto out;
+ else
+ return ret;
+ }
+ } else
+ return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+out:
+ kfree(ah);
+ return ret;
+}
+
+
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
+ enum rdma_transport_type transport;
+ transport = rdma_port_get_transport(ibah->device, ah_attr->port_num);
+
if (mlx4_is_barred(ibah->device->dma_device))
return -EFAULT;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(ah->av.dlid);
- ah_attr->sl = (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >>
28);
- ah_attr->port_num = (u8)(be32_to_cpu(ah->av.port_pd) >> 24);
- if (ah->av.stat_rate)
- ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+ ah_attr->dlid = transport == RDMA_TRANSPORT_IB ?
be16_to_cpu(ah->av.ib.dlid) : 0;
+ ah_attr->sl = (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel)
>> 28);
+ ah_attr->port_num = (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24);
+ if (ah->av.ib.stat_rate)
+ ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.traffic_class =
- (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20);
+ (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20);
ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+ ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
+ ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+ memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
}
return 0;
@@ -108,7 +239,7 @@
// Leo: temporary
int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr )
{
- struct mlx4_av *av = &to_mah(ibah)->av;
+ struct mlx4_av *av = &to_mah(ibah)->av.ib;
struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev;
if (mlx4_is_barred(dev))
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/main.c (working copy)
@@ -133,31 +133,21 @@
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+
+static enum rdma_transport_type
+mlx4_ib_port_get_transport(struct ib_device *device, u8 port_num)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct mlx4_dev *dev = to_mdev(device)->dev;
- if (mlx4_is_barred(ibdev->dma_device))
- return -EFAULT;
-
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad)
- goto out;
+ return dev->caps.port_mask & (1 << (port_num - 1)) ?
+ RDMA_TRANSPORT_IB : RDMA_TRANSPORT_RDMAOE;
+}
- memset(props, 0, sizeof *props);
- init_query_mad(in_mad);
- in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
-
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad,
out_mad);
- if (err)
- goto out;
-
+static void ib_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props,
+ struct ib_smp *out_mad)
+{
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -177,7 +167,64 @@
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_IB;
+}
+static void eth_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props,
+ struct ib_smp *out_mad)
+{
+
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ props->pkey_tbl_len =
(u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port];
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ //props->active_mtu = rdmaoe->mtu[port - 1];
+ props->active_mtu = 1500; //jyang:hardcoded
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_RDMAOE;
+
+ //props->state = netif_running(ndev) && netif_oper_up(ndev) ?
+ // IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->state = IB_PORT_ACTIVE; //jyang: just hardcoded it now
+ props->phys_state = props->state;
+}
+
+
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad,
out_mad);
+ if (err)
+ goto out;
+
+ mlx4_ib_port_get_transport(ibdev, port) == RDMA_TRANSPORT_IB ?
+ ib_link_query_port(ibdev, port, props, out_mad) :
+ eth_link_query_port(ibdev, port, props, out_mad);
+
out:
kfree(in_mad);
kfree(out_mad);
@@ -522,6 +569,7 @@
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.get_port_transport = mlx4_ib_port_get_transport;
ibdev->ib_dev.query_gid_chunk = mlx4_ib_query_gid_chunk;
ibdev->ib_dev.query_pkey_chunk = mlx4_ib_query_pkey_chunk;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
Index: hw/mlx4/kernel/bus/ib/mlx4_ib.h
===================================================================
--- hw/mlx4/kernel/bus/ib/mlx4_ib.h (revision 2617)
+++ hw/mlx4/kernel/bus/ib/mlx4_ib.h (working copy)
@@ -165,14 +165,15 @@
struct mlx4_ib_ah {
struct ib_ah ibah;
- struct mlx4_av av;
+ union mlx4_ext_av av;
};
+
enum {
/*
* Largest possible UD header: send with GRH and immediate data.
*/
- MLX4_IB_UD_HEADER_SIZE = 72
+ MLX4_IB_UD_HEADER_SIZE = 76
};
struct mlx4_ib_sqp {
@@ -180,7 +181,10 @@
int pkey_index;
u32 qkey;
u32 send_psn;
- struct ib_ud_header ud_header;
+ union {
+ struct ib_ud_header ib;
+ struct eth_ud_header eth;
+ } hdr;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
};
@@ -340,9 +344,14 @@
int __init mlx4_ib_init(void);
void __exit mlx4_ib_cleanup(void);
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+ u8 *mac, int *is_mcast);
+
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
- return !!(ah->av.g_slid & 0x80);
+ return !!(ah->av.ib.g_slid & 0x80);
+
}
#endif /* MLX4_IB_H */
Index: hw/mlx4/kernel/bus/ib/qp.c
===================================================================
--- hw/mlx4/kernel/bus/ib/qp.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/qp.c (working copy)
@@ -46,10 +46,16 @@
enum {
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
- MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+ MLX4_IB_LINK_TYPE_IB = 0,
+ MLX4_IB_LINK_TYPE_ETH = 1
};
enum {
+ MLX4_RDMAOE_ETHERTYPE = 0x8915
+};
+
+enum {
MLX4_IB_MIN_SQ_STRIDE = 6
};
@@ -65,6 +71,8 @@
__constant_cpu_to_be32(MLX4_OPCODE_NOP) /* [IB_WR_NOP] */
};
+extern inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac);
+
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
@@ -724,6 +732,12 @@
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah,
struct mlx4_qp_path *path, u8 port)
{
+ int err;
+ int is_eth = rdma_port_get_transport(&dev->ib_dev, port) ==
+ RDMA_TRANSPORT_RDMAOE ? 1 : 0;
+ u8 mac[6];
+ int is_mcast;
+
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->static_rate) {
@@ -754,7 +768,21 @@
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
- return 0;
+ if (is_eth) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -1;
+
+ err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast);
+ if (err)
+ return err;
+
+ memcpy(path->dmac, mac, 6);
+ path->ackto = MLX4_IB_LINK_TYPE_ETH;
+ /* use index 0 into MAC table for RDMAoE */
+ path->grh_mylmc &= 0x80;
+ }
+
+ return 0;
}
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
@@ -1146,79 +1174,132 @@
return opcode;
}
+
+
+
static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,
- void *wqe)
+ void *wqe, unsigned *mlx_seg_len)
{
enum ib_wr_opcode opcode = to_wr_opcode(wr);
struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);
struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);
- __be16 pkey;
+ u16 pkey;
int send_size;
int header_size;
int spc;
- u32 i;
+ u16 i;
+ struct ib_ud_header *ib = NULL;
+ struct eth_ud_header *eth = NULL;
+ struct ib_unpacked_grh *grh;
+ struct ib_unpacked_bth *bth;
+ struct ib_unpacked_deth *deth;
+ u8 *tmp;
+ u8 mac[6];
send_size = 0;
for (i = 0; i < wr->num_ds; ++i)
send_size += wr->ds_array[i].length;
- ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah),
&sqp->ud_header);
+ if (rdma_port_get_transport(sqp->qp.ibqp.device, sqp->qp.port) ==
RDMA_TRANSPORT_IB) {
- sqp->ud_header.lrh.service_level =
- (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
- sqp->ud_header.lrh.destination_lid = ah->av.dlid;
- sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid &
0x7f);
+ ib = &sqp->hdr.ib;
+ grh = &ib->grh;
+ bth = &ib->bth;
+ deth = &ib->deth;
+ ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), ib);
+ ib->lrh.service_level =
+ (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);
+ ib->lrh.destination_lid = ah->av.ib.dlid;
+ ib->lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ } else {
+ eth = &sqp->hdr.eth;
+ grh = ð->grh;
+ bth = ð->bth;
+ deth = ð->deth;
+ ib_rdmaoe_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), eth);
+ }
+
+
if (mlx4_ib_ah_grh_present(ah)) {
- sqp->ud_header.grh.traffic_class =
- (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);
- sqp->ud_header.grh.flow_label =
- ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
- sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
- ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),
- ah->av.gid_index, &sqp->ud_header.grh.source_gid);
- memcpy(sqp->ud_header.grh.destination_gid.raw,
- ah->av.dgid, 16);
+ grh->traffic_class =
+ (u8)((be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff);
+ grh->flow_label =
+ ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ grh->hop_limit = ah->av.ib.hop_limit;
+ ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24),
+ ah->av.ib.gid_index, &grh->source_gid);
+ memcpy(grh->destination_gid.raw,
+ ah->av.ib.dgid, 16);
}
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 :
0) |
- (sqp->ud_header.lrh.destination_lid ==
- XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
- mlx->rlid = sqp->ud_header.lrh.destination_lid;
+ if (ib) {
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 :
0) |
+ (ib->lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+ (ib->lrh.service_level << 8));
+ mlx->rlid = ib->lrh.destination_lid;
+
+ }
+
switch (opcode) {
case IB_WR_SEND:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
- sqp->ud_header.immediate_present = 0;
+ bth->opcode = IB_OPCODE_UD_SEND_ONLY;
+ if (ib)
+ ib->immediate_present = 0;
+ else
+ eth->immediate_present = 0;
break;
case IB_WR_SEND_WITH_IMM:
- sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
- sqp->ud_header.immediate_present = 1;
- sqp->ud_header.immediate_data = wr->immediate_data;
+ bth->opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ if (ib) {
+ ib->immediate_present = 1;
+ ib->immediate_data = wr->immediate_data;
+ } else {
+ eth->immediate_present = 1;
+ eth->immediate_data = wr->immediate_data;
+ }
break;
default:
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
- if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
- sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
- sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt &
IB_SEND_OPT_SOLICITED));
+ if (ib) {
+ ib->lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (ib->lrh.destination_lid == IB_LID_PERMISSIVE)
+ ib->lrh.source_lid = IB_LID_PERMISSIVE;
+ } else {
+ memcpy(eth->eth.dmac_h, ah->av.eth.mac_0_1, 2);
+ memcpy(eth->eth.dmac_h + 2, ah->av.eth.mac_2_5, 2);
+ memcpy(eth->eth.dmac_l, ah->av.eth.mac_2_5 + 2, 2);
+ rdma_get_ll_mac((struct in6_addr *)&grh->source_gid, mac);
+
+ tmp = mac;
+ memcpy(eth->eth.smac_h, tmp, 2);
+ memcpy(eth->eth.smac_l, tmp + 2, 4);
+ eth->eth.type = cpu_to_be16(MLX4_RDMAOE_ETHERTYPE);
+ }
+
+ bth->solicited_event = (u8)(!!(wr->send_opt & IB_SEND_SOLICITED));
+
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
else
ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index,
&pkey);
- sqp->ud_header.bth.pkey = pkey;
- sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
- sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) -
1));
- sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?
- cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ bth->pkey = pkey;
+ bth->destination_qpn = wr->dgrm.ud.remote_qp;
+ bth->psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ deth->qkey = wr->dgrm.ud.remote_qkey & 0x80000000 ?
+ cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
+ deth->source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
- header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+ if (ib)
+ header_size = ib_ud_header_pack(ib, sqp->header_buf);
+ else
+ header_size = rdmaoe_ud_header_pack(eth, sqp->header_buf);
#if 0
{
@@ -1271,7 +1352,10 @@
i = 2;
}
- return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size,
16);
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ return 0;
+
}
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct
ib_cq *ib_cq)
@@ -1314,9 +1398,13 @@
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
ib_send_wr_t *wr)
{
+
memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof
(struct mlx4_av));
dseg->dqpn = wr->dgrm.ud.remote_qp;
dseg->qkey = wr->dgrm.ud.remote_qkey;
+ dseg->vlan = to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.vlan;
+ memcpy(dseg->mac_0_1, to_mah((struct ib_ah
*)wr->dgrm.ud.h_av)->av.eth.mac_0_1, 6);
+
}
static void set_mlx_icrc_seg(void *dseg)
@@ -1398,7 +1486,7 @@
int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
ib_send_wr_t **bad_wr)
{
- enum ib_wr_opcode opcode;
+ enum ib_wr_opcode opcode;// = to_wr_opcode(wr);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_dev *dev = to_mdev(ibqp->device)->dev;
u8 *wqe /*, *wqe_start*/;
@@ -1525,16 +1613,14 @@
case IB_QPT_SMI:
case IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), wr, ctrl);
+ err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (err < 0) {
if (bad_wr)
*bad_wr = wr;
goto out;
}
-
- wqe += err;
- size += err / 16;
-
+ wqe += seglen;
+ size += seglen / 16;
err = 0;
break;
Index: hw/mlx4/kernel/bus/inc/cmd.h
===================================================================
--- hw/mlx4/kernel/bus/inc/cmd.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/cmd.h (working copy)
@@ -138,6 +138,7 @@
MLX4_SET_PORT_MAC_TABLE = 0x2,
MLX4_SET_PORT_VLAN_TABLE = 0x3,
MLX4_SET_PORT_PRIO_MAP = 0x4,
+ MLX4_SET_PORT_GID_TABLE = 0x5,
};
struct mlx4_dev;
Index: hw/mlx4/kernel/bus/inc/device.h
===================================================================
--- hw/mlx4/kernel/bus/inc/device.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/device.h (working copy)
@@ -208,8 +208,9 @@
int log_num_prios;
int num_fc_exch;
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
- enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
- int reserved_fexch_mpts_base;
+ u32 port_mask;
+ enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
+ int reserved_fexch_mpts_base;
int total_reserved_qps;
};
@@ -343,6 +344,28 @@
u8 dgid[16];
};
+struct mlx4_eth_av {
+ __be32 port_pd;
+ u8 reserved1;
+ u8 smac_idx;
+ u16 reserved2;
+ u8 reserved3;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ __be32 sl_tclass_flowlabel;
+ u8 dgid[16];
+ u32 reserved4[2];
+ __be16 vlan;
+ u8 mac_0_1[2];
+ u8 mac_2_5[4];
+};
+
+union mlx4_ext_av {
+ struct mlx4_av ib;
+ struct mlx4_eth_av eth;
+};
+
#define MLX4_DEV_SIGNATURE 0xf1b34a6e
struct mlx4_dev_params {
Index: hw/mlx4/kernel/bus/inc/ib_pack.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_pack.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_pack.h (working copy)
@@ -39,6 +39,7 @@
enum {
IB_LRH_BYTES = 8,
+ IB_ETH_BYTES = 14,
IB_GRH_BYTES = 40,
IB_BTH_BYTES = 12,
IB_DETH_BYTES = 8
@@ -212,6 +213,15 @@
__be32 source_qpn;
};
+struct ib_unpacked_eth {
+ u8 dmac_h[4];
+ u8 dmac_l[2];
+ u8 smac_h[2];
+ u8 smac_l[4];
+ __be16 type;
+};
+
+
struct ib_ud_header {
struct ib_unpacked_lrh lrh;
int grh_present;
@@ -222,6 +232,19 @@
__be32 immediate_data;
};
+
+
+struct eth_ud_header {
+ struct ib_unpacked_eth eth;
+ int grh_present;
+ struct ib_unpacked_grh grh;
+ struct ib_unpacked_bth bth;
+ struct ib_unpacked_deth deth;
+ int immediate_present;
+ __be32 immediate_data;
+};
+
+
void ib_pack(const struct ib_field *desc,
int desc_len,
void *structure,
@@ -236,10 +259,18 @@
int grh_present,
struct ib_ud_header *header);
+void ib_rdmaoe_ud_header_init(int payload_bytes,
+ int grh_present,
+ struct eth_ud_header *header);
+
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf);
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header);
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+ void *buf);
+
+
#endif /* IB_PACK_H */
Index: hw/mlx4/kernel/bus/inc/ib_verbs.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_verbs.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_verbs.h (working copy)
@@ -53,6 +53,34 @@
#include "ib_verbs_ex.h"
+/*
+ * IPv6 address structure
+ */
+
+struct in6_addr
+{
+ union
+ {
+ __u8 u6_addr8[16];
+ __be16 u6_addr16[8];
+ __be32 u6_addr32[4];
+ } in6_u;
+#define s6_addr in6_u.u6_addr8
+#define s6_addr16 in6_u.u6_addr16
+#define s6_addr32 in6_u.u6_addr32
+};
+
+
+struct sockaddr_in6 {
+ unsigned short int sin6_family; /* AF_INET6 */
+ __be16 sin6_port; /* Transport layer port # */
+ __be32 sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ __u32 sin6_scope_id; /* scope id (new in RFC2553) */
+};
+
+#define AF_INET6 10 /* IP version 6 */
+
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
@@ -63,7 +91,8 @@
enum rdma_transport_type {
RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_RDMAOE
};
enum rdma_transport_type
@@ -231,6 +260,7 @@
u8 active_width;
u8 active_speed;
u8 phys_state;
+ enum rdma_transport_type transport;
};
enum ib_device_modify_flags {
@@ -633,6 +663,10 @@
IB_WR_ATOMIC_CMP_AND_SWP,
IB_WR_ATOMIC_FETCH_AND_ADD,
IB_WR_LSO,
+ IB_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV,
+ IB_WR_FAST_REG_MR,
IB_WR_NOP
};
@@ -920,6 +954,9 @@
int (*query_port)(struct ib_device *device,
u8 port_num,
struct ib_port_attr *port_attr);
+ enum rdma_transport_type (*get_port_transport)(struct ib_device
*device,
+ u8 port_num);
+
int (*query_gid_chunk)(struct ib_device *device,
u8 port_num, int index,
union ib_gid gid[8], int size);
@@ -1127,6 +1164,11 @@
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
+enum rdma_transport_type rdma_port_get_transport(struct ib_device
*device,
+ u8 port_num);
+int rdma_is_transport_supported(struct ib_device *device,
+ enum rdma_transport_type transport);
+
int ib_query_gid_chunk(struct ib_device *device,
u8 port_num, int index, union ib_gid gid[8], int size);
Index: hw/mlx4/kernel/bus/inc/qp.h
===================================================================
--- hw/mlx4/kernel/bus/inc/qp.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/qp.h (working copy)
@@ -113,7 +113,9 @@
u8 snooper_flags;
u8 reserved3[2];
u8 counter_index;
- u8 reserved4[7];
+ u8 reserved4;
+ u8 dmac[6];
+
};
struct mlx4_qp_context {
@@ -213,7 +215,9 @@
__be32 av[8];
__be32 dqpn;
__be32 qkey;
- __be32 reservd[2];
+ __be16 vlan;
+ u8 mac_0_1[2];
+ u8 mac_2_5[4];
};
#pragma warning( disable : 4200)
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/main.c (working copy)
@@ -139,7 +139,9 @@
int count = 0;
for (i = 0; i < dev->caps.num_ports; i++) {
- if (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) {
+ if ((dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) ||
+ (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_ETH))
+ {
count++;
}
}
@@ -170,6 +172,16 @@
return FALSE;
}
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+ int i;
+
+ dev->caps.port_mask = 0;
+ for (i = 1; i <= dev->caps.num_ports; ++i)
+ if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
+ dev->caps.port_mask |= 1 << (i - 1);
+}
+
static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap
*dev_cap)
{
int err;
@@ -309,6 +321,8 @@
++num_eth_ports;
}
+ mlx4_set_port_mask(dev);
+
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
Index: hw/mlx4/kernel/bus/net/port.c
===================================================================
--- hw/mlx4/kernel/bus/net/port.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/port.c (working copy)
@@ -33,7 +33,9 @@
#include "mlx4.h"
#include "cmd.h"
+#include "public.h"
+extern NTSTATUS __create_child();
void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port)
{
@@ -60,6 +62,10 @@
table->refs[i] = 0;
}
table->max = 1 << dev->caps.log_num_vlans;
+ if(table->max > MLX4_MAX_VLAN_NUM)
+ {
+ table->max = MLX4_MAX_VLAN_NUM;
+ }
table->total = 0;
}
@@ -84,6 +90,52 @@
return err;
}
+static void mlx4_addrconf_ifid_eui48_win(u8 *eui, u64 mac)
+{
+ u8 *p = (u8*)&mac+2; //mac 6 bytes
+ memcpy(eui, p, 3);
+ memcpy(eui + 5, p + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+}
+
+
+static int update_ipv6_gids_win(struct mlx4_dev *dev, int port, int
clear, u64 mac)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids, *tmpgids;
+ int err;
+
+ tmpgids = kzalloc(128 * sizeof *gids, GFP_ATOMIC);
+ if (!tmpgids)
+ return -ENOMEM;
+
+ if (!clear) {
+ mlx4_addrconf_ifid_eui48_win(&tmpgids[0].raw[8], cpu_to_be64(mac));
+ tmpgids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
+ goto out;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, tmpgids, 128 * sizeof *gids);
+
+ err = mlx4_cmd(dev, mailbox->dma.da, MLX4_SET_PORT_GID_TABLE << 8 |
port,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+out:
+ kfree(tmpgids);
+ return err;
+}
+
+
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int
*index)
{
struct mlx4_mac_table *table =
@@ -112,7 +164,7 @@
}
mlx4_dbg(dev, "Free mac index is %d\n", free);
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
/* No free mac entries */
err = -ENOSPC;
goto out;
@@ -132,6 +184,20 @@
*index = free;
++table->total;
+
+ //update port guid with mac address
+ update_ipv6_gids_win(dev, port, 0, mac);
+
+ if(!InterlockedExchange(&dev->pdev->ib_hca_created, 1))
+ {
+ NTSTATUS status = STATUS_SUCCESS;
+ status = __create_child(dev->pdev->p_wdf_device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+ if (!NT_SUCCESS(status)) {
+ mlx4_err(dev, "__create_child (ib)failed with 0x%x\n", status);
+ dev->pdev->ib_hca_created = FALSE;
+ }
+ }
+
out:
up(&table->mac_sem);
return err;
@@ -207,7 +273,7 @@
}
}
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
/* No free vlan entries */
err = -ENOSPC;
goto out;
Index: hw/mlx4/kernel/bus/net/SOURCES
===================================================================
--- hw/mlx4/kernel/bus/net/SOURCES (revision 2617)
+++ hw/mlx4/kernel/bus/net/SOURCES (working copy)
@@ -31,7 +31,7 @@
srq.c \
port.c \
-INCLUDES=..;..\inc;..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\
..\inc\kernel;
+INCLUDES=..;..\inc;..\..\inc;..\..\..\inc;..\core\$O;..\..\..\..\..\inc
;..\..\..\..\..\inc\kernel;
C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS
-D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS
#-DFORCE_LIVEFISH
Index: hw/mlx4/kernel/hca/av.c
===================================================================
--- hw/mlx4/kernel/hca/av.c (revision 2617)
+++ hw/mlx4/kernel/hca/av.c (working copy)
@@ -74,6 +74,7 @@
p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr);
if (IS_ERR(p_ib_ah)) {
err = PTR_ERR(p_ib_ah);
+ status = errno_to_iberr(err);
HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n",
err));
goto err_create_ah;
}
Index: hw/mlx4/kernel/hca/data.c
===================================================================
--- hw/mlx4/kernel/hca/data.c (revision 2617)
+++ hw/mlx4/kernel/hca/data.c (working copy)
@@ -339,6 +339,7 @@
ibal_port_p->max_vls = mthca_port_p->max_vl_num;
ibal_port_p->sm_lid = cl_ntoh16(mthca_port_p->sm_lid);
ibal_port_p->sm_sl = mthca_port_p->sm_sl;
+ ibal_port_p->transport = mthca_port_p->transport;
ibal_port_p->link_state = (mthca_port_p->state != 0) ?
(uint8_t)mthca_port_p->state : IB_LINK_DOWN;
ibal_port_p->num_gids = (uint16_t)mthca_port_p->gid_tbl_len;
ibal_port_p->num_pkeys = mthca_port_p->pkey_tbl_len;
Index: hw/mlx4/kernel/inc/l2w.h
===================================================================
--- hw/mlx4/kernel/inc/l2w.h (revision 2617)
+++ hw/mlx4/kernel/inc/l2w.h (working copy)
@@ -185,6 +185,8 @@
DMA_ADAPTER * p_dma_adapter; /* HCA adapter object */
DEVICE_OBJECT * p_self_do; /* mlx4_bus's FDO */
DEVICE_OBJECT * pdo; /* mlx4_bus's PDO */
+ PVOID p_wdf_device; /* wdf_device */
+ LONG ib_hca_created;
// mlx4_ib: various objects and info
struct ib_device * ib_dev;
// mlx4_net: various objects and info
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h (revision 2617)
+++ inc/iba/ib_types.h (working copy)
@@ -9419,6 +9419,8 @@
TO_LONG_PTR(ib_gid_t*, p_gid_table);
TO_LONG_PTR(ib_net16_t*,p_pkey_table);
+ enum rdma_transport_type transport;
+
} ib_port_attr_t;
/*
* SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types.h (revision 2617)
+++ ulp/opensm/user/include/iba/ib_types.h (working copy)
@@ -8676,6 +8676,7 @@
ib_gid_t *p_gid_table;
ib_net16_t *p_pkey_table;
+ enum rdma_transport_type transport;
} ib_port_attr_t;
/*
* SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types_extended.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types_extended.h (revision 2617)
+++ ulp/opensm/user/include/iba/ib_types_extended.h (working copy)
@@ -586,6 +586,7 @@
TO_LONG_PTR(ib_gid_t*, p_gid_table);
TO_LONG_PTR(ib_net16_t*,p_pkey_table);
+ enum rdma_transport_type transport;
} ib_port_attr_t;
/*
* SEE ALSO
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20091202/a3bddac3/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lle_mlx4_ofed.patch
Type: application/octet-stream
Size: 42086 bytes
Desc: lle_mlx4_ofed.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20091202/a3bddac3/attachment.obj>
More information about the ofw
mailing list