[ofw] patch 1/2 Add support for RDMAoEth to the low level driver

Tzachi Dar tzachid at mellanox.co.il
Wed Dec 2 08:21:35 PST 2009


Here are the changes in the low level driver:
 
Thanks
Tzachi
 
Index: hw/mlx4/kernel/bus/core/ud_header.c
===================================================================
--- hw/mlx4/kernel/bus/core/ud_header.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/ud_header.c (working copy)
@@ -62,6 +62,15 @@
  { STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) }
 };
 
+static const struct ib_field eth_table[]  = {
+ { STRUCT_FIELD_INIT(eth, dmac_h, 0, 0, 32) },
+ { STRUCT_FIELD_INIT(eth, dmac_l, 1, 0, 16) },
+ { STRUCT_FIELD_INIT(eth, smac_h, 1, 16,16) },
+ { STRUCT_FIELD_INIT(eth, smac_l, 2, 0 ,32) },
+ { STRUCT_FIELD_INIT(eth, type, 3, 0, 16)}
+};
+
+
 static const struct ib_field grh_table[]  = {
  { STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) },
  { STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) },
@@ -279,3 +288,93 @@
  return 0;
 }
 EXPORT_SYMBOL(ib_ud_header_unpack);
+
+/**
+ * ib_rdmaoe_ud_header_init - Initialize UD header structure
+ * @payload_bytes:Length of packet payload
+ * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @header:Structure to initialize
+ *
+ * ib_rdmaoe_ud_header_init() initializes the grh.ip_version,
grh.payload_length,
+ * grh.next_header, bth.opcode, bth.pad_count and
+ * bth.transport_header_version fields of a &struct eth_ud_header given
+ * the payload length and whether a GRH will be included.
+ */
+void ib_rdmaoe_ud_header_init(int           payload_bytes,
+      int          grh_present,
+      struct eth_ud_header    *header)
+{
+ int header_len;
+
+ memset(header, 0, sizeof *header);
+
+ header_len =
+  sizeof header->eth  +
+  IB_BTH_BYTES  +
+  IB_DETH_BYTES;
+ if (grh_present)
+  header_len += IB_GRH_BYTES;
+
+ header->grh_present          = grh_present;
+ if (grh_present) {
+  header->grh.ip_version      = 6;
+  header->grh.payload_length  =
+   cpu_to_be16((IB_BTH_BYTES     +
+         IB_DETH_BYTES    +
+         payload_bytes    +
+         4                + /* ICRC     */
+         3) & ~3);          /* round up */
+  header->grh.next_header     = 0x1b;
+ }
+
+ if (header->immediate_present)
+  header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ else
+  header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY;
+ header->bth.pad_count                =(u8) ((4 - payload_bytes) & 3);
+ header->bth.transport_header_version = 0;
+}
+
+
+
+/**
+ * rdmaoe_ud_header_pack - Pack UD header struct into eth wire format
+ * @header:UD header struct
+ * @buf:Buffer to pack into
+ *
+ * ib_ud_header_pack() packs the UD header structure @header into wire
+ * format in the buffer @buf.
+ */
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+         void                 *buf)
+{
+ int len = 0;
+
+ ib_pack(eth_table, ARRAY_SIZE(eth_table),
+  &header->eth, buf);
+ len += IB_ETH_BYTES;
+
+ if (header->grh_present) {
+  ib_pack(grh_table, ARRAY_SIZE(grh_table),
+   &header->grh, (u8*)buf + len);
+  len += IB_GRH_BYTES;
+ }
+
+ ib_pack(bth_table, ARRAY_SIZE(bth_table),
+  &header->bth, (u8*)buf + len);
+ len += IB_BTH_BYTES;
+
+ ib_pack(deth_table, ARRAY_SIZE(deth_table),
+  &header->deth, (u8*)buf + len);
+ len += IB_DETH_BYTES;
+
+ if (header->immediate_present) {
+  memcpy((u8*)buf + len, &header->immediate_data,
+         sizeof header->immediate_data);
+  len += sizeof header->immediate_data;
+ }
+
+ return len;
+}
+
+
Index: hw/mlx4/kernel/bus/core/verbs.c
===================================================================
--- hw/mlx4/kernel/bus/core/verbs.c (revision 2617)
+++ hw/mlx4/kernel/bus/core/verbs.c (working copy)
@@ -336,3 +336,28 @@
 }
 EXPORT_SYMBOL(ib_destroy_ah);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ case RDMA_NODE_IB_SWITCH:
+ case RDMA_NODE_IB_ROUTER:
+  return RDMA_TRANSPORT_IB;
+ case RDMA_NODE_RNIC:
+  return RDMA_TRANSPORT_IWARP;
+ default:
+  ASSERT(FALSE);
+  return 0;
+ }
+}
+
+enum rdma_transport_type rdma_port_get_transport(struct ib_device
*device,
+       u8 port_num)
+{
+ return device->get_port_transport ?
+  device->get_port_transport(device, port_num) :
+  rdma_node_get_transport(device->node_type);
+}
+EXPORT_SYMBOL(rdma_port_get_transport);
+
Index: hw/mlx4/kernel/bus/drv/drv.c
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/drv.c (working copy)
@@ -95,7 +95,6 @@
 
 #endif
 
-static 
 NTSTATUS
 __create_child(
  __in WDFDEVICE  Device,
@@ -228,13 +227,21 @@
 
  if ( p_fdo->children_created )
   goto end;
- 
+
  // eventually we'll have all information about children in Registry
  // DriverEntry will read it into a Global storage and
  // this routine will create all the children on base on this info
  number_of_ib_ports = mlx4_count_ib_ports(mdev);
  ASSERT(number_of_ib_ports >=0 && number_of_ib_ports <=2);
 
+ //For now we it's either IB or ETH, and we always create LLE if it's
ETH
+ if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] ==
MLX4_PORT_TYPE_IB) ) {
+  status = __create_child(Device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+  if (!NT_SUCCESS(status)) {
+    MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child
(ib)failed with 0x%x\n", status));
+  }
+ }
+
  for (i = 1; i <= mdev->caps.num_ports; i++) {
         if (mlx4_is_enabled_port(mdev, i)) {
             if(mlx4_is_eth_port(mdev, i)) {
@@ -869,6 +876,9 @@
   goto err;
  }
 
+ pdev->p_wdf_device = Device;
+ pdev->ib_hca_created = 0;
+
  // start the card
  status = __start_card(Device, p_fdo );
  if( !NT_SUCCESS( status ) ) 
Index: hw/mlx4/kernel/bus/drv/stat.c
===================================================================
--- hw/mlx4/kernel/bus/drv/stat.c (revision 2617)
+++ hw/mlx4/kernel/bus/drv/stat.c (working copy)
@@ -113,7 +113,7 @@
 void st_print_mlx_header( struct mlx4_dev *mdev, struct mlx4_ib_sqp
*sqp, struct mlx4_wqe_mlx_seg *mlx )
 {
  if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_UDH )
- __print_ud_header( mdev, &sqp->ud_header );
+  __print_ud_header( mdev, &sqp->hdr.ib );
  if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_WQE )
   __print_mlx( mdev, mlx );
 }
Index: hw/mlx4/kernel/bus/ib/ah.c
===================================================================
--- hw/mlx4/kernel/bus/ib/ah.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/ah.c (working copy)
@@ -32,68 +32,199 @@
 
 #include "mlx4_ib.h"
 
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr)
+static inline int rdma_link_local_addr(struct in6_addr *addr)
 {
+ if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
+     addr->s6_addr32[1] == 0)
+  return 1;
+ else
+  return 0;
+}
+
+inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+ memcpy(mac, &addr->s6_addr[8], 3);
+ memcpy(mac + 3, &addr->s6_addr[13], 3);
+ mac[0] ^= 2;   
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+ return addr->s6_addr[0] == 0xff ? 1 : 0;
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+ int i;
+
+ mac[0] = 0x33;
+ mac[1] = 0x33;
+ for (i = 2; i < 6; ++i)
+  mac[i] = addr->s6_addr[i + 10];
+
+}
+
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+   u8 *mac, int *is_mcast)
+{
+ int err = 0;
+ struct sockaddr_in6 dst;
+
+ UNREFERENCED_PARAMETER(dev);
+
+ *is_mcast = 0;
+ memcpy(dst.sin6_addr.s6_addr, ah_attr->grh.dgid.raw,
sizeof(ah_attr->grh.dgid.raw));
+
+ if (rdma_link_local_addr(&dst.sin6_addr))
+  rdma_get_ll_mac(&dst.sin6_addr, mac);
+ else if (rdma_is_multicast_addr(&dst.sin6_addr)) {
+  rdma_get_mcast_mac(&dst.sin6_addr, mac);
+  *is_mcast = 1;
+ } else {
+  err = -EINVAL; //jyang:todo
+  ASSERT(FALSE);
+ }
+ return err;
+}
+
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr,
+      struct mlx4_ib_ah *ah)
+{
  struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- struct mlx4_ib_ah *ah;
 
  if (mlx4_is_barred(pd->device->dma_device))
   return ERR_PTR(-EFAULT);
 
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
-  return ERR_PTR(-ENOMEM);
 
- memset(&ah->av, 0, sizeof ah->av);
-
- ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num <<
24));
- ah->av.g_slid  = ah_attr->src_path_bits;
- ah->av.dlid    = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
-  ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
-  while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
-         !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
-   --ah->av.stat_rate;
- }
- ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num
<< 24));
+ ah->av.ib.g_slid  = ah_attr->src_path_bits;
  if (ah_attr->ah_flags & IB_AH_GRH) {
-  ah->av.g_slid   |= 0x80;
-  ah->av.gid_index = ah_attr->grh.sgid_index;
-  ah->av.hop_limit = ah_attr->grh.hop_limit;
-  ah->av.sl_tclass_flowlabel |=
+  ah->av.ib.g_slid   |= 0x80;
+  ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+  ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+  ah->av.ib.sl_tclass_flowlabel |=
    cpu_to_be32((ah_attr->grh.traffic_class << 20) |
         ah_attr->grh.flow_label);
-  memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+  memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
  }
 
+ ah->av.ib.dlid    = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+  ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+  while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET
&&
+         !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+   --ah->av.ib.stat_rate;
+ }
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
  return &ah->ibah;
 }
 
+struct ib_ah *create_rdmaoe_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr,
+       struct mlx4_ib_ah *ah)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct mlx4_dev *dev = ibdev->dev;
+ u8 mac[6];
+ int err;
+ int is_mcast;
+
+ if (mlx4_is_barred(pd->device->dma_device))
+  return ERR_PTR(-EFAULT);
+
+ err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast);
+ if (err)
+  return ERR_PTR(err);
+
+ memcpy(ah->av.eth.mac_0_1, mac, 2);
+ memcpy(ah->av.eth.mac_2_5, mac + 2, 4);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num
<< 24));
+ ah->av.ib.g_slid = 0x80;
+ if (ah_attr->static_rate) {
+  ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+  while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET
&&
+         !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+   --ah->av.ib.stat_rate;
+ }
+
+ /*
+  * HW requires multicast LID so we just choose one.
+  */
+ if (is_mcast)
+  ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
+ return &ah->ibah;
+}
+
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr
*ah_attr)
+{
+ struct mlx4_ib_ah *ah;
+ enum rdma_transport_type transport;
+
+ struct ib_ah *ret;
+
+ ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+  return ERR_PTR(-ENOMEM);
+
+ transport = rdma_port_get_transport(pd->device, ah_attr->port_num);
+ if (transport == RDMA_TRANSPORT_RDMAOE) {
+  if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+   ret = ERR_PTR(-EINVAL);
+   goto out;
+  } else {
+   /* TBD: need to handle the case when we get called
+   in an atomic context and there we might sleep. We
+   don't expect this currently since we're working with
+   link local addresses which we can translate without
+   going to sleep */
+   ret = create_rdmaoe_ah(pd, ah_attr, ah);
+   if (IS_ERR(ret))
+    goto out;
+   else
+    return ret;
+  }
+ } else
+  return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+out:
+ kfree(ah);
+ return ret;
+}
+
+
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 {
  struct mlx4_ib_ah *ah = to_mah(ibah);
+ enum rdma_transport_type transport;
 
+ transport = rdma_port_get_transport(ibah->device, ah_attr->port_num);
+
  if (mlx4_is_barred(ibah->device->dma_device))
   return -EFAULT;
 
  memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid        = be16_to_cpu(ah->av.dlid);
- ah_attr->sl        = (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >>
28);
- ah_attr->port_num      = (u8)(be32_to_cpu(ah->av.port_pd) >> 24);
- if (ah->av.stat_rate)
-  ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+ ah_attr->dlid        = transport == RDMA_TRANSPORT_IB ?
be16_to_cpu(ah->av.ib.dlid) : 0;
+ ah_attr->sl        = (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel)
>> 28);
+ ah_attr->port_num      = (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24);
+ if (ah->av.ib.stat_rate)
+  ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
 
  if (mlx4_ib_ah_grh_present(ah)) {
   ah_attr->ah_flags = IB_AH_GRH;
 
   ah_attr->grh.traffic_class =
-   (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20);
+   (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20);
   ah_attr->grh.flow_label =
-   be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
-  ah_attr->grh.hop_limit  = ah->av.hop_limit;
-  ah_attr->grh.sgid_index = ah->av.gid_index;
-  memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+   be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+  ah_attr->grh.hop_limit  = ah->av.ib.hop_limit;
+  ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+  memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
  }
 
  return 0;
@@ -108,7 +239,7 @@
 // Leo: temporary 
 int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr )
 {
- struct mlx4_av *av  = &to_mah(ibah)->av;
+ struct mlx4_av *av  = &to_mah(ibah)->av.ib;
  struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev;
 
  if (mlx4_is_barred(dev))
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/main.c (working copy)
@@ -133,31 +133,21 @@
  return err;
 }
 
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
-         struct ib_port_attr *props)
+
+static enum rdma_transport_type
+mlx4_ib_port_get_transport(struct ib_device *device, u8 port_num)
 {
- struct ib_smp *in_mad  = NULL;
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct mlx4_dev *dev = to_mdev(device)->dev;
 
- if (mlx4_is_barred(ibdev->dma_device))
-  return -EFAULT;
- 
- in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad)
-  goto out;
+ return dev->caps.port_mask & (1 << (port_num - 1)) ?
+  RDMA_TRANSPORT_IB : RDMA_TRANSPORT_RDMAOE;
+}
 
- memset(props, 0, sizeof *props);
 
- init_query_mad(in_mad);
- in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
- in_mad->attr_mod = cpu_to_be32(port);
-
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad,
out_mad);
- if (err)
-  goto out;
-
+static void ib_link_query_port(struct ib_device *ibdev, u8 port,
+          struct ib_port_attr *props,
+          struct ib_smp *out_mad)
+{
  props->lid  = be16_to_cpup((__be16 *) (out_mad->data + 16));
  props->lmc  = out_mad->data[34] & 0x7;
  props->sm_lid  = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -177,7 +167,64 @@
  props->subnet_timeout = out_mad->data[51] & 0x1f;
  props->max_vl_num = out_mad->data[37] >> 4;
  props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_IB;
+}
 
+static void eth_link_query_port(struct ib_device *ibdev, u8 port,
+    struct ib_port_attr *props,
+    struct ib_smp *out_mad)
+{
+
+ props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
+ props->pkey_tbl_len =
(u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port];
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu  = out_mad->data[41] & 0xf;
+ //props->active_mtu = rdmaoe->mtu[port - 1];
+ props->active_mtu = 1500; //jyang:hardcoded
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+ props->transport= RDMA_TRANSPORT_RDMAOE;
+
+ //props->state  = netif_running(ndev) &&  netif_oper_up(ndev) ?
+ //    IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->state  = IB_PORT_ACTIVE; //jyang: just hardcoded it now
+ props->phys_state = props->state;
+}
+
+
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+             struct ib_port_attr *props)
+{
+ struct ib_smp *in_mad  = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad)
+  goto out;
+
+ memset(props, 0, sizeof *props);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad,
out_mad);
+ if (err)
+  goto out;
+
+ mlx4_ib_port_get_transport(ibdev, port) == RDMA_TRANSPORT_IB ?
+  ib_link_query_port(ibdev, port, props, out_mad) :
+  eth_link_query_port(ibdev, port, props, out_mad);
+
 out:
  kfree(in_mad);
  kfree(out_mad);
@@ -522,6 +569,7 @@
  ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
  ibdev->ib_dev.query_device = mlx4_ib_query_device;
  ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.get_port_transport = mlx4_ib_port_get_transport;
  ibdev->ib_dev.query_gid_chunk = mlx4_ib_query_gid_chunk;
  ibdev->ib_dev.query_pkey_chunk = mlx4_ib_query_pkey_chunk;
  ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
Index: hw/mlx4/kernel/bus/ib/mlx4_ib.h
===================================================================
--- hw/mlx4/kernel/bus/ib/mlx4_ib.h (revision 2617)
+++ hw/mlx4/kernel/bus/ib/mlx4_ib.h (working copy)
@@ -165,14 +165,15 @@
 
 struct mlx4_ib_ah {
  struct ib_ah  ibah;
- struct mlx4_av  av;
+ union mlx4_ext_av   av;
 };
 
+
 enum {
  /*
   * Largest possible UD header: send with GRH and immediate data.
   */
- MLX4_IB_UD_HEADER_SIZE  = 72
+ MLX4_IB_UD_HEADER_SIZE  = 76
 };
 
 struct mlx4_ib_sqp {
@@ -180,7 +181,10 @@
  int   pkey_index;
  u32   qkey;
  u32   send_psn;
- struct ib_ud_header ud_header;
+ union {
+  struct ib_ud_header ib;
+  struct eth_ud_header eth;
+ } hdr;
  u8   header_buf[MLX4_IB_UD_HEADER_SIZE];
 };
 
@@ -340,9 +344,14 @@
 int __init mlx4_ib_init(void);
 void __exit mlx4_ib_cleanup(void);
 
+int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah_attr,
+   u8 *mac, int *is_mcast);
+
+
 static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
- return !!(ah->av.g_slid & 0x80);
+ return !!(ah->av.ib.g_slid & 0x80);
+
 }
 
 #endif /* MLX4_IB_H */
Index: hw/mlx4/kernel/bus/ib/qp.c
===================================================================
--- hw/mlx4/kernel/bus/ib/qp.c (revision 2617)
+++ hw/mlx4/kernel/bus/ib/qp.c (working copy)
@@ -46,10 +46,16 @@
 
 enum {
  MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
- MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+ MLX4_IB_LINK_TYPE_IB  = 0,
+ MLX4_IB_LINK_TYPE_ETH  = 1
 };
 
 enum {
+ MLX4_RDMAOE_ETHERTYPE = 0x8915
+};
+
+enum {
  MLX4_IB_MIN_SQ_STRIDE = 6
 };
 
@@ -65,6 +71,8 @@
  __constant_cpu_to_be32(MLX4_OPCODE_NOP)    /* [IB_WR_NOP]     */
 };
 
+extern inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac);
+
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
 {
  return container_of(mqp, struct mlx4_ib_sqp, qp);
@@ -724,6 +732,12 @@
 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct
ib_ah_attr *ah,
     struct mlx4_qp_path *path, u8 port)
 {
+ int err;
+ int is_eth = rdma_port_get_transport(&dev->ib_dev, port) ==
+  RDMA_TRANSPORT_RDMAOE ? 1 : 0;
+ u8 mac[6];
+ int is_mcast;
+
  path->grh_mylmc     = ah->src_path_bits & 0x7f;
  path->rlid     = cpu_to_be16(ah->dlid);
  if (ah->static_rate) {
@@ -754,7 +768,21 @@
  path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
   ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
 
- return 0;
+ if (is_eth) {
+  if (!(ah->ah_flags & IB_AH_GRH))
+   return -1;
+
+  err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast);
+  if (err)
+   return err;
+
+  memcpy(path->dmac, mac, 6);
+  path->ackto = MLX4_IB_LINK_TYPE_ETH;
+  /* use index 0 into MAC table for RDMAoE */
+  path->grh_mylmc &= 0x80;
+ }
+
+    return 0;
 }
 
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
@@ -1146,79 +1174,132 @@
  return opcode;
 }
 
+
+
+
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr,
-       void *wqe)
+       void *wqe, unsigned *mlx_seg_len)
 {
  enum ib_wr_opcode opcode = to_wr_opcode(wr);
  struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
  struct mlx4_wqe_mlx_seg *mlx = wqe;
  struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx);
  struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av);
- __be16 pkey;
+ u16 pkey;
  int send_size;
  int header_size;
  int spc;
- u32 i;
+ u16 i;
+ struct ib_ud_header *ib = NULL;
+ struct eth_ud_header *eth = NULL;
+ struct ib_unpacked_grh *grh;
+ struct ib_unpacked_bth  *bth;
+ struct ib_unpacked_deth *deth;
+ u8 *tmp;
+ u8 mac[6];
 
  send_size = 0;
  for (i = 0; i < wr->num_ds; ++i)
   send_size += wr->ds_array[i].length;
 
- ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah),
&sqp->ud_header);
+ if (rdma_port_get_transport(sqp->qp.ibqp.device, sqp->qp.port) ==
RDMA_TRANSPORT_IB) {
 
- sqp->ud_header.lrh.service_level   =
-  (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28);
- sqp->ud_header.lrh.destination_lid = ah->av.dlid;
- sqp->ud_header.lrh.source_lid      = cpu_to_be16(ah->av.g_slid &
0x7f);
+  ib = &sqp->hdr.ib;
+  grh = &ib->grh;
+  bth = &ib->bth;
+  deth = &ib->deth;
+  ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), ib);
+  ib->lrh.service_level   =
+   (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28);
+  ib->lrh.destination_lid = ah->av.ib.dlid;
+  ib->lrh.source_lid      = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ } else {
+  eth = &sqp->hdr.eth;
+  grh = &eth->grh;
+  bth = &eth->bth;
+  deth = &eth->deth;
+  ib_rdmaoe_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), eth);
+ }
+
+ 
  if (mlx4_ib_ah_grh_present(ah)) {
-  sqp->ud_header.grh.traffic_class =
-   (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff);
-  sqp->ud_header.grh.flow_label    =
-   ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
-  sqp->ud_header.grh.hop_limit     = ah->av.hop_limit;
-  ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24),
-      ah->av.gid_index, &sqp->ud_header.grh.source_gid);
-  memcpy(sqp->ud_header.grh.destination_gid.raw,
-         ah->av.dgid, 16);
+  grh->traffic_class =
+   (u8)((be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff);
+  grh->flow_label    =
+   ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+  grh->hop_limit     = ah->av.ib.hop_limit;
+  ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24),
+      ah->av.ib.gid_index, &grh->source_gid);
+  memcpy(grh->destination_gid.raw,
+      ah->av.ib.dgid, 16);
  }
 
  mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 :
0) |
-      (sqp->ud_header.lrh.destination_lid ==
-       XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
-      (sqp->ud_header.lrh.service_level << 8));
- mlx->rlid   = sqp->ud_header.lrh.destination_lid;
 
+ if (ib) {
+  mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 :
0) |
+       (ib->lrh.destination_lid ==
+        IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+       (ib->lrh.service_level << 8));
+  mlx->rlid   = ib->lrh.destination_lid;
+
+ }
+
  switch (opcode) {
  case IB_WR_SEND:
-  sqp->ud_header.bth.opcode  = IB_OPCODE_UD_SEND_ONLY;
-  sqp->ud_header.immediate_present = 0;
+  bth->opcode  = IB_OPCODE_UD_SEND_ONLY;
+  if (ib)
+   ib->immediate_present = 0;
+  else
+   eth->immediate_present = 0;
   break;
  case IB_WR_SEND_WITH_IMM:
-  sqp->ud_header.bth.opcode  = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
-  sqp->ud_header.immediate_present = 1;
-  sqp->ud_header.immediate_data    = wr->immediate_data;
+  bth->opcode  = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+  if (ib) {
+   ib->immediate_present = 1;
+   ib->immediate_data    = wr->immediate_data;
+  } else {
+   eth->immediate_present = 1;
+   eth->immediate_data    = wr->immediate_data;
+  }
   break;
  default:
   return -EINVAL;
  }
 
- sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
- if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
-  sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
- sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt &
IB_SEND_OPT_SOLICITED));
+ if (ib) {
+  ib->lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
+  if (ib->lrh.destination_lid == IB_LID_PERMISSIVE)
+   ib->lrh.source_lid = IB_LID_PERMISSIVE;
+ } else {
+  memcpy(eth->eth.dmac_h, ah->av.eth.mac_0_1, 2);
+  memcpy(eth->eth.dmac_h + 2, ah->av.eth.mac_2_5, 2);
+  memcpy(eth->eth.dmac_l, ah->av.eth.mac_2_5 + 2, 2);
+  rdma_get_ll_mac((struct in6_addr *)&grh->source_gid, mac);
+
+  tmp = mac;
+  memcpy(eth->eth.smac_h, tmp, 2);
+  memcpy(eth->eth.smac_l, tmp + 2, 4);
+  eth->eth.type = cpu_to_be16(MLX4_RDMAOE_ETHERTYPE);
+ }
+
+ bth->solicited_event = (u8)(!!(wr->send_opt & IB_SEND_SOLICITED));
+
  if (!sqp->qp.ibqp.qp_num)
   ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
  else
   ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index,
&pkey);
- sqp->ud_header.bth.pkey = pkey;
- sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp;
- sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) -
1));
- sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ?
-  cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ bth->pkey = pkey;
+ bth->destination_qpn = wr->dgrm.ud.remote_qp;
+ bth->psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ deth->qkey = wr->dgrm.ud.remote_qkey & 0x80000000 ?
+         cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey;
+ deth->source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 
- header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+ if (ib)
+  header_size = ib_ud_header_pack(ib, sqp->header_buf);
+ else
+  header_size = rdmaoe_ud_header_pack(eth, sqp->header_buf);
 
 #if 0
  {
@@ -1271,7 +1352,10 @@
   i = 2;
  }
 
- return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size,
16);
+ *mlx_seg_len =
+  ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ return 0;
+
 }
 
 static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct
ib_cq *ib_cq)
@@ -1314,9 +1398,13 @@
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
         ib_send_wr_t *wr)
 {
+
  memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof
(struct mlx4_av));
  dseg->dqpn = wr->dgrm.ud.remote_qp;
  dseg->qkey = wr->dgrm.ud.remote_qkey;
+ dseg->vlan = to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.vlan;
+ memcpy(dseg->mac_0_1, to_mah((struct ib_ah
*)wr->dgrm.ud.h_av)->av.eth.mac_0_1, 6);
+
 }
 
 static void set_mlx_icrc_seg(void *dseg)
@@ -1398,7 +1486,7 @@
 int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr,
         ib_send_wr_t **bad_wr)
 {
- enum ib_wr_opcode opcode;
+ enum ib_wr_opcode opcode;// = to_wr_opcode(wr);
  struct mlx4_ib_qp *qp = to_mqp(ibqp);
  struct mlx4_dev *dev = to_mdev(ibqp->device)->dev;
  u8 *wqe /*, *wqe_start*/;
@@ -1525,16 +1613,14 @@
 
   case IB_QPT_SMI:
   case IB_QPT_GSI:
-   err = build_mlx_header(to_msqp(qp), wr, ctrl);
+   err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
    if (err < 0) {
     if (bad_wr)
      *bad_wr = wr;
     goto out;
    }
-   
-   wqe  += err;
-   size += err / 16;
-
+   wqe  += seglen;
+   size += seglen / 16;
    err = 0;
    break;
 
Index: hw/mlx4/kernel/bus/inc/cmd.h
===================================================================
--- hw/mlx4/kernel/bus/inc/cmd.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/cmd.h (working copy)
@@ -138,6 +138,7 @@
  MLX4_SET_PORT_MAC_TABLE = 0x2,
  MLX4_SET_PORT_VLAN_TABLE = 0x3,
  MLX4_SET_PORT_PRIO_MAP  = 0x4,
+ MLX4_SET_PORT_GID_TABLE = 0x5,
 };
 
 struct mlx4_dev;
Index: hw/mlx4/kernel/bus/inc/device.h
===================================================================
--- hw/mlx4/kernel/bus/inc/device.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/device.h (working copy)
@@ -208,8 +208,9 @@
  int   log_num_prios;
  int   num_fc_exch;
  enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
-    enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
-    int   reserved_fexch_mpts_base;   
+ u32   port_mask;
+ enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1];
+ int   reserved_fexch_mpts_base;   
  int   total_reserved_qps;
 };
 
@@ -343,6 +344,28 @@
  u8   dgid[16];
 };
 
+struct mlx4_eth_av {
+ __be32  port_pd;
+ u8  reserved1;
+ u8  smac_idx;
+ u16  reserved2;
+ u8  reserved3;
+ u8  gid_index;
+ u8  stat_rate;
+ u8  hop_limit;
+ __be32  sl_tclass_flowlabel;
+ u8  dgid[16];
+ u32  reserved4[2];
+ __be16  vlan;
+ u8  mac_0_1[2];
+ u8  mac_2_5[4];
+};
+
+union mlx4_ext_av {
+ struct mlx4_av  ib;
+ struct mlx4_eth_av eth;
+};
+
 #define MLX4_DEV_SIGNATURE 0xf1b34a6e
 
 struct mlx4_dev_params {
Index: hw/mlx4/kernel/bus/inc/ib_pack.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_pack.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_pack.h (working copy)
@@ -39,6 +39,7 @@
 
 enum {
  IB_LRH_BYTES  = 8,
+ IB_ETH_BYTES  = 14,
  IB_GRH_BYTES  = 40,
  IB_BTH_BYTES  = 12,
  IB_DETH_BYTES = 8
@@ -212,6 +213,15 @@
  __be32       source_qpn;
 };
 
+struct ib_unpacked_eth {
+ u8 dmac_h[4];
+ u8 dmac_l[2];
+ u8 smac_h[2];
+ u8 smac_l[4];
+ __be16 type;
+};
+
+
 struct ib_ud_header {
  struct ib_unpacked_lrh  lrh;
  int                     grh_present;
@@ -222,6 +232,19 @@
  __be32           immediate_data;
 };
 
+
+
+struct eth_ud_header {
+ struct ib_unpacked_eth  eth;
+ int                     grh_present;
+ struct ib_unpacked_grh  grh;
+ struct ib_unpacked_bth  bth;
+ struct ib_unpacked_deth deth;
+ int              immediate_present;
+ __be32           immediate_data;
+};
+
+
 void ib_pack(const struct ib_field        *desc,
       int                           desc_len,
       void                         *structure,
@@ -236,10 +259,18 @@
          int         grh_present,
          struct ib_ud_header *header);
 
+void ib_rdmaoe_ud_header_init(int          payload_bytes,
+      int         grh_present,
+      struct eth_ud_header   *header);
+
 int ib_ud_header_pack(struct ib_ud_header *header,
         void                *buf);
 
 int ib_ud_header_unpack(void                *buf,
    struct ib_ud_header *header);
 
+int rdmaoe_ud_header_pack(struct eth_ud_header *header,
+         void                 *buf);
+
+
 #endif /* IB_PACK_H */
Index: hw/mlx4/kernel/bus/inc/ib_verbs.h
===================================================================
--- hw/mlx4/kernel/bus/inc/ib_verbs.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/ib_verbs.h (working copy)
@@ -53,6 +53,34 @@
 
 #include "ib_verbs_ex.h"
 
+/*
+ * IPv6 address structure
+ */
+
+struct in6_addr
+{
+ union 
+ {
+  __u8  u6_addr8[16];
+  __be16  u6_addr16[8];
+  __be32  u6_addr32[4];
+ } in6_u;
+#define s6_addr   in6_u.u6_addr8
+#define s6_addr16  in6_u.u6_addr16
+#define s6_addr32  in6_u.u6_addr32
+};
+
+
+struct sockaddr_in6 {
+ unsigned short int sin6_family;    /* AF_INET6 */
+ __be16   sin6_port;      /* Transport layer port # */
+ __be32   sin6_flowinfo;  /* IPv6 flow information */
+ struct in6_addr  sin6_addr;      /* IPv6 address */
+ __u32   sin6_scope_id;  /* scope id (new in RFC2553) */
+};
+
+#define AF_INET6 10 /* IP version 6   */
+
 enum rdma_node_type {
  /* IB values map to NodeInfo:NodeType. */
  RDMA_NODE_IB_CA  = 1,
@@ -63,7 +91,8 @@
 
 enum rdma_transport_type {
  RDMA_TRANSPORT_IB,
- RDMA_TRANSPORT_IWARP
+ RDMA_TRANSPORT_IWARP,
+ RDMA_TRANSPORT_RDMAOE
 };
 
 enum rdma_transport_type
@@ -231,6 +260,7 @@
  u8   active_width;
  u8   active_speed;
  u8                      phys_state;
+ enum rdma_transport_type transport;
 };
 
 enum ib_device_modify_flags {
@@ -633,6 +663,10 @@
  IB_WR_ATOMIC_CMP_AND_SWP,
  IB_WR_ATOMIC_FETCH_AND_ADD,
  IB_WR_LSO,
+ IB_WR_SEND_WITH_INV,
+ IB_WR_RDMA_READ_WITH_INV,
+ IB_WR_LOCAL_INV,
+ IB_WR_FAST_REG_MR,
  IB_WR_NOP
 };
 
@@ -920,6 +954,9 @@
  int             (*query_port)(struct ib_device *device,
        u8 port_num,
        struct ib_port_attr *port_attr);
+ enum rdma_transport_type   (*get_port_transport)(struct ib_device
*device,
+        u8 port_num);
+
  int             (*query_gid_chunk)(struct ib_device *device,
       u8 port_num, int index,
       union ib_gid gid[8], int size);
@@ -1127,6 +1164,11 @@
 int ib_query_port(struct ib_device *device,
     u8 port_num, struct ib_port_attr *port_attr);
 
+enum rdma_transport_type rdma_port_get_transport(struct ib_device
*device,
+       u8 port_num);
+int rdma_is_transport_supported(struct ib_device *device,
+    enum rdma_transport_type transport);
+
 int ib_query_gid_chunk(struct ib_device *device,
    u8 port_num, int index, union ib_gid gid[8], int size);
 
Index: hw/mlx4/kernel/bus/inc/qp.h
===================================================================
--- hw/mlx4/kernel/bus/inc/qp.h (revision 2617)
+++ hw/mlx4/kernel/bus/inc/qp.h (working copy)
@@ -113,7 +113,9 @@
  u8   snooper_flags;
  u8   reserved3[2];
  u8   counter_index;
- u8   reserved4[7];
+ u8   reserved4;
+ u8   dmac[6];
+
 };
 
 struct mlx4_qp_context {
@@ -213,7 +215,9 @@
  __be32   av[8];
  __be32   dqpn;
  __be32   qkey;
- __be32   reservd[2];
+ __be16   vlan;
+ u8   mac_0_1[2];
+ u8   mac_2_5[4];
 };
 
 #pragma warning( disable : 4200)
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/main.c (working copy)
@@ -139,7 +139,9 @@
  int count = 0;
 
  for (i = 0; i < dev->caps.num_ports; i++) {
-  if (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) {
+  if ((dev->caps.port_type[i+1] == MLX4_PORT_TYPE_IB) ||
+            (dev->caps.port_type[i+1] == MLX4_PORT_TYPE_ETH))
+        {
    count++;
   }
  }
@@ -170,6 +172,16 @@
  return FALSE;
 }
 
+static void mlx4_set_port_mask(struct mlx4_dev *dev)
+{
+ int i;
+
+ dev->caps.port_mask = 0;
+ for (i = 1; i <= dev->caps.num_ports; ++i)
+  if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
+   dev->caps.port_mask |= 1 << (i - 1);
+}
+
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap
*dev_cap)
 {
  int err;
@@ -309,6 +321,8 @@
    ++num_eth_ports;
  }
 
+ mlx4_set_port_mask(dev);
+
  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
Index: hw/mlx4/kernel/bus/net/port.c
===================================================================
--- hw/mlx4/kernel/bus/net/port.c (revision 2617)
+++ hw/mlx4/kernel/bus/net/port.c (working copy)
@@ -33,7 +33,9 @@
 
 #include "mlx4.h"
 #include "cmd.h"
+#include "public.h"
 
+extern NTSTATUS __create_child();
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port)
 {
@@ -60,6 +62,10 @@
   table->refs[i] = 0;
  }
  table->max = 1 << dev->caps.log_num_vlans;
+ if(table->max > MLX4_MAX_VLAN_NUM)
+ {
+  table->max = MLX4_MAX_VLAN_NUM;
+ }
  table->total = 0;
 }
 
@@ -84,6 +90,52 @@
  return err;
 }
 
+static void mlx4_addrconf_ifid_eui48_win(u8 *eui, u64 mac)
+{
+    u8 *p = (u8*)&mac+2; //mac 6 bytes
+ memcpy(eui, p, 3);
+ memcpy(eui + 5, p + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+}
+
+
+static int update_ipv6_gids_win(struct mlx4_dev *dev, int port, int
clear, u64 mac)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids, *tmpgids;
+ int err;
+
+ tmpgids = kzalloc(128 * sizeof *gids, GFP_ATOMIC);
+ if (!tmpgids)
+  return -ENOMEM;
+
+ if (!clear) {
+  mlx4_addrconf_ifid_eui48_win(&tmpgids[0].raw[8], cpu_to_be64(mac));
+  tmpgids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ }
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+  err = PTR_ERR(mailbox);
+  goto out;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, tmpgids, 128 * sizeof *gids);
+
+ err = mlx4_cmd(dev, mailbox->dma.da, MLX4_SET_PORT_GID_TABLE << 8 |
port,
+         1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+out:
+ kfree(tmpgids);
+ return err;
+}
+
+
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int
*index)
 {
  struct mlx4_mac_table *table =
@@ -112,7 +164,7 @@
  }
  mlx4_dbg(dev, "Free mac index is %d\n", free);
 
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
   /* No free mac entries */
   err = -ENOSPC;
   goto out;
@@ -132,6 +184,20 @@
 
  *index = free;
  ++table->total;
+
+ //update port guid with mac address
+ update_ipv6_gids_win(dev, port, 0, mac);
+   
+ if(!InterlockedExchange(&dev->pdev->ib_hca_created, 1))
+ {
+     NTSTATUS status = STATUS_SUCCESS;
+  status = __create_child(dev->pdev->p_wdf_device, BUS_HARDWARE_IDS,
BUS_HARDWARE_DESCRIPTION, 0 );
+  if (!NT_SUCCESS(status)) {
+    mlx4_err(dev, "__create_child (ib)failed with 0x%x\n", status);
+    dev->pdev->ib_hca_created = FALSE;
+  }
+ }
+
 out:
  up(&table->mac_sem);
  return err;
@@ -207,7 +273,7 @@
   }
  }
 
- if (table->total == table->max) {
+ if (table->total == table->max || free < 0) {
   /* No free vlan entries */
   err = -ENOSPC;
   goto out;
Index: hw/mlx4/kernel/bus/net/SOURCES
===================================================================
--- hw/mlx4/kernel/bus/net/SOURCES (revision 2617)
+++ hw/mlx4/kernel/bus/net/SOURCES (working copy)
@@ -31,7 +31,7 @@
  srq.c   \
         port.c                  \
 
-INCLUDES=..;..\inc;..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\
..\inc\kernel;
+INCLUDES=..;..\inc;..\..\inc;..\..\..\inc;..\core\$O;..\..\..\..\..\inc
;..\..\..\..\..\inc\kernel;
 
 C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS
-D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS 
 #-DFORCE_LIVEFISH
Index: hw/mlx4/kernel/hca/av.c
===================================================================
--- hw/mlx4/kernel/hca/av.c (revision 2617)
+++ hw/mlx4/kernel/hca/av.c (working copy)
@@ -74,6 +74,7 @@
  p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr);
  if (IS_ERR(p_ib_ah)) {
   err = PTR_ERR(p_ib_ah);
+  status = errno_to_iberr(err);
   HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n",
err));
   goto err_create_ah;
  }
Index: hw/mlx4/kernel/hca/data.c
===================================================================
--- hw/mlx4/kernel/hca/data.c (revision 2617)
+++ hw/mlx4/kernel/hca/data.c (working copy)
@@ -339,6 +339,7 @@
    ibal_port_p->max_vls    = mthca_port_p->max_vl_num;
    ibal_port_p->sm_lid     = cl_ntoh16(mthca_port_p->sm_lid);
    ibal_port_p->sm_sl      = mthca_port_p->sm_sl;
+   ibal_port_p->transport  = mthca_port_p->transport;
    ibal_port_p->link_state = (mthca_port_p->state != 0) ?
(uint8_t)mthca_port_p->state : IB_LINK_DOWN;
    ibal_port_p->num_gids   = (uint16_t)mthca_port_p->gid_tbl_len;
    ibal_port_p->num_pkeys  = mthca_port_p->pkey_tbl_len;
Index: hw/mlx4/kernel/inc/l2w.h
===================================================================
--- hw/mlx4/kernel/inc/l2w.h (revision 2617)
+++ hw/mlx4/kernel/inc/l2w.h (working copy)
@@ -185,6 +185,8 @@
  DMA_ADAPTER  *    p_dma_adapter; /* HCA adapter object */
  DEVICE_OBJECT *    p_self_do;  /* mlx4_bus's FDO */
  DEVICE_OBJECT *    pdo;   /* mlx4_bus's PDO */
+ PVOID                           p_wdf_device;   /* wdf_device */
+ LONG       ib_hca_created;
  // mlx4_ib: various objects and info 
  struct ib_device *    ib_dev;
  // mlx4_net: various objects and info 
Index: inc/iba/ib_types.h
===================================================================
--- inc/iba/ib_types.h (revision 2617)
+++ inc/iba/ib_types.h (working copy)
@@ -9419,6 +9419,8 @@
  TO_LONG_PTR(ib_gid_t*, p_gid_table);
  TO_LONG_PTR(ib_net16_t*,p_pkey_table);
 
+ enum rdma_transport_type transport;
+
 } ib_port_attr_t;
 /*
 * SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types.h (revision 2617)
+++ ulp/opensm/user/include/iba/ib_types.h (working copy)
@@ -8676,6 +8676,7 @@
  ib_gid_t    *p_gid_table;
  ib_net16_t    *p_pkey_table;
 
+ enum rdma_transport_type transport;
 } ib_port_attr_t;
 /*
 * SEE ALSO
Index: ulp/opensm/user/include/iba/ib_types_extended.h
===================================================================
--- ulp/opensm/user/include/iba/ib_types_extended.h (revision 2617)
+++ ulp/opensm/user/include/iba/ib_types_extended.h (working copy)
@@ -586,6 +586,7 @@
  TO_LONG_PTR(ib_gid_t*, p_gid_table);
  TO_LONG_PTR(ib_net16_t*,p_pkey_table);
 
+ enum rdma_transport_type transport;
 } ib_port_attr_t;
 /*
 * SEE ALSO

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20091202/a3bddac3/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: lle_mlx4_ofed.patch
Type: application/octet-stream
Size: 42086 bytes
Desc: lle_mlx4_ofed.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20091202/a3bddac3/attachment.obj>


More information about the ofw mailing list