[ofw] [ipoib] connected mode changes
Alex Estrin
alex.estrin at qlogic.com
Mon Dec 8 10:00:30 PST 2008
Hello,
Attached are IPoIB Connected Mode changes.
Here is a reviewed key implementation info from previous emails with some recent changes:
Connection:
Listening CEP associated with local endpoint.
Connection established per endpoint. CM Active side, QP creation/destroy is offloaded to a system thread.
CREQ sent along with unicast ARP reply to the endpoint if that endpoint reported it's C/M capabilities in ARP request.
Host can accept CREQ from the same endpoint. As a result one or two RC QPs will be created per connection (that matches linux behavior).
Both QPs are created bidirectional so the first established connection will be used for transmission.
Max payload MTU is set to 65520 (to match Linux ipoib cm MTU size).
Since there is no HW support for RC QP yet, checksum offload flags were forced to set: Send - disabled, receive - bypass.
Send path:
Unicast TCP and UDP packets go through RC QP.
ARP, ICMP, multicast go through UD QP, if necessary IP packet fragmented and fit into a chain of WRs.
Recv path:
endpoint recv queue attached to SRQ.
SRQ is created per port, SRQ queue size calculated using data from ca attributes query (might need to come up with better scalable value).
introduced new descriptor type that extends layout of UD receive descriptor.
implemented simple ARP filter (will go away).
reused existing filter for UDP/DHCP packets.
Common code changes:
ipoib_cm.c
-new file. Most of IB CM related code was put there.
ipoib_endpoint.c
Most receive buffers management functions implemented there.
ipoib_port.c
__build_send_desc() and __send_mgr_filter_ip() were reworked to handle RC/UD redirection and IP fragmentation logic.
LSO WR formatting was repackaged as a separate function.
Recv statistics update was optimized a bit for RC path.
inc\kernel\ip_packet.h
- added a few macros for IP header fragment flags handling.
ipoib_xfr_mgr.h
fixed and put to use ipoib hw addr fields handling routines.
ipoib_driver.h.
most global definitions moved here.
Some other changes:
added MiniportCancelSendPackets routine.
added ErrorLog messages for success/failed C/M initialization.
reduced some debug print noise by moving statistic OIDs and few other to higher level.
some minor code format, while tried to maintain consistent project coding style.
Please review.
Known issues and limitations:
C/M is forced to stay disabled if LSO is enabled ( not sure how to merge it together since LSO is tied to UD ).
SID is misformatted (IETF bit) to match Linux implementation. (Linux PR was opened).
IP fragmentation of packets > 30k will fail.
Code was tested on 2003 x86, x64 and with Linux OFED 1.3.1
Thanks,
Alex.
Index: inc/kernel/ip_packet.h
===================================================================
--- inc/kernel/ip_packet.h (revision 1776)
+++ inc/kernel/ip_packet.h (working copy)
@@ -197,6 +197,7 @@
#define IP_PROT_TCP 6
#define IP_PROT_UDP 17
#define IP_PROT_IGMP 2
+#define IP_PROT_ICMP 1
#include <complib/cl_packon.h>
@@ -359,12 +360,26 @@
#define IP_HEADER_LENGTH(pIpHdr) \
( (ULONG)((pIpHdr->ver_hl & 0x0F) << 2) )
+#define IP_FRAGMENT_OFFSET(p_ip_hdr) \
+ ( cl_ntoh16( p_ip_hdr->offset & CL_HTON16(0x1fff) ) )
+
+#define IP_DONT_FRAGMENT(p_ip_hdr) \
+ ( (BOOLEAN)( p_ip_hdr->offset & CL_HTON16(0x4000 ) ) )
+
+#define IP_MORE_FRAGMENTS( p_ip_hdr ) \
+ ( (BOOLEAN)( p_ip_hdr->offset & CL_HTON16(0x2000) ) )
+
+#define IP_SET_MORE_FRAGMENTS( p_ip_hdr ) \
+ ( p_ip_hdr->offset |= CL_HTON16(0x2000) )
+
+#define IP_SET_LAST_FRAGMENT( p_ip_hdr ) \
+ ( p_ip_hdr->offset &= (CL_HTON16(~0x2000) ) )
+
#define TCP_HEADER_LENGTH(pTcpHdr) \
((pTcpHdr->offset & 0xF0) >> 2)
-#define PROTOCOL_TCP 6
+#define PROTOCOL_TCP IP_PROT_TCP
-
#define IGMP_V2_MEMBERSHIP_QUERY 0x11
#define IGMP_V2_MEMBERSHIP_REPORT 0x16
#define IGMP_V1_MEMBERSHIP_REPORT 0x12 // for backward compatibility with IGMPv1
Index: ulp/ipoib/kernel/ipoib_adapter.c
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.c (revision 1776)
+++ ulp/ipoib/kernel/ipoib_adapter.c (working copy)
@@ -1287,7 +1287,8 @@
ipoib_inc_recv_stat(
IN ipoib_adapter_t* const p_adapter,
IN const ip_stat_sel_t stat_sel,
- IN const size_t bytes OPTIONAL )
+ IN const size_t bytes OPTIONAL,
+ IN const size_t packets OPTIONAL )
{
IPOIB_ENTER( IPOIB_DBG_STAT );
@@ -1305,21 +1306,21 @@
case IP_STAT_UCAST_BYTES:
case IP_STAT_UCAST_FRAMES:
p_adapter->recv_stats.comp.success++;
- p_adapter->recv_stats.ucast.frames++;
+ p_adapter->recv_stats.ucast.frames += packets;
p_adapter->recv_stats.ucast.bytes += bytes;
break;
case IP_STAT_BCAST_BYTES:
case IP_STAT_BCAST_FRAMES:
p_adapter->recv_stats.comp.success++;
- p_adapter->recv_stats.bcast.frames++;
+ p_adapter->recv_stats.bcast.frames += packets;
p_adapter->recv_stats.bcast.bytes += bytes;
break;
case IP_STAT_MCAST_BYTES:
case IP_STAT_MCAST_FRAMES:
p_adapter->recv_stats.comp.success++;
- p_adapter->recv_stats.mcast.frames++;
+ p_adapter->recv_stats.mcast.frames += packets;
p_adapter->recv_stats.mcast.bytes += bytes;
break;
Index: ulp/ipoib/kernel/ipoib_adapter.h
===================================================================
--- ulp/ipoib/kernel/ipoib_adapter.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_adapter.h (working copy)
@@ -60,15 +60,20 @@
/*
* Macros
*/
-typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS} tCsumTypeEn;
+typedef enum
+{
+ CSUM_DISABLED = 0,
+ CSUM_ENABLED,
+ CSUM_BYPASS
+} csum_flag_t;
typedef struct _ipoib_params
{
int32_t rq_depth;
int32_t rq_low_watermark;
int32_t sq_depth;
- int32_t send_chksum_offload; //is actually of type tCsumTypeEn
- int32_t recv_chksum_offload; //is actually of type tCsumTypeEn
+ csum_flag_t send_chksum_offload;
+ csum_flag_t recv_chksum_offload;
uint32_t sa_timeout;
uint32_t sa_retry_cnt;
uint32_t recv_pool_ratio;
@@ -79,6 +84,9 @@
uint32_t mc_leave_rescan;
uint32_t guid_mask;
uint32_t bc_join_retry;
+ boolean_t cm_enabled;
+ uint32_t cm_payload_mtu;
+ uint32_t cm_xfer_block_size;
} ipoib_params_t;
/*
* FIELDS
@@ -389,7 +397,8 @@
ipoib_inc_recv_stat(
IN ipoib_adapter_t* const p_adapter,
IN const ip_stat_sel_t stat_sel,
- IN const size_t bytes OPTIONAL );
+ IN const size_t bytes OPTIONAL,
+ IN const size_t packets OPTIONAL );
NDIS_STATUS
Index: ulp/ipoib/kernel/ipoib_debug.h
===================================================================
--- ulp/ipoib/kernel/ipoib_debug.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_debug.h (working copy)
@@ -34,6 +34,9 @@
#ifndef _IPOIB_DEBUG_H_
#define _IPOIB_DEBUG_H_
+#if defined __MODULE__
+#undef __MODULE__
+#endif
#define __MODULE__ "[IPoIB]"
Index: ulp/ipoib/kernel/ipoib_driver.c
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.c (revision 1776)
+++ ulp/ipoib/kernel/ipoib_driver.c (working copy)
@@ -160,11 +160,12 @@
{NDIS_STRING_CONST("SaTimeout"), 1, IPOIB_OFFSET(sa_timeout), IPOIB_SIZE(sa_timeout), 1000, 250, UINT_MAX},
{NDIS_STRING_CONST("SaRetries"), 1, IPOIB_OFFSET(sa_retry_cnt), IPOIB_SIZE(sa_retry_cnt), 10, 1, UINT_MAX},
{NDIS_STRING_CONST("RecvRatio"), 1, IPOIB_OFFSET(recv_pool_ratio), IPOIB_SIZE(recv_pool_ratio), 1, 1, 10},
- {NDIS_STRING_CONST("PayloadMtu"), 1, IPOIB_OFFSET(payload_mtu), IPOIB_SIZE(payload_mtu), 2044, 512, 4092},
+ {NDIS_STRING_CONST("PayloadMtu"), 1, IPOIB_OFFSET(payload_mtu), IPOIB_SIZE(payload_mtu), 2044, 512, MAX_CM_PAYLOAD_MTU},
{NDIS_STRING_CONST("lso"), 0, IPOIB_OFFSET(lso), IPOIB_SIZE(lso), 0, 0, 1},
{NDIS_STRING_CONST("MCLeaveRescan"), 1, IPOIB_OFFSET(mc_leave_rescan), IPOIB_SIZE(mc_leave_rescan), 260, 1, 3600},
- {NDIS_STRING_CONST("BCJoinRetry"), 1, IPOIB_OFFSET(bc_join_retry), IPOIB_SIZE(bc_join_retry), 50, 0, 1000}
-
+ {NDIS_STRING_CONST("BCJoinRetry"), 1, IPOIB_OFFSET(bc_join_retry), IPOIB_SIZE(bc_join_retry), 50, 0, 1000},
+ {NDIS_STRING_CONST("CmEnabled"), 0, IPOIB_OFFSET(cm_enabled), IPOIB_SIZE(cm_enabled), FALSE, FALSE, TRUE}
+
};
#define IPOIB_NUM_REG_PARAMS (sizeof (HCARegTable) / sizeof(IPOIB_REG_ENTRY))
@@ -275,6 +276,11 @@
ipoib_shutdown(
IN PVOID adapter_context );
+void
+ipoib_cancel_xmit(
+ IN NDIS_HANDLE adapter_context,
+ IN PVOID cancel_id );
+
static void
ipoib_complete_query(
IN ipoib_adapter_t* const p_adapter,
@@ -370,7 +376,7 @@
characteristics.ReturnPacketHandler = ipoib_return_packet;
characteristics.SendPacketsHandler = ipoib_send_packets;
-
+ characteristics.CancelSendPacketsHandler = ipoib_cancel_xmit;
#ifdef NDIS51_MINIPORT
characteristics.PnPEventNotifyHandler = ipoib_pnp_notify;
characteristics.AdapterShutdownHandler = ipoib_shutdown;
@@ -597,7 +603,23 @@
// Adjusting the low watermark parameter
p_adapter->params.rq_low_watermark =
p_adapter->params.rq_depth / p_adapter->params.rq_low_watermark;
-
+
+ /* disable CM if LSO is active */
+ if( p_adapter->params.cm_enabled )
+ {
+ p_adapter->params.cm_enabled = !p_adapter->params.lso;
+ if( !p_adapter->params.cm_enabled )
+ {
+ NdisWriteErrorLogEntry( p_adapter->h_adapter,
+ EVENT_IPOIB_CONNECTED_MODE_ERR, 1, 0xbadc0de0 );
+ }
+ }
+ p_adapter->params.cm_payload_mtu =
+ min( MAX_CM_PAYLOAD_MTU, p_adapter->params.payload_mtu );
+ p_adapter->params.cm_xfer_block_size =
+ p_adapter->params.cm_payload_mtu + sizeof(eth_hdr_t);
+ p_adapter->params.payload_mtu =
+ min( DEFAULT_PAYLOAD_MTU, p_adapter->params.payload_mtu);
p_adapter->params.xfer_block_size = (sizeof(eth_hdr_t) + p_adapter->params.payload_mtu);
NdisReadNetworkAddress( &status, p_mac, p_len, h_config );
@@ -717,7 +739,9 @@
ib_api_status_t ib_status;
UINT medium_index;
ipoib_adapter_t *p_adapter;
-
+#if IPOIB_USE_DMA
+ ULONG max_phys_mapping;
+#endif
IPOIB_ENTER( IPOIB_DBG_INIT );
#ifdef _DEBUG_
@@ -760,8 +784,13 @@
NdisInterfacePNPBus );
#if IPOIB_USE_DMA
+ max_phys_mapping = p_adapter->params.cm_enabled ?
+ p_adapter->params.cm_xfer_block_size: p_adapter->params.xfer_block_size;
+ max_phys_mapping = p_adapter->params.lso ?
+ max(LARGE_SEND_OFFLOAD_SIZE, max_phys_mapping): max_phys_mapping;
status =
- NdisMInitializeScatterGatherDma( h_adapter, TRUE, p_adapter->params.xfer_block_size );
+ NdisMInitializeScatterGatherDma( h_adapter, TRUE, max_phys_mapping );
+
if( status != NDIS_STATUS_SUCCESS )
{
ipoib_destroy_adapter( p_adapter );
@@ -948,11 +977,18 @@
case OID_GEN_MAXIMUM_FRAME_SIZE:
IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MAXIMUM_FRAME_SIZE\n", port_num) );
- info = p_adapter->params.payload_mtu;
+ if( p_adapter->params.cm_enabled )
+ {
+ info = p_adapter->params.cm_payload_mtu;
+ }
+ else
+ {
+ info = p_adapter->params.payload_mtu;
+ }
break;
case OID_GEN_LINK_SPEED:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_LINK_SPEED\n", port_num) );
cl_obj_lock( &p_adapter->obj );
info = p_adapter->port_rate;
@@ -962,14 +998,20 @@
case OID_GEN_TRANSMIT_BUFFER_SPACE:
IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE\n", port_num) );
- info = p_adapter->params.sq_depth * p_adapter->params.xfer_block_size;
+ if( p_adapter->params.cm_enabled )
+ info = p_adapter->params.sq_depth * p_adapter->params.cm_xfer_block_size;
+ else
+ info = p_adapter->params.sq_depth * p_adapter->params.xfer_block_size;
break;
case OID_GEN_RECEIVE_BUFFER_SPACE:
IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_TRANSMIT_BUFFER_SPACE "
"or OID_GEN_RECEIVE_BUFFER_SPACE\n", port_num) );
- info = p_adapter->params.rq_depth * p_adapter->params.xfer_block_size;
+ if( p_adapter->params.cm_enabled )
+ info = p_adapter->params.rq_depth * p_adapter->params.cm_xfer_block_size;
+ else
+ info = p_adapter->params.rq_depth * p_adapter->params.xfer_block_size;
break;
case OID_GEN_MAXIMUM_LOOKAHEAD:
@@ -983,7 +1025,10 @@
"OID_GEN_TRANSMIT_BLOCK_SIZE or "
"OID_GEN_RECEIVE_BLOCK_SIZE or "
"OID_GEN_MAXIMUM_TOTAL_SIZE\n", port_num) );
- info = p_adapter->params.xfer_block_size;
+ if( p_adapter->params.cm_enabled )
+ info = p_adapter->params.cm_xfer_block_size;
+ else
+ info = p_adapter->params.xfer_block_size;
break;
case OID_GEN_VENDOR_ID:
@@ -1044,7 +1089,7 @@
break;
case OID_GEN_MEDIA_CONNECT_STATUS:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MEDIA_CONNECT_STATUS\n", port_num) );
cl_obj_lock( &p_adapter->obj );
switch( p_adapter->state )
@@ -1064,7 +1109,7 @@
break;
case IB_PNP_PORT_ACTIVE:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d returning NdisMediaStateConnected\n", port_num) );
info = NdisMediaStateConnected;
break;
@@ -1091,119 +1136,119 @@
/* Required General Statistics */
case OID_GEN_XMIT_OK:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_XMIT_OK\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_SUCCESS, &oid_info );
break;
case OID_GEN_RCV_OK:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_RCV_OK\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_SUCCESS, &oid_info );
break;
case OID_GEN_XMIT_ERROR:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_XMIT_ERROR\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_ERROR, &oid_info );
break;
case OID_GEN_RCV_ERROR:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_RCV_ERROR\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_ERROR, &oid_info );
break;
case OID_GEN_RCV_NO_BUFFER:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_RCV_NO_BUFFER\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_DROPPED, &oid_info );
break;
case OID_GEN_DIRECTED_BYTES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_DIRECTED_BYTES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_UCAST_BYTES, &oid_info );
break;
case OID_GEN_DIRECTED_FRAMES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_DIRECTED_FRAMES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_UCAST_FRAMES, &oid_info );
break;
case OID_GEN_MULTICAST_BYTES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MULTICAST_BYTES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_MCAST_BYTES, &oid_info );
break;
case OID_GEN_MULTICAST_FRAMES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MULTICAST_FRAMES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_MCAST_FRAMES, &oid_info );
break;
case OID_GEN_BROADCAST_BYTES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_BROADCAST_BYTES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_BCAST_BYTES, &oid_info );
break;
case OID_GEN_BROADCAST_FRAMES_XMIT:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_BROADCAST_FRAMES_XMIT\n", port_num) );
src_buf = NULL;
status = ipoib_get_send_stat( p_adapter, IP_STAT_BCAST_FRAMES, &oid_info );
break;
case OID_GEN_DIRECTED_BYTES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_DIRECTED_BYTES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_UCAST_BYTES, &oid_info );
break;
case OID_GEN_DIRECTED_FRAMES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_DIRECTED_FRAMES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_UCAST_FRAMES, &oid_info );
break;
case OID_GEN_MULTICAST_BYTES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MULTICAST_BYTES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_MCAST_BYTES, &oid_info );
break;
case OID_GEN_MULTICAST_FRAMES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_MULTICAST_FRAMES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_MCAST_FRAMES, &oid_info );
break;
case OID_GEN_BROADCAST_BYTES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_BROADCAST_BYTES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_BCAST_BYTES, &oid_info );
break;
case OID_GEN_BROADCAST_FRAMES_RCV:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_GEN_BROADCAST_FRAMES_RCV\n", port_num) );
src_buf = NULL;
status = ipoib_get_recv_stat( p_adapter, IP_STAT_BCAST_FRAMES, &oid_info );
@@ -1211,34 +1256,34 @@
/* Required Ethernet operational characteristics */
case OID_802_3_PERMANENT_ADDRESS:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_802_3_PERMANENT_ADDRESS\n", port_num) );
src_buf = &p_adapter->mac;
buf_len = sizeof(p_adapter->mac);
break;
case OID_802_3_CURRENT_ADDRESS:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_802_3_CURRENT_ADDRESS\n", port_num) );
src_buf = &p_adapter->params.conf_mac;
buf_len = sizeof(p_adapter->params.conf_mac);
break;
case OID_802_3_MULTICAST_LIST:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_802_3_MULTICAST_LIST\n", port_num) );
src_buf = p_adapter->mcast_array;
buf_len = p_adapter->mcast_array_size * sizeof(mac_addr_t);
break;
case OID_802_3_MAXIMUM_LIST_SIZE:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_802_3_MAXIMUM_LIST_SIZE\n", port_num) );
info = MAX_MCAST;
break;
case OID_802_3_MAC_OPTIONS:
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received query for OID_802_3_MAC_OPTIONS\n", port_num) );
info = 0;
break;
@@ -1279,7 +1324,7 @@
case OID_802_3_XMIT_LATE_COLLISIONS:
case OID_PNP_CAPABILITIES:
status = NDIS_STATUS_NOT_SUPPORTED;
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received an unsupported oid of 0x%.8X!\n", port_num, oid) );
break;
@@ -1292,7 +1337,7 @@
#endif
default:
status = NDIS_STATUS_INVALID_OID;
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID,
("Port %d received an invalid oid of 0x%.8X!\n", port_num, oid) );
break;
}
@@ -1638,7 +1683,18 @@
buf_len = sizeof(ULONG);
port_num = p_adapter->guids.port_num;
+
+ cl_obj_lock( &p_adapter->obj );
+ if( p_adapter->state == IB_PNP_PORT_REMOVE )
+ {
+ *p_bytes_read = 0;
+ cl_obj_unlock( &p_adapter->obj );
+ return NDIS_STATUS_NOT_ACCEPTED;
+ }
+
+ cl_obj_unlock( &p_adapter->obj );
+
switch( oid )
{
/* Required General */
@@ -2520,7 +2576,20 @@
IPOIB_EXIT( IPOIB_DBG_OID );
}
+void
+ipoib_cancel_xmit(
+ IN NDIS_HANDLE adapter_context,
+ IN PVOID cancel_id )
+{
+ ipoib_adapter_t* const p_adapter =
+ (ipoib_adapter_t* const )adapter_context;
+ if( p_adapter && p_adapter->p_port )
+ {
+ ipoib_port_cancel_xmit( p_adapter->p_port, cancel_id );
+ }
+}
+
static void
__ipoib_ats_reg_cb(
IN ib_reg_svc_rec_t *p_reg_svc_rec )
Index: ulp/ipoib/kernel/ipoib_driver.h
===================================================================
--- ulp/ipoib/kernel/ipoib_driver.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_driver.h (working copy)
@@ -48,23 +48,39 @@
#define MAX_BUNDLE_ID_LENGTH 32
/* MLX4 supports 4K MTU */
-#define IB_MTU 4096
+#define MAX_IB_MTU 4096
+#define DEFAULT_MTU 2048
/*
* Header length as defined by IPoIB spec:
* http://www.ietf.org/internet-drafts/draft-ietf-ipoib-ip-over-infiniband-04.txt
*/
-#define MAX_PAYLOAD_MTU (IB_MTU - sizeof(ipoib_hdr_t))
-
+#define MAX_UD_PAYLOAD_MTU (MAX_IB_MTU - sizeof(ipoib_hdr_t))
+#define DEFAULT_PAYLOAD_MTU (DEFAULT_MTU - sizeof(ipoib_hdr_t))
+#define MAX_CM_PAYLOAD_MTU (65520)
+#define MAX_WRS_PER_MSG (MAX_CM_PAYLOAD_MTU/MAX_UD_PAYLOAD_MTU)
/*
* Only the protocol type is sent as part of the UD payload
* since the rest of the Ethernet header is encapsulated in the
* various IB headers. We report out buffer space as if we
* transmit the ethernet headers.
*/
-#define MAX_XFER_BLOCK_SIZE (sizeof(eth_hdr_t) + MAX_PAYLOAD_MTU)
+#define MAX_XFER_BLOCK_SIZE (sizeof(eth_hdr_t) + MAX_UD_PAYLOAD_MTU)
+#define DATA_OFFSET (sizeof(eth_hdr_t) - sizeof(ipoib_hdr_t))
+#define IPOIB_CM_FLAG_RC (0x80)
+#define IPOIB_CM_FLAG_UC (0x40)
+#define IPOIB_CM_FLAG_SVCID (0x10) // OFED set IETF bit this way ( open OFED PR 1121 )
+#define MAX_SEND_SGE (30)
+
+/* Amount of physical memory to register. */
+#define MEM_REG_SIZE 0xFFFFFFFFFFFFFFFF
+
+/* Number of work completions to chain for send and receive polling. */
+#define MAX_SEND_WC 8
+#define MAX_RECV_WC 16
+
typedef struct _ipoib_globals
{
KSPIN_LOCK lock;
Index: ulp/ipoib/kernel/ipoib_endpoint.c
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.c (revision 1776)
+++ ulp/ipoib/kernel/ipoib_endpoint.c (working copy)
@@ -43,6 +43,7 @@
#include "ipoib_endpoint.tmh"
#endif
#include <complib/cl_atomic.h>
+#include <complib/cl_math.h>
static void
@@ -76,7 +77,73 @@
__endpt_resolve(
IN ipoib_endpt_t* const p_endpt );
+static void
+__endpt_cm_send_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context );
+static void
+__endpt_cm_recv_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context );
+static void
+__endpt_cm_buf_mgr_construct(
+ IN endpt_buf_mgr_t * const p_buf_mgr );
+static void
+__conn_reply_cb(
+ IN ib_cm_rep_rec_t *p_cm_rep );
+
+static void
+__conn_mra_cb(
+ IN ib_cm_mra_rec_t *p_mra_rec );
+
+static void
+__conn_rej_cb(
+ IN ib_cm_rej_rec_t *p_rej_rec );
+
+static void
+__conn_dreq_cb(
+ IN ib_cm_dreq_rec_t *p_dreq_rec );
+
+static cl_status_t
+__cm_recv_desc_ctor(
+ IN void* const p_object,
+ IN void* context,
+ OUT cl_pool_item_t** const pp_pool_item );
+
+static void
+__cm_recv_desc_dtor(
+ IN const cl_pool_item_t* const p_pool_item,
+ IN void *context );
+
+static NDIS_PACKET*
+__endpt_cm_get_ndis_pkt(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_cm_desc_t* const p_desc );
+
+static inline ipoib_cm_desc_t*
+__endpt_cm_buf_mgr_get_recv(
+ IN endpt_buf_mgr_t * const p_buf_mgr );
+
+static boolean_t
+__cm_recv_is_dhcp(
+ IN const ipoib_pkt_t* const p_ipoib );
+
+static ib_api_status_t
+__endpt_cm_recv_arp(
+ IN ipoib_port_t* const p_port,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src_endpt );
+
+static ib_api_status_t
+__endpt_cm_recv_udp(
+ IN ipoib_port_t* const p_port,
+ IN ib_wc_t* const p_wc,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src_endpt );
+
ipoib_endpt_t*
ipoib_endpt_create(
IN const ib_gid_t* const p_dgid,
@@ -102,6 +169,10 @@
status = cl_obj_init( &p_endpt->obj, CL_DESTROY_ASYNC,
__endpt_destroying, __endpt_cleanup, __endpt_free );
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+ ("Created endpoint: [ %p ] DLID: %#x QPN: %#x \n",
+ p_endpt, cl_ntoh16(dlid), cl_ntoh32(qpn) ) );
+
p_endpt->dgid = *p_dgid;
p_endpt->dlid = dlid;
p_endpt->qpn = qpn;
@@ -218,7 +289,12 @@
ipoib_port_ref(p_port, ref_leave_mcast);
p_port->p_adapter->p_ifc->leave_mcast( p_endpt->h_mcast, ipoib_leave_mcast_cb );
}
-
+ else if( p_port->p_adapter->params.cm_enabled )
+ {
+ p_endpt->cm_flag = 0;
+ CL_ASSERT( endpt_cm_get_state( p_endpt ) == IPOIB_CM_DISCONNECTED );
+ }
+
cl_obj_unlock( p_obj );
IPOIB_EXIT( IPOIB_DBG_ENDPT );
@@ -269,6 +345,12 @@
return PARENT_STRUCT( p_endpt->rel.p_parent_obj, ipoib_port_t, obj );
}
+ipoib_port_t*
+ipoib_endpt_parent(
+ IN ipoib_endpt_t* const p_endpt )
+{
+ return __endpt_parent( p_endpt );
+}
/*
* This function is called with the port object's send lock held and
@@ -356,3 +438,706 @@
IPOIB_EXIT( IPOIB_DBG_ENDPT );
return NDIS_STATUS_SUCCESS;
}
+
+
+static void
+__endpt_cm_buf_mgr_construct(
+ IN endpt_buf_mgr_t * const p_buf_mgr )
+{
+ IPOIB_ENTER( IPOIB_DBG_INIT );
+
+ cl_qpool_construct( &p_buf_mgr->recv_pool );
+
+ p_buf_mgr->h_packet_pool = NULL;
+ p_buf_mgr->h_buffer_pool = NULL;
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+}
+
+ib_api_status_t
+endpt_cm_buf_mgr_init(
+ IN ipoib_port_t* const p_port )
+{
+ cl_status_t cl_status;
+ NDIS_STATUS ndis_status;
+ ib_api_status_t ib_status = IB_SUCCESS;
+
+ IPOIB_ENTER( IPOIB_DBG_INIT );
+
+ if( p_port->cm_buf_mgr.pool_init )
+ return ib_status;
+
+ cl_qlist_init( &p_port->cm_buf_mgr.posted_list );
+
+ __endpt_cm_buf_mgr_construct( &p_port->cm_buf_mgr );
+ p_port->cm_recv_mgr.rq_depth =
+ min( (uint32_t)p_port->p_adapter->params.rq_depth * 8,
+ p_port->p_ca_attrs->max_srq_wrs/2 );
+ p_port->cm_recv_mgr.depth = 0;
+ /* Allocate the receive descriptors pool */
+ cl_status = cl_qpool_init( &p_port->cm_buf_mgr.recv_pool,
+ p_port->cm_recv_mgr.rq_depth ,
+ 0,
+ 0,
+ sizeof( ipoib_cm_desc_t ),
+ __cm_recv_desc_ctor,
+ __cm_recv_desc_dtor,
+ p_port );
+
+ if( cl_status != CL_SUCCESS )
+ {
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_RECV_POOL, 1, cl_status );
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_qpool_init for cm recvs returned %#x\n", cl_status) );
+
+ return IB_INSUFFICIENT_MEMORY;
+ }
+
+ /* Allocate the NDIS buffer and packet pools for receive indication. */
+ NdisAllocatePacketPool( &ndis_status,
+ &p_port->cm_buf_mgr.h_packet_pool,
+ p_port->cm_recv_mgr.rq_depth,
+ PROTOCOL_RESERVED_SIZE_IN_PACKET );
+ if( ndis_status != NDIS_STATUS_SUCCESS )
+ {
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_RECV_PKT_POOL, 1, ndis_status );
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("NdisAllocatePacketPool returned %08X\n", ndis_status) );
+
+ ib_status = IB_INSUFFICIENT_RESOURCES;
+ goto pkt_pool_failed;
+ }
+
+ NdisAllocateBufferPool( &ndis_status,
+ &p_port->cm_buf_mgr.h_buffer_pool,
+ p_port->cm_recv_mgr.rq_depth );
+ if( ndis_status != NDIS_STATUS_SUCCESS )
+ {
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_RECV_BUF_POOL, 1, ndis_status );
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("NdisAllocateBufferPool returned %08X\n", ndis_status) );
+
+ ib_status = IB_INSUFFICIENT_RESOURCES;
+ goto buf_pool_failed;
+ }
+ p_port->cm_recv_mgr.recv_pkt_array =
+ cl_zalloc( sizeof(NDIS_PACKET*) * p_port->cm_recv_mgr.rq_depth );
+
+ if( !p_port->cm_recv_mgr.recv_pkt_array )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("cl_zalloc for PNDIS_PACKET array failed.\n") );
+
+ ib_status = IB_INSUFFICIENT_MEMORY;
+ goto pkt_array_failed;
+ }
+
+ p_port->cm_buf_mgr.pool_init = TRUE;
+ return IB_SUCCESS;
+
+pkt_array_failed:
+ if( p_port->cm_buf_mgr.h_buffer_pool )
+ NdisFreeBufferPool( p_port->cm_buf_mgr.h_buffer_pool );
+buf_pool_failed:
+ if( p_port->cm_buf_mgr.h_packet_pool )
+ NdisFreePacketPool( p_port->cm_buf_mgr.h_packet_pool );
+pkt_pool_failed:
+ cl_qpool_destroy( &p_port->cm_buf_mgr.recv_pool );
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+ return ib_status;
+}
+
+void
+endpt_cm_buf_mgr_reset(
+ IN ipoib_port_t* const p_port )
+{
+ cl_list_item_t *p_item;
+
+ if( !p_port->cm_buf_mgr.pool_init )
+ return;
+
+ if( cl_qlist_count( &p_port->cm_buf_mgr.posted_list ) )
+ {
+ for( p_item = cl_qlist_remove_head( &p_port->cm_buf_mgr.posted_list );
+ p_item != cl_qlist_end( &p_port->cm_buf_mgr.posted_list );
+ p_item = cl_qlist_remove_head( &p_port->cm_buf_mgr.posted_list ) )
+ {
+ cl_qpool_put( &p_port->cm_buf_mgr.recv_pool,
+ &( PARENT_STRUCT( p_item, ipoib_cm_desc_t, list_item ))->item );
+ }
+ }
+}
+
+void
+endpt_cm_buf_mgr_destroy(
+ IN ipoib_port_t* const p_port )
+{
+
+ IPOIB_ENTER(IPOIB_DBG_INIT );
+
+ CL_ASSERT( p_port );
+
+ /* Free the receive descriptors. */
+ if( !p_port->cm_buf_mgr.pool_init )
+ return;
+
+ endpt_cm_buf_mgr_reset( p_port );
+
+ p_port->cm_buf_mgr.pool_init = FALSE;
+
+ if( p_port->cm_recv_mgr.recv_pkt_array )
+ {
+ cl_free( p_port->cm_recv_mgr.recv_pkt_array );
+ }
+
+ /* Destroy the receive packet and buffer pools. */
+ if( p_port->cm_buf_mgr.h_buffer_pool )
+ NdisFreeBufferPool( p_port->cm_buf_mgr.h_buffer_pool );
+ if( p_port->cm_buf_mgr.h_packet_pool )
+ NdisFreePacketPool( p_port->cm_buf_mgr.h_packet_pool );
+
+ cl_qpool_destroy( &p_port->cm_buf_mgr.recv_pool );
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+}
+
+static cl_status_t
+__cm_recv_desc_ctor(
+ IN void* const p_object,
+ IN void* context,
+ OUT cl_pool_item_t** const pp_pool_item )
+{
+ ipoib_cm_desc_t* p_desc;
+ ipoib_port_t* p_port;
+ ib_mr_create_t create_mr;
+ net32_t rkey;
+
+ CL_ASSERT( p_object );
+ CL_ASSERT( context );
+
+ p_desc = (ipoib_cm_desc_t*)p_object;
+ p_port = (ipoib_port_t*)context;
+
+#define BUF_ALIGN (16)
+
+ p_desc->alloc_buf_size =
+ ROUNDUP( p_port->p_adapter->params.cm_xfer_block_size, BUF_ALIGN );
+
+ p_desc->p_alloc_buf = (uint8_t *)ExAllocatePoolWithTag(
+ NonPagedPool, p_desc->alloc_buf_size, 'DOMC' );
+
+ if( p_desc->p_alloc_buf == NULL )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to allocate receive buffer size %d bytes.\n", p_desc->alloc_buf_size ) );
+ return CL_INSUFFICIENT_MEMORY;
+ }
+
+ create_mr.vaddr = p_desc->p_alloc_buf;
+ create_mr.length = p_desc->alloc_buf_size;
+ create_mr.access_ctrl = IB_AC_LOCAL_WRITE;
+
+
+ if( p_port->p_adapter->p_ifc->reg_mem(
+ p_port->ib_mgr.h_pd,
+ &create_mr,
+ &p_desc->lkey,
+ &rkey,
+ &p_desc->h_mr ) != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to create Memory Region size %d bytes.\n", p_desc->alloc_buf_size ) );
+ goto ctor_failed;
+ }
+ p_desc->p_buf = p_desc->p_alloc_buf + (BUF_ALIGN - sizeof( ipoib_hdr_t));
+ p_desc->buf_size = p_desc->alloc_buf_size - (BUF_ALIGN - sizeof( ipoib_hdr_t));
+
+ /* Setup the local data segment. */
+ p_desc->local_ds[0].vaddr = (uint64_t)(uintn_t)p_desc->p_buf;
+ p_desc->local_ds[0].length = p_desc->buf_size;
+ p_desc->local_ds[0].lkey = p_desc->lkey;
+
+ /* Setup the work request. */
+ p_desc->wr.wr_id = (uintn_t)p_desc;
+ p_desc->wr.ds_array = p_desc->local_ds;
+ p_desc->wr.num_ds = 1;
+ p_desc->type = PKT_TYPE_CM_UCAST;
+
+ *pp_pool_item = &p_desc->item;
+ return CL_SUCCESS;
+
+ctor_failed:
+ ExFreePoolWithTag( p_desc->p_alloc_buf, 'DOMC' );
+ return CL_INSUFFICIENT_MEMORY;
+}
+
+static void
+__cm_recv_desc_dtor(
+ IN const cl_pool_item_t* const p_pool_item,
+ IN void *context )
+{
+ ipoib_cm_desc_t *p_desc;
+ ipoib_port_t* p_port;
+
+ if( p_pool_item == NULL || context == NULL )
+ return;
+
+ p_port = (ipoib_port_t*)context;
+ p_desc = PARENT_STRUCT( p_pool_item, ipoib_cm_desc_t, item );
+
+ if( p_desc->h_mr )
+ p_port->p_adapter->p_ifc->dereg_mr( p_desc->h_mr );
+
+ if( p_desc->p_alloc_buf )
+ ExFreePoolWithTag( p_desc->p_alloc_buf, 'DOMC' );
+}
+
+static NDIS_PACKET*
+__endpt_cm_get_ndis_pkt(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_cm_desc_t* const p_desc )
+{
+ NDIS_STATUS status;
+ NDIS_PACKET *p_packet;
+ NDIS_BUFFER *p_buffer;
+
+ IPOIB_ENTER( IPOIB_DBG_RECV );
+
+ NdisDprAllocatePacketNonInterlocked( &status, &p_packet,
+ p_port->cm_buf_mgr.h_packet_pool );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to allocate NDIS_PACKET: %08x\n", status) );
+ return NULL;
+ }
+
+ IPOIB_PORT_FROM_PACKET( p_packet ) = p_port;
+ IPOIB_RECV_FROM_PACKET( p_packet ) = p_desc;
+
+ NdisAllocateBuffer(
+ &status,
+ &p_buffer,
+ p_port->cm_buf_mgr.h_buffer_pool,
+ (void *)(p_desc->p_buf - DATA_OFFSET),
+ p_desc->len + DATA_OFFSET );
+
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to allocate NDIS_BUFFER: %08x\n", status) );
+ NdisDprFreePacketNonInterlocked( p_packet );
+ return NULL;
+ }
+
+ NdisChainBufferAtFront( p_packet, p_buffer );
+ NDIS_SET_PACKET_HEADER_SIZE( p_packet, sizeof(eth_hdr_t) );
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+ return p_packet;
+}
+
+static inline ipoib_cm_desc_t*
+__endpt_cm_buf_mgr_get_recv(
+ IN endpt_buf_mgr_t * const p_buf_mgr )
+{
+ ipoib_cm_desc_t *p_desc;
+
+ p_desc = (ipoib_cm_desc_t*)cl_qpool_get( &p_buf_mgr->recv_pool );
+ if( p_desc )
+ cl_qlist_insert_tail( &p_buf_mgr->posted_list, &p_desc->list_item );
+
+ return p_desc;
+}
+
+void
+endpt_cm_buf_mgr_put_recv(
+ IN endpt_buf_mgr_t * const p_buf_mgr,
+ IN ipoib_cm_desc_t* const p_desc )
+{
+
+ IPOIB_ENTER(IPOIB_DBG_RECV );
+
+ /* Return the descriptor to it's pool. */
+ cl_qlist_remove_item( &p_buf_mgr->posted_list, &p_desc->list_item );
+ cl_qpool_put( &p_buf_mgr->recv_pool, &p_desc->item );
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+}
+
+void
+endpt_cm_buf_mgr_put_recv_list(
+ IN endpt_buf_mgr_t * const p_buf_mgr,
+ IN cl_qlist_t* const p_list )
+{
+ cl_qpool_put_list( &p_buf_mgr->recv_pool, p_list );
+}
+
+uint32_t
+endpt_cm_recv_mgr_build_pkt_array(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt,
+ IN cl_qlist_t* const p_done_list,
+ IN OUT uint32_t* p_bytes_recv )
+{
+ cl_list_item_t *p_item;
+ ipoib_cm_desc_t *p_desc;
+ uint32_t i = 0;
+ NDIS_PACKET *p_packet;
+
+ IPOIB_ENTER( IPOIB_DBG_RECV );
+ UNUSED_PARAM( p_endpt );
+
+ p_item = cl_qlist_remove_head( p_done_list );
+
+ *p_bytes_recv = 0;
+
+ for( p_item; p_item != cl_qlist_end( p_done_list );
+ p_item = cl_qlist_remove_head( p_done_list ) )
+ {
+ p_desc = (ipoib_cm_desc_t*)p_item;
+
+ p_packet = __endpt_cm_get_ndis_pkt( p_port, p_desc );
+ if( !p_packet )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to get Packet from descriptor\n" ) );
+ endpt_cm_buf_mgr_put_recv( &p_port->cm_buf_mgr, p_desc );
+ p_port->cm_recv_mgr.depth--;
+ continue;
+ }
+ p_desc->ndis_csum.Value = 0;
+ p_desc->ndis_csum.Receive.NdisPacketTcpChecksumSucceeded = TRUE;
+ p_desc->ndis_csum.Receive.NdisPacketUdpChecksumSucceeded = TRUE;
+ p_desc->ndis_csum.Receive.NdisPacketIpChecksumSucceeded = TRUE;
+ NDIS_PER_PACKET_INFO_FROM_PACKET( p_packet, TcpIpChecksumPacketInfo ) =
+ (void*)(uintn_t)p_desc->ndis_csum.Value;
+
+ NDIS_SET_PACKET_STATUS( p_packet, NDIS_STATUS_SUCCESS );
+ p_port->cm_recv_mgr.recv_pkt_array[i] = p_packet;
+ i++;
+ *p_bytes_recv += p_desc->len;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+ return i;
+}
+void
+endpt_cm_flush_recv(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt )
+{
+ ib_api_status_t ib_status = IB_SUCCESS;
+ ib_qp_mod_t mod_attr;
+ ib_wc_t wc[MAX_RECV_WC];
+ ib_wc_t *p_free_wc;
+ ib_wc_t *p_done_wc;
+ ib_wc_t *p_wc;
+ ipoib_cm_desc_t *p_desc;
+ size_t i;
+
+ IPOIB_ENTER( IPOIB_DBG_RECV );
+
+ CL_ASSERT( p_endpt );
+
+ if( p_endpt->conn.h_recv_qp )
+ {
+ cl_memclr( &mod_attr, sizeof( mod_attr ) );
+ mod_attr.req_state = IB_QPS_ERROR;
+ p_port->p_adapter->p_ifc->modify_qp( p_endpt->conn.h_send_qp, &mod_attr );
+ p_port->p_adapter->p_ifc->modify_qp( p_endpt->conn.h_recv_qp, &mod_attr );
+
+ for( i = 0; i < MAX_RECV_WC; i++ )
+ wc[i].p_next = &wc[i + 1];
+ wc[MAX_RECV_WC - 1].p_next = NULL;
+
+ do
+ {
+ p_free_wc = wc;
+ ib_status =
+ p_port->p_adapter->p_ifc->poll_cq( p_endpt->conn.h_recv_cq,
+ &p_free_wc, &p_done_wc );
+ if( ib_status != IB_SUCCESS &&
+ ib_status != IB_NOT_FOUND )
+ {
+ /* connection CQ failed */
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Poll Recv CQ failed status %#x\n", ib_status ) );
+ break;
+ }
+ cl_spinlock_acquire( &p_port->recv_lock );
+ for( p_wc = p_done_wc; p_wc; p_wc = p_wc->p_next )
+ {
+ p_desc = (ipoib_cm_desc_t *)(uintn_t)p_wc->wr_id;
+ endpt_cm_buf_mgr_put_recv( &p_port->cm_buf_mgr, p_desc );
+ p_port->cm_recv_mgr.depth--;
+ }
+ cl_spinlock_release( &p_port->recv_lock );
+ } while( !p_free_wc );
+
+ ib_status = p_port->p_adapter->p_ifc->destroy_qp( p_endpt->conn.h_recv_qp, NULL );
+ if( ib_status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Destroy Recv QP failed status %#x\n", ib_status ) );
+ }
+ p_endpt->conn.h_recv_qp = NULL;
+ }
+
+ if( p_endpt->conn.h_send_qp )
+ {
+ ib_status = p_port->p_adapter->p_ifc->destroy_qp( p_endpt->conn.h_send_qp, NULL );
+ if( ib_status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Destroy Send QP failed status %#x\n", ib_status ) );
+ }
+ p_endpt->conn.h_send_qp = NULL;
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+}
+
+int32_t
+endpt_cm_recv_mgr_filter(
+ IN ipoib_endpt_t* const p_endpt,
+ IN ib_wc_t* const p_done_wc_list,
+ OUT cl_qlist_t* const p_done_list,
+ OUT cl_qlist_t* const p_bad_list )
+{
+ ib_api_status_t ib_status;
+ ipoib_cm_desc_t *p_desc;
+ ib_wc_t *p_wc;
+ ipoib_pkt_t *p_ipoib;
+ eth_pkt_t *p_eth;
+ ipoib_port_t* p_port;
+ int32_t recv_cnt;
+
+ IPOIB_ENTER( IPOIB_DBG_RECV );
+
+ p_port = ipoib_endpt_parent( p_endpt );
+
+ for( p_wc = p_done_wc_list, recv_cnt = 0; p_wc; p_wc = p_wc->p_next )
+ {
+ p_desc = (ipoib_cm_desc_t *)(uintn_t)p_wc->wr_id;
+ recv_cnt++;
+ if( p_wc->status != IB_WCS_SUCCESS )
+ {
+ if( p_wc->status != IB_WCS_WR_FLUSHED_ERR )
+ {
+
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed completion %s (vendor specific %#x)\n",
+ p_port->p_adapter->p_ifc->get_wc_status_str( p_wc->status ),
+ (int)p_wc->vendor_specific) );
+ }
+ else
+ {
+ IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
+ ("Flushed completion %s\n",
+ p_port->p_adapter->p_ifc->get_wc_status_str( p_wc->status )) );
+ }
+
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
+
+ cl_qlist_remove_item( &p_port->cm_buf_mgr.posted_list,&p_desc->list_item );
+ cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
+ continue;
+ }
+
+ /* Successful completion
+ Setup the ethernet/ip/arp header and queue descriptor for report. */
+ ib_status = IB_SUCCESS;
+ p_ipoib = (ipoib_pkt_t *)((uint8_t*)p_desc->p_buf );
+ p_eth = (eth_pkt_t *)((uint8_t*)p_desc->p_buf - DATA_OFFSET );
+
+ switch( p_ipoib->hdr.type )
+ {
+ case ETH_PROT_TYPE_ARP:
+ if( p_wc->length < (sizeof(ipoib_hdr_t) + sizeof(ipoib_arp_pkt_t)) )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Received ARP packet too short\n") );
+ ib_status = IB_ERROR;
+ break;
+ }
+ ib_status =
+ __endpt_cm_recv_arp( p_port, p_ipoib, p_eth, p_endpt );
+ break;
+ case ETH_PROT_TYPE_IP:
+ if( p_wc->length < (sizeof(ipoib_hdr_t) + sizeof(ip_hdr_t)) )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Received IP packet too short\n") );
+ ib_status = IB_ERROR;
+ break;
+ }
+ if( p_ipoib->type.ip.hdr.prot == IP_PROT_UDP )
+ {
+ ib_status =
+ __endpt_cm_recv_udp( p_port, p_wc, p_ipoib, p_eth, p_endpt );
+ }
+
+ break;
+ }
+
+ if( ib_status != IB_SUCCESS )
+ {
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
+ cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
+ continue;
+ }
+
+ p_eth->hdr.type = p_ipoib->hdr.type;
+ p_eth->hdr.src = p_endpt->mac;
+ p_eth->hdr.dst = p_port->p_adapter->mac;
+
+ /* save payload length */
+ p_desc->len = p_wc->length;
+
+ cl_qlist_insert_tail( p_done_list, &p_desc->item.list_item );
+ }
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+ return recv_cnt;
+}
+
+ib_api_status_t
+endpt_cm_post_recv(
+ IN ipoib_port_t* const p_port )
+{
+ ib_api_status_t ib_status = IB_SUCCESS;
+ ipoib_cm_desc_t *p_head_desc = NULL;
+ ipoib_cm_desc_t *p_tail_desc = NULL;
+ ipoib_cm_desc_t *p_next_desc;
+ ib_recv_wr_t *p_failed_wc = NULL;
+
+ IPOIB_ENTER( IPOIB_DBG_RECV );
+
+ while( cl_qpool_count( &p_port->cm_buf_mgr.recv_pool ) > 1 )
+ {
+ /* Pull receives out of the pool and chain them up. */
+ p_next_desc = __endpt_cm_buf_mgr_get_recv(
+ &p_port->cm_buf_mgr );
+ if( !p_next_desc )
+ {
+ IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
+ ("Out of receive descriptors! Endpt recv queue depth 0x%x\n",
+ p_port->cm_recv_mgr.depth ) );
+ break;
+ }
+
+ if( !p_tail_desc )
+ {
+ p_tail_desc = p_next_desc;
+ p_next_desc->wr.p_next = NULL;
+ }
+ else
+ {
+ p_next_desc->wr.p_next = &p_head_desc->wr;
+ }
+
+ p_head_desc = p_next_desc;
+
+ p_port->cm_recv_mgr.depth++;
+ }
+
+ if( p_head_desc )
+ {
+ ib_status = p_port->p_adapter->p_ifc->post_srq_recv(
+ p_port->ib_mgr.h_srq, &p_head_desc->wr, &p_failed_wc );
+
+ if( ib_status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("ip_post_recv returned %s\n",
+ p_port->p_adapter->p_ifc->get_err_str( ib_status )) );
+
+ /* put descriptors back to the pool */
+ while( p_failed_wc )
+ {
+ p_head_desc = PARENT_STRUCT( p_failed_wc, ipoib_cm_desc_t, wr );
+ p_failed_wc = p_failed_wc->p_next;
+ endpt_cm_buf_mgr_put_recv( &p_port->cm_buf_mgr, p_head_desc );
+ p_port->cm_recv_mgr.depth--;
+ }
+ }
+ }
+
+
+ IPOIB_EXIT( IPOIB_DBG_RECV );
+ return( ib_status );
+}
+
+static ib_api_status_t
+__endpt_cm_recv_arp(
+ IN ipoib_port_t* const p_port,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src_endpt )
+{
+ const ipoib_arp_pkt_t *p_ib_arp;
+ arp_pkt_t *p_arp;
+
+ p_ib_arp = &p_ipoib->type.arp;
+ p_arp = &p_eth->type.arp;
+
+ if( p_ib_arp->hw_type != ARP_HW_TYPE_IB ||
+ p_ib_arp->hw_size != sizeof(ipoib_hw_addr_t) ||
+ p_ib_arp->prot_type != ETH_PROT_TYPE_IP )
+ {
+ return IB_ERROR;
+ }
+
+ p_arp->hw_type = ARP_HW_TYPE_ETH;
+ p_arp->hw_size = sizeof(mac_addr_t);
+ p_arp->src_hw = p_src_endpt->mac;
+ p_arp->src_ip = p_ib_arp->src_ip;
+ p_arp->dst_hw = p_port->p_local_endpt->mac;
+ p_arp->dst_ip = p_ib_arp->dst_ip;
+
+ return IB_SUCCESS;
+}
+
+static ib_api_status_t
+__endpt_cm_recv_udp(
+ IN ipoib_port_t* const p_port,
+ IN ib_wc_t* const p_wc,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src_endpt )
+{
+ ib_api_status_t ib_status = IB_SUCCESS;
+
+ if( p_wc->length <
+ (sizeof(ipoib_hdr_t) + sizeof(ip_hdr_t) + sizeof(udp_hdr_t)) )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Received UDP packet too short\n") );
+ return IB_ERROR;
+ }
+ if( __cm_recv_is_dhcp( p_ipoib ) )
+ {
+ ib_status = ipoib_recv_dhcp(
+ p_port, p_ipoib, p_eth, p_src_endpt, p_port->p_local_endpt );
+ }
+
+ return ib_status;
+}
+
+static boolean_t
+__cm_recv_is_dhcp(
+ IN const ipoib_pkt_t* const p_ipoib )
+{
+ return( (p_ipoib->type.ip.prot.udp.hdr.dst_port == DHCP_PORT_SERVER &&
+ p_ipoib->type.ip.prot.udp.hdr.src_port == DHCP_PORT_CLIENT) ||
+ (p_ipoib->type.ip.prot.udp.hdr.dst_port == DHCP_PORT_CLIENT &&
+ p_ipoib->type.ip.prot.udp.hdr.src_port == DHCP_PORT_SERVER) );
+}
Index: ulp/ipoib/kernel/ipoib_endpoint.h
===================================================================
--- ulp/ipoib/kernel/ipoib_endpoint.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_endpoint.h (working copy)
@@ -46,6 +46,57 @@
#include "ipoib_debug.h"
+typedef struct _endpt_buf_mgr
+{
+ cl_qpool_t recv_pool;
+ NDIS_HANDLE h_packet_pool;
+ NDIS_HANDLE h_buffer_pool;
+ cl_qlist_t posted_list;
+ boolean_t pool_init;
+} endpt_buf_mgr_t;
+
+typedef struct _endpt_recv_mgr
+{
+ int32_t depth;
+ int32_t rq_depth;
+ NDIS_PACKET **recv_pkt_array;
+
+} endpt_recv_mgr_t;
+
+
+typedef enum _cm_state
+{
+ IPOIB_CM_DISCONNECTED,
+ IPOIB_CM_INIT,
+ IPOIB_CM_CONNECT,
+ IPOIB_CM_CONNECTED,
+ IPOIB_CM_LISTEN,
+ IPOIB_CM_DREP_SENT,
+ IPOIB_CM_DREQ_SENT,
+ IPOIB_CM_REJ_RECVD,
+ IPOIB_CM_DESTROY
+} cm_state_t;
+
+typedef struct _cm_private_data
+{
+ ib_net32_t ud_qpn;
+ ib_net32_t recv_mtu;
+} cm_private_data_t;
+
+typedef struct _endpt_conn
+{
+ ib_net64_t service_id;
+ cm_private_data_t private_data;
+ ib_qp_handle_t h_send_qp;
+ ib_qp_handle_t h_recv_qp;
+ ib_qp_handle_t h_work_qp;
+ ib_cq_handle_t h_send_cq;
+ ib_cq_handle_t h_recv_cq;
+ ib_listen_handle_t h_cm_listen;
+ cm_state_t state;
+
+} endpt_conn_t;
+
typedef struct _ipoib_endpt
{
cl_obj_t obj;
@@ -53,13 +104,18 @@
cl_map_item_t mac_item;
cl_fmap_item_t gid_item;
cl_map_item_t lid_item;
+ cl_fmap_item_t conn_item;
+ LIST_ENTRY list_item;
ib_query_handle_t h_query;
ib_mcast_handle_t h_mcast;
mac_addr_t mac;
ib_gid_t dgid;
net16_t dlid;
net32_t qpn;
+ uint8_t cm_flag;
ib_av_handle_t h_av;
+ endpt_conn_t conn;
+
ib_al_ifc_t *p_ifc;
boolean_t is_in_use;
boolean_t is_mcast_listener;
@@ -103,6 +159,9 @@
* expired
* Flag to indicate that the endpoint should be flushed.
*
+* connection
+* for connected mode endpoints
+*
* p_ifc
* Reference to transport functions, can be used
* while endpoint is not attached to port yet.
@@ -154,5 +213,43 @@
ipoib_endpt_queue(
IN ipoib_endpt_t* const p_endpt );
+struct _ipoib_port *
+ipoib_endpt_parent(
+ IN ipoib_endpt_t* const p_endpt );
+inline cm_state_t
+endpt_cm_set_state(
+ IN ipoib_endpt_t* const p_endpt,
+ IN cm_state_t state )
+{
+ return(cm_state_t)InterlockedExchange(
+ (volatile LONG *)&p_endpt->conn.state,
+ (LONG)state );
+}
+
+inline cm_state_t
+endpt_cm_get_state(
+ IN ipoib_endpt_t* const p_endpt )
+{
+ return( cm_state_t )InterlockedCompareExchange(
+ (volatile LONG *)&p_endpt->conn.state,
+ IPOIB_CM_DISCONNECTED, IPOIB_CM_DISCONNECTED );
+}
+
+ib_api_status_t
+endpt_cm_create_qp(
+ IN ipoib_endpt_t* const p_endpt,
+ IN ib_qp_handle_t* const p_h_qp );
+
+ib_api_status_t
+ipoib_endpt_connect(
+ IN ipoib_endpt_t* const p_endpt );
+
+int32_t
+endpt_cm_recv_mgr_filter(
+ IN ipoib_endpt_t* const p_endpt,
+ IN ib_wc_t* const p_done_wc_list,
+ OUT cl_qlist_t* const p_done_list,
+ OUT cl_qlist_t* const p_bad_list );
+
#endif /* _IPOIB_ENDPOINT_H_ */
Index: ulp/ipoib/kernel/ipoib_log.mc
===================================================================
--- ulp/ipoib/kernel/ipoib_log.mc (revision 1776)
+++ ulp/ipoib/kernel/ipoib_log.mc (working copy)
@@ -316,3 +316,19 @@
%2: Pkey index not found for partition , change switch pkey configuration.
.
+MessageId=0x005C
+Facility=IPoIB
+Severity=Error
+SymbolicName=EVENT_IPOIB_CONNECTED_MODE_ERR
+Language=English
+%2: Connected Mode failed to initialize, disabled. Interface will use default UD QP transport.
+.
+
+MessageId=0x005D
+Facility=IPoIB
+Severity=Informational
+SymbolicName=EVENT_IPOIB_CONNECTED_MODE_UP
+Language=English
+%2: Connected Mode initialized and operational.
+.
+
Index: ulp/ipoib/kernel/ipoib_port.c
===================================================================
--- ulp/ipoib/kernel/ipoib_port.c (revision 1776)
+++ ulp/ipoib/kernel/ipoib_port.c (working copy)
@@ -33,6 +33,7 @@
+#include "ipoib_endpoint.h"
#include "ipoib_port.h"
#include "ipoib_adapter.h"
#include "ipoib_debug.h"
@@ -45,14 +46,6 @@
#include <offload.h>
-/* Amount of physical memory to register. */
-#define MEM_REG_SIZE 0xFFFFFFFFFFFFFFFF
-
-/* Number of work completions to chain for send and receive polling. */
-#define MAX_SEND_WC 8
-#define MAX_RECV_WC 16
-
-
ib_gid_t bcast_mgid_template = {
0xff, /* multicast field */
0x12, /* scope (to be filled in) */
@@ -102,7 +95,15 @@
__port_free(
IN cl_obj_t* const p_obj );
+static ib_api_status_t
+__port_query_ca_attrs(
+ IN ipoib_port_t* const p_port,
+ IN ib_ca_attr_t** pp_ca_attrs );
+static void
+__srq_async_event_cb(
+IN ib_async_event_rec_t *p_event_rec );
+
/******************************************************************************
*
* IB resource manager operations
@@ -341,15 +342,44 @@
IN const ib_cq_handle_t h_cq,
IN void *cq_context );
-static NDIS_STATUS GetLsoHeaderSize(
- IN ipoib_port_t* const pPort,
- IN PNDIS_BUFFER CurrBuffer,
- IN LsoData *pLsoData,
- OUT uint16_t *pSize,
- OUT INT *IndexOfData,
- IN ipoib_hdr_t *ipoib_hdr
- );
+static NDIS_STATUS
+GetLsoHeaderSize(
+ IN ipoib_port_t* const pPort,
+ IN PNDIS_BUFFER CurrBuffer,
+ IN LsoData *pLsoData,
+ OUT uint16_t *pSize,
+ OUT INT *IndexOfData,
+ IN ipoib_hdr_t *ipoib_hdr );
+static NDIS_STATUS
+__build_lso_desc(
+ IN ipoib_port_t* const p_port,
+ IN OUT ipoib_send_desc_t* const p_desc,
+ IN ULONG mss,
+ IN int32_t hdr_idx );
+
+static NDIS_STATUS
+__send_fragments(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_send_desc_t* const p_desc,
+ IN eth_hdr_t* const p_eth_hdr,
+ IN ip_hdr_t* const p_ip_hdr,
+ IN uint32_t buf_len,
+ IN NDIS_BUFFER* p_ndis_buf );
+
+static void
+__update_fragment_ip_hdr(
+IN ip_hdr_t* const p_ip_hdr,
+IN uint16_t fragment_size,
+IN uint16_t fragment_offset,
+IN BOOLEAN more_fragments );
+
+static void
+__copy_ip_options(
+IN uint8_t* p_buf,
+IN uint8_t* p_options,
+IN uint32_t options_len,
+IN BOOLEAN copy_all );
/******************************************************************************
*
* Endpoint manager operations
@@ -516,7 +546,7 @@
*/
static void* GetIpPayloadPtr(const ip_hdr_t* const p_ip_hdr)
{
- return (void*)((uint8_t*)p_ip_hdr + 4*(p_ip_hdr->ver_hl & 0xf));
+ return (void*)((uint8_t*)p_ip_hdr + IP_HEADER_LENGTH(p_ip_hdr));
}
/******************************************************************************
@@ -742,6 +772,14 @@
__endpt_mgr_remove_all( p_port );
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ endpt_cm_buf_mgr_destroy( p_port );
+ ipoib_port_srq_destroy( p_port );
+ p_port->endpt_mgr.thread_is_done = 1;
+ cl_event_signal( &p_port->endpt_mgr.event );
+ }
+
ipoib_port_resume( p_port );
IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -794,7 +832,10 @@
cl_spinlock_destroy( &p_port->recv_lock );
cl_obj_deinit( p_obj );
-
+ if( p_port->p_ca_attrs )
+ {
+ cl_free ( p_port->p_ca_attrs );
+ }
cl_free( p_port );
IPOIB_EXIT( IPOIB_DBG_INIT );
@@ -831,8 +872,6 @@
uint64_t vaddr;
net32_t rkey;
ib_qp_attr_t qp_attr;
- ib_ca_attr_t * p_ca_attr;
- uint32_t attr_size;
IPOIB_ENTER( IPOIB_DBG_INIT );
@@ -850,6 +889,13 @@
return status;
}
+ status = __port_query_ca_attrs( p_port, &p_port->p_ca_attrs );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Query CA attributes failed\n" ) );
+ return status;
+ }
/* Allocate the PD. */
status = p_port->p_adapter->p_ifc->alloc_pd(
p_port->ib_mgr.h_ca, IB_PDT_UD, p_port, &p_port->ib_mgr.h_pd );
@@ -905,50 +951,22 @@
qp_create.rq_sge = 2; /* To support buffers spanning pages. */
qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq;
qp_create.sq_depth = p_port->p_adapter->params.sq_depth;
-
- //Figure out the right number of SGE entries for sends.
- /* Get the size of the CA attribute structure. */
- status = p_port->p_adapter->p_ifc->query_ca( p_port->ib_mgr.h_ca, NULL, &attr_size );
- if( status != IB_INSUFFICIENT_MEMORY )
- {
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("ib_query_ca failed with status %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
- return status;
- }
- /* Allocate enough space to store the attribute structure. */
- p_ca_attr = cl_malloc( attr_size );
- if( !p_ca_attr )
- {
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("cl_malloc failed to allocate p_ca_attr!\n") );
- return IB_INSUFFICIENT_RESOURCES;
- }
-
- /* Query the CA attributes. */
- status = p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, p_ca_attr, &attr_size );
- if( status != IB_SUCCESS )
- {
- cl_free( p_ca_attr );
-
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("ib_query_ca failed with status %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) );
- return status;
- }
#define UD_QP_USED_SGE 3
- qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ? MAX_SEND_SGE : (p_ca_attr->max_sges - UD_QP_USED_SGE);
- if (!p_ca_attr->ipoib_csum) {
- //checksum is not supported by device
- //user must specify BYPASS to explicitly cancel checksum calculation
+ qp_create.sq_sge = MAX_SEND_SGE < p_port->p_ca_attrs->max_sges ?
+ MAX_SEND_SGE : ( p_port->p_ca_attrs->max_sges - UD_QP_USED_SGE );
+ if ( !p_port->p_ca_attrs->ipoib_csum )
+ {
+ /* checksum is not supported by device
+ user must specify BYPASS to explicitly cancel checksum calculation */
if (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED)
p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED;
if (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED)
p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED;
}
- cl_free( p_ca_attr );
-
+
qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq;
- qp_create.sq_signaled = TRUE;
+ qp_create.sq_signaled = FALSE;
status = p_port->p_adapter->p_ifc->create_qp(
p_port->ib_mgr.h_pd, &qp_create, p_port,
__qp_event, &p_port->ib_mgr.h_qp );
@@ -998,11 +1016,185 @@
return status;
}
+ status = ipoib_port_srq_init( p_port );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("ipoib_port_srq_init failed %s\n",
+ p_port->p_adapter->p_ifc->get_err_str( status )) );
+ /* disable further CM initialization */
+ p_port->p_adapter->params.cm_enabled = FALSE;
+
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_CONNECTED_MODE_ERR, 1, 0xbadc0de1 );
+
+ }
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ status = endpt_cm_buf_mgr_init( p_port );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("CM Init buf mgr failed status %#x\n", status ) );
+ ipoib_port_srq_destroy( p_port );
+ p_port->p_adapter->params.cm_enabled = FALSE;
+
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_CONNECTED_MODE_ERR, 1, 0xbadc0de2 );
+ }
+ else
+ {
+ /* now we can adjust csum capabilities */
+ p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED;
+ p_port->p_adapter->params.recv_chksum_offload = CSUM_BYPASS;
+ }
+
+ }
IPOIB_EXIT( IPOIB_DBG_INIT );
return IB_SUCCESS;
}
+static void
+__srq_async_event_cb(
+IN ib_async_event_rec_t *p_event_rec )
+{
+ ipoib_port_t* p_port =
+ (ipoib_port_t *)p_event_rec->context;
+ switch( p_event_rec->code )
+ {
+ case IB_AE_SRQ_LIMIT_REACHED:
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("SRQ ASYNC EVENT CODE %d: %s\n",
+ p_event_rec->code, "IB_AE_SRQ_LIMIT_REACHED" ) );
+ break;
+ case IB_AE_SRQ_CATAS_ERROR:
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("SRQ ASYNC EVENT CODE %d: %s\n",
+ p_event_rec->code, "IB_AE_SRQ_CATAS_ERROR" ) );
+ /*SRQ is in err state, must reinitialize */
+ p_port->p_adapter->hung = TRUE;
+ break;
+ case IB_AE_SRQ_QP_LAST_WQE_REACHED:
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("SRQ ASYNC EVENT CODE %d: %s\n",
+ p_event_rec->code, "IB_AE_SRQ_QP_LAST_WQE_REACHED" ) );
+ /*SRQ is in err state, must reinitialize */
+ p_port->p_adapter->hung = TRUE;
+ break;
+ default:
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("ASYNC EVENT CODE ARRIVED %d(%#x)\n",
+ p_event_rec->code, p_event_rec->code ) );
+ }
+}
+
+ib_api_status_t
+ipoib_port_srq_init(
+ IN ipoib_port_t* const p_port )
+{
+ ib_api_status_t ib_status;
+ ib_srq_handle_t h_srq;
+ ib_srq_attr_t srq_attr;
+
+ IPOIB_ENTER( IPOIB_DBG_INIT );
+
+ if( !p_port->p_adapter->params.cm_enabled )
+ return IB_SUCCESS;
+
+ srq_attr.max_sge = min( 2, p_port->p_ca_attrs->max_srq_sges );
+ srq_attr.srq_limit = 10;
+ srq_attr.max_wr =
+ min( (uint32_t)p_port->p_adapter->params.rq_depth * 8,
+ p_port->p_ca_attrs->max_srq_wrs/2 );
+
+ ib_status = p_port->p_adapter->p_ifc->create_srq(
+ p_port->ib_mgr.h_pd,
+ &srq_attr,
+ p_port,
+ __srq_async_event_cb,
+ &h_srq );
+ if( ib_status != IB_SUCCESS )
+ {
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_CREATE_QP, 1, ib_status );
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("ib_create_srq failed status %s\n",
+ p_port->p_adapter->p_ifc->get_err_str( ib_status )) );
+ return ib_status;
+ }
+ p_port->ib_mgr.h_srq = h_srq;
+
+ IPOIB_EXIT( IPOIB_DBG_INIT );
+
+ return ib_status;
+}
+
+/* __port_query_ca_attrs()
+ * returns a pointer to allocated memory.
+ * must be released by caller.
+ */
+static ib_api_status_t
+__port_query_ca_attrs(
+ IN ipoib_port_t* const p_port,
+ IN ib_ca_attr_t** pp_ca_attrs )
+{
+ ib_api_status_t ib_status;
+ uint32_t attr_size;
+ ib_ca_attr_t* p_ca_attrs;
+
+ *pp_ca_attrs = NULL;
+
+ ib_status =
+ p_port->p_adapter->p_ifc->query_ca( p_port->ib_mgr.h_ca, NULL , &attr_size );
+ if( ib_status != IB_INSUFFICIENT_MEMORY )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("ib_query_ca failed status %s\n",
+ p_port->p_adapter->p_ifc->get_err_str( ib_status )) );
+ goto done;
+ }
+ CL_ASSERT( attr_size );
+
+ p_ca_attrs = cl_zalloc( attr_size );
+ if ( p_ca_attrs == NULL )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Allocate %d bytes failed for CA Attributes\n", attr_size ));
+ ib_status = IB_INSUFFICIENT_MEMORY;
+ goto done;
+ }
+
+ ib_status =
+ p_port->p_adapter->p_ifc->query_ca( p_port->ib_mgr.h_ca, p_ca_attrs , &attr_size );
+ if ( ib_status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("CA attributes query failed\n") );
+ cl_free ( p_ca_attrs );
+ goto done;
+ }
+
+ *pp_ca_attrs = p_ca_attrs;
+done:
+ return ib_status;
+}
+
+void
+ipoib_port_srq_destroy(
+ IN ipoib_port_t* const p_port )
+{
+ ib_api_status_t status;
+
+ if( p_port->ib_mgr.h_srq )
+ {
+ status =
+ p_port->p_adapter->p_ifc->destroy_srq( p_port->ib_mgr.h_srq, NULL );
+ CL_ASSERT( status == IB_SUCCESS );
+ p_port->ib_mgr.h_srq = NULL;
+ }
+}
+
static void
__ib_mgr_destroy(
IN ipoib_port_t* const p_port )
@@ -1172,7 +1364,10 @@
{
ipoib_recv_desc_t *p_desc;
ipoib_port_t *p_port;
+
+#if IPOIB_INLINE_RECV
uint32_t ds0_len;
+#endif
IPOIB_ENTER( IPOIB_DBG_ALLOC );
@@ -1231,6 +1426,7 @@
p_desc->local_ds[0].vaddr = cl_get_physaddr( p_desc->p_buf );
p_desc->local_ds[0].length = sizeof(ipoib_pkt_t) + sizeof(ib_grh_t);
p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;
+ p_desc->wr.num_ds = 1;
#endif /* IPOIB_INLINE_RECV */
*pp_pool_item = &p_desc->item;
@@ -1472,8 +1668,8 @@
cl_perf_stop( &p_port->p_adapter->perf, GetRecv );
if( !p_next )
{
- IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
- ("Out of receive descriptors! recv queue depath 0x%x\n",p_port->recv_mgr.depth) );
+ IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
+ ("Out of receive descriptors! recv queue depth 0x%x\n",p_port->recv_mgr.depth) );
break;
}
@@ -1547,10 +1743,33 @@
/* Get the port and descriptor from the packet. */
p_port = IPOIB_PORT_FROM_PACKET( p_packet );
- p_desc = IPOIB_RECV_FROM_PACKET( p_packet );
+ p_desc = (ipoib_recv_desc_t *)IPOIB_RECV_FROM_PACKET( p_packet );
cl_spinlock_acquire( &p_port->recv_lock );
+ /* Get descriptor from the packet. */
+ if( p_desc->type == PKT_TYPE_CM_UCAST )
+ {
+ NDIS_BUFFER *p_buf;
+
+ /* Unchain the NDIS buffer. */
+ NdisUnchainBufferAtFront( p_packet, &p_buf );
+ CL_ASSERT( p_buf );
+ /* Return the NDIS packet and NDIS buffer to their pools. */
+ NdisDprFreePacketNonInterlocked( p_packet );
+ NdisFreeBuffer( p_buf );
+
+ endpt_cm_buf_mgr_put_recv( &p_port->cm_buf_mgr, (ipoib_cm_desc_t *)p_desc );
+ status = endpt_cm_post_recv( p_port );
+ if( status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Post Recv QP failed\n" ) );
+ }
+ cl_spinlock_release( &p_port->recv_lock );
+ return;
+ }
+
cl_perf_start( ReturnPutRecv );
__buf_mgr_put_recv( p_port, p_desc, p_packet );
cl_perf_stop( &p_port->p_adapter->perf, ReturnPutRecv );
@@ -1754,7 +1973,7 @@
while( shortage-- > 1 )
{
__buf_mgr_put_recv( p_port,
- IPOIB_RECV_FROM_PACKET( p_port->recv_mgr.recv_pkt_array[shortage] ),
+ (ipoib_recv_desc_t *)IPOIB_RECV_FROM_PACKET( p_port->recv_mgr.recv_pkt_array[shortage] ),
p_port->recv_mgr.recv_pkt_array[shortage] );
}
cl_spinlock_release( &p_port->recv_lock );
@@ -1909,12 +2128,12 @@
(*pp_src )->mac.addr[0], (*pp_src )->mac.addr[1],
(*pp_src )->mac.addr[2], (*pp_src )->mac.addr[3],
(*pp_src )->mac.addr[4], (*pp_src )->mac.addr[5]) );
- (*pp_src)->qpn = p_wc->recv.ud.remote_qp;
+// (*pp_src)->qpn = p_wc->recv.ud.remote_qp;
}
if( *pp_src && *pp_dst )
{
- IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
+ IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
("Recv:\n"
"\tsrc MAC: %02X-%02X-%02X-%02X-%02X-%02X\n"
"\tdst MAC: %02X-%02X-%02X-%02X-%02X-%02X\n",
@@ -1968,14 +2187,14 @@
("Failed completion %s (vendor specific %#x)\n",
p_port->p_adapter->p_ifc->get_wc_status_str( p_wc->status ),
(int)p_wc->vendor_specific) );
- ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
}
else
{
- IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
+ IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_RECV,
("Flushed completion %s\n",
p_port->p_adapter->p_ifc->get_wc_status_str( p_wc->status )) );
- ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_DROPPED, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_DROPPED, 0, 0 );
}
cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
/* Dereference the port object on behalf of the failed receive. */
@@ -1989,7 +2208,7 @@
{
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("Received ETH packet < min size\n") );
- ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
ipoib_port_deref( p_port, ref_recv_inv_len );
continue;
@@ -1998,16 +2217,17 @@
if((len - sizeof(ipoib_hdr_t)) > p_port->p_adapter->params.payload_mtu)
{
IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
- ("Received ETH packet > payload MTU (%d)\n",
+ ("Received ETH packet len %d > payload MTU (%d)\n",
+ (len - sizeof(ipoib_hdr_t)),
p_port->p_adapter->params.payload_mtu) );
- ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
ipoib_port_deref( p_port, ref_recv_inv_len );
continue;
}
/* Successful completion. Get the receive information. */
- p_desc->ndis_csum.Value = ( (p_wc->recv.ud.recv_opt & IB_RECV_OPT_CSUM_MASK ) >> 8 );
+ p_desc->ndis_csum.Value = ( ( p_wc->recv.ud.recv_opt & IB_RECV_OPT_CSUM_MASK ) >> 8 );
cl_perf_start( GetRecvEndpts );
__recv_get_endpts( p_port, p_desc, p_wc, &p_src, &p_dst );
cl_perf_stop( &p_port->p_adapter->perf, GetRecvEndpts );
@@ -2125,7 +2345,7 @@
if( status != IB_SUCCESS )
{
/* Update stats. */
- ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, IP_STAT_ERROR, 0, 0 );
cl_qlist_insert_tail( p_bad_list, &p_desc->item.list_item );
/* Dereference the port object on behalf of the failed receive. */
ipoib_port_deref( p_port, ref_recv_filter );
@@ -2158,7 +2378,7 @@
}
cl_qlist_insert_tail( p_done_list, &p_desc->item.list_item );
- ipoib_inc_recv_stat( p_port->p_adapter,ip_stat , len );
+ ipoib_inc_recv_stat( p_port->p_adapter, ip_stat, len, 1 );
}
}
@@ -2373,6 +2593,7 @@
ib_gid_t gid;
mac_addr_t mac;
ipoib_hw_addr_t null_hw = {0};
+ uint8_t cm_capable = 0;
IPOIB_ENTER( IPOIB_DBG_RECV );
@@ -2407,6 +2628,8 @@
return IB_INVALID_SETTING;
}
+ cm_capable = ipoib_addr_get_flags( &p_ib_arp->src_hw );
+
/*
* If we don't have a source, lookup the endpoint specified in the payload.
*/
@@ -2442,10 +2665,8 @@
}
else if( ipoib_is_voltaire_router_gid( &(*pp_src)->dgid ) )
{
- if( (*pp_src)->qpn !=
- (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) &&
- p_wc->recv.ud.remote_qp !=
- (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) )
+ if( (*pp_src)->qpn != ipoib_addr_get_qpn( &p_ib_arp->src_hw ) &&
+ p_wc->recv.ud.remote_qp != ipoib_addr_get_qpn( &p_ib_arp->src_hw ) )
{
/* Out of date! Destroy the endpoint and replace it. */
__endpt_mgr_remove( p_port, *pp_src );
@@ -2483,7 +2704,7 @@
* Create the endpoint.
*/
*pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid,
- p_wc->recv.ud.remote_lid, (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
+ p_wc->recv.ud.remote_lid, ipoib_addr_get_qpn( &p_ib_arp->src_hw ) );
if( !*pp_src )
{
@@ -2506,11 +2727,44 @@
cl_obj_unlock( &p_port->obj );
}
+ (*pp_src)->cm_flag = cm_capable;
+
CL_ASSERT( !cl_memcmp(
&(*pp_src)->dgid, &p_ib_arp->src_hw.gid, sizeof(ib_gid_t) ) );
CL_ASSERT( ipoib_is_voltaire_router_gid( &(*pp_src)->dgid ) ||
- (*pp_src)->qpn ==
- (p_ib_arp->src_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
+ (*pp_src)->qpn == ipoib_addr_get_qpn( &p_ib_arp->src_hw ) );
+
+ if( p_port->p_adapter->params.cm_enabled &&
+ p_ib_arp->op == ARP_OP_REQ &&
+ cm_capable == IPOIB_CM_FLAG_RC )
+ {
+ /* if we've got ARP request and RC flag is set,
+ save SID for connect REQ to be sent in ARP reply
+ when requestor's path get resolved */
+ if( endpt_cm_get_state( (*pp_src) ) == IPOIB_CM_DISCONNECTED )
+ {
+ (*pp_src)->cm_flag = cm_capable;
+ ipoib_addr_set_sid(
+ &(*pp_src)->conn.service_id,
+ ipoib_addr_get_qpn( &p_ib_arp->src_hw ) );
+ }
+ }
+
+#if DBG
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ (" ARP %s from ENDPT[%p] state %d CM cap: %d QPN: %#x MAC: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ ((p_ib_arp->op == ARP_OP_REQ )? "REQUEST" : "REPLY"),
+ *pp_src, endpt_cm_get_state( *pp_src ),
+ ((cm_capable == IPOIB_CM_FLAG_RC)? 1: 0),
+ cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ),
+ (*pp_src)->mac.addr[0], (*pp_src)->mac.addr[1],
+ (*pp_src)->mac.addr[2], (*pp_src)->mac.addr[3],
+ (*pp_src)->mac.addr[4], (*pp_src)->mac.addr[5] ));
+ }
+#endif
+
/* Now swizzle the data. */
p_arp->hw_type = ARP_HW_TYPE_ETH;
p_arp->hw_size = sizeof(mac_addr_t);
@@ -2549,8 +2803,7 @@
{
p_arp->dst_hw = p_dst->mac;
p_arp->dst_ip = p_ib_arp->dst_ip;
- CL_ASSERT( p_dst->qpn ==
- (p_ib_arp->dst_hw.flags_qpn & CL_HTON32(0x00FFFFFF)) );
+ CL_ASSERT( p_dst->qpn == ipoib_addr_get_qpn( &p_ib_arp->dst_hw ) );
}
}
else /* we got ARP reqeust */
@@ -2646,7 +2899,7 @@
if( status != NDIS_STATUS_SUCCESS )
{
- ipoib_inc_recv_stat( p_port->p_adapter, type, 0 );
+ ipoib_inc_recv_stat( p_port->p_adapter, type, 0, 0 );
/* Return the receive descriptor to the pool. */
__buf_mgr_put_recv( p_port, p_desc, NULL );
IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_RECV,
@@ -2669,7 +2922,10 @@
}
chksum.Value = 0;
- switch (p_port->p_adapter->params.recv_chksum_offload) {
+ switch( p_port->p_adapter->params.recv_chksum_offload )
+ {
+ default:
+ CL_ASSERT( FALSE );
case CSUM_DISABLED:
NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, TcpIpChecksumPacketInfo ) =
(void*)(uintn_t)chksum.Value;
@@ -2689,12 +2945,8 @@
NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, TcpIpChecksumPacketInfo ) =
(void*)(uintn_t)chksum.Value;
break;
- default:
- ASSERT(FALSE);
- NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, TcpIpChecksumPacketInfo ) =
- (void*)(uintn_t)chksum.Value;
}
- ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len );
+ ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len, 1 );
IPOIB_EXIT( IPOIB_DBG_RECV );
return IB_SUCCESS;
@@ -2851,6 +3103,7 @@
cl_perf_start( FilterArp );
status = __send_mgr_filter_arp(
p_port, p_eth_hdr, p_buf, buf_len, p_desc );
+ p_desc->send_dir = SEND_UD_QP;
cl_perf_stop( &p_port->p_adapter->perf, FilterArp );
break;
@@ -2859,6 +3112,8 @@
* The IPoIB spec doesn't define how to send non IP or ARP packets.
* Just send the payload and hope for the best.
*/
+
+ p_desc->send_dir = SEND_UD_QP;
cl_perf_start( SendGen );
status = __send_gen( p_port, p_desc, 0 );
cl_perf_stop( &p_port->p_adapter->perf, SendGen );
@@ -2914,11 +3169,11 @@
NdisQueryPacketLength( p_desc->p_pkt, &tot_len );
/* Setup the work request. */
- p_desc->local_ds[1].vaddr = cl_get_physaddr(
+ p_desc->send_wr[0].local_ds[1].vaddr = cl_get_physaddr(
((uint8_t*)p_desc->p_buf) + sizeof(eth_hdr_t) );
- p_desc->local_ds[1].length = tot_len - sizeof(eth_hdr_t);
- p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
- p_desc->wr.num_ds = 2;
+ p_desc->send_wr[0].local_ds[1].length = tot_len - sizeof(eth_hdr_t);
+ p_desc->send_wr[0].local_ds[1].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].wr.num_ds = 2;
/* Copy the packet. */
NdisCopyFromPacketToPacketSafe( p_packet, bytes_copied, tot_len,
@@ -3010,18 +3265,18 @@
CL_ASSERT( i == 0 );
if( offset < PAGE_SIZE )
{
- p_desc->local_ds[j].lkey = p_port->ib_mgr.lkey;
- p_desc->local_ds[j].vaddr = (page_array[i] << PAGE_SHIFT);
+ p_desc->send_wr[0].local_ds[j].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].local_ds[j].vaddr = (page_array[i] << PAGE_SHIFT);
/* Add the byte offset since we're on the 1st page. */
- p_desc->local_ds[j].vaddr += offset;
+ p_desc->send_wr[0].local_ds[j].vaddr += offset;
if( offset + buf_len > PAGE_SIZE )
{
- p_desc->local_ds[j].length = PAGE_SIZE - offset;
- buf_len -= p_desc->local_ds[j].length;
+ p_desc->send_wr[0].local_ds[j].length = PAGE_SIZE - offset;
+ buf_len -= p_desc->send_wr[0].local_ds[j].length;
}
else
{
- p_desc->local_ds[j].length = buf_len;
+ p_desc->send_wr[0].local_ds[j].length = buf_len;
buf_len = 0;
}
/* This data segment is done. Move to the next. */
@@ -3037,25 +3292,25 @@
/* Finish this MDL */
while( buf_len )
{
- p_desc->local_ds[j].lkey = p_port->ib_mgr.lkey;
- p_desc->local_ds[j].vaddr = (page_array[i] << PAGE_SHIFT);
+ p_desc->send_wr[0].local_ds[j].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].local_ds[j].vaddr = (page_array[i] << PAGE_SHIFT);
/* Add the first page's offset if we're on the first page. */
if( i == 0 )
- p_desc->local_ds[j].vaddr += MmGetMdlByteOffset( p_mdl );
+ p_desc->send_wr[0].local_ds[j].vaddr += MmGetMdlByteOffset( p_mdl );
if( i == 0 && (MmGetMdlByteOffset( p_mdl ) + buf_len) > PAGE_SIZE )
{
/* Buffers spans pages. */
- p_desc->local_ds[j].length =
+ p_desc->send_wr[0].local_ds[j].length =
PAGE_SIZE - MmGetMdlByteOffset( p_mdl );
- buf_len -= p_desc->local_ds[j].length;
+ buf_len -= p_desc->send_wr[0].local_ds[j].length;
/* This page is done. Move to the next. */
i++;
}
else
{
/* Last page of the buffer. */
- p_desc->local_ds[j].length = buf_len;
+ p_desc->send_wr[0].local_ds[j].length = buf_len;
buf_len = 0;
}
/* This data segment is done. Move to the next. */
@@ -3076,7 +3331,7 @@
}
/* Set the number of data segments. */
- p_desc->wr.num_ds = j;
+ p_desc->send_wr[0].wr.num_ds = j;
IPOIB_EXIT( IPOIB_DBG_SEND );
return IB_SUCCESS;
@@ -3088,7 +3343,7 @@
__send_gen(
IN ipoib_port_t* const p_port,
IN ipoib_send_desc_t* const p_desc,
- IN INT lso_data_index)
+ IN INT lso_data_index )
{
ib_api_status_t status;
SCATTER_GATHER_LIST *p_sgl;
@@ -3109,16 +3364,21 @@
}
/* Remember that one of the DS entries is reserved for the IPoIB header. */
- if( ( p_sgl->NumberOfElements >= MAX_SEND_SGE &&
- p_sgl->Elements[0].Length > sizeof(eth_hdr_t)) ||
- ( p_sgl->NumberOfElements > MAX_SEND_SGE &&
- p_sgl->Elements[0].Length <= sizeof(eth_hdr_t)) )
+ if( ( p_sgl->NumberOfElements >= MAX_SEND_SGE ||
+ p_sgl->Elements[0].Length < sizeof(eth_hdr_t)) )
{
- IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
- ("Too many buffers to fit in WR ds_array. Copying data.\n") );
- cl_perf_start( SendCopy );
- status = __send_copy( p_port, p_desc );
- cl_perf_stop( &p_port->p_adapter->perf, SendCopy );
+
+ IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_SEND,
+ ("Too many buffers %d to fit in WR ds_array[%d] \
+ Or buffer[0] length %d < Eth header. Copying data.\n",
+ p_sgl->NumberOfElements, MAX_SEND_SGE, p_sgl->Elements[0].Length ) );
+ status = NDIS_STATUS_RESOURCES;
+ if( !p_port->p_adapter->params.cm_enabled )
+ {
+ cl_perf_start( SendCopy );
+ status = __send_copy( p_port, p_desc );
+ cl_perf_stop( &p_port->p_adapter->perf, SendCopy );
+ }
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
}
@@ -3128,7 +3388,8 @@
* or part of it.
*/
i = 0;
- if (lso_data_index) { //we have an LSO packet
+ if( lso_data_index )
+ { /* we have an LSO packet */
i = lso_data_index;
j = 0;
}
@@ -3140,11 +3401,11 @@
}
else
{
- p_desc->local_ds[j].vaddr =
+ p_desc->send_wr[0].local_ds[j].vaddr =
p_sgl->Elements[i].Address.QuadPart + offset;
- p_desc->local_ds[j].length =
+ p_desc->send_wr[0].local_ds[j].length =
p_sgl->Elements[i].Length - offset;
- p_desc->local_ds[j].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].local_ds[j].lkey = p_port->ib_mgr.lkey;
i++;
j++;
break;
@@ -3153,15 +3414,15 @@
/* Now fill in the rest of the local data segments. */
while( i < p_sgl->NumberOfElements )
{
- p_desc->local_ds[j].vaddr = p_sgl->Elements[i].Address.QuadPart;
- p_desc->local_ds[j].length = p_sgl->Elements[i].Length;
- p_desc->local_ds[j].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].local_ds[j].vaddr = p_sgl->Elements[i].Address.QuadPart;
+ p_desc->send_wr[0].local_ds[j].length = p_sgl->Elements[i].Length;
+ p_desc->send_wr[0].local_ds[j].lkey = p_port->ib_mgr.lkey;
i++;
j++;
}
/* Set the number of data segments. */
- p_desc->wr.num_ds = j;
+ p_desc->send_wr[0].wr.num_ds = j;
IPOIB_EXIT( IPOIB_DBG_SEND );
return NDIS_STATUS_SUCCESS;
@@ -3179,6 +3440,7 @@
{
NDIS_STATUS status;
ip_hdr_t *p_ip_hdr;
+ uint32_t ip_packet_len;
PERF_DECLARE( QueryIp );
PERF_DECLARE( SendTcp );
@@ -3217,43 +3479,76 @@
return NDIS_STATUS_BUFFER_TOO_SHORT;
}
- if( p_ip_hdr->offset ||
- p_ip_hdr->prot != IP_PROT_UDP )
+ switch( p_ip_hdr->prot )
{
- /* Check if this packet is IGMP */
- if ( p_ip_hdr->prot == IP_PROT_IGMP )
- {
- /*
- In igmp packet I saw that iph arrive in 2 NDIS_BUFFERs:
- 1. iph
- 2. ip options
- So to get the IGMP packet we need to skip the ip options NDIS_BUFFER
- */
- size_t iph_size_in_bytes = (p_ip_hdr->ver_hl & 0xf) * 4;
- size_t iph_options_size = iph_size_in_bytes - buf_len;
- buf_len -= sizeof(ip_hdr_t);//without ipheader
+ case IP_PROT_UDP:
- /*
- Could be a case that arrived igmp packet not from type IGMPv2 ,
- but IGMPv1 or IGMPv3.
- We anyway pass it to __send_mgr_filter_igmp_v2().
- */
- __send_mgr_filter_igmp_v2(p_port, p_ip_hdr, iph_options_size, p_buf, buf_len);
+ cl_perf_start( FilterUdp );
+ status = __send_mgr_filter_udp(
+ p_port, p_ip_hdr, p_buf, (buf_len - sizeof(ip_hdr_t)), p_desc );
+ cl_perf_stop( &p_port->p_adapter->perf, FilterUdp );
+ if( status == NDIS_STATUS_PENDING )
+ { /* not DHCP packet, keep going */
+ if( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )
+ p_desc->send_dir = SEND_UD_QP;
+ else
+ p_desc->send_dir = SEND_RC_QP;
+ break;
}
- /* Not a UDP packet. */
- cl_perf_start( SendTcp );
- status = __send_gen( p_port, p_desc,0 );
- cl_perf_stop( &p_port->p_adapter->perf, SendTcp );
- IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
+
+ case IP_PROT_TCP:
+ p_desc->send_dir = SEND_RC_QP;
+ break;
+ case IP_PROT_IGMP:
+ /*
+ In igmp packet I saw that iph arrive in 2 NDIS_BUFFERs:
+ 1. iph
+ 2. ip options
+ So to get the IGMP packet we need to skip the ip options NDIS_BUFFER.
+ Could be a case that arrived igmp packet not from type IGMPv2 ,
+ but IGMPv1 or IGMPv3.
+ We anyway pass it to __send_mgr_filter_igmp_v2().
+ */
+ status =
+ __send_mgr_filter_igmp_v2(p_port, p_ip_hdr,
+ (size_t)( IP_HEADER_LENGTH(p_ip_hdr) - buf_len ),
+ p_buf, (buf_len - sizeof(ip_hdr_t)) );
+ if( status != NDIS_STATUS_SUCCESS )
+ return status;
+ p_desc->send_dir = SEND_UD_QP;
+ break;
+ case IP_PROT_ICMP:
+ p_desc->send_dir = SEND_UD_QP;
+ default:
+ break;
}
+
+ if( !p_port->p_adapter->params.cm_enabled )
+ {
+ p_desc->send_dir = SEND_UD_QP;
+ goto send_gen;
+ }
+ else if( endpt_cm_get_state( p_desc->p_endpt ) != IPOIB_CM_CONNECTED )
+ {
+ p_desc->send_dir = SEND_UD_QP;
+ }
- buf_len -= sizeof(ip_hdr_t);
+ if( p_desc->send_dir == SEND_UD_QP )
+ {
+ ip_packet_len = cl_ntoh16( p_ip_hdr->length );
+ if( ip_packet_len > p_port->p_adapter->params.payload_mtu )
+ {
+ status = __send_fragments( p_port, p_desc, (eth_hdr_t* const)p_eth_hdr,
+ (ip_hdr_t* const)p_ip_hdr, (uint32_t)buf_len, p_buf );
+ return status;
+ }
+ }
- cl_perf_start( FilterUdp );
- status = __send_mgr_filter_udp(
- p_port, p_ip_hdr, p_buf, buf_len, p_desc );
- cl_perf_stop( &p_port->p_adapter->perf, FilterUdp );
+send_gen:
+ cl_perf_start( SendTcp );
+ status = __send_gen( p_port, p_desc,0 );
+ cl_perf_stop( &p_port->p_adapter->perf, SendTcp );
IPOIB_EXIT( IPOIB_DBG_SEND );
return status;
@@ -3440,11 +3735,7 @@
p_udp_hdr->dst_port != DHCP_PORT_CLIENT) )
{
/* Not a DHCP packet. */
- cl_perf_start( SendUdp );
- status = __send_gen( p_port, p_desc,0 );
- cl_perf_stop( &p_port->p_adapter->perf, SendUdp );
- IPOIB_EXIT( IPOIB_DBG_SEND );
- return status;
+ return NDIS_STATUS_PENDING;
}
buf_len -= sizeof(udp_hdr_t);
@@ -3654,10 +3945,11 @@
}
/* no chksum for udp */
p_desc->p_buf->ip.prot.udp.hdr.chksum = 0;
- p_desc->local_ds[1].vaddr = cl_get_physaddr( p_desc->p_buf );
- p_desc->local_ds[1].length = sizeof(ip_hdr_t) + sizeof(udp_hdr_t) + sizeof(dhcp_pkt_t);
- p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
- p_desc->wr.num_ds = 2;
+ p_desc->send_wr[0].local_ds[1].vaddr = cl_get_physaddr( p_desc->p_buf );
+ p_desc->send_wr[0].local_ds[1].length = sizeof(ip_hdr_t) + sizeof(udp_hdr_t) + sizeof(dhcp_pkt_t);
+ p_desc->send_wr[0].local_ds[1].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].wr.num_ds = 2;
+ p_desc->send_dir = SEND_UD_QP;
IPOIB_EXIT( IPOIB_DBG_SEND );
return NDIS_STATUS_SUCCESS;
}
@@ -3732,32 +4024,101 @@
p_ib_arp->hw_size = sizeof(ipoib_hw_addr_t);
p_ib_arp->prot_size = p_arp->prot_size;
p_ib_arp->op = p_arp->op;
- p_ib_arp->src_hw.flags_qpn = p_port->ib_mgr.qpn;
+
+ ipoib_addr_set_qpn( &p_ib_arp->src_hw, p_port->ib_mgr.qpn );
+
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ ipoib_addr_set_flags( &p_ib_arp->src_hw, IPOIB_CM_FLAG_RC );
+ }
+
ib_gid_set_default( &p_ib_arp->src_hw.gid,
p_port->p_adapter->guids.port_guid.guid );
p_ib_arp->src_ip = p_arp->src_ip;
if( cl_memcmp( &p_arp->dst_hw, &null_hw, sizeof(mac_addr_t) ) )
{
/* Get the endpoint referenced by the dst_hw address. */
+ net32_t qpn = 0;
status = __endpt_mgr_get_gid_qpn( p_port, p_arp->dst_hw,
- &p_ib_arp->dst_hw.gid, &p_ib_arp->dst_hw.flags_qpn );
+ &p_ib_arp->dst_hw.gid, &qpn );
if( status != NDIS_STATUS_SUCCESS )
{
IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
("Failed lookup of destination HW address\n") );
return status;
}
+ ipoib_addr_set_qpn( &p_ib_arp->dst_hw, qpn );
+
+ if( p_arp->op == ARP_OP_REP &&
+ p_port->p_adapter->params.cm_enabled &&
+ p_desc->p_endpt->cm_flag == IPOIB_CM_FLAG_RC )
+ {
+ cm_state_t cm_state;
+ cm_state =
+ ( cm_state_t )InterlockedCompareExchange( (volatile LONG *)&p_desc->p_endpt->conn.state,
+ IPOIB_CM_CONNECT, IPOIB_CM_DISCONNECTED );
+ switch( cm_state )
+ {
+ case IPOIB_CM_DISCONNECTED:
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ ("ARP REPLY pending Endpt[%p] QPN %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ p_desc->p_endpt,
+ cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->dst_hw )),
+ p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1],
+ p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3],
+ p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] ) );
+ ipoib_addr_set_sid( &p_desc->p_endpt->conn.service_id,
+ ipoib_addr_get_qpn( &p_ib_arp->dst_hw ) );
+
+ ExFreeToNPagedLookasideList(
+ &p_port->buf_mgr.send_buf_list, p_desc->p_buf );
+ cl_qlist_insert_tail( &p_port->send_mgr.pending_list,
+ IPOIB_LIST_ITEM_FROM_PACKET( p_desc->p_pkt ) );
+ NdisInterlockedInsertTailList( &p_port->endpt_mgr.pending_conns,
+ &p_desc->p_endpt->list_item,
+ &p_port->endpt_mgr.conn_lock );
+ cl_event_signal( &p_port->endpt_mgr.event );
+ return NDIS_STATUS_PENDING;
+
+ case IPOIB_CM_CONNECT:
+ /* queue ARP REP packet until connected */
+ ExFreeToNPagedLookasideList(
+ &p_port->buf_mgr.send_buf_list, p_desc->p_buf );
+ cl_qlist_insert_tail( &p_port->send_mgr.pending_list,
+ IPOIB_LIST_ITEM_FROM_PACKET( p_desc->p_pkt ) );
+ return NDIS_STATUS_PENDING;
+ default:
+ break;
+ }
+ }
}
else
{
cl_memclr( &p_ib_arp->dst_hw, sizeof(ipoib_hw_addr_t) );
}
+
+#if DBG
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ (" ARP SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ p_desc->p_endpt,
+ endpt_cm_get_state( p_desc->p_endpt ),
+ p_desc->p_endpt->cm_flag,
+ cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->dst_hw )),
+ p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1],
+ p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3],
+ p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] ));
+ }
+#endif
+
p_ib_arp->dst_ip = p_arp->dst_ip;
- p_desc->local_ds[1].vaddr = cl_get_physaddr( p_ib_arp );
- p_desc->local_ds[1].length = sizeof(ipoib_arp_pkt_t);
- p_desc->local_ds[1].lkey = p_port->ib_mgr.lkey;
- p_desc->wr.num_ds = 2;
+ p_desc->send_wr[0].local_ds[1].vaddr = cl_get_physaddr( p_ib_arp );
+ p_desc->send_wr[0].local_ds[1].length = sizeof(ipoib_arp_pkt_t);
+ p_desc->send_wr[0].local_ds[1].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].wr.num_ds = 2;
+ p_desc->send_wr[0].wr.p_next = NULL;
IPOIB_EXIT( IPOIB_DBG_SEND );
return NDIS_STATUS_SUCCESS;
@@ -3794,7 +4155,7 @@
return NDIS_STATUS_BUFFER_TOO_SHORT;
}
- IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_SEND,
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_SEND,
("Ethernet header:\n"
"\tsrc MAC: %02X-%02X-%02X-%02X-%02X-%02X\n"
"\tdst MAC: %02X-%02X-%02X-%02X-%02X-%02X\n"
@@ -3881,26 +4242,17 @@
PNDIS_PACKET_EXTENSION PktExt;
PNDIS_TCP_IP_CHECKSUM_PACKET_INFO pChecksumPktInfo; //NDIS 5.1
ULONG mss;
- LsoData TheLsoData;
- INT IndexOfData = 0;
- ULONG PhysBufCount;
- ULONG PacketLength;
- PNDIS_BUFFER FirstBuffer;
- uint16_t lso_header_size;
-
PERF_DECLARE( SendMgrFilter );
IPOIB_ENTER( IPOIB_DBG_SEND );
/* Format the send descriptor. */
- cl_perf_start( SendMgrFilter );
PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt);
- pChecksumPktInfo = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo];
+ pChecksumPktInfo =
+ (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo];
mss = PtrToUlong(PktExt->NdisPacketInfo[TcpLargeSendPacketInfo]);
- //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before
- NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength);
/* Format the send descriptor. */
hdr_idx = cl_atomic_inc( &p_port->hdr_idx );
@@ -3909,49 +4261,27 @@
p_port->hdr[hdr_idx].type = p_eth_hdr->type;
p_port->hdr[hdr_idx].resv = 0;
- if (mss)
+ p_desc->send_wr[0].local_ds[0].vaddr = cl_get_physaddr( &p_port->hdr[hdr_idx] );
+ p_desc->send_wr[0].local_ds[0].length = sizeof(ipoib_hdr_t);
+ p_desc->send_wr[0].local_ds[0].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[0].wr.send_opt = 0;
+
+ if( p_port->p_adapter->params.lso && mss )
{
- memset(&TheLsoData, 0, sizeof TheLsoData );
- status = GetLsoHeaderSize(
- p_port,
- FirstBuffer,
- &TheLsoData,
- &lso_header_size,
- &IndexOfData,
- &p_port->hdr[hdr_idx]
-
- );
- if ((status != NDIS_STATUS_SUCCESS ) ||
- (TheLsoData.FullBuffers != TheLsoData.UsedBuffers)) {
- ASSERT(FALSE);
-
- IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("<-- Throwing this packet\n"));
-
- //NdisReleaseSpinLock(&Port->SendLock);
- //MP_ASSERT_NDIS_PACKET_TYPE(Packet);
- //SendComplete(Port, Packet, NDIS_STATUS_INVALID_PACKET);
- //NdisAcquireSpinLock(&Port->SendLock);
- //IPOIB_PRINT_EXIT
+ status = __build_lso_desc( p_port, p_desc, mss, hdr_idx );
+ if( status != NDIS_STATUS_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("__build_lso_desc returned 0x%08X.\n", status) );
return status;
}
- ASSERT(lso_header_size > 0);
- p_desc->wr.dgrm.ud.mss = mss;
- p_desc->wr.dgrm.ud.header = TheLsoData.LsoBuffers[0].pData;
- p_desc->wr.dgrm.ud.hlen = lso_header_size;
- // Tell NDIS how much we will send.
- PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = UlongToPtr(PacketLength);
- p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM | IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
- __send_gen(p_port, p_desc, IndexOfData);
- p_desc->wr.wr_type = WR_LSO;
- } else {
-
- /* Setup the first local data segment (used for the IPoIB header). */
- p_desc->local_ds[0].vaddr = cl_get_physaddr( &p_port->hdr[hdr_idx] );
- p_desc->local_ds[0].length = sizeof(ipoib_hdr_t);
- p_desc->local_ds[0].lkey = p_port->ib_mgr.lkey;
-
+ }
+ else
+ {
+ uint32_t i;
+ cl_perf_start( SendMgrFilter );
status = __send_mgr_filter(
- p_port, p_eth_hdr, p_buf, buf_len, p_desc);
+ p_port, p_eth_hdr, p_buf, buf_len, p_desc );
cl_perf_stop( &p_port->p_adapter->perf, SendMgrFilter );
if( status != NDIS_STATUS_SUCCESS )
{
@@ -3959,49 +4289,140 @@
("__send_mgr_filter returned 0x%08X.\n", status) );
return status;
}
- p_desc->wr.wr_type = WR_SEND;
- p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED;
- }
+ if( p_desc->send_dir == SEND_UD_QP )
+ {
+ p_desc->send_qp = p_port->ib_mgr.h_qp; // UD QP
+ for( i = 0; i < p_desc->num_wrs; i++ )
+ {
+ p_desc->send_wr[i].wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;
+ p_desc->send_wr[i].wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;
+ p_desc->send_wr[i].wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;
+ p_desc->send_wr[i].wr.dgrm.ud.pkey_index = p_port->pkey_index;
+ p_desc->send_wr[i].wr.dgrm.ud.rsvd = NULL;
+ p_desc->send_wr[i].wr.send_opt = 0;
-
- /* Setup the work request. */
- p_desc->wr.p_next = NULL;
- p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;
-
- if(p_port->p_adapter->params.send_chksum_offload &&
- (pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || pChecksumPktInfo->Transmit.NdisPacketChecksumV6))
- {
- // Set transimition checksum offloading
- if (pChecksumPktInfo->Transmit.NdisPacketIpChecksum)
+ if( p_port->p_adapter->params.send_chksum_offload &&
+ ( pChecksumPktInfo->Transmit.NdisPacketChecksumV4 ||
+ pChecksumPktInfo->Transmit.NdisPacketChecksumV6 ))
+ {
+ // Set transimition checksum offloading
+ if( pChecksumPktInfo->Transmit.NdisPacketIpChecksum )
+ {
+ p_desc->send_wr[i].wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM;
+ }
+ if( pChecksumPktInfo->Transmit.NdisPacketTcpChecksum )
+ {
+ p_desc->send_wr[i].wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM;
+ }
+ }
+ }
+ }
+ else // RC QP
{
- p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM;
+ CL_ASSERT( p_desc->send_dir == SEND_RC_QP );
+ p_desc->send_qp = p_desc->p_endpt->conn.h_work_qp;
}
- if(pChecksumPktInfo->Transmit.NdisPacketTcpChecksum ||
- pChecksumPktInfo->Transmit.NdisPacketUdpChecksum )
- {
- p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM;
+ for( i = 0; i < p_desc->num_wrs; i++ )
+ {
+ p_desc->send_wr[i].wr.wr_type = WR_SEND;
+ p_desc->send_wr[i].wr.wr_id = (uintn_t)p_desc->p_pkt;
+ p_desc->send_wr[i].wr.ds_array = &p_desc->send_wr[i].local_ds[0];
+ if( i )
+ {
+ p_desc->send_wr[i-1].wr.p_next = &p_desc->send_wr[i].wr;
+ }
}
+ p_desc->send_wr[p_desc->num_wrs - 1].wr.send_opt |= IB_SEND_OPT_SIGNALED;
+ p_desc->send_wr[p_desc->num_wrs - 1].wr.p_next = NULL;
}
-
- p_desc->wr.ds_array = p_desc->local_ds;
- p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt1->qpn;
- p_desc->wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;
- p_desc->wr.dgrm.ud.h_av = p_desc->p_endpt1->h_av;
- p_desc->wr.dgrm.ud.pkey_index = p_port->pkey_index;
- p_desc->wr.dgrm.ud.rsvd = NULL;
-
/* Store context in our reserved area of the packet. */
IPOIB_PORT_FROM_PACKET( p_desc->p_pkt ) = p_port;
- IPOIB_ENDPT_FROM_PACKET( p_desc->p_pkt ) = p_desc->p_endpt1;
+ IPOIB_ENDPT_FROM_PACKET( p_desc->p_pkt ) = p_desc->p_endpt;
IPOIB_SEND_FROM_PACKET( p_desc->p_pkt ) = p_desc->p_buf;
IPOIB_EXIT( IPOIB_DBG_SEND );
return NDIS_STATUS_SUCCESS;
}
+static NDIS_STATUS
+__build_lso_desc(
+ IN ipoib_port_t* const p_port,
+ IN OUT ipoib_send_desc_t* const p_desc,
+ IN ULONG mss,
+ IN int32_t hdr_idx )
+{
+ NDIS_STATUS status;
+ PNDIS_PACKET_EXTENSION PktExt;
+ LsoData TheLsoData;
+ INT IndexOfData = 0;
+ ULONG PhysBufCount;
+ ULONG PacketLength;
+ PNDIS_BUFFER FirstBuffer;
+ uint16_t lso_header_size;
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL,
+ &FirstBuffer,(PUINT)&PacketLength);
+ PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt);
+
+ memset(&TheLsoData, 0, sizeof TheLsoData );
+ status = GetLsoHeaderSize(
+ p_port,
+ FirstBuffer,
+ &TheLsoData,
+ &lso_header_size,
+ &IndexOfData,
+ &p_port->hdr[hdr_idx] );
+
+ if ((status != NDIS_STATUS_SUCCESS ) ||
+ (TheLsoData.FullBuffers != TheLsoData.UsedBuffers))
+ {
+ ASSERT(FALSE);
+
+ IPOIB_PRINT(TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("<-- Throwing this packet\n"));
+
+ //NdisReleaseSpinLock(&Port->SendLock);
+ //MP_ASSERT_NDIS_PACKET_TYPE(Packet);
+ //SendComplete(Port, Packet, NDIS_STATUS_INVALID_PACKET);
+ //NdisAcquireSpinLock(&Port->SendLock);
+ //IPOIB_PRINT_EXIT
+ if( status == NDIS_STATUS_SUCCESS )
+ {
+ status = NDIS_STATUS_INVALID_PACKET;
+ }
+ return status;
+ }
+ ASSERT(lso_header_size > 0);
+ // Tell NDIS how much we will send.
+ PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] = UlongToPtr(PacketLength);
+
+ p_desc->send_wr[0].wr.dgrm.ud.mss = mss;
+ p_desc->send_wr[0].wr.dgrm.ud.header = TheLsoData.LsoBuffers[0].pData;
+ p_desc->send_wr[0].wr.dgrm.ud.hlen = lso_header_size;
+ p_desc->send_wr[0].wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;
+ p_desc->send_wr[0].wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;
+ p_desc->send_wr[0].wr.dgrm.ud.h_av = p_desc->p_endpt->h_av;
+ p_desc->send_wr[0].wr.dgrm.ud.pkey_index = p_port->pkey_index;
+ p_desc->send_wr[0].wr.dgrm.ud.rsvd = NULL;
+
+ p_desc->send_wr[0].wr.wr_id = (uintn_t)p_desc->p_pkt;
+ p_desc->send_wr[0].wr.ds_array = p_desc->send_wr[0].local_ds;
+ p_desc->send_wr[0].wr.wr_type = WR_LSO;
+ p_desc->send_wr[0].wr.send_opt =
+ (IB_SEND_OPT_TX_IP_CSUM | IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED;
+
+ p_desc->send_wr[0].wr.p_next = NULL;
+ p_desc->send_qp = p_port->ib_mgr.h_qp;
+ p_desc->send_dir = SEND_UD_QP;
+ status = __send_gen(p_port, p_desc, IndexOfData );
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return status;
+}
+
static inline void
__process_failed_send(
IN ipoib_port_t* const p_port,
@@ -4015,8 +4436,8 @@
p_desc->p_pkt, status );
ipoib_inc_send_stat( p_port->p_adapter, IP_STAT_ERROR, 0 );
/* Deref the endpoint. */
- if( p_desc->p_endpt1 )
- ipoib_endpt_deref( p_desc->p_endpt1 );
+ if( p_desc->p_endpt )
+ ipoib_endpt_deref( p_desc->p_endpt );
if( p_desc->p_buf )
{
@@ -4041,6 +4462,7 @@
eth_hdr_t *p_eth_hdr;
NDIS_BUFFER *p_buf;
UINT buf_len;
+ ib_send_wr_t *p_wr_failed;
PERF_DECLARE( GetEthHdr );
PERF_DECLARE( BuildSendDesc );
@@ -4076,8 +4498,10 @@
for( i = 0; i < num_packets; i++ )
{
desc.p_pkt = p_packet_array[i];
- desc.p_endpt1 = NULL;
+ desc.p_endpt = NULL;
desc.p_buf = NULL;
+ desc.send_qp = NULL;
+ desc.num_wrs = 1;
/* Get the ethernet header so we can find the endpoint. */
cl_perf_start( GetEthHdr );
@@ -4101,7 +4525,7 @@
ip_hdr_t *p_ip_hdr;
NDIS_BUFFER *p_ip_hdr_buf;
UINT ip_hdr_buf_len;
-
+
// Extract the ip hdr
if(buf_len >= sizeof(ip_hdr_t)+ sizeof(eth_hdr_t))
{
@@ -4139,7 +4563,7 @@
p_eth_hdr->dst.addr[3] = ((unsigned char*)&p_ip_hdr->dst_ip)[1];
}
h_end:
- status = __send_mgr_queue( p_port, p_eth_hdr, &desc.p_endpt1 );
+ status = __send_mgr_queue( p_port, p_eth_hdr, &desc.p_endpt );
cl_perf_stop( &p_port->p_adapter->perf, SendMgrQueue );
if( status == NDIS_STATUS_PENDING )
{
@@ -4171,6 +4595,11 @@
cl_perf_stop( &p_port->p_adapter->perf, BuildSendDesc );
if( status != NDIS_STATUS_SUCCESS )
{
+ if( status == NDIS_STATUS_PENDING )
+ {
+ ipoib_endpt_deref( desc.p_endpt );
+ continue;
+ }
cl_perf_start( ProcessFailedSends );
__process_failed_send( p_port, &desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
@@ -4179,7 +4608,7 @@
/* Post the WR. */
cl_perf_start( PostSend );
- ib_status = p_port->p_adapter->p_ifc->post_send( p_port->ib_mgr.h_qp, &desc.wr, NULL );
+ ib_status = p_port->p_adapter->p_ifc->post_send( desc.send_qp, &desc.send_wr[0].wr, &p_wr_failed );
cl_perf_stop( &p_port->p_adapter->perf, PostSend );
if( ib_status != IB_SUCCESS )
{
@@ -4213,6 +4642,7 @@
eth_hdr_t *p_eth_hdr;
NDIS_BUFFER *p_buf;
UINT buf_len;
+ ib_send_wr_t *p_wr_failed;
PERF_DECLARE( GetEndpt );
PERF_DECLARE( BuildSendDesc );
@@ -4248,8 +4678,10 @@
desc.p_pkt = IPOIB_PACKET_FROM_LIST_ITEM(
cl_qlist_remove_head( &p_port->send_mgr.pending_list ) );
- desc.p_endpt1 = NULL;
+ desc.p_endpt = NULL;
desc.p_buf = NULL;
+ desc.send_qp = NULL;
+ desc.num_wrs = 1;
/* Get the ethernet header so we can find the endpoint. */
status = __send_mgr_get_eth_hdr(
@@ -4263,11 +4695,11 @@
}
cl_perf_start( GetEndpt );
- status = __endpt_mgr_ref( p_port, p_eth_hdr->dst, &desc.p_endpt1 );
+ status = __endpt_mgr_ref( p_port, p_eth_hdr->dst, &desc.p_endpt );
cl_perf_stop( &p_port->p_adapter->perf, GetEndpt );
if( status == NDIS_STATUS_PENDING )
{
- CL_ASSERT(desc.p_endpt1 == NULL);
+ CL_ASSERT(desc.p_endpt == NULL);
cl_qlist_insert_head( &p_port->send_mgr.pending_list,
IPOIB_LIST_ITEM_FROM_PACKET( desc.p_pkt ) );
break;
@@ -4275,7 +4707,7 @@
else if( status != NDIS_STATUS_SUCCESS )
{
ASSERT( status == NDIS_STATUS_NO_ROUTE_TO_DESTINATION );
- CL_ASSERT(desc.p_endpt1 == NULL);
+ CL_ASSERT(desc.p_endpt == NULL);
if( ETH_IS_MULTICAST( p_eth_hdr->dst.addr ) )
{
@@ -4305,6 +4737,12 @@
cl_perf_stop( &p_port->p_adapter->perf, BuildSendDesc );
if( status != NDIS_STATUS_SUCCESS )
{
+ if( status == NDIS_STATUS_PENDING )
+ {
+ /* ARP REPLY packet queued */
+ ipoib_endpt_deref( desc.p_endpt );
+ continue;
+ }
cl_perf_start( ProcessFailedSends );
__process_failed_send( p_port, &desc, status );
cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends );
@@ -4313,7 +4751,7 @@
/* Post the WR. */
cl_perf_start( PostSend );
- ib_status = p_port->p_adapter->p_ifc->post_send( p_port->ib_mgr.h_qp, &desc.wr, NULL );
+ ib_status = p_port->p_adapter->p_ifc->post_send( desc.send_qp, &desc.send_wr[0].wr, &p_wr_failed );
cl_perf_stop( &p_port->p_adapter->perf, PostSend );
if( ib_status != IB_SUCCESS )
{
@@ -4498,18 +4936,126 @@
IPOIB_EXIT( IPOIB_DBG_INIT );
}
+static void
+__endpt_cm_mgr_thread(
+IN void* p_context );
static ib_api_status_t
__endpt_mgr_init(
IN ipoib_port_t* const p_port )
{
IPOIB_ENTER( IPOIB_DBG_INIT );
- UNUSED_PARAM( p_port );
+
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ cl_fmap_init( &p_port->endpt_mgr.conn_endpts, __gid_cmp );
+
+ NdisInitializeListHead( &p_port->endpt_mgr.pending_conns );
+ NdisAllocateSpinLock( &p_port->endpt_mgr.conn_lock );
+ cl_event_init( &p_port->endpt_mgr.event, FALSE );
+
+ NdisInitializeListHead( &p_port->endpt_mgr.remove_conns );
+ NdisAllocateSpinLock( &p_port->endpt_mgr.remove_lock );
+
+ cl_thread_init( &p_port->endpt_mgr.h_thread,
+ __endpt_cm_mgr_thread,
+ ( const void *)p_port,
+ "CmEndPtMgr" );
+ }
+
IPOIB_EXIT( IPOIB_DBG_INIT );
return IB_SUCCESS;
}
+static void
+__endpt_cm_mgr_thread(
+IN void* p_context )
+{
+ ib_api_status_t ib_status;
+ LIST_ENTRY *p_item;
+ ipoib_endpt_t *p_endpt;
+ ipoib_port_t *p_port =( ipoib_port_t *)p_context;
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ ("Starting Port [%d] Endpt CM thread \n", p_port->port_num ) );
+ while( !p_port->endpt_mgr.thread_is_done )
+ {
+ cl_event_wait_on( &p_port->endpt_mgr.event, EVENT_NO_TIMEOUT, FALSE );
+
+ while( ( p_item = NdisInterlockedRemoveHeadList(
+ &p_port->endpt_mgr.pending_conns,
+ &p_port->endpt_mgr.conn_lock) ) != NULL )
+ {
+
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, list_item );
+ if( p_port->endpt_mgr.thread_is_done )
+ {
+ endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECTED );
+ continue;
+ }
+
+ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ ("Endpt[%p] CONNECT REQ to MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ p_endpt,
+ p_endpt->mac.addr[0], p_endpt->mac.addr[1],
+ p_endpt->mac.addr[2], p_endpt->mac.addr[3],
+ p_endpt->mac.addr[4], p_endpt->mac.addr[5] ) );
+
+ if( !p_endpt->conn.h_send_qp )
+ {
+ ib_status = endpt_cm_create_qp( p_endpt, &p_endpt->conn.h_send_qp );
+ if( ib_status != IB_SUCCESS )
+ {
+ IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Endpt [%p ] CM create QP failed status %#x\n", p_endpt, ib_status ) );
+ }
+ else
+ {
+ ib_status = ipoib_endpt_connect( p_endpt );
+ if( ib_status != IB_SUCCESS && ib_status != IB_PENDING )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Endpt [ %p ] conn REQ failed status %#x\n", p_endpt, ib_status ) );
+ }
+ }
+ if( ib_status != IB_SUCCESS && ib_status != IB_PENDING )
+ {
+ endpt_cm_set_state( p_endpt, IPOIB_CM_DESTROY );
+ endpt_cm_flush_recv( p_port, p_endpt );
+ endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECTED );
+ }
+ }
+
+ }//while( p_item != NULL )
+
+ while( ( p_item = NdisInterlockedRemoveHeadList(
+ &p_port->endpt_mgr.remove_conns,
+ &p_port->endpt_mgr.remove_lock ) ) != NULL )
+ {
+ p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, list_item );
+
+ endpt_cm_set_state( p_endpt, IPOIB_CM_DESTROY );
+
+ IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_INIT,
+ ("\nDESTROYING Endpt[%p] MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+ p_endpt,
+ p_endpt->mac.addr[0], p_endpt->mac.addr[1],
+ p_endpt->mac.addr[2], p_endpt->mac.addr[3],
+ p_endpt->mac.addr[4], p_endpt->mac.addr[5] ) );
+ endpt_cm_flush_recv( p_port, p_endpt );
+ endpt_cm_set_state( p_endpt, IPOIB_CM_DISCONNECTED );
+ cl_obj_destroy( &p_endpt->obj );
+ }
+ }
+
+ p_port->endpt_mgr.thread_is_done++;
+ NdisFreeSpinLock( &p_port->endpt_mgr.conn_lock );
+
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,
+ (" Port [%d] Endpt thread is done\n", p_port->port_num ) );
+}
+
static void
__endpt_mgr_destroy(
IN ipoib_port_t* const p_port )
@@ -4518,7 +5064,10 @@
CL_ASSERT( cl_is_qmap_empty( &p_port->endpt_mgr.mac_endpts ) );
CL_ASSERT( cl_is_qmap_empty( &p_port->endpt_mgr.lid_endpts ) );
CL_ASSERT( cl_is_fmap_empty( &p_port->endpt_mgr.gid_endpts ) );
- UNUSED_PARAM( p_port );
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ CL_ASSERT( cl_is_fmap_empty( &p_port->endpt_mgr.conn_endpts ) );
+ }
IPOIB_EXIT( IPOIB_DBG_INIT );
}
@@ -4552,15 +5101,17 @@
IN ipoib_port_t* const p_port )
{
cl_map_item_t *p_item;
+ cl_fmap_item_t *p_fmap_item;
ipoib_endpt_t *p_endpt;
cl_qlist_t mc_list;
+ cl_qlist_t conn_list;
uint32_t local_exist = 0;
IPOIB_ENTER( IPOIB_DBG_ENDPT );
cl_qlist_init( &mc_list );
-
+ cl_qlist_init( &conn_list );
cl_obj_lock( &p_port->obj );
/* Wait for all readers to complete. */
while( p_port->endpt_rdr )
@@ -4583,8 +5134,8 @@
if( p_port->p_local_endpt )
{
- cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,
- &p_port->p_local_endpt->gid_item );
+ ipoib_port_cancel_listen( p_port, p_port->p_local_endpt );
+
cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,
&p_port->p_local_endpt->mac_item );
cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,
@@ -4616,8 +5167,26 @@
cl_qlist_insert_tail(
&mc_list, &p_endpt->mac_item.pool_item.list_item );
}
- else if( p_endpt->h_av )
+ /* destroy connected endpoints if any */
+ else if( p_port->p_adapter->params.cm_enabled &&
+ endpt_cm_get_state( p_endpt ) != IPOIB_CM_DISCONNECTED )
{
+ p_fmap_item = cl_fmap_get( &p_port->endpt_mgr.conn_endpts, &p_endpt->dgid );
+ if( p_fmap_item != cl_fmap_end( &p_port->endpt_mgr.conn_endpts ) )
+ {
+ cl_fmap_remove_item( &p_port->endpt_mgr.conn_endpts,
+ &p_endpt->conn_item );
+ }
+ cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts,
+ &p_endpt->mac_item );
+ cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts,
+ &p_endpt->gid_item );
+
+ cl_qlist_insert_tail(
+ &conn_list, &p_endpt->mac_item.pool_item.list_item );
+ }
+ if( p_endpt->h_av )
+ {
/* Destroy the AV for all other endpoints. */
p_port->p_adapter->p_ifc->destroy_av( p_endpt->h_av );
p_endpt->h_av = NULL;
@@ -4636,6 +5205,12 @@
#endif
cl_obj_unlock( &p_port->obj );
+ while( cl_qlist_count( &conn_list ) )
+ {
+ endpt_cm_destroy_conn( p_port,
+ PARENT_STRUCT( cl_qlist_remove_head( &conn_list ),
+ ipoib_endpt_t, mac_item.pool_item.list_item ) );
+ }
if(cl_qlist_count( &mc_list ) - local_exist)
{
@@ -4673,6 +5248,8 @@
IN ipoib_port_t* const p_port,
IN ipoib_endpt_t* const p_endpt )
{
+ cl_fmap_item_t* p_fmap_item;
+
IPOIB_ENTER( IPOIB_DBG_ENDPT );
/* This function must be called from the recieve path */
@@ -4690,7 +5267,16 @@
* in the LID map if the GID has the same subnet prefix as us.
*/
cl_fmap_remove_item( &p_port->endpt_mgr.gid_endpts, &p_endpt->gid_item );
-
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ p_fmap_item = cl_fmap_get( &p_port->endpt_mgr.conn_endpts, &p_endpt->dgid );
+
+ if( p_fmap_item != cl_fmap_end( &p_port->endpt_mgr.conn_endpts ) )
+ {
+ cl_fmap_remove_item( &p_port->endpt_mgr.conn_endpts,
+ &p_endpt->conn_item );
+ }
+ }
if( p_endpt->dlid )
{
cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts,
@@ -4699,7 +5285,7 @@
cl_obj_unlock( &p_port->obj );
- cl_obj_destroy( &p_endpt->obj );
+ endpt_cm_destroy_conn( p_port, p_endpt );
IPOIB_EXIT( IPOIB_DBG_ENDPT );
}
@@ -4848,7 +5434,7 @@
cl_obj_lock( &p_port->obj );
- IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+ IPOIB_PRINT( TRACE_LEVEL_VERBOSE, IPOIB_DBG_ENDPT,
("Look for :\t MAC: %02X-%02X-%02X-%02X-%02X-%02X\n",
mac.addr[0], mac.addr[1], mac.addr[2],
mac.addr[3], mac.addr[4], mac.addr[5]) );
@@ -5102,6 +5688,7 @@
IN const mac_addr_t mac )
{
cl_map_item_t *p_item;
+ cl_fmap_item_t *p_fmap_item;
ipoib_endpt_t *p_endpt;
uint64_t key;
@@ -5130,6 +5717,17 @@
cl_fmap_remove_item(
&p_port->endpt_mgr.gid_endpts, &p_endpt->gid_item );
+ if( p_port->p_adapter->params.cm_enabled )
+ {
+ p_fmap_item = cl_fmap_get( &p_port->endpt_mgr.conn_endpts, &p_endpt->dgid );
+
+ if( p_fmap_item != cl_fmap_end( &p_port->endpt_mgr.conn_endpts ) )
+ {
+ cl_fmap_remove_item( &p_port->endpt_mgr.conn_endpts,
+ &p_endpt->conn_item );
+ }
+ }
+
if( p_endpt->dlid )
{
cl_qmap_remove_item(
@@ -5137,7 +5735,9 @@
}
cl_obj_unlock( &p_port->obj );
- cl_obj_destroy( &p_endpt->obj );
+
+ endpt_cm_destroy_conn( p_port, p_endpt );
+
#if DBG
cl_atomic_dec( &p_port->ref[ref_endpt_track] );
IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
@@ -5296,8 +5896,8 @@
cl_memclr( &av_attr, sizeof(ib_av_attr_t) );
av_attr.port_num = p_port->port_num;
av_attr.sl = 0;
- IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ENDPT,
- ("<__endpt_mgr_add_local>: av_attr.dlid = p_port_info->base_lid = %d\n",p_port_info->base_lid));
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT,
+ (" av_attr.dlid = p_port_info->base_lid = %d\n", cl_ntoh16( p_port_info->base_lid ) ));
av_attr.dlid = p_port_info->base_lid;
av_attr.static_rate = p_port->ib_mgr.rate;
av_attr.path_bits = 0;
@@ -5760,7 +6360,7 @@
cl_obj_lock( &p_port->obj );
}
- if(! p_port->p_local_endpt)
+ if( !p_port->p_local_endpt )
{
ib_port_info_t port_info;
cl_memclr(&port_info, sizeof(port_info));
@@ -5853,6 +6453,20 @@
return;
}
+ if( p_port->p_adapter->params.cm_enabled &&
+ !p_port->p_local_endpt->conn.h_cm_listen )
+ {
+ if( ipoib_port_listen( p_port ) != IB_SUCCESS )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Port CM Listen failed\n" ) );
+ /*keep going with UD only */
+ p_port->p_adapter->params.cm_enabled = FALSE;
+
+ NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter,
+ EVENT_IPOIB_CONNECTED_MODE_ERR, 1, 0xbadc0de3 );
+ }
+ }
/* garbage collector timer is needed when link is active */
gc_due_time.QuadPart = -(int64_t)(((uint64_t)p_port->p_adapter->params.mc_leave_rescan * 2000000) * 10);
KeSetTimerEx(&p_port->gc_timer,gc_due_time,
@@ -6277,7 +6891,7 @@
}
IpHdr = (ip_hdr_t UNALIGNED*)pSrc;
IpHeaderLen = (uint16_t)IP_HEADER_LENGTH(IpHdr);
- ASSERT(IpHdr->prot == PROTOCOL_TCP);
+ ASSERT(IpHdr->prot == IP_PROT_TCP);
if (CurrLength < IpHeaderLen) {
IPOIB_PRINT(TRACE_LEVEL_VERBOSE, IPOIB_DBG_ERROR, ("Error processing packets\n"));
return status;
@@ -6328,7 +6942,7 @@
TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;
TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr);
- ASSERT(TcpHeaderLen == 20);
+ //ASSERT(TcpHeaderLen == 20);
if (CurrLength < TcpHeaderLen) {
//IPOIB_PRINT(TRACE_LEVEL_VERBOSE, ETH, ("Error porcessing packets\n"));
@@ -6442,4 +7056,451 @@
__port_do_mcast_garbage(p_port);
}
+ipoib_endpt_t*
+ipoib_endpt_get_by_gid(
+ IN ipoib_port_t* const p_port,
+ IN const ib_gid_t* const p_gid )
+{
+ return __endpt_mgr_get_by_gid( p_port, p_gid );
+}
+ipoib_endpt_t*
+ipoib_endpt_get_by_lid(
+ IN ipoib_port_t* const p_port,
+ IN const net16_t lid )
+{
+ return __endpt_mgr_get_by_lid( p_port, lid );
+}
+
+ib_api_status_t
+ipoib_recv_dhcp(
+ IN ipoib_port_t* const p_port,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src,
+ IN ipoib_endpt_t* const p_dst )
+{
+ return __recv_dhcp(
+ p_port, p_ipoib, p_eth, p_src,p_dst );
+}
+
+void
+ipoib_port_cancel_xmit(
+ IN ipoib_port_t* const p_port,
+ IN PVOID cancel_id )
+{
+ cl_list_item_t *p_item;
+ NDIS_PACKET* p_packet;
+ PVOID packet_id;
+ cl_qlist_t cancel_list;
+
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ cl_qlist_init( &cancel_list );
+
+ cl_spinlock_acquire( &p_port->send_lock );
+
+ for( p_item = cl_qlist_head( &p_port->send_mgr.pending_list );
+ p_item != cl_qlist_end( &p_port->send_mgr.pending_list );
+ p_item = cl_qlist_next( p_item ) )
+ {
+ p_packet = IPOIB_PACKET_FROM_LIST_ITEM( p_item );
+ packet_id = NDIS_GET_PACKET_CANCEL_ID( p_packet );
+ if( packet_id == cancel_id )
+ {
+ cl_qlist_remove_item( &p_port->send_mgr.pending_list, p_item );
+ NDIS_SET_PACKET_STATUS( p_packet, NDIS_STATUS_REQUEST_ABORTED );
+ cl_qlist_insert_tail( &cancel_list, IPOIB_LIST_ITEM_FROM_PACKET( p_packet ) );
+ }
+ }
+ cl_spinlock_release( &p_port->send_lock );
+
+ if( cl_qlist_count( &cancel_list ) )
+ {
+ while( ( p_item = cl_qlist_remove_head( &cancel_list ))
+ != cl_qlist_end( &cancel_list ))
+ {
+ p_packet = IPOIB_PACKET_FROM_LIST_ITEM( p_item );
+ NdisMSendComplete( p_port->p_adapter->h_adapter,
+ p_packet, NDIS_STATUS_REQUEST_ABORTED );
+ }
+ }
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+}
+
+/* use code from ipoib_mac_to_path without lookup for endpt
+* Useful if endpt is already known.
+*/
+NTSTATUS
+ipoib_endpt_get_path(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt,
+ OUT ib_path_rec_t* p_path )
+{
+ uint8_t sl;
+ net32_t flow_lbl;
+ uint8_t hop_limit;
+
+ IPOIB_ENTER( IPOIB_DBG_ENDPT );
+
+ cl_obj_lock( &p_port->obj );
+
+ if( p_port->p_local_endpt == NULL ||
+ p_endpt == NULL )
+ {
+ cl_obj_unlock( &p_port->obj );
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("No local endpoint.\n") );
+ return STATUS_INVALID_PARAMETER;
+ }
+
+ p_path->resv0 = 0;
+ p_path->dgid = p_endpt->dgid;
+ p_path->sgid = p_port->p_local_endpt->dgid;
+ p_path->dlid = p_endpt->dlid;
+ p_path->slid = p_port->p_local_endpt->dlid;
+
+ ib_member_get_sl_flow_hop(
+ p_port->ib_mgr.bcast_rec.sl_flow_hop,
+ &sl,
+ &flow_lbl,
+ &hop_limit
+ );
+ ib_path_rec_set_hop_flow_raw( p_path, hop_limit, flow_lbl, FALSE );
+
+ p_path->tclass = p_port->ib_mgr.bcast_rec.tclass;
+ p_path->num_path = 1;
+ p_path->pkey = p_port->ib_mgr.bcast_rec.pkey;
+ p_path->mtu = p_port->ib_mgr.bcast_rec.mtu;
+ p_path->rate = p_port->ib_mgr.bcast_rec.rate;
+ if( p_path->slid == p_path->dlid )
+ p_path->pkt_life = 0;
+ else
+ p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life;
+ p_path->preference = 0;
+ p_path->resv1 = 0;
+ p_path->resv2 = 0;
+
+ cl_obj_unlock( &p_port->obj );
+
+ IPOIB_EXIT( IPOIB_DBG_ENDPT );
+ return STATUS_SUCCESS;
+}
+
+/*
+* Put all fragments into separate WR and chain together.
+* The last WR will be set to generate CQ Event.
+* lookaside buffer is used for ipoib and ip headers attached to each WR.
+* Buffer will be released on last WR send completion.
+*/
+static NDIS_STATUS
+__send_fragments(
+IN ipoib_port_t* const p_port,
+IN ipoib_send_desc_t* const p_desc,
+IN eth_hdr_t* const p_eth_hdr,
+IN ip_hdr_t* const p_ip_hdr,
+IN uint32_t buf_len,
+IN NDIS_BUFFER* p_ndis_buf )
+{
+ uint32_t ds_idx = 0;
+ uint32_t wr_idx = 0;
+ uint32_t sgl_idx = 2; //skip eth hdr, ip hdr
+ uint32_t options_len = 0;
+ uint8_t* p_options = NULL;
+ uint8_t* p_buf;
+ uint32_t frag_offset = 0;
+ uint32_t next_sge;
+ uint32_t wr_size = 0;
+ uint32_t ip_hdr_len = IP_HEADER_LENGTH( p_ip_hdr );
+ uint32_t total_ip_len = cl_ntoh16( p_ip_hdr->length );
+
+ SCATTER_GATHER_LIST *p_sgl;
+
+ IPOIB_ENTER( IPOIB_DBG_SEND );
+
+ if( IP_DONT_FRAGMENT(p_ip_hdr) )
+ return NDIS_STATUS_INVALID_PACKET;
+
+ p_sgl = NDIS_PER_PACKET_INFO_FROM_PACKET( p_desc->p_pkt, ScatterGatherListPacketInfo );
+ if( !p_sgl )
+ {
+ ASSERT( p_sgl );
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to get SGL from packet.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ if( ( p_sgl->NumberOfElements > MAX_SEND_SGE ||
+ p_sgl->Elements[0].Length < sizeof(eth_hdr_t)) )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Too many SG Elements in packet.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ p_buf = (uint8_t *)
+ ExAllocateFromNPagedLookasideList( &p_port->buf_mgr.send_buf_list );
+ if( !p_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to allocate lookaside buffer.\n") );
+ return NDIS_STATUS_RESOURCES;
+ }
+ p_desc->p_buf = (send_buf_t*)p_buf;
+
+ /* build first fragment WR */
+ ((ipoib_hdr_t*)p_buf)->type = p_eth_hdr->type;
+ ((ipoib_hdr_t*)p_buf)->resv = 0;
+ p_desc->send_wr[0].local_ds[0].vaddr = cl_get_physaddr( (void *)p_buf );
+ p_desc->send_wr[0].local_ds[0].length = sizeof( ipoib_hdr_t );
+ p_desc->send_wr[0].local_ds[0].lkey = p_port->ib_mgr.lkey;
+
+ p_buf += sizeof( ipoib_hdr_t );
+
+ if( buf_len < ip_hdr_len )
+ { /* ip options in a separate buffer */
+ CL_ASSERT( buf_len == sizeof( ip_hdr_t ) );
+ NdisGetNextBuffer( p_ndis_buf, &p_ndis_buf );
+ if( !p_ndis_buf )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to get IP options buffer.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ NdisQueryBufferSafe( p_ndis_buf, &p_options, &options_len, NormalPagePriority );
+ if( !p_options )
+ {
+ IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
+ ("Failed to query IP options buffer address.\n") );
+ return NDIS_STATUS_FAILURE;
+ }
+ cl_memcpy( p_buf, p_ip_hdr, sizeof( ip_hdr_t ) );
+ if( p_options && options_len )
+ {
+ cl_memcpy( &p_buf[sizeof(ip_hdr_t)], p_options, options_len );
+ }
+ wr_size = buf_len + options_len;
+ sgl_idx++;
+ }
+ else
+ { /*options probably in the same buffer */
+ cl_memcpy( p_buf, p_ip_hdr, buf_len );
+ options_len = ip_hdr_len - sizeof( ip_hdr_t );
+ if( options_len )
+ {
+ p_options = p_buf + sizeof( ip_hdr_t );
+ }
+ frag_offset += ( buf_len - ip_hdr_len );
+ wr_size = buf_len;
+ }
+ ++ds_idx;
+
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].vaddr = cl_get_physaddr( p_buf );
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].length = wr_size;
+
+ /* count how much data can be put into the first WR beside IP header.
+ * other protocols headers possibly supplied in subsequent buffers.
+ */
+ for( sgl_idx; sgl_idx < p_sgl->NumberOfElements; sgl_idx++ )
+ {
+ next_sge = p_sgl->Elements[sgl_idx].Length;
+
+ /* add sgl if it can fit into the same WR
+ * Note: so far not going to split large SGE between WRs,
+ * so first fragment could be a smaller size.
+ */
+ if( next_sge <= ( p_port->p_adapter->params.payload_mtu - wr_size ) )
+ {
+ ++ds_idx;
+ wr_size += next_sge;
+ frag_offset += next_sge;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].vaddr =
+ p_sgl->Elements[sgl_idx].Address.QuadPart;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].length = next_sge;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].lkey = p_port->ib_mgr.lkey;
+ }
+ else
+ {
+ /* fix ip hdr for the first fragment and move on */
+ __update_fragment_ip_hdr( (ip_hdr_t* const)p_buf,
+ (uint16_t)wr_size, IP_FRAGMENT_OFFSET(p_ip_hdr), TRUE );
+
+ p_desc->send_wr[wr_idx].wr.num_ds = ds_idx + 1;
+ p_buf += ip_hdr_len;
+ p_buf += (( buf_len > ip_hdr_len ) ? ( buf_len - ip_hdr_len ): 0);
+ frag_offset += ( (IP_FRAGMENT_OFFSET(p_ip_hdr)) << 3 );
+ ++wr_idx;
+ ds_idx = 0;
+ break;
+ }
+ }
+ total_ip_len -= wr_size;
+ wr_size = 0;
+
+ for( sgl_idx, wr_idx; sgl_idx < p_sgl->NumberOfElements; sgl_idx++ )
+ {
+ uint32_t seg_len;
+ uint64_t next_sgl_addr;
+
+ if( wr_idx >= ( MAX_WRS_PER_MSG - 1 ) )
+ return NDIS_STATUS_RESOURCES;
+
+ next_sge = p_sgl->Elements[sgl_idx].Length;
+ next_sgl_addr = p_sgl->Elements[sgl_idx].Address.QuadPart;
+
+ while( next_sge )
+ {
+ if( ds_idx == 0 )
+ { /* new ipoib + ip header */
+ ((ipoib_hdr_t*)p_buf)->type = p_eth_hdr->type;
+ ((ipoib_hdr_t*)p_buf)->resv = 0;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].vaddr = cl_get_physaddr( p_buf );
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].lkey = p_port->ib_mgr.lkey;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].length = sizeof( ipoib_hdr_t );
+ p_buf += sizeof( ipoib_hdr_t );
+ ++ds_idx;
+
+ cl_memcpy( p_buf, p_ip_hdr, sizeof( ip_hdr_t ) );
+ if( p_options && options_len )
+ {
+ /* copy ip options if needed */
+ __copy_ip_options( &p_buf[sizeof(ip_hdr_t)],
+ p_options, options_len, FALSE );
+ }
+ wr_size = ip_hdr_len;
+ }
+ if( ds_idx == 1 )
+ {
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].length = ip_hdr_len;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].vaddr = cl_get_physaddr( p_buf );
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].lkey = p_port->ib_mgr.lkey;
+ ++ds_idx;
+ }
+
+ seg_len = ( next_sge > ( p_port->p_adapter->params.payload_mtu - wr_size ) )?
+ ( p_port->p_adapter->params.payload_mtu - wr_size ) : next_sge;
+
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].vaddr = next_sgl_addr;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].length = seg_len;
+ p_desc->send_wr[wr_idx].local_ds[ds_idx].lkey = p_port->ib_mgr.lkey;
+ ++ds_idx;
+
+ wr_size += seg_len;
+ total_ip_len -= seg_len;
+
+ if( wr_size >= p_port->p_adapter->params.payload_mtu || total_ip_len == 0 )
+ { /* fix ip hdr for that fragment */
+ __update_fragment_ip_hdr( (ip_hdr_t* const)p_buf, (uint16_t)wr_size,
+ ((uint16_t)(frag_offset >> 3 )),
+ (BOOLEAN)(( total_ip_len > 0 ) || IP_MORE_FRAGMENTS( p_ip_hdr)) );
+ p_desc->send_wr[wr_idx].wr.num_ds = ds_idx;
+ if( total_ip_len > 0 )
+ {
+ ++wr_idx;
+ frag_offset += (wr_size - ip_hdr_len);
+ wr_size = 0;
+ ds_idx = 0;
+ p_buf += ip_hdr_len;
+ }
+ }
+ next_sge -= seg_len;
+ if( next_sge > 0 )
+ {
+ next_sgl_addr += seg_len;
+ }
+ }
+ }
+ p_desc->num_wrs += wr_idx;
+
+ IPOIB_EXIT( IPOIB_DBG_SEND );
+ return NDIS_STATUS_SUCCESS;
+}
+
+static void
+__update_fragment_ip_hdr(
+IN ip_hdr_t* const p_ip_hdr,
+IN uint16_t fragment_size,
+IN uint16_t fragment_offset,
+IN BOOLEAN more_fragments )
+{
+
+ p_ip_hdr->length = cl_hton16( fragment_size ); // bytes
+ p_ip_hdr->offset = cl_hton16( fragment_offset ); // 8-byte units
+ if( more_fragments )
+ {
+ IP_SET_MORE_FRAGMENTS( p_ip_hdr );
+ }
+ else
+ {
+ IP_SET_LAST_FRAGMENT( p_ip_hdr );
+ }
+ p_ip_hdr->chksum = 0;
+ p_ip_hdr->chksum = ipchksum( (uint16_t*)p_ip_hdr, IP_HEADER_LENGTH(p_ip_hdr) );
+}
+
+static void
+__copy_ip_options(
+IN uint8_t* p_buf,
+IN uint8_t* p_options,
+IN uint32_t options_len,
+IN BOOLEAN copy_all )
+{
+ uint32_t option_length;
+ uint32_t total_length = 0;
+ uint32_t copied_length = 0;
+ uint8_t* p_src = p_options;
+ uint8_t* p_dst = p_buf;
+
+ if( p_options == NULL || options_len == 0 )
+ return;
+ if( copy_all )
+ {
+ cl_memcpy( p_dst, p_src, options_len );
+ return;
+ }
+ do
+ {
+ if( ( *p_src ) == 0 ) // end of options list
+ {
+ total_length++;
+ break;
+ }
+ if( ( *p_src ) == 0x1 ) // no op
+ {
+ p_src++;
+ total_length++;
+ continue;
+ }
+ /*from RFC791:
+ * This option may be used between options, for example, to align
+ * the beginning of a subsequent option on a 32 bit boundary.
+ */
+ if( copied_length && (copied_length % 4) )
+ {
+ uint32_t align = 4 - (copied_length % 4);
+ cl_memset( p_dst, 0x1, (size_t)align );
+ p_dst += align;
+ copied_length += align;
+ }
+ option_length = *(p_src + 1);
+
+ if( *p_src & 0x80 )
+ {
+ cl_memcpy( p_dst, p_src, option_length );
+ p_dst += option_length;
+ copied_length += option_length;
+ }
+ total_length += option_length;
+ p_src += option_length;
+
+ }while( total_length < options_len );
+
+ CL_ASSERT( total_length == options_len );
+ CL_ASSERT( copied_length <= 40 );
+
+ /* padding the rest */
+ if( options_len > copied_length )
+ {
+ cl_memclr( p_dst, ( options_len - copied_length ) );
+ }
+ return;
+}
Index: ulp/ipoib/kernel/ipoib_port.h
===================================================================
--- ulp/ipoib/kernel/ipoib_port.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_port.h (working copy)
@@ -42,8 +42,8 @@
#include <complib/cl_qmap.h>
#include <complib/cl_fleximap.h>
#include <ip_packet.h>
+#include "ipoib_xfr_mgr.h"
#include "ipoib_endpoint.h"
-#include "ipoib_xfr_mgr.h"
/*
@@ -51,11 +51,6 @@
*/
#define IPOIB_INLINE_RECV 1
-
-/* Max send data segment list size. */
-#define MAX_SEND_SGE 30 //TODO optimize this value
-
-
/*
* Invalid pkey index
*/
@@ -75,7 +70,7 @@
#define IPOIB_ENDPT_FROM_PACKET( P ) \
(((ipoib_endpt_t**)P->MiniportReservedEx)[1])
#define IPOIB_RECV_FROM_PACKET( P ) \
- (((ipoib_recv_desc_t**)P->MiniportReservedEx)[1])
+ (((void **)P->MiniportReservedEx)[1])
#define IPOIB_SEND_FROM_PACKET( P ) \
(((send_buf_t**)P->MiniportReservedEx)[2])
#define IPOIB_PACKET_FROM_LIST_ITEM( I ) \
@@ -92,6 +87,7 @@
ib_cq_handle_t h_send_cq;
ib_qp_handle_t h_qp;
ib_query_handle_t h_query;
+ ib_srq_handle_t h_srq;
net32_t qpn;
ib_mr_handle_t h_mr;
@@ -188,7 +184,7 @@
ipoib_hdr_t hdr;
union _payload
{
- uint8_t data[MAX_PAYLOAD_MTU];
+ uint8_t data[MAX_UD_PAYLOAD_MTU];
ipoib_arp_pkt_t arp;
ip_pkt_t ip;
@@ -275,7 +271,7 @@
*/
typedef union _send_buf
{
- uint8_t data[MAX_PAYLOAD_MTU];
+ uint8_t data[MAX_UD_PAYLOAD_MTU];
ipoib_arp_pkt_t arp;
ip_pkt_t ip;
@@ -326,11 +322,29 @@
{
PKT_TYPE_UCAST,
PKT_TYPE_BCAST,
- PKT_TYPE_MCAST
+ PKT_TYPE_MCAST,
+ PKT_TYPE_CM_UCAST
} ipoib_pkt_type_t;
+typedef struct _ipoib_cm_desc
+{
+ cl_pool_item_t item; /* Must be first. */
+ uint32_t len;
+ ipoib_pkt_type_t type;
+ ib_recv_wr_t wr;
+ ib_local_ds_t local_ds[2];
+ cl_list_item_t list_item;
+ uint8_t* p_alloc_buf;
+ uint8_t* p_buf;
+ uint32_t alloc_buf_size;
+ uint32_t buf_size;
+ net32_t lkey;
+ ib_mr_handle_t h_mr;
+ NDIS_TCP_IP_CHECKSUM_PACKET_INFO ndis_csum;
+} ipoib_cm_desc_t;
+
typedef struct _ipoib_recv_desc
{
cl_pool_item_t item; /* Must be first. */
@@ -373,16 +387,27 @@
* The pool item is always first to allow casting form a cl_pool_item_t or
* cl_list_item_t to the descriptor.
*********/
+typedef struct __ipoib_send_wr
+{
+ ib_send_wr_t wr;
+ ib_local_ds_t local_ds[MAX_SEND_SGE]; /* Must be last. */
+} ipoib_send_wr_t;
+typedef enum __send_dir
+{
+ SEND_UD_QP = 1,
+ SEND_RC_QP = 2
+} send_dir_t;
typedef struct _ipoib_send_desc
{
NDIS_PACKET *p_pkt;
- ipoib_endpt_t *p_endpt1;
+ ipoib_endpt_t *p_endpt;
send_buf_t *p_buf;
- ib_send_wr_t wr;
- ipoib_hdr_t pkt_hdr;
- ib_local_ds_t local_ds[MAX_SEND_SGE]; /* Must be last. */
+ ib_qp_handle_t send_qp;
+ send_dir_t send_dir;
+ uint32_t num_wrs;
+ ipoib_send_wr_t send_wr[MAX_WRS_PER_MSG];
} ipoib_send_desc_t;
/*
@@ -461,7 +486,14 @@
cl_qmap_t mac_endpts;
cl_fmap_t gid_endpts;
cl_qmap_t lid_endpts;
-
+ cl_fmap_t conn_endpts;
+ LIST_ENTRY pending_conns;
+ LIST_ENTRY remove_conns;
+ NDIS_SPIN_LOCK conn_lock;
+ NDIS_SPIN_LOCK remove_lock;
+ cl_thread_t h_thread;
+ cl_event_t event;
+ uint32_t thread_is_done;
} ipoib_endpt_mgr_t;
/*
* FIELDS
@@ -474,6 +506,9 @@
* lid_endpts
* Map of enpoints, keyed by LID. Only enpoints on the same subnet
* are inserted in the LID map.
+*
+* conn_endpts
+* Map of connected endpts, keyed by remote gid.
*********/
@@ -506,8 +541,11 @@
ipoib_endpt_mgr_t endpt_mgr;
+ endpt_buf_mgr_t cm_buf_mgr;
+ endpt_recv_mgr_t cm_recv_mgr;
+
ipoib_endpt_t *p_local_endpt;
-
+ ib_ca_attr_t *p_ca_attrs;
#if DBG
atomic32_t ref[ref_array_size];
#endif
@@ -632,6 +670,12 @@
IN ipoib_port_t * p_port,
IN int type);
+NTSTATUS
+ipoib_endpt_get_path(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt,
+ OUT ib_path_rec_t* p_path );
+
#if DBG
// This function is only used to monitor send failures
static inline VOID NdisMSendCompleteX(
@@ -649,4 +693,91 @@
#define NdisMSendCompleteX NdisMSendComplete
#endif
+ipoib_endpt_t*
+ipoib_endpt_get_by_gid(
+ IN ipoib_port_t* const p_port,
+ IN const ib_gid_t* const p_gid );
+
+ipoib_endpt_t*
+ipoib_endpt_get_by_lid(
+ IN ipoib_port_t* const p_port,
+ IN const net16_t lid );
+
+ib_api_status_t
+ipoib_port_srq_init(
+ IN ipoib_port_t* const p_port );
+
+void
+ipoib_port_srq_destroy(
+ IN ipoib_port_t* const p_port );
+
+ib_api_status_t
+ipoib_port_listen(
+ IN ipoib_port_t* const p_port );
+
+ib_api_status_t
+ipoib_port_cancel_listen(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt );
+
+ib_api_status_t
+endpt_cm_buf_mgr_init(
+ IN ipoib_port_t* const p_port );
+
+void
+endpt_cm_buf_mgr_destroy(
+ IN ipoib_port_t* const p_port );
+
+void
+endpt_cm_buf_mgr_reset(
+ IN ipoib_port_t* const p_port );
+
+void
+endpt_cm_buf_mgr_put_recv(
+ IN endpt_buf_mgr_t * const p_buf_mgr,
+ IN ipoib_cm_desc_t* const p_desc );
+
+void
+endpt_cm_buf_mgr_put_recv_list(
+ IN endpt_buf_mgr_t * const p_buf_mgr,
+ IN cl_qlist_t* const p_list );
+
+uint32_t
+endpt_cm_recv_mgr_build_pkt_array(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt,
+ IN cl_qlist_t* const p_done_list,
+ IN OUT uint32_t* p_bytes_recv );
+
+ib_api_status_t
+endpt_cm_post_recv(
+ IN ipoib_port_t* const p_port );
+
+void
+endpt_cm_destroy_conn(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt );
+
+void
+endpt_cm_disconnect(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt );
+void
+endpt_cm_flush_recv(
+ IN ipoib_port_t* const p_port,
+ IN ipoib_endpt_t* const p_endpt );
+
+ib_api_status_t
+ipoib_recv_dhcp(
+ IN ipoib_port_t* const p_port,
+ IN const ipoib_pkt_t* const p_ipoib,
+ OUT eth_pkt_t* const p_eth,
+ IN ipoib_endpt_t* const p_src,
+ IN ipoib_endpt_t* const p_dst );
+
+void
+ipoib_port_cancel_xmit(
+ IN ipoib_port_t* const p_port,
+ IN PVOID cancel_id );
+
#endif /* _IPOIB_PORT_H_ */
Index: ulp/ipoib/kernel/ipoib_xfr_mgr.h
===================================================================
--- ulp/ipoib/kernel/ipoib_xfr_mgr.h (revision 1776)
+++ ulp/ipoib/kernel/ipoib_xfr_mgr.h (working copy)
@@ -97,7 +97,7 @@
ipoib_addr_get_flags(
IN const ipoib_hw_addr_t* const p_addr )
{
- return (uint8_t)(cl_ntoh32( p_addr->flags_qpn ) >> 24);
+ return (uint8_t)( p_addr->flags_qpn & 0x000000ff);
}
static inline void
@@ -105,15 +105,15 @@
IN ipoib_hw_addr_t* const p_addr,
IN const uint8_t flags )
{
- p_addr->flags_qpn &= cl_ntoh32( 0xFFFFFF00 );
- p_addr->flags_qpn |= cl_ntoh32( flags );
+ p_addr->flags_qpn &= ( 0xFFFFFF00 );
+ p_addr->flags_qpn |= ( flags );
}
static inline net32_t
ipoib_addr_get_qpn(
IN const ipoib_hw_addr_t* const p_addr )
{
- return cl_ntoh32( cl_ntoh32( p_addr->flags_qpn ) >> 8 );
+ return( ( p_addr->flags_qpn ) & 0xffffff00 );
}
static inline void
@@ -121,10 +121,19 @@
IN ipoib_hw_addr_t* const p_addr,
IN const net32_t qpn )
{
- p_addr->flags_qpn = cl_ntoh32( (cl_ntoh32(
- p_addr->flags_qpn ) & 0x000000FF ) | (cl_ntoh32( qpn ) << 8) );
+ p_addr->flags_qpn &= ( 0x000000FF );
+ p_addr->flags_qpn |= qpn ;
}
+static inline void
+ipoib_addr_set_sid(
+ IN net64_t* const p_sid,
+ IN const net32_t qpn )
+{
+ *p_sid = qpn;
+ *p_sid <<= 32;
+ *p_sid |= IPOIB_CM_FLAG_SVCID;
+}
/****f* IPOIB/ipoib_mac_from_sst_guid
* NAME
Index: ulp/ipoib/kernel/netipoib-xp32.inf
===================================================================
--- ulp/ipoib/kernel/netipoib-xp32.inf (revision 1776)
+++ ulp/ipoib/kernel/netipoib-xp32.inf (working copy)
@@ -138,7 +138,7 @@
HKR, Ndi\Params\PayloadMtu, Type, 0, "dword"
HKR, Ndi\Params\PayloadMtu, Default, 0, "2044"
HKR, Ndi\Params\PayloadMtu, Min, 0, "512"
-HKR, Ndi\Params\PayloadMtu, Max, 0, "4092"
+HKR, Ndi\Params\PayloadMtu, Max, 0, "65520"
HKR, Ndi\Params\MCLeaveRescan, ParamDesc, 0, %MC_RESCAN_STR%
HKR, Ndi\Params\MCLeaveRescan, Type, 0, "dword"
@@ -161,6 +161,12 @@
HKR, Ndi\Params\BCJoinRetry, Min, 0, "0"
HKR, Ndi\Params\BCJoinRetry, Max, 0, "1000"
+HKR, Ndi\Params\CmEnabled, ParamDesc, 0, %CONNECTED_MODE_STR%
+HKR, Ndi\Params\CmEnabled, Type, 0, "enum"
+HKR, Ndi\Params\CmEnabled, Default, 0, "0"
+HKR, Ndi\Params\CmEnabled, Optional, 0, "0"
+HKR, Ndi\Params\CmEnabled\enum, "0", 0, %DISABLED_STR%
+HKR, Ndi\Params\CmEnabled\enum, "1", 0, %ENABLED_STR%
[IpoibService]
DisplayName = %IpoibServiceDispName%
@@ -261,4 +267,5 @@
ENABLED_IF_STR = "Enabled (if supported by HW)"
ENABLED_STR = "Enabled"
DISABLED_STR = "Disabled"
-BYPASS_STR = "Bypass"
\ No newline at end of file
+BYPASS_STR = "Bypass"
+CONNECTED_MODE_STR = "Connected mode"
Index: ulp/ipoib/kernel/netipoib.inx
===================================================================
--- ulp/ipoib/kernel/netipoib.inx (revision 1776)
+++ ulp/ipoib/kernel/netipoib.inx (working copy)
@@ -141,7 +141,7 @@
HKR, Ndi\Params\PayloadMtu, Type, 0, "dword"
HKR, Ndi\Params\PayloadMtu, Default, 0, "2044"
HKR, Ndi\Params\PayloadMtu, Min, 0, "512"
-HKR, Ndi\Params\PayloadMtu, Max, 0, "4092"
+HKR, Ndi\Params\PayloadMtu, Max, 0, "65520"
HKR, Ndi\Params\MCLeaveRescan, ParamDesc, 0, %MC_RESCAN_STR%
HKR, Ndi\Params\MCLeaveRescan, Type, 0, "dword"
@@ -164,6 +164,12 @@
HKR, Ndi\Params\BCJoinRetry, Min, 0, "0"
HKR, Ndi\Params\BCJoinRetry, Max, 0, "1000"
+HKR, Ndi\Params\CmEnabled, ParamDesc, 0, %CONNECTED_MODE_STR%
+HKR, Ndi\Params\CmEnabled, Type, 0, "enum"
+HKR, Ndi\Params\CmEnabled, Default, 0, "0"
+HKR, Ndi\Params\CmEnabled, Optional, 0, "0"
+HKR, Ndi\Params\CmEnabled\enum, "0", 0, %DISABLED_STR%
+HKR, Ndi\Params\CmEnabled\enum, "1", 0, %ENABLED_STR%
[IpoibService]
DisplayName = %IpoibServiceDispName%
@@ -268,4 +274,5 @@
ENABLED_IF_STR = "Enabled (if supported by HW)"
ENABLED_STR = "Enabled"
DISABLED_STR = "Disabled"
-BYPASS_STR = "Bypass"
\ No newline at end of file
+BYPASS_STR = "Bypass"
+CONNECTED_MODE_STR = "Connected mode"
\ No newline at end of file
Index: ulp/ipoib/kernel/SOURCES
===================================================================
--- ulp/ipoib/kernel/SOURCES (revision 1776)
+++ ulp/ipoib/kernel/SOURCES (working copy)
@@ -23,7 +23,8 @@
ipoib_adapter.c \
ipoib_endpoint.c \
ipoib_port.c \
- ipoib_ibat.c
+ ipoib_ibat.c \
+ ipoib_cm.c
INCLUDES=..;..\..\..\inc;..\..\..\inc\kernel;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081208/e15b4a24/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ipoib_cm_trunk.diff
Type: application/octet-stream
Size: 163185 bytes
Desc: ipoib_cm_trunk.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081208/e15b4a24/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ip_packet.diff
Type: application/octet-stream
Size: 1196 bytes
Desc: ip_packet.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081208/e15b4a24/attachment-0001.obj>
More information about the ofw
mailing list