[ofw] Patch for librdmacm incl. RSockets functionality

Schmitt, Hubert Hubert.Schmitt at oce.com
Thu Jan 10 01:51:30 PST 2013


Hi everyone,
 
Attached please find a patch for review and discussion (based on rev. 3419 of https://beany.openfabrics.org/svnrepo/ofw/gen1/trunk/) which contains a porting of Sean Hefty's RSockets protocol to Windows. Many thanks to Sean at this point for his cooperation and support.
 
What I've done is merging the librdmacm sources from current Linux OFED with the respective OFW sources, resulting in a librdmacm.dll acting as a Winsock base transport provider (similar to the ulp/wsd component). In contrast to WSD or ND, that RSockets provider allows socket-based RDMA communication not only between Windows hosts, but also between Windows and Linux.
 
For further comments and documentation please look at ulp\librdmacm\RSocket.txt.
 
The contribution of the patch is in the name and on behalf of Océ Printing Systems GmbH. The use of the patch is based in principle upon the terms of the BSD as noted in ulp\librdmacm\RSocket.txt.
 
Thanks and Regards,
Hubert
 
===================================================================

 
Index: etc/user/gtod.c
===================================================================
--- etc/user/gtod.c (revision 3419)
+++ etc/user/gtod.c (working copy)
@@ -57,11 +57,12 @@
  ptv->tv_usec = (long) (ll - ((LONGLONG)(ptv->tv_sec) * 10000000)) / 10;
 }
 
-
-// static __inline
-int gettimeofday(struct timeval *ptv, void *ignored)
+static /*__inline*/ int gettimeofday(struct timeval *ptv, void *ignored)
 {
  static int QueryCounter = 2;
+ static LARGE_INTEGER Frequency = {10000000,0}; /* prevent division by 0 */
+ static LARGE_INTEGER Offset; /* counter offset for right time*/
+ static LARGE_INTEGER LastCounter;
  FILETIME CurrentTime;
  UNREFERENCED_PARAMETER(ignored);     
 
@@ -69,9 +70,6 @@
 
  if(QueryCounter)
  {
-  static LARGE_INTEGER Frequency;
-  static LARGE_INTEGER Offset; /* counter offset for right time*/
-  static LARGE_INTEGER LastCounter;
   LARGE_INTEGER Time;
   LARGE_INTEGER Counter;
  
@@ -80,13 +78,15 @@
  
   if(QueryCounter == 2)
   {
-   QueryCounter = 1;
-   if(!QueryPerformanceFrequency(&Frequency))
+   if(QueryPerformanceFrequency(&Frequency))
    {
+    QueryCounter = 1; // But now the Frequency is valid!
+   }
+   else
+   {
     QueryCounter = 0;
     Frequency.QuadPart = 10000000; /* prevent division by 0 */
    }
- 
    /* get time as a large integer */
    Counter.HighPart &= 0x7FL; /* Clear high bits to prevent overflow */
    Offset.LowPart = CurrentTime.dwLowDateTime;
Index: inc/user/linux/_fcntl.h
===================================================================
--- inc/user/linux/_fcntl.h (revision 0)
+++ inc/user/linux/_fcntl.h (working copy)
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#ifndef __FCNTL_H_
+#define __FCNTL_H_
+
+#include <fcntl.h>
+
+#define F_GETFL         3       /* get file->f_flags */
+#define F_SETFL         4       /* set file->f_flags */
+
+#endif /* __FCNTL_H_ */
Index: tools/dirs
===================================================================
--- tools/dirs (revision 3419)
+++ tools/dirs (working copy)
@@ -6,4 +6,5 @@
  part_man \
  infiniband-diags \
  qlgcvnic_config \
- ndinstall
+ ndinstall \
+ rsinstall
Index: tools/rsinstall/dirs
===================================================================
--- tools/rsinstall/dirs (revision 0)
+++ tools/rsinstall/dirs (working copy)
@@ -0,0 +1,2 @@
+DIRS=\
+ user
Index: tools/rsinstall/user/makefile
===================================================================
--- tools/rsinstall/user/makefile (revision 0)
+++ tools/rsinstall/user/makefile (working copy)
@@ -0,0 +1,8 @@
+#
+# DO NOT EDIT THIS FILE!!!  Edit .\sources. if you want to add a new source
+# file to this component.  This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+MINIMUM_NT_TARGET_VERSION=0x502
+
+!INCLUDE ..\..\..\inc\openib.def
Index: tools/rsinstall/user/rsinstall.c
===================================================================
--- tools/rsinstall/user/rsinstall.c (revision 0)
+++ tools/rsinstall/user/rsinstall.c (working copy)
@@ -0,0 +1,724 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+/*
+ * Module Name: rsinstall.c
+ * Description: This module installs/removes a winsock service provider for infiniband. 
+ * execute:
+ * To install the service provider
+ *  installsp -i     
+ * To remove the service provider
+ *  installsp -r
+ */
+
+#include "rdma/rwinsock.h"
+#include <ws2spi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Initialize the LSP's provider path for Infiband Service Provider dll */
+static const WCHAR provider_path[] = L"%SYSTEMROOT%\\system32\\librdmacm.dll";
+static const WCHAR provider_prefix[] =L" RSockets for InfiniBand"; //includes one whitespace
+static const char provider_name[] = VER_PROVIDER ;//L"%VER_PROVIDER% RSockets for InfiniBand"; //(VER_PROVIDER ## WINDIR);
+static const char openib_key_name[] = IB_COMPANYNAME;
+
+#ifdef PERFMON_ENABLED
+#include "LoadPerf.h"
+#include "rdma/rs_regpath.h"
+
+typedef struct _pm_symbol_def
+{
+ DWORD name_def;
+ CHAR name_str[40];
+ CHAR name_desc[40];
+ CHAR help_desc[256];
+
+} pm_symbol_def_t;
+
+static pm_symbol_def_t  _pm_symbols[]=
+{
+ { RS_PM_OBJ,
+ "RS_PM_OBJ",
+ "IB RSockets",
+ "InfiniBand RSockets Provider."
+ },
+ { RS_PM_COUNTER(BYTES_SEND),
+ "RS_PM_BYTES_TX_SEC",
+ "Send bytes/sec",
+ "Send bytes/second, excluding RDMA Write."
+ },
+ { RS_PM_COUNTER(BYTES_RECV),
+ "RS_PM_BYTES_RX_SEC",
+ "Recv bytes/sec",
+ "Receive bytes/second, excluding RDMA Read."
+ },
+ { RS_PM_COUNTER(BYTES_WRITE),
+ "RS_PM_RDMA_WR_SEC",
+ "RDMA Write bytes/sec",
+ "RDMA Write bytes/second."
+ },
+ { RS_PM_COUNTER(BYTES_READ),
+ "RS_PM_RDMA_RD_SEC",
+ "RDMA Read bytes/sec",
+ "RDMA Read bytes/second."
+ },
+ { RS_PM_COUNTER(BYTES_TOTAL),
+ "RS_PM_BYTES_SEC",
+ "Total bytes/sec",
+ "Total bytes transmitted per second, including send, "
+ "receive, RDMA Write, and RDMA Read."
+ },
+ { RS_PM_COUNTER(COMP_SEND),
+ "RS_PM_SEND_COMPLETIONS_SEC",
+ "Send Completions/sec",
+ "Send and RDMA Write Completions/sec."
+ },
+ { RS_PM_COUNTER(COMP_RECV),
+ "RS_PM_RECV_COMPLETIONS_SEC",
+ "Recv Completions/sec",
+ "Recv and RDMA Read Completions/sec."
+ },
+ { RS_PM_COUNTER(COMP_TOTAL),
+ "RS_PM_COMPLETIONS_SEC",
+ "Total Completions/sec",
+ "Total Completions processed per second."
+ },
+ { RS_PM_COUNTER(INTR_TOTAL),
+ "RS_PM_COMPLETIONS_INTR",
+ "Total Interrupts/sec",
+ "Completion Queue events per second."
+ }
+};
+
+#define RS_PM_NUM_SYMBOLS  (sizeof(_pm_symbols)/sizeof(pm_symbol_def_t))
+#define RS_PM_LANGUAGE "009" /* good for English */
+
+static CHAR *
+_RSGenerateFileName(char *header, char *file )
+{
+ DWORD size1, size;
+ CHAR *full_file_name;
+ int header_len = header == NULL ? 0 : strlen(header);
+
+ size = GetTempPath(0, NULL);
+ if (size == 0)
+ {
+  fprintf( stderr, "GetTempPath  failed\n" );
+  return NULL;
+ }
+ size1 = size + strlen(file) + header_len;
+ full_file_name = HeapAlloc (GetProcessHeap (), HEAP_ZERO_MEMORY, size1);
+ if ( full_file_name == NULL )
+ {
+  fprintf( stderr, "GetTempPath  failed\n" );
+  return NULL;
+ }
+ size1 = GetTempPath(size, full_file_name + header_len);
+ if (size != size1 + 1) 
+ {
+  fprintf( stderr, "Very strange, GetTempPath  returned something different\n" );
+  HeapFree (GetProcessHeap (), 0, full_file_name);
+  return NULL;
+ }
+ if (header_len != 0)
+ {
+  memcpy(full_file_name, header, header_len);
+ }
+ strcat(full_file_name, file);
+ return full_file_name;
+}
+
+
+static DWORD
+_RSPerfmonIniFilesGenerate( void )
+{
+ FILE *f_handle;
+ DWORD num;
+ DWORD ret = ERROR_SUCCESS;
+ char *ibsp_pm_sym_file = NULL;
+ char *ibsp_pm_ini_file = NULL;
+
+ /* create ".h" file first */
+ ibsp_pm_sym_file = _RSGenerateFileName(NULL, RS_PM_SYM_H_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+  fprintf( stderr, "_RSGenerateFileName  failed\n" );
+  ret = ERROR_NOT_ENOUGH_MEMORY;
+  goto Cleanup;
+ }
+
+ f_handle = fopen( ibsp_pm_sym_file, "w+" );
+
+ if( !f_handle )
+ {
+  fprintf( stderr, "Create Header file %s failed\n", ibsp_pm_sym_file );
+  ret = ERROR_FILE_INVALID;
+  goto Cleanup;
+ }
+
+ fprintf(
+  f_handle, "/* %s Generated by program */ \r\n", ibsp_pm_sym_file );
+  
+ 
+ for( num = 0; num < RS_PM_NUM_SYMBOLS; num++ )
+ {
+  fprintf( f_handle, "#define\t%s\t%d\r\n",
+   _pm_symbols[num].name_str, _pm_symbols[num].name_def );
+ }
+
+ fflush( f_handle );
+ fclose( f_handle );
+
+ /* create 'ini' file next */
+ ibsp_pm_ini_file = _RSGenerateFileName(NULL, RS_PM_INI_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+  fprintf( stderr, "_RSGenerateFileName  failed\n" );
+  ret = ERROR_NOT_ENOUGH_MEMORY;
+  goto Cleanup;
+ }
+ f_handle = fopen( ibsp_pm_ini_file, "w+" );
+
+ if( !f_handle )
+ {
+  fprintf( stderr, "Create INI file %s  failed\n", ibsp_pm_ini_file );
+  ret = ERROR_FILE_INVALID;
+  goto Cleanup;
+ }
+ 
+ fprintf( f_handle, "[info]\r\ndrivername=" RS_PM_SUBKEY_NAME
+  "\r\nsymbolfile=%s\r\n\r\n", ibsp_pm_sym_file );
+ fprintf( f_handle,"[languages]\r\n" RS_PM_LANGUAGE
+  "=language" RS_PM_LANGUAGE "\r\n\r\n" );
+
+ fprintf( f_handle, 
+  "[objects]\r\n%s_" RS_PM_LANGUAGE "_NAME=%s\r\n\r\n[text]\r\n",
+  _pm_symbols[0].name_str, _pm_symbols[0].name_desc );
+ 
+ for( num = 0; num < RS_PM_NUM_SYMBOLS; num++ )
+ {
+  fprintf( f_handle,"%s_" RS_PM_LANGUAGE "_NAME=%s\r\n",
+   _pm_symbols[num].name_str, _pm_symbols[num].name_desc );
+  fprintf( f_handle,"%s_" RS_PM_LANGUAGE "_HELP=%s\r\n",
+   _pm_symbols[num].name_str, _pm_symbols[num].help_desc );
+ }
+
+ fflush( f_handle );
+ fclose( f_handle );
+
+Cleanup:
+ if ( ibsp_pm_sym_file )
+ {
+  HeapFree (GetProcessHeap (), 0, ibsp_pm_sym_file);
+ }
+ if ( ibsp_pm_ini_file )
+ {
+  HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+ return ret;
+}
+
+
+static void
+_RSPerfmonIniFilesRemove( void )
+{
+ char *ibsp_pm_sym_file = NULL;
+ char *ibsp_pm_ini_file = NULL;
+
+ ibsp_pm_sym_file = _RSGenerateFileName(NULL, RS_PM_SYM_H_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+  fprintf( stderr, "_RSGenerateFileName  failed\n" );
+  goto Cleanup;
+ }
+
+ ibsp_pm_ini_file = _RSGenerateFileName(NULL, RS_PM_INI_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+  fprintf( stderr, "_RSGenerateFileName  failed\n" );
+  goto Cleanup;
+ }
+
+ if( !DeleteFile( ibsp_pm_ini_file ) )
+ {
+  fprintf( stderr, "Delete file %s failed status %d\n",
+   ibsp_pm_ini_file, GetLastError() );
+ }
+ if( !DeleteFile( ibsp_pm_sym_file ) )
+ {
+  fprintf( stderr,"Delete file %s failed status %d\n",
+   ibsp_pm_sym_file, GetLastError() );
+ }
+
+Cleanup: 
+ if ( ibsp_pm_sym_file )
+ {
+  HeapFree (GetProcessHeap (), 0, ibsp_pm_sym_file);
+ }
+ if ( ibsp_pm_ini_file )
+ {
+  HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+ 
+}
+
+
+/* Try to create Performance Register Keys */
+static LONG
+_RSPerfmonRegisterKeys( void )
+{
+ LONG reg_status;
+ HKEY pm_hkey;
+ DWORD typesSupp = 7;
+
+ reg_status = RegCreateKeyEx( HKEY_LOCAL_MACHINE,
+  RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF, 0, NULL,
+  REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &pm_hkey, NULL );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+  fprintf( stderr,
+   "_RSPerfmonRegisterKeys Create Key %s failed with %d\n",
+   RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF, reg_status );
+  return reg_status;
+ }
+
+ /* create/assign values to the key */
+ RegSetValueExW( pm_hkey, L"Library", 0, REG_EXPAND_SZ,
+  (LPBYTE)provider_path, sizeof(provider_path) );
+
+ RegSetValueEx( pm_hkey, TEXT("Open"), 0, REG_SZ,
+  (LPBYTE)TEXT("RSPmOpen"), sizeof(TEXT("RSPmOpen")) );
+
+ RegSetValueEx( pm_hkey, TEXT("Collect"), 0, REG_SZ,
+  (LPBYTE)TEXT("RSPmCollectData"), sizeof(TEXT("RSPmCollectData")) );
+
+ RegSetValueEx( pm_hkey, TEXT("Close"), 0, REG_SZ,
+  (LPBYTE)TEXT("RSPmClose"), sizeof(TEXT("RSPmClose")) );
+
+ RegFlushKey( pm_hkey );
+ RegCloseKey( pm_hkey );
+
+ reg_status = RegCreateKeyEx( HKEY_LOCAL_MACHINE,
+  RS_PM_EVENTLOG_PATH, 0, NULL,
+  REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &pm_hkey, NULL );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+  fprintf(stderr, "Create EventLog Key failed with %d\n", reg_status );
+  return reg_status;
+ }
+
+ /* create/assign values to the key */
+ RegSetValueExW( pm_hkey, L"EventMessageFile", 0, REG_EXPAND_SZ,\
+  (LPBYTE)provider_path, sizeof(provider_path) );
+
+ RegSetValueEx( pm_hkey, TEXT("TypesSupported"), 0, REG_DWORD,
+  (LPBYTE)&typesSupp, sizeof(typesSupp) );
+
+ RegFlushKey( pm_hkey );
+ RegCloseKey( pm_hkey );
+
+ return reg_status;
+}
+
+
+/* Try to destroy Performance Register Keys */
+static LONG
+_RSPerfmonDeregisterKeys( void )
+{
+ LONG reg_status;
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+  RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF,
+  (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+  fprintf( stderr,
+   "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+   GetLastError() );
+ }
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+  RS_PM_REGISTRY_PATH, (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+  fprintf( stderr,
+   "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+   GetLastError() );
+ }
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+  RS_PM_EVENTLOG_PATH, (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+  fprintf( stderr,
+   "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+   GetLastError() );
+ }
+
+ return reg_status;
+}
+
+
+/*
+ * functions will try to register performance counters
+ * definitions with PerfMon application.
+ * API externally called by lodctr.exe/unlodctr.exe utilities.
+ */
+static DWORD
+_RSPerfmonRegisterCounters( void )
+{
+ DWORD status;
+ char *ibsp_pm_ini_file = NULL;
+
+ ibsp_pm_ini_file = _RSGenerateFileName("unused ", RS_PM_INI_FILE);
+ if( !ibsp_pm_ini_file )
+ {
+  fprintf( stderr, "_RSGenerateFileName  failed\n" );
+  status = ERROR_NOT_ENOUGH_MEMORY;
+  goto Cleanup;
+ }
+
+ /*
+  * format commandline string, as per SDK :
+  * Pointer to a null-terminated string that consists of one or more 
+  * arbitrary letters, a space, and then the name of the initialization
+  * file.
+  */
+ status = LoadPerfCounterTextStrings( ibsp_pm_ini_file, TRUE );
+ if( status != ERROR_SUCCESS )
+ {
+  status = GetLastError();
+  fprintf( stderr,
+   "IBSPPerfmonRegisterCounters install failed status %d\n", status );
+ }
+Cleanup: 
+ if ( ibsp_pm_ini_file )
+ {
+  HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+
+ return status;
+}
+
+
+/*
+ * functions will try to unregister performance counters
+ * definitions with PerfMon application.
+ * API externally called by lodctr.exe/unlodctr.exe utilities.
+ */
+static DWORD
+_RSPerfmonDeregisterCounters( void )
+{
+ DWORD status;
+
+ /*
+  * format commandline string, as per SDK :
+  * Pointer to a null-terminated string that consists of one or more 
+  * arbitrary letters, a space, and then the name of the initialization
+  * file.
+  */
+ status = UnloadPerfCounterTextStrings(
+  TEXT("unused ") TEXT(RS_PM_SUBKEY_NAME), TRUE );
+ if( status != ERROR_SUCCESS )
+ {
+  fprintf( stderr,
+   "IBSPPerfmonDeregisterCounters remove failed status %d\n",
+   status );
+ }
+ return status;
+}
+
+#endif /* PERFMON_ENABLED */
+
+
+/*
+ * Function: usage
+ *   Description: Prints usage information.
+ */
+static void
+usage (char *progname)
+{
+ printf ("usage: %s [-i/-r [-p]]\n", progname);
+ printf ("    -i [path]  Install the service provider,\n"
+   "               optionally specify full pathname of DLL\n"
+   "    -r         Remove the %s service provider\n"
+   "    -r <name>  Remove the specified service provider\n"
+   "    -l         List service providers\n",VER_PROVIDER);
+}
+
+
+/* Function: print_providers
+ *   Description: 
+ *     This function prints out each entry in the Winsock catalog.
+*/
+static void print_providers(void)
+{
+ WSAPROTOCOL_INFOW *protocol_info;
+ unsigned int protocol_count;
+ unsigned int i;
+ DWORD protocol_size;
+ INT err_no;
+ int rc;
+
+ /* Find the size of the buffer */
+ protocol_size = 0;
+ rc = WSCEnumProtocols (NULL, NULL, &protocol_size, &err_no);
+ if (rc == SOCKET_ERROR && err_no != WSAENOBUFS) {
+  printf("WSCEnumProtocols() returned error (%d)\n", err_no);
+  return;
+ }
+
+ /* Allocate the buffer */
+ protocol_info = HeapAlloc (GetProcessHeap (), HEAP_ZERO_MEMORY, protocol_size);
+ if (protocol_info == NULL) {
+  printf("HeapAlloc() failed\n");
+  return;
+ }
+
+ /* Enumerate the catalog for real */
+ rc = WSCEnumProtocols (NULL, protocol_info, &protocol_size, &err_no);
+ if (rc == SOCKET_ERROR) {
+  printf("WSCEnumProtocols returned error for real enumeration (%d)\n",
+    err_no);
+  HeapFree (GetProcessHeap (), 0, protocol_info);
+  return;
+ }
+
+ protocol_count = rc;
+
+ for (i = 0; i < protocol_count; i++) {
+  printf ("%010d - %S\n", protocol_info[i].dwCatalogEntryId,
+    protocol_info[i].szProtocol);
+ }
+
+ HeapFree (GetProcessHeap (), 0, protocol_info);
+
+ return;
+}
+
+/*
+ * Function: install_provider
+ *   Description: installs the service provider
+ */
+static void install_provider(LPWSTR szProviderPath)
+{
+ int rc;
+ INT err_no;
+ LONG reg_error;
+ WSAPROTOCOL_INFOW provider;
+ HKEY hkey;
+    size_t res;
+    size_t st_len;
+
+ ZeroMemory(&provider, sizeof(provider));
+ 
+ /* Setup the values in PROTOCOL_INFO */
+ provider.dwServiceFlags1 = 
+  XP1_GUARANTEED_DELIVERY | 
+  XP1_GUARANTEED_ORDER | 
+  XP1_MESSAGE_ORIENTED |
+  XP1_GRACEFUL_CLOSE;
+ provider.dwServiceFlags2 = 0; /* Reserved */
+ provider.dwServiceFlags3 = 0; /* Reserved */
+ provider.dwServiceFlags4 = 0; /* Reserved */
+// SAN provider only: provider.dwProviderFlags = PFL_HIDDEN;
+ provider.ProviderId = rsProviderGuid; /* Service Provider ID provided by vendor. */
+ provider.dwCatalogEntryId = 0;
+ provider.ProtocolChain.ChainLen = 1; /* Base Protocol Service Provider */
+ provider.iVersion = 2; /* don't know what it is */
+ provider.iAddressFamily = AF_INET;
+ provider.iMaxSockAddr = 16;
+ provider.iMinSockAddr = 16;
+ provider.iSocketType = SOCK_STREAM;
+ provider.iProtocol = IPPROTO_TCP;
+ provider.iProtocolMaxOffset = 0;
+ provider.iNetworkByteOrder = BIGENDIAN;
+ provider.iSecurityScheme = SECURITY_PROTOCOL_NONE;
+ provider.dwMessageSize = 0xFFFFFFFF; /* IB supports 32-bit lengths for data transfers on RC */
+ provider.dwProviderReserved = 0;
+
+ st_len = strlen(provider_name);
+ rc = mbstowcs(provider.szProtocol, provider_name, st_len); //do not count \0
+ // We can't use there mbstowcs_s 
+ //rc = mbstowcs_s(&convertedChars, provider.szProtocol, sizeof(provider_name), provider_name, );
+    if (rc  != st_len) {
+        printf("<install_provider> Can't convert string %s to WCHAR\n",provider_name);
+        printf("Converted %d from %d\n", rc, st_len);
+    }
+    wcscpy( provider.szProtocol + st_len, provider_prefix);
+    wprintf(L"Provider protocol = %s\n", provider.szProtocol);
+ wprintf(L"Provider path     = %s\n", szProviderPath);
+
+ rc = WSCInstallProvider(
+  (LPGUID)&rsProviderGuid, szProviderPath, &provider, 1, &err_no );
+ if( rc == SOCKET_ERROR )
+ {
+  if( err_no == WSANO_RECOVERY )
+   printf("The provider is already installed\n");
+  else
+   printf("install_provider: WSCInstallProvider failed: %d\n", err_no);
+ }
+}
+
+/*
+ * Function: remove_provider
+ *   Description: removes our provider.
+ */
+static void remove_provider( const char* const provider_name )
+{
+ int rc;
+ int err_no;
+ LONG reg_error;
+ HKEY hkey;
+
+ /* Remove from the catalog */
+ rc = WSCDeinstallProvider((LPGUID)&rsProviderGuid, &err_no);
+ if (rc == SOCKET_ERROR) {
+  printf ("WSCDeinstallProvider failed: %d\n", err_no);
+ }
+
+#if 0 //def _WIN64
+ /* Remove from the 32-bit catalog too! */
+ rc = WSCDeinstallProvider32((LPGUID)&rsProviderGuid, &err_no);
+ if (rc == SOCKET_ERROR) {
+  printf ("WSCDeinstallProvider32 failed: %d\n", err_no);
+ }
+#endif /* _WIN64 */
+}
+
+/* Function: main
+ *
+ *  Description:
+ *    Parse the command line arguments and call either the install or remove
+ *    routine.
+ */
+int __cdecl main (int argc, char *argv[])
+{
+ WSADATA wsd;
+ size_t  rc, st_len;
+ WCHAR   arg_provider_path[MAX_PATH];
+
+ /* Load Winsock */
+ if (WSAStartup (MAKEWORD (2, 2), &wsd) != 0) {
+  printf ("InstallSP: Unable to load Winsock: %d\n", GetLastError ());
+  return -1;
+ }
+
+ /* Confirm that the WinSock DLL supports 2.2. Note that if the
+  * DLL supports versions greater than 2.2 in addition to 2.2, it
+  * will still return 2.2 in wVersion since that is the version we
+  * requested. */
+ if (LOBYTE (wsd.wVersion) != 2 || HIBYTE (wsd.wVersion) != 2) {
+
+  /* Tell the user that we could not find a usable WinSock DLL. */
+  WSACleanup ();
+  printf
+   ("InstallSP: Unable to find a usable version of Winsock DLL\n");
+  return -1;
+ }
+ if (argc < 2) {
+  usage (argv[0]);
+  return -1;
+ }
+ if ((strlen (argv[1]) != 2) && (argv[1][0] != '-')
+  && (argv[1][0] != '/')) {
+  usage (argv[0]);
+  return -1;
+ }
+ switch (tolower (argv[1][1])) {
+
+ case 'i':
+  /* Install the Infiniband Service Provider */
+  if( argc == 2 ) {
+   wcscpy_s(arg_provider_path, sizeof(arg_provider_path)/2, provider_path);
+  }
+  else {
+   st_len = strlen( argv[2] );
+   if (st_len >= sizeof(arg_provider_path)/2)
+    st_len = sizeof(arg_provider_path)/2 - 1;
+   rc = mbstowcs(arg_provider_path, argv[2], st_len); //do not count \0
+   arg_provider_path[st_len] = '\0';
+   // We can't use there mbstowcs_s 
+   if (rc != st_len) {
+    printf("Can't convert string \"%s\" to WCHAR\n", argv[2]);
+    printf("Converted %d from %d\n", rc, st_len);
+    wcscpy_s(arg_provider_path, sizeof(arg_provider_path)/2, provider_path);
+   }
+  }
+  install_provider( arg_provider_path );
+
+#ifdef PERFMON_ENABLED
+  _RSPerfmonIniFilesGenerate();
+  if ( _RSPerfmonRegisterKeys() == ERROR_SUCCESS )
+    _RSPerfmonRegisterCounters();
+#endif
+  break;
+
+ case 'r':
+  /* Remove the service provider */
+  if( argc == 2 )
+   remove_provider( openib_key_name );
+  else
+   remove_provider( argv[2] );
+#ifdef PERFMON_ENABLED
+  _RSPerfmonIniFilesRemove();
+  if ( _RSPerfmonDeregisterCounters() == ERROR_SUCCESS )
+   _RSPerfmonDeregisterKeys();
+#endif
+  break;
+
+ case 'l':
+  /* List existing providers */
+  print_providers();
+  break;
+ 
+ default:
+  usage (argv[0]);
+  break;
+ }
+
+ WSACleanup ();
+
+ return 0;
+}
Index: tools/rsinstall/user/rsinstall.exe.manifest
===================================================================
--- tools/rsinstall/user/rsinstall.exe.manifest (revision 0)
+++ tools/rsinstall/user/rsinstall.exe.manifest (working copy)
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+    <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+    <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+        <security>
+            <requestedPrivileges>
+                <requestedExecutionLevel level="requireAdministrator" uiAccess="false"/>
+            </requestedPrivileges>
+        </security>
+    </trustInfo>
+</assembly>   
\ No newline at end of file
Index: tools/rsinstall/user/rsinstall.rc
===================================================================
--- tools/rsinstall/user/rsinstall.rc (revision 0)
+++ tools/rsinstall/user/rsinstall.rc (working copy)
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <oib_ver.h>
+
+#define VER_FILETYPE    VFT_APP
+#define VER_FILESUBTYPE    VFT2_UNKNOWN
+
+#ifdef DBG
+#define VER_FILEDESCRIPTION_STR  "RSockets for InfiniBand installer (Debug)"
+#else
+#define VER_FILEDESCRIPTION_STR  "RSockets for InfiniBand installer"
+#endif
+
+#define VER_INTERNALNAME_STR  "rsinstall.exe"
+#define VER_ORIGINALFILENAME_STR "rsinstall.exe"
+
+#include <common.ver>
Index: tools/rsinstall/user/RsInstall.sln
===================================================================
--- tools/rsinstall/user/RsInstall.sln (revision 0)
+++ tools/rsinstall/user/RsInstall.sln (working copy)
@@ -0,0 +1,21 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "InstallSP", "InstallSP.vcproj", "{B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(SolutionConfiguration) = preSolution
+  Debug = Debug
+  Release = Release
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+  {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Debug.ActiveCfg = Debug|Win32
+  {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Debug.Build.0 = Debug|Win32
+  {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Release.ActiveCfg = Release|Win32
+  {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Release.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal
Index: tools/rsinstall/user/SOURCES
===================================================================
--- tools/rsinstall/user/SOURCES (revision 0)
+++ tools/rsinstall/user/SOURCES (working copy)
@@ -0,0 +1,30 @@
+TARGETNAME=rsinstall
+TARGETPATH=..\..\..\bin\user\obj$(BUILD_ALT_DIR)
+TARGETTYPE=PROGRAM
+UMTYPE=console
+USE_MSVCRT=1
+
+INCLUDES=..\..\..\inc;\
+ ..\..\..\inc\user;\
+ ..\..\..\ulp\librdmacm\include;\
+ $(PLATFORM_SDK_PATH)\include;
+
+SOURCES= \
+ rsinstall.rc \
+ rsinstall.c
+
+USER_C_FLAGS=$(USER_C_FLAGS)
+# -DPERFMON_ENABLED
+
+TARGETLIBS=\
+ $(SDK_LIB_PATH)\ws2_32.lib \
+ $(SDK_LIB_PATH)\LoadPerf.lib 
+
+MSC_WARNING_LEVEL= /W3
+
+LINKER_FLAGS=$(LINKER_FLAGS)
+
+SXS_APPLICATION_MANIFEST=rsinstall.exe.manifest
+SXS_ASSEMBLY_VERSION=1.0
+SXS_ASSEMBLY_NAME=rsinstall.exe
+SXS_ASSEMBLY_LANGUAGE=0000
Index: ulp/librdmacm/AUTHORS
===================================================================
--- ulp/librdmacm/AUTHORS (revision 3419)
+++ ulp/librdmacm/AUTHORS (working copy)
@@ -1 +1,2 @@
 Sean Hefty  <sean.hefty at intel.com>
+Hubert Schmitt <hubert.schmitt at oce.com>
Index: ulp/librdmacm/COPYING
===================================================================
--- ulp/librdmacm/COPYING (revision 3419)
+++ ulp/librdmacm/COPYING (working copy)
@@ -1,11 +1,11 @@
 Copyright (c) 2008 Intel Corporation.  All rights reserved.
+Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
 
-This software is available to you under the OpenFabrics.org BSD license
-below:
+This software is available to you under the BSD license below:
 
-     Redistribution and use in source and binary forms, with or
-     without modification, are permitted provided that the following
-     conditions are met:
+    Redistribution and use in source and binary forms, with or
+    without modification, are permitted provided that the following
+    conditions are met:
 
       - Redistributions of source code must retain the above
         copyright notice, this list of conditions and the following
@@ -16,11 +16,21 @@
         disclaimer in the documentation and/or other materials
         provided with the distribution.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+      - Neither the name Oce Printing Systems GmbH nor the names
+        of the authors may be used to endorse or promote products
+        derived from this software without specific prior written
+        permission.
+
+    THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+    WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+    MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+    OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+    OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+    THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+    OF SUCH DAMAGE. 
Index: ulp/librdmacm/examples/rstream/makefile
===================================================================
--- ulp/librdmacm/examples/rstream/makefile (revision 0)
+++ ulp/librdmacm/examples/rstream/makefile (working copy)
@@ -0,0 +1,7 @@
+#
+# DO NOT EDIT THIS FILE!!!  Edit .\sources. if you want to add a new source
+# file to this component.  This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+
+!INCLUDE ..\..\..\..\inc\openib.def
Index: ulp/librdmacm/examples/rstream/makefile.inc
===================================================================
--- ulp/librdmacm/examples/rstream/makefile.inc (revision 0)
+++ ulp/librdmacm/examples/rstream/makefile.inc (working copy)
@@ -0,0 +1,8 @@
+Custom_target:
+!if "$(BUILD_PASS)" == "PASS2" || "$(BUILD_PASS)" == "ALL"
+
+!endif
+
+
+
+!INCLUDE ..\..\..\..\inc\mod_ver.def
Index: ulp/librdmacm/examples/rstream/rstream.c
===================================================================
--- ulp/librdmacm/examples/rstream/rstream.c (revision 0)
+++ ulp/librdmacm/examples/rstream/rstream.c (working copy)
@@ -0,0 +1,749 @@
+/*
+ * Copyright (c) 2011-2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+ 
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <_errno.h>
+#include "../../../../etc/user/gtod.c" // getimeofday()
+#include "getopt.c"
+#include <sys/types.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <_fcntl.h>
+#include <unistd.h>
+
+#include "..\src\openib_osd.h"
+#include <rdma/rdma_cma.h>
+#include <rdma/rwinsock.h>
+
+#define MSG_DONTWAIT 0x80
+
+struct test_size_param {
+ int size;
+ int option;
+};
+
+static struct test_size_param test_size[] = {
+ { 1 <<  6, 0 },
+ { 1 <<  7, 1 }, { (1 <<  7) + (1 <<  6), 1},
+ { 1 <<  8, 1 }, { (1 <<  8) + (1 <<  7), 1},
+ { 1 <<  9, 1 }, { (1 <<  9) + (1 <<  8), 1},
+ { 1 << 10, 1 }, { (1 << 10) + (1 <<  9), 1},
+ { 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
+ { 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
+ { 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
+ { 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
+ { 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
+ { 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
+ { 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
+ { 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
+ { 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
+ { 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
+ { 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
+ { 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
+};
+#define TEST_CNT (sizeof test_size / sizeof test_size[0])
+
+enum rs_optimization {
+ opt_mixed,
+ opt_latency,
+ opt_bandwidth
+};
+
+static int rs, lrs;
+static int use_rs = 1;
+static int use_async = 0;
+static int verify = 0;
+static int flags = 0; //MSG_DONTWAIT;
+static int poll_timeout = 0;
+static int custom;
+static enum rs_optimization optimization;
+static int size_option;
+static int iterations = 1;
+static int transfer_size = 1000;
+static int transfer_count = 1000;
+static int buffer_size;
+static char test_name[10] = "custom";
+static char *port = "7471";
+static char *dst_addr;
+static char *src_addr;
+static struct timeval start, end;
+static void *buf;
+
+#define rs_socket(f,t,p)          use_rs ? WSASocket(f,t,p,rsGetProtocolInfo(NULL),0,0) : socket(f,t,p)
+#define rs_bind(s,a,l)            bind(s,a,l)
+#define rs_listen(s,b)            listen(s,b)
+#define rs_connect(s,a,l)         connect(s,a,l)
+#define rs_accept(s,a,l)          accept(s,a,l)
+#define rs_shutdown(s,h)          shutdown(s,h)
+#define rs_close(s)               closesocket(s)
+#define rs_recv(s,b,l,f)          recv(s,b,l,f)
+#define rs_send(s,b,l,f)          send(s,b,l,f)
+#define rs_recvfrom(s,b,l,f,a,al) recvfrom(s,b,l,f,a,al)
+#define rs_sendto(s,b,l,f,a,al)   sendto(s,b,l,f,a,al)
+#define rs_select(n,rf,wf,ef,t)   select(n,rf,wf,ef,t)
+#define rs_ioctlsocket(s,c,p)     ioctlsocket(s,c,p)
+#define rs_setsockopt(s,l,n,v,ol) setsockopt(s,l,n,v,ol)
+#define rs_getsockopt(s,l,n,v,ol) getsockopt(s,l,n,v,ol)
+
+static void size_str(char *str, size_t ssize, long long size)
+{
+ long long base, fraction = 0;
+ char mag;
+
+ if (size >= (1 << 30)) {
+  base = 1 << 30;
+  mag = 'g';
+ } else if (size >= (1 << 20)) {
+  base = 1 << 20;
+  mag = 'm';
+ } else if (size >= (1 << 10)) {
+  base = 1 << 10;
+  mag = 'k';
+ } else {
+  base = 1;
+  mag = '\0';
+ }
+
+ if (size / base < 10)
+  fraction = (size % base) * 10 / base;
+ if (fraction) {
+  _snprintf(str, ssize, "%lld.%lld%c", size / base, fraction, mag);
+ } else {
+  _snprintf(str, ssize, "%lld%c", size / base, mag);
+ }
+}
+
+static void cnt_str(char *str, size_t ssize, long long cnt)
+{
+ if (cnt >= 1000000000)
+  _snprintf(str, ssize, "%lldb", cnt / 1000000000);
+ else if (cnt >= 1000000)
+  _snprintf(str, ssize, "%lldm", cnt / 1000000);
+ else if (cnt >= 1000)
+  _snprintf(str, ssize, "%lldk", cnt / 1000);
+ else
+  _snprintf(str, ssize, "%lld", cnt);
+}
+
+static void show_perf(void)
+{
+ char str[32];
+ float usec;
+ long long bytes;
+
+ usec = (float)((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
+ bytes = (long long) iterations * transfer_count * transfer_size * 2;
+
+ /* name size transfers iterations bytes seconds Gb/sec usec/xfer */
+ printf("%-10s", test_name);
+ size_str(str, sizeof str, transfer_size);
+ printf("%-8s", str);
+ cnt_str(str, sizeof str, transfer_count);
+ printf("%-8s", str);
+ cnt_str(str, sizeof str, iterations);
+ printf("%-8s", str);
+ size_str(str, sizeof str, bytes);
+ printf("%-8s", str);
+ printf("%8.2fs%10.2f%11.2f\n",
+  usec / 1000000., (bytes * 8) / (1000. * usec),
+  (usec / iterations) / (transfer_count * 2));
+}
+
+static int size_to_count(int size)
+{
+ if (size >= 1000000)
+  return 100;
+ else if (size >= 100000)
+  return 1000;
+ else if (size >= 10000)
+  return 10000;
+ else if (size >= 1000)
+  return 100000;
+ else
+  return 1000000;
+}
+
+static void init_latency_test(int size)
+{
+ char sstr[5];
+
+ size_str(sstr, sizeof sstr, size);
+ _snprintf(test_name, sizeof test_name, "%s_lat", sstr);
+ transfer_count = 1;
+ transfer_size = size;
+ iterations = size_to_count(transfer_size);
+}
+
+static void init_bandwidth_test(int size)
+{
+ char sstr[5];
+
+ size_str(sstr, sizeof sstr, size);
+ _snprintf(test_name, sizeof test_name, "%s_bw", sstr);
+ iterations = 1;
+ transfer_size = size;
+ transfer_count = size_to_count(transfer_size);
+}
+
+static void format_buf(void *buf, int size)
+{
+ uint8_t *array = buf;
+ static uint8_t data;
+ int i;
+
+ for (i = 0; i < size; i++)
+  array[i] = data++;
+}
+
+static int verify_buf(void *buf, int size)
+{
+ static long long total_bytes;
+ uint8_t *array = buf;
+ static uint8_t data;
+ int i;
+
+ for (i = 0; i < size; i++, total_bytes++) {
+  if (array[i] != data++) {
+   printf("data verification failed byte %lld\n", total_bytes);
+   return -1;
+  }
+ }
+ return 0;
+}
+
+static int do_poll(struct pollfd *fds)
+{
+ int  ret;
+ int  nfds = 0;
+ fd_set readfds, writefds, exceptfds;
+ struct timeval timeout;
+ 
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+ 
+ if (fds->events & (POLLIN | POLLHUP)) {
+  FD_SET(fds->fd, &readfds);
+  nfds++;
+ }
+  
+ if (fds->events & POLLOUT) {
+  FD_SET(fds->fd, &writefds);
+  nfds++;
+ }
+
+ if (fds->events & ~(POLLIN | POLLOUT)) {
+  FD_SET(fds->fd, &exceptfds);
+  nfds++;
+ }
+
+ timeout.tv_sec  = poll_timeout / 1000;
+ timeout.tv_usec = timeout.tv_sec ? 0 : poll_timeout * 1000;
+ 
+ do { 
+  ret = rs_select(
+   nfds,
+   FD_ISSET(fds->fd, &readfds  ) ? &readfds   : NULL,
+   FD_ISSET(fds->fd, &writefds ) ? &writefds  : NULL,
+   FD_ISSET(fds->fd, &exceptfds) ? &exceptfds : NULL,
+   poll_timeout < 0 ? NULL : &timeout
+  );
+ } while (!ret);
+ 
+ return ret == 1 ? 0 : ret;
+}
+
+static int send_xfer(int size)
+{
+ struct pollfd fds;
+ int offset, ret;
+ 
+ if (verify)
+  format_buf(buf, size);
+
+ if (use_async) {
+  fds.fd = rs;
+  fds.events = POLLOUT;
+ }
+
+ for (offset = 0; offset < size; ) {
+  if (use_async) {
+   ret = do_poll(&fds);
+   if (ret)
+    return ret;
+  }
+
+  ret = (int)rs_send(rs, (char *)buf + offset, size - offset, flags);
+  if (ret > 0) {
+   offset += ret;
+  } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
+   perror("rsend");
+   return ret;
+  }
+ }
+ return 0;
+}
+
+static int recv_xfer(int size)
+{
+ struct pollfd fds;
+ int offset, ret;
+ 
+ if (use_async) {
+  fds.fd = rs;
+  fds.events = POLLIN;
+ }
+
+ for (offset = 0; offset < size; ) {
+  if (use_async) {
+   ret = do_poll(&fds);
+   if (ret)
+    return ret;
+  }
+
+  ret = (int)rs_recv(rs, (char *)buf + offset, size - offset, flags);
+  if (ret > 0) {
+   offset += ret;
+  } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
+   perror("rrecv");
+   return ret;
+  }
+ }
+
+ if (verify) {
+  ret = verify_buf(buf, size);
+  if (ret)
+   return ret;
+ }
+ return 0;
+}
+
+static int sync_test(void)
+{
+ int ret;
+
+ ret = dst_addr ? send_xfer(16) : recv_xfer(16);
+ if (ret)
+  return ret;
+
+ return dst_addr ? recv_xfer(16) : send_xfer(16);
+}
+
+static int run_test(void)
+{
+ int ret, i, t;
+
+ ret = sync_test();
+ if (ret)
+  goto out;
+
+ gettimeofday(&start, NULL);
+ for (i = 0; i < iterations; i++) {
+  for (t = 0; t < transfer_count; t++) {
+   ret = dst_addr ? send_xfer(transfer_size) :
+      recv_xfer(transfer_size);
+   if (ret)
+    goto out;
+  }
+  for (t = 0; t < transfer_count; t++) {
+   ret = dst_addr ? recv_xfer(transfer_size) :
+      send_xfer(transfer_size);
+   if (ret)
+    goto out;
+  }
+ }
+ gettimeofday(&end, NULL);
+ show_perf();
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void set_options(int rs)
+{
+ int val;
+
+ if (buffer_size) {
+  rs_setsockopt(rs, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
+         sizeof buffer_size);
+  rs_setsockopt(rs, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
+         sizeof buffer_size);
+ } else {
+  val = 1 << 19;
+  rs_setsockopt(rs, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
+  rs_setsockopt(rs, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
+ }
+
+ val = 1;
+ rs_setsockopt(rs, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
+
+ val = 1;
+ if (flags & MSG_DONTWAIT)
+  rs_ioctlsocket(rs, FIONBIO, (u_long *)&val);
+
+ if (use_rs) {
+  /* Inline size based on experimental data */
+  if (optimization == opt_latency) {
+   val = 384;
+   rs_setsockopt(rs, SOL_RDMA, RDMA_INLINE, (char *)&val, sizeof val);
+  } else if (optimization == opt_bandwidth) {
+   val = 0;
+   rs_setsockopt(rs, SOL_RDMA, RDMA_INLINE, (char *)&val, sizeof val);
+  }
+ }
+}
+
+static int server_listen(void)
+{
+ struct addrinfo hints, *res;
+ int val, ret;
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags  = RAI_PASSIVE;
+ hints.ai_family = AF_INET;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ 
+  ret = getaddrinfo(src_addr, port, &hints, &res);
+ if (ret) {
+  perror("getaddrinfo");
+  return ret;
+ }
+
+ lrs = (int)(rs_socket(res->ai_family, res->ai_socktype, res->ai_protocol));
+ if (lrs < 0) {
+  perror("rsocket");
+  ret = lrs;
+  goto free;
+ }
+
+ val = 1;
+ ret = rs_setsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, (char *)&val, sizeof val);
+ if (ret) {
+  perror("rsetsockopt SO_REUSEADDR");
+  goto close;
+ }
+
+ ret = rs_bind(lrs, res->ai_addr, res->ai_addrlen);
+ if (ret) {
+  perror("rbind");
+  goto close;
+ }
+
+ ret = rs_listen(lrs, 1);
+ if (ret)
+  perror("rlisten");
+
+close:
+ if (ret)
+  rs_close(lrs);
+free:
+ freeaddrinfo(res);
+ return ret;
+}
+
+static int server_connect(void)
+{
+ struct pollfd fds;
+ int ret = 0;
+
+ set_options(lrs);
+ do {
+  if (use_async) {
+   fds.fd = lrs;
+   fds.events = POLLIN;
+
+   ret = do_poll(&fds);
+   if (ret) {
+    perror("rpoll");
+    return ret;
+   }
+  }
+
+  rs = (int)(rs_accept(lrs, NULL, 0));
+ } while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
+
+ if (rs < 0) {
+  ret = rs;
+  perror("raccept");
+  return ret;
+ }
+ set_options(rs);
+
+ return ret;
+}
+
+static int client_connect(void)
+{
+ struct addrinfo hints, *res;
+ struct pollfd fds;
+ int ret, err;
+ socklen_t len;
+ 
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags  = RAI_PASSIVE;
+ hints.ai_family = AF_INET;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+ 
+  ret = getaddrinfo(dst_addr, port, &hints, &res);
+ if (ret) {
+  perror("getaddrinfo");
+  return ret;
+ }
+
+ rs = (int)(rs_socket(res->ai_family, res->ai_socktype, res->ai_protocol));
+ if (rs < 0) {
+  perror("rsocket");
+  ret = rs;
+  goto free;
+ }
+
+ set_options(rs);
+ /* TODO: bind client to src_addr */
+
+ ret = rs_connect(rs, res->ai_addr, res->ai_addrlen);
+ if (ret && (errno != EINPROGRESS)) {
+  perror("rconnect");
+  goto close;
+ }
+
+ if (ret && (errno == EINPROGRESS)) {
+  fds.fd = rs;
+  fds.events = POLLOUT;
+  ret = do_poll(&fds);
+  if (ret)
+   goto close;
+
+  len = sizeof err;
+  ret = rs_getsockopt(rs, SOL_SOCKET, SO_ERROR, (char *)&err, &len);
+  if (ret)
+   goto close;
+  if (err) {
+   ret = -1;
+   errno = err;
+   perror("async rconnect");
+  }
+ }
+
+close:
+ if (ret)
+  rs_close(rs);
+free:
+ freeaddrinfo(res);
+ return ret;
+}
+
+static int run(void)
+{
+ int i, ret = 0;
+ DWORD dwBytesReturned = 0;
+ 
+ buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
+ if (!buf) {
+  perror("malloc");
+  return -1;
+ }
+ if (!dst_addr) {
+  ret = server_listen();
+  if (ret)
+   goto free;
+ }
+ printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
+        "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
+ if (!custom) {
+  optimization = opt_latency;
+  ret = dst_addr ? client_connect() : server_connect();
+  if (ret)
+   goto free;
+
+  for (i = 0; i < TEST_CNT; i++) {
+   if (test_size[i].option > size_option)
+    continue;
+   init_latency_test(test_size[i].size);
+   run_test();
+  }
+  rs_shutdown(rs, SHUT_RDWR);
+  rs_close(rs);
+
+  optimization = opt_bandwidth;
+  ret = dst_addr ? client_connect() : server_connect();
+  if (ret)
+   goto free;
+
+  for (i = 0; i < TEST_CNT; i++) {
+   if (test_size[i].option > size_option)
+    continue;
+   init_bandwidth_test(test_size[i].size);
+   run_test();
+  }
+ } else {
+  ret = dst_addr ? client_connect() : server_connect();
+  if (ret)
+   goto free;
+
+  ret = run_test();
+ }
+
+ rs_shutdown(rs, SHUT_RDWR);
+ rs_close(rs);
+free:
+ free(buf);
+ return ret;
+}
+
+static int set_test_opt(char *optarg)
+{
+ if (strlen(optarg) == 1) {
+  switch (optarg[0]) {
+  case 's':
+   use_rs = 0;
+   break;
+/* async not yet supported
+ *  case 'a':
+ *   use_async = 1;
+ *   break;
+ */
+  case 'b':
+   flags &= ~MSG_DONTWAIT;
+   break;
+  case 'n':
+   flags |= MSG_DONTWAIT;
+   break;
+  case 'v':
+   verify = 1;
+   break;  default:
+   return -1;
+  }
+ } else {
+  if (!_strnicmp("socket", optarg, 6)) {
+   use_rs = 0;
+  } /* async not yet supported
+     * else if (!_strnicmp("async", optarg, 5)) {
+     * use_async = 1;
+     * }
+     */
+    else if (!_strnicmp("block", optarg, 5)) {
+   flags &= ~MSG_DONTWAIT;
+  } else if (!_strnicmp("nonblock", optarg, 8)) {
+   flags |= MSG_DONTWAIT;
+  } else if (!_strnicmp("verify", optarg, 6)) {
+   verify = 1;
+  } else {
+   return -1;
+  }
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int op, ret;
+ WSADATA wsaData;
+
+    if (0 != (ret = WSAStartup(0x202,&wsaData)) ) {
+        fprintf(stderr, "WSAStartup failed with error %d\n",ret);
+  ret = -1;
+        goto out;
+    }
+ while ((op = getopt(argc, argv, "s:b:B:I:C:S:p:T:")) != -1) {
+  switch (op) {
+  case 's':
+   dst_addr = optarg;
+   break;
+  case 'b':
+   src_addr = optarg;
+   break;
+  case 'B':
+   buffer_size = atoi(optarg);
+   break;
+  case 'I':
+   custom = 1;
+   iterations = atoi(optarg);
+   break;
+  case 'C':
+   custom = 1;
+   transfer_count = atoi(optarg);
+   break;
+  case 'S':
+   if (!_strnicmp("all", optarg, 3)) {
+    size_option = 1;
+   } else {
+    custom = 1;
+    transfer_size = atoi(optarg);
+   }
+   break;
+  case 'p':
+   port = optarg;
+   break;
+  case 'T':
+   if (!set_test_opt(optarg))
+    break;
+   /* invalid option - fall through */
+  default:
+   printf("usage: %s\n", argv[0]);
+   printf("\t[-s server_address]\n");
+   printf("\t[-b bind_address]\n");
+   printf("\t[-B buffer_size]\n");
+   printf("\t[-I iterations]\n");
+   printf("\t[-C transfer_count]\n");
+   printf("\t[-S transfer_size or all]\n");
+   printf("\t[-p port_number]\n");
+   printf("\t[-T test_option]\n");
+   printf("\t    s|sockets - use standard tcp/ip sockets\n");
+//   printf("\t    a|async - asynchronous operation (use poll)\n");
+   printf("\t    b|blocking - use blocking calls\n");
+   printf("\t    n|nonblocking - use nonblocking calls\n");
+   printf("\t    v|verify - verify data\n");
+   exit(1);
+  }
+ }
+ if (!(flags & MSG_DONTWAIT))
+  poll_timeout = -1;
+
+ ret = run();
+
+out:
+    WSACleanup();
+
+ return ret;
+}
Index: ulp/librdmacm/examples/rstream/rstream.rc
===================================================================
--- ulp/librdmacm/examples/rstream/rstream.rc (revision 0)
+++ ulp/librdmacm/examples/rstream/rstream.rc (working copy)
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2005 Mellanox Technologies.  All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <oib_ver.h>
+
+#define VER_FILETYPE    VFT_APP
+#define VER_FILESUBTYPE    VFT2_UNKNOWN
+
+#ifdef DBG
+#define VER_FILEDESCRIPTION_STR  "(R)Socket Test (Debug)"
+#else
+#define VER_FILEDESCRIPTION_STR  "(R)Socket Test "
+#endif
+
+#define VER_INTERNALNAME_STR  "rstream.exe"
+#define VER_ORIGINALFILENAME_STR "rstream.exe"
+
+#include <common.ver>
Index: ulp/librdmacm/examples/rstream/SOURCES
===================================================================
--- ulp/librdmacm/examples/rstream/SOURCES (revision 0)
+++ ulp/librdmacm/examples/rstream/SOURCES (working copy)
@@ -0,0 +1,32 @@
+TARGETNAME=rstream
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)
+TARGETTYPE=PROGRAM
+UMTYPE=console
+USE_MSVCRT=1
+NTTARGETFILES=Custom_target
+
+C_DEFINES=$(C_DEFINES) /D__WIN__ 
+
+SOURCES=rstream.rc \
+ rstream.c
+
+INCLUDES= ..; \
+   ..\..\..\..\inc; \
+   ..\..\..\..\inc\user; \
+   ..\..\..\..\inc\user\linux; \
+   ..\..\include; \
+   ..\..\..\libibverbs\include; \
+   ..\..\..\..\hw\mlx4\user\hca; \
+   ..\..\..\..\inc\complib; \
+   ..\..\..\..\core\complib\user\$(O); \
+   ..\..\..\..\core\al\user\$(O); \
+   ..\..\..\..\etc\user;
+
+RCOPTIONS=/I..\..\win\include
+
+TARGETLIBS= \
+   $(DDK_LIB_PATH)\Ws2_32.lib \
+   $(TARGETPATH)\*\complib.lib \
+   $(TARGETPATH)\*\ibal.lib
+
+MSC_WARNING_LEVEL= /W3
Index: ulp/librdmacm/include/rdma/rdma_cma.h
===================================================================
--- ulp/librdmacm/include/rdma/rdma_cma.h (revision 3419)
+++ ulp/librdmacm/include/rdma/rdma_cma.h (working copy)
@@ -1,8 +1,8 @@
 /*
  * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
  *
- * This software is available to you under the OpenFabrics.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
@@ -17,14 +17,24 @@
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
  */
 
 #pragma once
@@ -678,7 +688,10 @@
 /* Option details */
 enum
 {
- RDMA_OPTION_ID_TOS = 0 /* uint8_t: RFC 2474 */
+ RDMA_OPTION_ID_TOS = 0,  /* uint8_t: RFC 2474 */
+ RDMA_OPTION_ID_REUSEADDR = 1,   /* int: ~SO_REUSEADDR */
+ RDMA_OPTION_ID_AFONLY  = 2,   /* int: ~IPV6_V6ONLY */
+ RDMA_OPTION_IB_PATH  = 1  /* struct ibv_path_data[] */
 };
 
 /**
Index: ulp/librdmacm/include/rdma/rdma_verbs.h
===================================================================
--- ulp/librdmacm/include/rdma/rdma_verbs.h (revision 3419)
+++ ulp/librdmacm/include/rdma/rdma_verbs.h (working copy)
@@ -1,297 +1,297 @@
-/*
- * Copyright (c) 2010 Intel Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#if !defined(RDMA_VERBS_H)
-#define RDMA_VERBS_H
-
-#include <assert.h>
-#include <infiniband/verbs.h>
-#include <rdma/rdma_cma.h>
-#include <errno.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static __inline int rdma_seterrno(int ret)
-{
- if (ret) {
-  errno = ret;
-  ret = -1;
- }
- return ret;
-}
-
-/*
- * Memory registration helpers.
- */
-static __inline struct ibv_mr *
-rdma_reg_msgs(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE);
-}
-
-static __inline struct ibv_mr *
-rdma_reg_read(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
-          IBV_ACCESS_REMOTE_READ);
-}
-
-static __inline struct ibv_mr *
-rdma_reg_write(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
-          IBV_ACCESS_REMOTE_WRITE);
-}
-
-static __inline int
-rdma_dereg_mr(struct ibv_mr *mr)
-{
- return rdma_seterrno(ibv_dereg_mr(mr));
-}
-
-
-/*
- * Vectored send, receive, and RDMA operations.
- * Support multiple scatter-gather entries.
- */
-static __inline int
-rdma_post_recvv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
-  int nsge)
-{
- struct ibv_recv_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
-
- return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_sendv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
-  int nsge, int flags)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_SEND;
- wr.send_flags = flags;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_readv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
-  int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_RDMA_READ;
- wr.send_flags = flags;
- wr.wr.rdma.remote_addr = remote_addr;
- wr.wr.rdma.rkey = rkey;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_writev(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
-   int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_RDMA_WRITE;
- wr.send_flags = flags;
- wr.wr.rdma.remote_addr = remote_addr;
- wr.wr.rdma.rkey = rkey;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-/*
- * Simple send, receive, and RDMA calls.
- */
-static __inline int
-rdma_post_recv(struct rdma_cm_id *id, void *context, void *addr,
-        size_t length, struct ibv_mr *mr)
-{
- struct ibv_sge sge;
-
- assert((addr >= mr->addr) &&
-     (((uint8_t) addr + length) <= ((uint8_t) mr->addr + mr->length)));
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr->lkey;
-
- return rdma_post_recvv(id, context, &sge, 1);
-}
-
-static __inline int
-rdma_post_send(struct rdma_cm_id *id, void *context, void *addr,
-        size_t length, struct ibv_mr *mr, int flags)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- return rdma_post_sendv(id, context, &sge, 1, flags);
-}
-
-static __inline int
-rdma_post_read(struct rdma_cm_id *id, void *context, void *addr,
-        size_t length, struct ibv_mr *mr, int flags,
-        uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr->lkey;
-
- return rdma_post_readv(id, context, &sge, 1, flags, remote_addr, rkey);
-}
-
-static __inline int
-rdma_post_write(struct rdma_cm_id *id, void *context, void *addr,
-  size_t length, struct ibv_mr *mr, int flags,
-  uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- return rdma_post_writev(id, context, &sge, 1, flags, remote_addr, rkey);
-}
-
-static __inline int
-rdma_post_ud_send(struct rdma_cm_id *id, void *context, void *addr,
-    size_t length, struct ibv_mr *mr, int flags,
-    struct ibv_ah *ah, uint32_t remote_qpn)
-{
- struct ibv_send_wr wr, *bad;
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = &sge;
- wr.num_sge = 1;
- wr.opcode = IBV_WR_SEND;
- wr.send_flags = flags;
- wr.wr.ud.ah = ah;
- wr.wr.ud.remote_qpn = remote_qpn;
- wr.wr.ud.remote_qkey = RDMA_UDP_QKEY;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-// Comment out until patch to automatically create CQs
-static __inline int
-rdma_get_send_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
-{
- struct ibv_cq *cq;
- void *context;
- int ret;
-
- ret = ibv_poll_cq(id->send_cq, 1, wc);
- if (ret)
-  goto out;
-
- ret = ibv_req_notify_cq(id->send_cq, 0);
- if (ret)
-  return rdma_seterrno(ret);
-
- while (!(ret = ibv_poll_cq(id->send_cq, 1, wc))) {
-  ret = ibv_get_cq_event(id->send_cq_channel, &cq, &context);
-  if (ret)
-   return rdma_seterrno(ret);
-
-  assert(cq == id->send_cq && context == id);
-  ibv_ack_cq_events(id->send_cq, 1);
- }
-out:
- return (ret < 0) ? rdma_seterrno(ret) : ret;
-}
-
-static __inline int
-rdma_get_recv_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
-{
- struct ibv_cq *cq;
- void *context;
- int ret;
-
- ret = ibv_poll_cq(id->recv_cq, 1, wc);
- if (ret)
-  goto out;
-
- ret = ibv_req_notify_cq(id->recv_cq, 0);
- if (ret)
-  return rdma_seterrno(ret);
-
- while (!(ret = ibv_poll_cq(id->recv_cq, 1, wc))) {
-  ret = ibv_get_cq_event(id->recv_cq_channel, &cq, &context);
-  if (ret)
-   return rdma_seterrno(ret);
-
-  assert(cq == id->recv_cq && context == id);
-  ibv_ack_cq_events(id->recv_cq, 1);
- }
-out:
- return (ret < 0) ? rdma_seterrno(ret) : ret;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* RDMA_CMA_H */
+/*
+ * Copyright (c) 2010 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(RDMA_VERBS_H)
+#define RDMA_VERBS_H
+
+#include <assert.h>
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <errno.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __inline int rdma_seterrno(int ret)
+{
+ if (ret) {
+  errno = ret;
+  ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * Memory registration helpers.
+ */
+static __inline struct ibv_mr *
+rdma_reg_msgs(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE);
+}
+
+static __inline struct ibv_mr *
+rdma_reg_read(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
+          IBV_ACCESS_REMOTE_READ);
+}
+
+static __inline struct ibv_mr *
+rdma_reg_write(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
+          IBV_ACCESS_REMOTE_WRITE);
+}
+
+static __inline int
+rdma_dereg_mr(struct ibv_mr *mr)
+{
+ return rdma_seterrno(ibv_dereg_mr(mr));
+}
+
+
+/*
+ * Vectored send, receive, and RDMA operations.
+ * Support multiple scatter-gather entries.
+ */
+static __inline int
+rdma_post_recvv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+  int nsge)
+{
+ struct ibv_recv_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+
+ return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_sendv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+  int nsge, int flags)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_SEND;
+ wr.send_flags = flags;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_readv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+  int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_READ;
+ wr.send_flags = flags;
+ wr.wr.rdma.remote_addr = remote_addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_writev(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+   int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_WRITE;
+ wr.send_flags = flags;
+ wr.wr.rdma.remote_addr = remote_addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+/*
+ * Simple send, receive, and RDMA calls.
+ */
+static __inline int
+rdma_post_recv(struct rdma_cm_id *id, void *context, void *addr,
+        size_t length, struct ibv_mr *mr)
+{
+ struct ibv_sge sge;
+
+ assert((addr >= mr->addr) &&
+     (((uint8_t) addr + length) <= ((uint8_t) mr->addr + mr->length)));
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr->lkey;
+
+ return rdma_post_recvv(id, context, &sge, 1);
+}
+
+static __inline int
+rdma_post_send(struct rdma_cm_id *id, void *context, void *addr,
+        size_t length, struct ibv_mr *mr, int flags)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ return rdma_post_sendv(id, context, &sge, 1, flags);
+}
+
+static __inline int
+rdma_post_read(struct rdma_cm_id *id, void *context, void *addr,
+        size_t length, struct ibv_mr *mr, int flags,
+        uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr->lkey;
+
+ return rdma_post_readv(id, context, &sge, 1, flags, remote_addr, rkey);
+}
+
+static __inline int
+rdma_post_write(struct rdma_cm_id *id, void *context, void *addr,
+  size_t length, struct ibv_mr *mr, int flags,
+  uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ return rdma_post_writev(id, context, &sge, 1, flags, remote_addr, rkey);
+}
+
+static __inline int
+rdma_post_ud_send(struct rdma_cm_id *id, void *context, void *addr,
+    size_t length, struct ibv_mr *mr, int flags,
+    struct ibv_ah *ah, uint32_t remote_qpn)
+{
+ struct ibv_send_wr wr, *bad;
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+ wr.opcode = IBV_WR_SEND;
+ wr.send_flags = flags;
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.remote_qpn = remote_qpn;
+ wr.wr.ud.remote_qkey = RDMA_UDP_QKEY;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+// Comment out until patch to automatically create CQs
+static __inline int
+rdma_get_send_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ ret = ibv_poll_cq(id->send_cq, 1, wc);
+ if (ret)
+  goto out;
+
+ ret = ibv_req_notify_cq(id->send_cq, 0);
+ if (ret)
+  return rdma_seterrno(ret);
+
+ while (!(ret = ibv_poll_cq(id->send_cq, 1, wc))) {
+  ret = ibv_get_cq_event(id->send_cq_channel, &cq, &context);
+  if (ret)
+   return rdma_seterrno(ret);
+
+  assert(cq == id->send_cq && context == id);
+  ibv_ack_cq_events(id->send_cq, 1);
+ }
+out:
+ return (ret < 0) ? rdma_seterrno(ret) : ret;
+}
+
+static __inline int
+rdma_get_recv_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ ret = ibv_poll_cq(id->recv_cq, 1, wc);
+ if (ret)
+  goto out;
+
+ ret = ibv_req_notify_cq(id->recv_cq, 0);
+ if (ret)
+  return rdma_seterrno(ret);
+
+ while (!(ret = ibv_poll_cq(id->recv_cq, 1, wc))) {
+  ret = ibv_get_cq_event(id->recv_cq_channel, &cq, &context);
+  if (ret)
+   return rdma_seterrno(ret);
+
+  assert(cq == id->recv_cq && context == id);
+  ibv_ack_cq_events(id->recv_cq, 1);
+ }
+out:
+ return (ret < 0) ? rdma_seterrno(ret) : ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RDMA_CMA_H */
Index: ulp/librdmacm/include/rdma/rs_regpath.h
===================================================================
--- ulp/librdmacm/include/rdma/rs_regpath.h (revision 0)
+++ ulp/librdmacm/include/rdma/rs_regpath.h (working copy)
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#ifndef _RS_REGPATH_H_
+#define _RS_REGPATH_H_
+
+/* these definitions are common for installSP and WSD projects */
+#define RS_PM_REGISTRY_PATH \
+ TEXT("SYSTEM\\CurrentControlSet\\Services\\RSockets\\")
+#define RS_PM_EVENTLOG_PATH \
+ TEXT("SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\RSockets")
+#define RS_PM_SUBKEY_NAME  TEXT("RSockets")
+#define RS_PM_SUBKEY_PERF  TEXT("Performance")
+#define RS_PM_INI_FILE  "rs_perfcounters.ini"
+#define RS_PM_SYM_H_FILE  "rs_perfini.h"
+
+enum RS_PM_COUNTERS
+{
+ BYTES_SEND = 0,
+ BYTES_RECV,
+ BYTES_WRITE,
+ BYTES_READ,
+ BYTES_TOTAL,
+ COMP_SEND,
+ COMP_RECV,
+ COMP_TOTAL,
+ INTR_TOTAL,
+ RS_PM_NUM_COUNTERS
+};
+
+/* counter symbol names */
+#define RS_PM_OBJ      0
+#define RS_PM_COUNTER( X )   ((X + 1) * 2)
+
+#endif /* _RS_REGPATH_H_ */
Index: ulp/librdmacm/include/rdma/rsocket.h
===================================================================
--- ulp/librdmacm/include/rdma/rsocket.h (revision 0)
+++ ulp/librdmacm/include/rdma/rsocket.h (working copy)
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#if !defined(RSOCKET_H)
+#define RSOCKET_H
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <ws2spi.h>
+
+typedef unsigned int nfds_t; // Under Linux from poll.h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+__declspec(dllexport)
+int rsocket(int domain, int type, int protocol);
+__declspec(dllexport)
+int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen);
+__declspec(dllexport)
+int rlisten(int socket, int backlog);
+__declspec(dllexport)
+int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen);
+SOCKET WSPAPI WSPAccept(
+ SOCKET    socket,
+ struct sockaddr *addr,
+ LPINT    addrlen,
+ LPCONDITIONPROC  lpfnCondition,
+ DWORD_PTR   dwCallbackData,
+ LPINT    lpErrno
+);
+__declspec(dllexport)
+int rconnect(int socket, const struct sockaddr *addr, socklen_t addrlen);
+__declspec(dllexport)
+int rshutdown(int socket, int how);
+__declspec(dllexport)
+int rclose(int socket);
+__declspec(dllexport)
+ssize_t rrecv(int socket, void *buf, size_t len, int flags);
+__declspec(dllexport)
+ssize_t rrecvfrom(int socket, void *buf, size_t len, int flags,
+    struct sockaddr *src_addr, socklen_t *addrlen);
+__declspec(dllexport)
+ssize_t rrecvmsg(int socket, struct msghdr *msg, int flags);
+__declspec(dllexport)
+ssize_t rsend(int socket, const void *buf, size_t len, int flags);
+__declspec(dllexport)
+ssize_t rsendto(int socket, const void *buf, size_t len, int flags,
+  const struct sockaddr *dest_addr, socklen_t addrlen);
+__declspec(dllexport)
+ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags);
+__declspec(dllexport)
+ssize_t rread(int socket, void *buf, size_t count);
+__declspec(dllexport)
+ssize_t rreadv(int socket, const struct iovec *iov, int iovcnt);
+__declspec(dllexport)
+ssize_t rwrite(int socket, const void *buf, size_t count);
+__declspec(dllexport)
+ssize_t rwritev(int socket, const struct iovec *iov, int iovcnt);
+__declspec(dllexport)
+int rpoll(struct pollfd *fds, nfds_t nfds, int timeout);
+__declspec(dllexport)
+int rselect(int nfds, fd_set *readfds, fd_set *writefds,
+     fd_set *exceptfds, struct timeval *timeout);
+__declspec(dllexport)
+int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen);
+__declspec(dllexport)
+int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen);
+
+#ifndef SOL_RDMA
+#define SOL_RDMA 0x10000
+enum {
+ RDMA_SQSIZE,
+ RDMA_RQSIZE,
+ RDMA_INLINE
+};
+#endif
+
+__declspec(dllexport)
+int rsetsockopt(int socket, int level, int optname,
+  const void *optval, socklen_t optlen);
+__declspec(dllexport)
+int rgetsockopt(int socket, int level, int optname,
+  void *optval, socklen_t *optlen);
+__declspec(dllexport)
+int rfcntl(int socket, int cmd, ... /* arg */ );
+__declspec(dllexport)
+int rioctlsocket(int socket, long cmd, u_long* argp);
+
+int rsGetStatus ( __out LPRS_STATUS *lppStatusBuffer );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RSOCKET_H */
Index: ulp/librdmacm/include/rdma/rwinsock.h
===================================================================
--- ulp/librdmacm/include/rdma/rwinsock.h (revision 0)
+++ ulp/librdmacm/include/rdma/rwinsock.h (working copy)
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#if !defined(RWINSOCK_H)
+#define RWINSOCK_H
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const GUID rsProviderGuid = { //D478E78B-A803-4a25-A5E4-83BFB7EAF4A7
+ 0xd478e78b,
+ 0xa803,
+ 0x4a25,
+ { 0xa5, 0xe4, 0x83, 0xbf, 0xb7, 0xea, 0xf4, 0xa7 }
+};
+
+static WSAPROTOCOL_INFO rsProtocolInfo = {0};
+
+/**
+ * \brief   Get RSockets Winsock provider's WSAPROTOCOL_INFO structure.
+ *
+ * \param lpStatus Pointer to status variable to be returned. Can be NULL if not required.
+ *
+ * \return   Pointer to the RSockets Winsock provider's WSAPROTOCOL_INFO structure
+ *     (NULL if the RSockets provider is not found or another error occured).
+ */
+static LPWSAPROTOCOL_INFO rsGetProtocolInfo (LPINT lpStatus)
+{
+ int                Status   = ERROR_SUCCESS;
+ LPWSAPROTOCOL_INFO lpProtocolBuffer = NULL;
+ LPWSAPROTOCOL_INFO lpReturn   = NULL; 
+ DWORD              BufferLength  = 0;
+ DWORD              i;
+
+ WSAEnumProtocols (NULL, NULL, &BufferLength); // Should always return the BufferLength
+
+ if (NULL == (lpProtocolBuffer = (LPWSAPROTOCOL_INFO)malloc (BufferLength)))
+ {
+  Status = ERROR_NOT_ENOUGH_MEMORY;
+  goto cleanup;
+ }
+
+ if (SOCKET_ERROR == WSAEnumProtocols (NULL, lpProtocolBuffer, &BufferLength))
+ {
+  Status = WSAGetLastError();
+  goto cleanup;
+ }
+ 
+ for (i = 0; i < BufferLength / sizeof(*lpProtocolBuffer); i++)
+ {
+  if (0 == memcmp (&lpProtocolBuffer[i].ProviderId, &rsProviderGuid, sizeof(rsProviderGuid)))
+  {
+   rsProtocolInfo = lpProtocolBuffer[i];
+   lpReturn  = &rsProtocolInfo;
+   break;
+  }
+ }
+
+ cleanup:
+ if (lpProtocolBuffer)
+  free (lpProtocolBuffer);
+   
+ if (lpStatus)
+  *lpStatus = Status;
+
+ return lpReturn;
+}
+
+#ifndef SOL_RDMA
+#define SOL_RDMA 0x10000 // for getsockopt + setsockopt
+enum {
+ RDMA_SQSIZE,
+ RDMA_RQSIZE,
+ RDMA_INLINE
+};
+#endif /* SOL_RDMA */
+
+typedef struct {
+ struct sockaddr src_addr;
+ struct sockaddr dst_addr;
+ char   state[16];
+} RS_STATUS, *LPRS_STATUS;
+
+typedef struct {
+ char szLine[128];
+} RS_TRACE_OUT, *LPRS_TRACE_OUT;
+
+/*
+ * IOCTL code definition to get RS_STATUS information via WSAIoctl():
+ */
+#define IOC_VENDOR_OFA  0x0FA0000
+#define SIO_RS_GET_STATUS (IOC_OUT | IOC_VENDOR | IOC_VENDOR_OFA | 1)
+#define SIO_RS_GET_TRACE (IOC_OUT | IOC_VENDOR | IOC_VENDOR_OFA | 2)
+
+#endif /* RWINSOCK_H */
Index: ulp/librdmacm/RSocket.txt
===================================================================
--- ulp/librdmacm/RSocket.txt (revision 0)
+++ ulp/librdmacm/RSocket.txt (working copy)
@@ -0,0 +1,105 @@
+
+ Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+
+ This software is available to you under the BSD license below:
+
+     Redistribution and use in source and binary forms, with or
+     without modification, are permitted provided that the following
+     conditions are met:
+
+      - Redistributions of source code must retain the above
+        copyright notice, this list of conditions and the following
+        disclaimer.
+
+      - Redistributions in binary form must reproduce the above
+        copyright notice, this list of conditions and the following
+        disclaimer in the documentation and/or other materials
+        provided with the distribution.
+
+      - Neither the name Oce Printing Systems GmbH nor the names
+        of the authors may be used to endorse or promote products
+        derived from this software without specific prior written
+        permission.
+
+ THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ OF SUCH DAMAGE. 
+
+#############################################################################
+
+
+GENERAL
+=======
+The RSockets protocol provides socket-based RDMA communication between
+Windows nodes (like to WSD or ND) as well as between Windows and Linux nodes.
+
+The RSockets functionality is contained within the librdmacm.dll which now
+is capable to act as a Winsock base transport provider.
+
+For now the librdmacm.dll still exports the direct rsocket calls
+(rsocket, rbind, rrecv etc.) as well. So application developers can
+alternatively circumvent Winsock and call those functions directly
+(by including rsocket.h instead of rwinsock.h).
+Aside from a slight performance gain, this might be useful in case of
+quickly porting a Linux app to Windows(?).
+But beware of using both access methods concurrently in the same application!
+
+
+INSTALLATION
+============
+Installation of that Winsock provider (i.e. registration of the
+librdmacm.dll in the Windows Registry) can be done with the rsinstall
+tool (see tools/rsinstall) which works similar to wsdinstall for the
+WSD Winsock provider.
+For a list of available options, just call rsinstall.exe without
+parameters. (Note that rsinstall.exe requires administrative privileges
+to run properly!)
+
+
+USAGE
+=====
+Usage of the RSocket provider at application level is quite simple,
+as demonstrated by the rstream tool (see ulp/librdmacm/examples/rstream)
+which is also a porting from Linux OFED. In contrast to a 'normal' Winsock
+application there are just two essential differences:
+
+- The ulp\librdmacm\include\rdma\rwinsock.h header has to be included
+  instead of winsock2.h. (Nonetheless it's still necessary to call
+  WSAStartup() and WSACleanup() during initialization and shutdown of
+  your application, respectivily).
+
+- Instead of calling socket() for socket creation, a WSASocket() has to
+  be performed with a WSAPROTOCOL_INFO structure selecting the appropriate
+  Winsock provider. For convenience there is a little helper function
+  rsGetProtocolInfo() implemented in rwinsock.h which provides this structure
+  based on the provider's GUID (static variable 'rsProviderGuid' which is
+  also contained in rwinsock.h).
+
+
+RESTRICTIONS
+============
+Generally there are the same restrictions for socket applications as
+described in the Linux RSockets man page (e.g. no UDP / SOCK_DGRAM).
+Moreover the following restrictions apply:
+
+- The MSG_DONTWAIT flag is not supported when calling WSASocket().
+  Instead to configure a socket for non-blocking operation,
+  ioctlsocket(FIONBIO) can be used.
+
+- Overlapped operation is currently not supported, i.e. a WSASocket() with
+  the WSA_FLAG_OVERLAPPED flag set will be rejected with a WSAEINVAL error.
+
+- The WSAPoll() function (in Windows Vista and later) is not supported,
+  hence the select() function has to be used instead.
+
+- IPv6 should work, but has not been tested yet.
Index: ulp/librdmacm/src/addrinfo.cpp
===================================================================
--- ulp/librdmacm/src/addrinfo.cpp (revision 3419)
+++ ulp/librdmacm/src/addrinfo.cpp (working copy)
@@ -1,11 +1,8 @@
 /*
  * Copyright (c) 2010 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
  *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
+ * This software is available to you under the BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
@@ -20,14 +17,24 @@
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
  */
 
 #if HAVE_CONFIG_H
@@ -47,7 +54,7 @@
 {
  WSADATA wsadata;
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  if (addr_ref++) {
   goto out;
  }
@@ -57,16 +64,16 @@
  }
 
 out:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 }
 
 static void ucma_shutdown(void)
 {
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  if (--addr_ref == 0) {
   WSACleanup();
  }
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 }
 
 static void ucma_convert_to_ai(struct addrinfo *ai, struct rdma_addrinfo *rai)
Index: ulp/librdmacm/src/cma.cpp
===================================================================
--- ulp/librdmacm/src/cma.cpp (revision 3419)
+++ ulp/librdmacm/src/cma.cpp (working copy)
@@ -1,8 +1,8 @@
 /*
  * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
  *
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
@@ -17,21 +17,31 @@
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
  */
 
 #include <windows.h>
 #include <winsock2.h>
+#include "openib_osd.h"
 #include <stdio.h>
 #include <iphlpapi.h>
-
 #include <rdma/rdma_cma.h>
 #include <rdma/rdma_verbs.h>
 #include <infiniband/verbs.h>
@@ -88,6 +98,7 @@
  int     port_cnt;
  uint8_t    max_initiator_depth;
  uint8_t    max_responder_resources;
+ int     max_qpsize;
 };
 
 struct cma_event {
@@ -100,6 +111,59 @@
 static int cma_dev_cnt;
 static DWORD ref;
 
+void wsa_setlasterror(int err)
+{
+ int wsa_err = 0;
+ switch (err) {
+  case 0:             break;
+  case EADDRINUSE:  {wsa_err = WSAEADDRINUSE;  break;}
+  case EADDRNOTAVAIL:  {wsa_err = WSAEADDRNOTAVAIL; break;}
+  case EAFNOSUPPORT:  {wsa_err = WSAEAFNOSUPPORT;  break;}
+  case EALREADY:   {wsa_err = WSAEALREADY;   break;}
+//  case EBADMSG:   {wsa_err = ; break;}
+  case ECANCELED:   {wsa_err = WSAECANCELLED;  break;}
+  case ECONNABORTED:  {wsa_err = WSAECONNABORTED;  break;}
+  case ECONNREFUSED:  {wsa_err = WSAECONNREFUSED;  break;}
+  case ECONNRESET:  {wsa_err = WSAECONNRESET;  break;}
+  case EDESTADDRREQ:  {wsa_err = WSAEDESTADDRREQ;  break;}
+  case EHOSTUNREACH:  {wsa_err = WSAEHOSTUNREACH;  break;}
+//  case EIDRM:    {wsa_err = ; break;}
+  case EINPROGRESS:  {wsa_err = WSAEINPROGRESS;  break;}
+  case EISCONN:   {wsa_err = WSAEISCONN;   break;}
+  case ELOOP:    {wsa_err = WSAELOOP;   break;}
+  case EMSGSIZE:   {wsa_err = WSAEMSGSIZE;   break;}
+  case ENETDOWN:   {wsa_err = WSAENETDOWN;   break;}
+  case ENETRESET:   {wsa_err = WSAENETRESET;  break;}
+  case ENETUNREACH:  {wsa_err = WSAENETUNREACH;  break;}
+  case ENOBUFS:   {wsa_err = WSAENOBUFS;   break;}
+//  case ENODATA:   {wsa_err = ; break;}
+//  case ENOLINK:   {wsa_err = ; break;}
+//  case ENOMSG:   {wsa_err = ; break;}
+  case ENOPROTOOPT:  {wsa_err = WSAENOPROTOOPT;  break;}
+//  case ENOSR:    {wsa_err = ; break;}
+//  case ENOSTR:   {wsa_err = ; break;}
+  case ENOTCONN:   {wsa_err = WSAENOTCONN;   break;}
+  case ENOTRECOVERABLE: {wsa_err = WSANO_RECOVERY;  break;}
+  case ENOTSOCK:   {wsa_err = WSAENOTSOCK;   break;}
+  case ENOTSUP:   {wsa_err = WSAEINVAL;   break;} //???
+  case EOPNOTSUPP:  {wsa_err = WSAEOPNOTSUPP;  break;}
+//  case EOTHER:   {wsa_err = ; break;}
+//  case EOVERFLOW:   {wsa_err = ; break;}
+//  case EOWNERDEAD:  {wsa_err = ; break;}
+  case EPROTO:   {wsa_err = WSAENOPROTOOPT;  break;} //???
+  case EPROTONOSUPPORT: {wsa_err = WSAEPROTONOSUPPORT; break;}
+  case EPROTOTYPE:  {wsa_err = WSAEPROTOTYPE;  break;}
+//  case ETIME:    {wsa_err = ; break;}
+  case ETIMEDOUT:   {wsa_err = WSAETIMEDOUT;  break;}
+//  case ETXTBSY:   {wsa_err = ; break;}
+  case ENOMEM:   {wsa_err = WSA_NOT_ENOUGH_MEMORY; break;}
+  case EAGAIN:
+  case EWOULDBLOCK:  {wsa_err = WSAEWOULDBLOCK;  break;}
+  default: ;
+ }
+ WSASetLastError(wsa_err);
+}
+
 static int ucma_acquire(void)
 {
  struct ibv_device **dev_list = NULL;
@@ -107,7 +171,7 @@
  struct ibv_device_attr attr;
  int i, ret, dev_cnt;
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  if (ref++) {
   goto out;
  }
@@ -153,13 +217,14 @@
   }
 
   cma_dev->port_cnt = attr.phys_port_cnt;
+  cma_dev->max_qpsize = attr.max_qp_wr;
   cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
   cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
  }
  ibv_free_device_list(dev_list);
  cma_dev_cnt = dev_cnt;
 out:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
  return 0;
 
 err4:
@@ -174,7 +239,7 @@
  ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
 err1:
  ref--;
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
  return ret;
 }
 
@@ -182,7 +247,7 @@
 {
  int i;
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  if (--ref == 0) {
   for (i = 0; i < cma_dev_cnt; i++) {
    ibv_dealloc_pd(cma_dev_array[i].pd);
@@ -192,7 +257,7 @@
   cma_dev_cnt = 0;
   ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
  }
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 }
 
 __declspec(dllexport)
@@ -329,9 +394,9 @@
 
  id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  id_priv->state = cma_destroying;
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 
  if (id->ps == RDMA_PS_TCP) {
   id->ep.connect->CancelOverlappedRequests();
@@ -502,13 +567,20 @@
  return ret;
 }
 
-static int ucma_complete(struct cma_id_private *id_priv)
+static int ucma_complete_priv(struct cma_id_private *id_priv)
 {
+ return ucma_complete(&id_priv->id);
+}
+
+int ucma_complete(struct rdma_cm_id *id)
+{
+ struct cma_id_private *id_priv;
  int ret;
 
- if (!id_priv->sync) {
+ id_priv = container_of(id, struct cma_id_private, id);
+
+ if (!id_priv->sync)
   return 0;
- }
 
  if (id_priv->id.event) {
   rdma_ack_cm_event(id_priv->id.event);
@@ -516,11 +588,18 @@
  }
 
  ret = rdma_get_cm_event(id_priv->id.channel, &id_priv->id.event);
- if (ret) {
+ if (ret)
   return ret;
+
+ if (id_priv->id.event->status) {
+  if (id_priv->id.event->event == RDMA_CM_EVENT_REJECTED)
+   ret = ERR(ECONNREFUSED);
+  else if (id_priv->id.event->status < 0)
+   ret = ERR(-id_priv->id.event->status);
+  else
+   ret = ERR(-id_priv->id.event->status);
  }
-
- return id_priv->id.event->status;
+ return ret;
 }
 
 __declspec(dllexport)
@@ -561,12 +640,17 @@
   }
  }
 
- RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
+ if (((struct sockaddr_in *)&id->route.addr.dst_addr)->sin_port == 0)
+ {
+  // port = 0 => Assume that entire dst_addr hasn't been set yet
+  RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
+ }
+
  id_priv->state = cma_addr_resolve;
 
  id_priv->refcnt++;
  CompEntryPost(&id->comp_entry);
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
 }
 
 __declspec(dllexport)
@@ -594,7 +678,7 @@
 
  id_priv->refcnt++;
  CompEntryPost(&id->comp_entry);
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
 }
 
 static int ucma_modify_qp_init(struct cma_id_private *id_priv, struct ibv_qp *qp)
@@ -648,10 +732,10 @@
  if (id->recv_cq_channel)
   ibv_destroy_comp_channel(id->recv_cq_channel);
 
- if (id->send_cq)
+ if (id->send_cq && id->send_cq != id->recv_cq)
   ibv_destroy_cq(id->send_cq);
 
- if (id->send_cq_channel)
+ if (id->send_cq_channel && id->send_cq_channel != id->recv_cq_channel)
   ibv_destroy_comp_channel(id->send_cq_channel);
 }
 
@@ -832,7 +916,7 @@
   return ibvw_wv_errno(hr);
  }
 
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
 }
 
 static int ucma_get_request(struct cma_id_private *listen, int index)
@@ -841,7 +925,7 @@
  HRESULT hr;
  int ret;
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  if (listen->state != cma_listening) {
   ret = ibvw_wv_errno(WV_INVALID_PARAMETER);
   goto err1;
@@ -873,14 +957,14 @@
   id_priv->refcnt--;
   goto err2;
  }
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 
  return 0;
 
 err2:
  InterlockedDecrement(&listen->refcnt);
 err1:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
  if (id_priv != NULL) {
   rdma_destroy_id(&id_priv->id);
  }
@@ -1011,7 +1095,7 @@
   return ibvw_wv_errno(hr);
  }
 
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
 }
 
 __declspec(dllexport)
@@ -1053,7 +1137,7 @@
   return ibvw_wv_errno(hr);
  }
 
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
 }
 
 __declspec(dllexport)
@@ -1179,7 +1263,7 @@
 
  id_priv = event->id_priv;
 
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
  switch (id_priv->state) {
  case cma_get_request:
   listen = (struct cma_id_private *) id_priv->id.context;
@@ -1190,7 +1274,7 @@
   }
 
   listen->req_list[id_priv->index] = NULL;
-  LeaveCriticalSection(&lock);
+  fastlock_release(&lock);
   return ucma_process_conn_req(event);
  case cma_addr_resolve:
   event->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1216,7 +1300,7 @@
   InterlockedDecrement(&id_priv->refcnt);
   ret = ECANCELED;
  }
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
 
  return ret;
 }
@@ -1240,6 +1324,8 @@
 
   ret = CompChannelPoll(&channel->channel, &entry);
   if (ret) {
+   if (ret == WAIT_TIMEOUT)
+    ret = ERR(EWOULDBLOCK);
    delete evt;
    return ret;
   }
@@ -1473,3 +1559,11 @@
  }
  return -1;
 }
+
+int ucma_max_qpsize(struct rdma_cm_id *id)
+{
+ struct cma_id_private *id_priv;
+
+ id_priv = container_of(id, struct cma_id_private, id);
+ return id_priv->cma_dev->max_qpsize;
+}
Index: ulp/librdmacm/src/cma.h
===================================================================
--- ulp/librdmacm/src/cma.h (revision 3419)
+++ ulp/librdmacm/src/cma.h (working copy)
@@ -1,9 +1,9 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2008-2009 Intel Corp.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
  *
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
@@ -18,24 +18,60 @@
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
  */
 
 #ifndef CMA_H
 #define CMA_H
 
-extern CRITICAL_SECTION lock;
-extern HANDLE heap;
+#include <complib/cl_spinlock.h>
+#include <rdma/rdma_verbs.h>
 
+/*
+ * Fast synchronization for low contention locking.
+ */
+#define fastlock_t    cl_spinlock_t
+#define fastlock_init(lock)  cl_spinlock_init(lock)
+#define fastlock_destroy(lock) cl_spinlock_destroy(lock)
+#define fastlock_acquire(lock) cl_spinlock_acquire(lock)
+#define fastlock_release(lock) cl_spinlock_release(lock)
+
+extern fastlock_t lock;
+extern HANDLE     heap;
+
+#define TRACE(fmt, ...) Trace(__FUNCTION__": " fmt "\n", __VA_ARGS__)
+
+void Trace(const char* fmt, ...);
 void ucma_cleanup();
+int ucma_max_qpsize(struct rdma_cm_id *id);
+int ucma_complete(struct rdma_cm_id *id);
+void wsa_setlasterror(int err);
 
+static __inline int ERR(int err)
+{
+    int ret = rdma_seterrno(err);
+ if (ret)
+  wsa_setlasterror(err);
+ return ret;
+}
+ 
 __inline void* __cdecl operator new(size_t size)
 {
  return HeapAlloc(heap, 0, size);
@@ -46,4 +82,13 @@
  HeapFree(heap, 0, pObj);
 }
 
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+#ifndef RDMADIR
+#define RDMADIR "rdma"
+#endif
+#define RDMA_CONF_DIR  SYSCONFDIR "/" RDMADIR
+#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket"
+
 #endif /* CMA_H */
Index: ulp/librdmacm/src/cma_exports.src
===================================================================
--- ulp/librdmacm/src/cma_exports.src (revision 3419)
+++ ulp/librdmacm/src/cma_exports.src (working copy)
@@ -30,4 +30,30 @@
 rdma_freeaddrinfo
 rdma_get_request
 rdmaw_wsa_errno
+rsocket
+rbind
+rlisten
+raccept
+rconnect
+rshutdown
+rclose
+rrecv
+rrecvfrom
+rrecvmsg
+rsend
+rsendto
+rsendmsg
+rread
+rreadv
+rwrite
+rwritev
+rgetpeername
+rgetsockname
+rsetsockopt
+rgetsockopt
+rfcntl
+rioctlsocket
+rselect
+rpoll
+WSPStartup
 #endif
Index: ulp/librdmacm/src/cma_main.cpp
===================================================================
--- ulp/librdmacm/src/cma_main.cpp (revision 3419)
+++ ulp/librdmacm/src/cma_main.cpp (working copy)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
  *
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
@@ -17,22 +17,1173 @@
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
  */
-
-#include <windows.h>
+ 
+#include "openib_osd.h" 
 #include "cma.h"
+#include "rdma/rwinsock.h"
+#include "rdma/rsocket.h"
+#include <complib/cl_debug.h>
+#include "../../../etc/user/gtod.c" // getimeofday()
 
-CRITICAL_SECTION lock;
-HANDLE heap;
+#include <strsafe.h>
+#include <stdio.h>
+#include <stdlib.h>
 
+#define MAX_TRACE_INDEX 4096 // Has to be a power of 2
+
+typedef struct {
+ FILETIME  fTime;
+ char*   pszFormat;
+ ULONG64  qwArgs[4];
+} RS_TRACE, *LPRS_TRACE;
+
+/*
+ * Globals used across files
+ */
+CRITICAL_SECTION    gCriticalSection;   // Critical section to protect startup/cleanup
+WSPUPCALLTABLE      gMainUpCallTable;   // Winsock upcall table
+WSAPROTOCOL_INFOW gProtocolInfo;
+BOOL    gDetached = FALSE;  // Indicates if process is detaching from DLL
+fastlock_t   lock;
+fastlock_t   mut;
+HANDLE    heap;
+
+/*
+ * Globals local to this file
+ */
+static const WCHAR *Description    = L"OpenFabrics RSockets for InfiniBand";
+static int           gStartupCount = 0;      // Global startup count (for every WSPStartup call)
+static uint32_t   iTrace        = 0;
+static RS_TRACE  rsTrace[MAX_TRACE_INDEX] = {0};
+static fastlock_t     TraceLock;
+
+/*
+ * Create trace entries at runtime
+ */
+void Trace (const char* fmt, ...)
+{
+ struct timeval time;
+ va_list   argptr;
+ int    i;
+
+ fastlock_acquire(&TraceLock);
+
+ GetSystemTimeAsFileTime(&rsTrace[iTrace].fTime);
+ rsTrace[iTrace].pszFormat = (char *)fmt;
+
+ va_start(argptr, fmt);
+ for (i = 0; i < sizeof(rsTrace[0].qwArgs) / sizeof(rsTrace[0].qwArgs[0]); i++)
+  rsTrace[iTrace].qwArgs[i] = va_arg(argptr, uint64_t);
+ va_end(argptr);
+
+ iTrace = (iTrace + 1) & (MAX_TRACE_INDEX-1);
+
+ fastlock_release(&TraceLock);
+ 
+ return;
+}
+
+/**
+ * \brief     Get current RSockets trace information for the calling process
+ *       (by calling librdmacm.dll directly).
+ *
+ * \param lppTraceBuffer Pointer to a buffer with an array of RS_TRACE information entries
+ *       to be allocated and returned. The caller is responsible for
+ *       deallocating that buffer via free() when it is no longer needed.
+ *
+ * \return     The number of RS_TRACE entries contained in the trace buffer
+ *       returned by *lppTraceBuffer.
+ */
+static uint32_t rsGetTrace ( __out LPRS_TRACE_OUT *lppTraceBuffer )
+{
+ uint32_t i, e, count = 0;
+ struct timeval tvTime;
+
+ if (NULL ==   lppTraceBuffer ||
+  NULL == (*lppTraceBuffer = (LPRS_TRACE_OUT)malloc(MAX_TRACE_INDEX * sizeof(**lppTraceBuffer)))
+    )
+  return 0;
+
+ memset( *lppTraceBuffer, 0, MAX_TRACE_INDEX * sizeof(**lppTraceBuffer) );
+ 
+ fastlock_acquire(&TraceLock);
+ for (
+   i = 0, e = iTrace;
+   i < MAX_TRACE_INDEX;
+   i++, e = (e+1) & (MAX_TRACE_INDEX - 1)
+  )
+ {
+  if (rsTrace[e].pszFormat)
+  {
+   FileTimeToTimeval(&rsTrace[e].fTime, &tvTime);
+   sprintf_s(
+           (*lppTraceBuffer)[i].szLine,
+    sizeof((*lppTraceBuffer)[0].szLine),
+    "%010d:%010d %s",
+    tvTime.tv_sec, tvTime.tv_usec,
+    rsTrace[e].pszFormat,
+    rsTrace[e].qwArgs[0], rsTrace[e].qwArgs[1], rsTrace[e].qwArgs[2], rsTrace[e].qwArgs[3]
+   );
+   cl_dbg_out("%s", (*lppTraceBuffer)[i].szLine);
+   count++;
+  }
+ }
+ fastlock_release(&TraceLock);
+ 
+ return count;
+}
+
+/*
+ * SPI Function Implementation
+ */
+
+/* 
+ * Function: WSPCleanup
+ *
+ * Description:
+ *    Decrement the entry count. If equal to zero then we can prepare to have us
+ *    unloaded so all resources should be freed
+ */
+int WSPAPI 
+WSPCleanup(
+        LPINT lpErrno  
+        )
+{
+    int rc = SOCKET_ERROR;
+
+    if ( gDetached ) {
+        rc = NO_ERROR;
+        goto cleanup;
+    }
+
+    //
+    // Grab the DLL global critical section
+    //
+    EnterCriticalSection( &gCriticalSection );
+
+    // Verify WSPStartup has been called
+    if ( 0 == gStartupCount ) {
+        *lpErrno = WSANOTINITIALISED;
+        goto cleanup;
+    }
+
+    // Decrement the global entry count
+    gStartupCount--;
+
+TRACE("StartupCount decremented to %d", gStartupCount);
+
+    if ( 0 == gStartupCount ) {
+        // Free LSP structures if still present as well as call WSPCleanup
+        //    for all providers this LSP loaded
+    }
+    rc = NO_ERROR;
+
+cleanup:
+    LeaveCriticalSection( &gCriticalSection );
+
+    return rc;
+}
+
+/*
+ * Function: WSPSocket
+ *
+ * Description:
+ *    This routine creates a socket. For an IFS LSP the lower provider's socket
+ *    handle is returned to the uppler layer. When a socket is created, a socket
+ *    context structure is created for the socket returned from the lower provider.
+ *    This context is used if the socket is later connected to a proxied address.
+ */
+SOCKET WSPAPI 
+WSPSocket(
+        int                 af,
+        int                 type,
+        int                 protocol,
+        LPWSAPROTOCOL_INFOW lpProtocolInfo,
+        GROUP               g,
+        DWORD               dwFlags,
+        LPINT               lpErrno
+        )
+{
+    WSAPROTOCOL_INFOW   InfoCopy = {0};
+ int     rs = (int)INVALID_SOCKET;
+    SOCKET              winSocket = INVALID_SOCKET,
+                        sret = INVALID_SOCKET;
+    int                 rc;
+
+TRACE("af=%d  type=%d  protocol=%d  flags=0x%X", af, type, protocol, dwFlags);
+
+ if (af != AF_INET) {
+  *lpErrno = WSAEAFNOSUPPORT;
+  return INVALID_SOCKET;
+ }
+
+ if (type != SOCK_STREAM) {
+  *lpErrno = WSAEPROTOTYPE;
+  return INVALID_SOCKET;
+ }
+
+ if (protocol != IPPROTO_TCP) {
+  *lpErrno = WSAEPROTONOSUPPORT;
+  return INVALID_SOCKET;
+ }
+
+ if (dwFlags != 0) {
+  *lpErrno = WSAEINVAL;
+  return INVALID_SOCKET;
+ }
+
+    //
+    // Create the socket from the lower layer
+    //
+ if ( INVALID_SOCKET == (rs = rsocket( af, type, protocol)) ) {
+  *lpErrno = WSAGetLastError();
+        goto cleanup;
+    }
+
+ winSocket = gMainUpCallTable.lpWPUCreateSocketHandle(
+   gProtocolInfo.dwCatalogEntryId,
+   rs, // __in  DWORD_PTR dwContext
+   lpErrno
+ );
+ if (INVALID_SOCKET == winSocket)
+        goto cleanup;
+
+TRACE("  New socket handle = %d", winSocket);
+
+    return winSocket;
+
+cleanup:
+
+    // If an error occured close the socket if it was already created
+    if (INVALID_SOCKET != rs)
+        rclose((int)rs);
+ 
+ *lpErrno = WSAGetLastError();
+
+    return INVALID_SOCKET;
+}
+
+int WSPAPI
+WSPBind(
+ __in   SOCKET s,
+ __in   const struct sockaddr* name,
+ __in   int namelen,
+ __out  LPINT lpErrno
+)
+{
+ struct sockaddr_in * name_in  = (struct sockaddr_in *)name;
+ struct sockaddr_in6* name_in6 = (struct sockaddr_in6*)name;
+
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ ret = rbind((int)rs, name, namelen);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d:", s);
+ if (AF_INET == name->sa_family) {
+  TRACE("  Addr = %d.%d.%d.%d",
+   name_in->sin_addr.S_un.S_un_b.s_b1,
+   name_in->sin_addr.S_un.S_un_b.s_b2,
+   name_in->sin_addr.S_un.S_un_b.s_b3,
+   name_in->sin_addr.S_un.S_un_b.s_b4
+  );
+  TRACE("  Port = %d,   Returning %d", name_in->sin_port, ret);
+ } else {
+  TRACE("  Addr = %d:%d:%d:%d:%d:%d:%d:%d",
+   name_in6->sin6_addr.u.Word[0],
+   name_in6->sin6_addr.u.Word[1],
+   name_in6->sin6_addr.u.Word[2],
+   name_in6->sin6_addr.u.Word[3],
+   name_in6->sin6_addr.u.Word[4],
+   name_in6->sin6_addr.u.Word[5],
+   name_in6->sin6_addr.u.Word[6],
+   name_in6->sin6_addr.u.Word[7]
+  );
+  TRACE("  Port = %d ,  Returning %d", name_in6->sin6_port, ret);
+ }
+ return ret;
+}
+
+int WSPAPI
+WSPListen(
+ __in   SOCKET s,
+ __in   int backlog,
+ __out  LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+ 
+ ret = rlisten((int)rs, backlog);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d: Backlog=%d, Returning %d", s, backlog, ret);
+
+ return ret;
+}
+
+int WSPAPI
+WSPConnect(
+ __in   SOCKET s,
+ __in   const struct sockaddr* name,
+ __in   int namelen,
+ __in   LPWSABUF lpCallerData,
+ __out  LPWSABUF lpCalleeData,
+ __in   LPQOS lpSQOS,
+ __in   LPQOS lpGQOS,
+ __out  LPINT lpErrno
+)
+{
+ struct sockaddr_in * name_in  = (struct sockaddr_in *)name;
+ struct sockaddr_in6* name_in6 = (struct sockaddr_in6*)name;
+
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+ 
+ ret = rconnect((int)rs, name, namelen);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+ if (lpCalleeData)
+  lpCalleeData->len = 0;
+
+TRACE("Socket = %d:", s);
+ if (AF_INET == name->sa_family) {
+  TRACE("  Addr = %d.%d.%d.%d",
+   name_in->sin_addr.S_un.S_un_b.s_b1,
+   name_in->sin_addr.S_un.S_un_b.s_b2,
+   name_in->sin_addr.S_un.S_un_b.s_b3,
+   name_in->sin_addr.S_un.S_un_b.s_b4
+  );
+  TRACE("  Port = %d,   Returning %d", name_in->sin_port, ret);
+ } else {
+  TRACE("  Addr = %d:%d:%d:%d:%d:%d:%d:%d",
+   name_in6->sin6_addr.u.Word[0],
+   name_in6->sin6_addr.u.Word[1],
+   name_in6->sin6_addr.u.Word[2],
+   name_in6->sin6_addr.u.Word[3],
+   name_in6->sin6_addr.u.Word[4],
+   name_in6->sin6_addr.u.Word[5],
+   name_in6->sin6_addr.u.Word[6],
+   name_in6->sin6_addr.u.Word[7]
+  );
+  TRACE("  Port = %d ,  Returning %d", name_in6->sin6_port, ret);
+ }
+
+ return ret;
+}
+
+int WSPAPI
+WSPShutdown(
+ __in   SOCKET s,
+ __in   int how,
+ __out  LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+ 
+ ret = rshutdown((int)rs, how);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d: how=%d, Returning %d", s, how, ret);
+
+ return ret;
+}
+
+int WSPAPI
+WSPCloseSocket(
+ __in   SOCKET s,
+ __out  LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+ 
+ ret = rclose((int)rs);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+ 
+ ret = gMainUpCallTable.lpWPUCloseSocketHandle(s, lpErrno);
+
+TRACE("Socket handle %s closed", s);
+
+ return ret;
+}
+
+int WSPAPI
+WSPRecv(
+ __in    SOCKET        s,
+ __inout LPWSABUF       lpBuffers,
+ __in    DWORD        dwBufferCount,
+ __out   LPDWORD        lpNumberOfBytesRecvd,
+ __inout LPDWORD        lpFlags,
+ __in    LPWSAOVERLAPPED      lpOverlapped,
+ __in    LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in    LPWSATHREADID      lpThreadId,
+ __out   LPINT        lpErrno
+)
+{
+ DWORD_PTR rs  = INVALID_SOCKET;
+ DWORD     i;
+ DWORD     dwNumberOfBytesRecvd = 0;
+ int       len = 0;
+ int       ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+  len = (int)rrecv((int)rs, lpBuffers[i].buf, lpBuffers[i].len, *lpFlags);
+  switch (len) {
+  case  0:
+   goto out;
+  case -1:
+   *lpErrno = WSAGetLastError();
+      ret   = SOCKET_ERROR;
+   goto out;
+  default:
+   dwNumberOfBytesRecvd += len;
+  }
+ }
+
+out:
+ *lpNumberOfBytesRecvd = dwNumberOfBytesRecvd;
+ 
+ return ret;
+}
+
+int WSPAPI
+WSPSend(
+ __in  SOCKET        s,
+ __in  LPWSABUF        lpBuffers,
+ __in  DWORD         dwBufferCount,
+ __out LPDWORD        lpNumberOfBytesSent,
+ __in  DWORD         dwFlags,
+ __in  LPWSAOVERLAPPED      lpOverlapped,
+ __in  LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in  LPWSATHREADID       lpThreadId,
+ __out LPINT         lpErrno
+)
+{
+ DWORD_PTR rs  = INVALID_SOCKET;
+ DWORD     i;
+ DWORD     dwNumberOfBytesSent = 0;
+ int       len;
+ int       ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+  len = (int)rsend((int)rs, lpBuffers[i].buf, lpBuffers[i].len, dwFlags);
+  if (-1 == len) {
+   *lpErrno = WSAGetLastError();
+      ret   = SOCKET_ERROR;
+     break;
+  } else
+   dwNumberOfBytesSent += len;
+ }
+ *lpNumberOfBytesSent = dwNumberOfBytesSent;
+ 
+ return ret;
+}
+
+int WSPAPI
+WSPGetSockOpt(
+ __in     SOCKET s,
+ __in     int level,
+ __in     int optname,
+ __out    char* optval,
+ __inout  LPINT optlen,
+ __out    LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+ 
+ ret = rgetsockopt((int)rs, level, optname, optval, optlen);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPSetSockOpt(
+ __in     SOCKET s,
+ __in     int level,
+ __in     int optname,
+ __in     const char* optval,
+ __in     int optlen,
+ __out    LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ ret = rsetsockopt((int)rs, level, optname, optval, optlen);
+ if (SOCKET_ERROR == ret)
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPSelect(
+ __in     int nfds,
+ __inout  fd_set* readfds,
+ __inout  fd_set* writefds,
+ __inout  fd_set* exceptfds,
+ __in     const struct timeval* timeout,
+ __out    LPINT lpErrno
+)
+{
+ u_int  i;
+ int    ret;
+ fd_set rreadfds, rwritefds, rexceptfds;
+ 
+ FD_ZERO(&rreadfds);
+ FD_ZERO(&rwritefds);
+ FD_ZERO(&rexceptfds);
+ 
+ nfds = 1;
+
+ for (i = 0; readfds && i < readfds->fd_count; i++) {
+  if (readfds->fd_array[i]) {
+   ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+       readfds->fd_array[i],
+      &rreadfds.fd_array[i],
+      lpErrno
+     );
+   if (SOCKET_ERROR == ret)
+    return ret;
+   
+   if (rreadfds.fd_array[i] > nfds)
+    nfds = (int)rreadfds.fd_array[i];
+
+   rreadfds.fd_count++;
+  }
+ }
+ for (i = 0; writefds && i < writefds->fd_count; i++) {
+  if (writefds->fd_array[i]) {
+   ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+       writefds->fd_array[i],
+      &rwritefds.fd_array[i],
+      lpErrno
+     );
+   if (SOCKET_ERROR == ret)
+    return ret;
+
+   if (rwritefds.fd_array[i] > nfds)
+    nfds = (int)rwritefds.fd_array[i];
+
+   rwritefds.fd_count++;
+  }
+ }
+ for (i = 0; exceptfds && i < exceptfds->fd_count; i++) {
+  if (exceptfds->fd_array[i]) {
+   ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+       exceptfds->fd_array[i],
+      &rexceptfds.fd_array[i],
+      lpErrno
+     );
+   if (SOCKET_ERROR == ret)
+    return ret;
+
+   if (rexceptfds.fd_array[i] > nfds)
+    nfds = (int)rexceptfds.fd_array[i];
+
+   rexceptfds.fd_count++;
+  }
+ }
+ 
+ ret = rselect(
+   nfds + 1, // Max. valid rsocket descriptor + 1
+   readfds   ? &rreadfds   : NULL,
+   writefds  ? &rwritefds  : NULL,
+   exceptfds ? &rexceptfds : NULL,
+   (struct timeval*)timeout
+  );
+ if (SOCKET_ERROR == ret) {
+  *lpErrno = WSAGetLastError();
+  return ret;
+ }
+ nfds = ret;
+ 
+ for (i = 0; ret && readfds && i < rreadfds.fd_count; i++) {
+  if (rreadfds.fd_array[i] && readfds->fd_array[i]) {
+   ret--;
+  } else {
+   readfds->fd_array[i] = 0;
+   readfds->fd_count--;
+  }
+ }
+ for (i = 0; ret && writefds && i < rwritefds.fd_count; i++) {
+  if (rwritefds.fd_array[i] && writefds->fd_array[i]) {
+   ret--;
+  } else {
+   writefds->fd_array[i] = 0;
+   writefds->fd_count--;
+  }
+ }
+ for (i = 0; ret && exceptfds && i < rexceptfds.fd_count; i++) {
+  if (rexceptfds.fd_array[i] && exceptfds->fd_array[i]) {
+   ret--;
+  } else {
+   exceptfds->fd_array[i] = 0;
+   exceptfds->fd_count--;
+  }
+ }
+ 
+ return (nfds - ret);
+}
+
+int WSPAPI
+WSPIoctl(
+ __in  SOCKET s,
+ __in  DWORD dwIoControlCode,
+ __in  LPVOID lpvInBuffer,
+ __in  DWORD cbInBuffer,
+ __out LPVOID lpvOutBuffer,
+ __in  DWORD cbOutBuffer,
+ __out LPDWORD lpcbBytesReturned,
+ __in  LPWSAOVERLAPPED lpOverlapped,
+ __in  LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in  LPWSATHREADID lpThreadId,
+ __out LPINT lpErrno
+)
+{
+ int      ret = 0;
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD     dwCount;
+ LPVOID    lpResultBuffer = NULL;
+ 
+ *lpcbBytesReturned = 0;
+ 
+ ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ switch (dwIoControlCode) {
+ case SIO_RS_GET_STATUS:
+  if (lpvOutBuffer) {
+   dwCount = rsGetStatus((LPRS_STATUS *)&lpResultBuffer);
+   if (lpResultBuffer) {
+    if (cbOutBuffer >= dwCount * sizeof(RS_STATUS)) {
+     *lpcbBytesReturned = dwCount * sizeof(RS_STATUS);
+    } else {
+      ret    = SOCKET_ERROR;
+     *lpErrno   = WSA_IO_INCOMPLETE;
+     *lpcbBytesReturned = cbOutBuffer; // Copy as much as possible anyway
+    }
+   }
+  }
+  break;
+ case SIO_RS_GET_TRACE:
+  if (lpvOutBuffer) {
+   dwCount = rsGetTrace((LPRS_TRACE_OUT *)&lpResultBuffer);
+   if (lpResultBuffer) {
+    if (cbOutBuffer >= dwCount * sizeof(RS_TRACE_OUT)) {
+     *lpcbBytesReturned = dwCount * sizeof(RS_TRACE_OUT);
+    } else {
+      ret    = SOCKET_ERROR;
+     *lpErrno   = WSA_IO_INCOMPLETE;
+     *lpcbBytesReturned = cbOutBuffer; // Copy as much as possible anyway
+    }
+   }
+  }
+  break;
+ case FIONBIO:
+ case FIONREAD:
+ case SIOCATMARK:
+  if (lpvInBuffer && cbInBuffer >= sizeof(u_long)) {
+   ret = rioctlsocket(
+     (int)rs,
+     dwIoControlCode,
+     (u_long *)lpvInBuffer
+    );
+   break;
+  }
+ default:
+  ret = SOCKET_ERROR;
+  *lpErrno = WSAEINVAL;
+ }
+ if (lpResultBuffer) {
+  memcpy(lpvOutBuffer, lpResultBuffer, *lpcbBytesReturned);
+  free(lpResultBuffer);
+ }
+  
+ return ret;
+}
+
+int WSPAPI
+WSPAddressToString(
+  __in     LPSOCKADDR lpsaAddress,
+  __in     DWORD dwAddressLength,
+  __in     LPWSAPROTOCOL_INFOW lpProtocolInfo,
+  __out    LPWSTR lpszAddressString,
+  __inout  LPDWORD lpdwAddressStringLength,
+  __out    LPINT lpErrno
+)
+{
+ int ret = 0;
+ 
+ if (lpProtocolInfo) {
+  if (0 != memcmp(&lpProtocolInfo->ProviderId, &rsProviderGuid, sizeof(rsProviderGuid))) {
+   *lpErrno = WSAEINVALIDPROVIDER;
+   return SOCKET_ERROR;
+  }
+ }
+ if (SOCKET_ERROR == (ret = WSAAddressToStringW(
+         lpsaAddress,
+         dwAddressLength,
+         NULL,
+         lpszAddressString,
+         lpdwAddressStringLength
+        )))
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPAsyncSelect(
+  __in   SOCKET s,
+  __in   HWND hWnd,
+  __in   unsigned int wMsg,
+  __in   long lEvent,
+  __out  LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPCancelBlockingCall(
+  __out  LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPDuplicateSocket(
+    IN SOCKET  s,
+    IN DWORD  dwProcessId,
+    OUT LPWSAPROTOCOL_INFOW  lpProtocolInfo,
+    OUT LPINT  lpErrno
+    )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPEnumNetworkEvents(
+    IN SOCKET  s,
+    IN WSAEVENT  hEventObject,
+    OUT LPWSANETWORKEVENTS  lpNetworkEvents,
+    OUT LPINT  lpErrno
+    )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPEventSelect(
+  __in   SOCKET s,
+  __in   WSAEVENT hEventObject,
+  __in   long lNetworkEvents,
+  __out  LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+BOOL WSPAPI
+WSPGetOverlappedResult(
+    IN SOCKET  s,
+    IN LPWSAOVERLAPPED  lpOverlapped,
+    OUT LPDWORD  lpcbTransfer,
+    IN BOOL  fWait,
+    OUT LPDWORD  lpdwFlags,
+    OUT LPINT  lpErrno
+    )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return FALSE;
+}
+
+int WSPAPI
+WSPGetPeerName(
+  __in     SOCKET s,
+  __out    struct sockaddr* name,
+  __inout  LPINT namelen,
+  __out    LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ if (SOCKET_ERROR == (ret = rgetpeername((int)rs, name, namelen)))
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPGetSockName(
+  __in     SOCKET s,
+  __out    struct sockaddr* name,
+  __inout  LPINT namelen,
+  __out    LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int      ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ if (SOCKET_ERROR == (ret = rgetsockname((int)rs, name, namelen)))
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+BOOL WSPAPI
+WSPGetQOSByName(
+  __in     SOCKET s,
+  __inout  LPWSABUF lpQOSName,
+  __out    LPQOS lpQOS,
+  __out    LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return FALSE;
+}
+
+SOCKET WSPAPI
+WSPJoinLeaf(
+  __in   SOCKET s,
+  __in   const struct sockaddr* name,
+  __in   int namelen,
+  __in   LPWSABUF lpCallerData,
+  __out  LPWSABUF lpCalleeData,
+  __in   LPQOS lpSQOS,
+  __in   LPQOS lpGQOS,
+  __in   DWORD dwFlags,
+  __out  LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return INVALID_SOCKET;
+}
+
+int WSPAPI
+WSPRecvDisconnect(
+  __in   SOCKET s,
+  __out  LPWSABUF lpInboundDisconnectData,
+  __out  LPINT lpErrno
+)
+{
+ return WSPShutdown(s, SD_RECEIVE, lpErrno); // Ignore lpInboundDisconnectData
+}
+
+int WSPAPI
+WSPRecvFrom(
+  __in     SOCKET s,
+  __inout  LPWSABUF lpBuffers,
+  __in     DWORD dwBufferCount,
+  __out    LPDWORD lpNumberOfBytesRecvd,
+  __inout  LPDWORD lpFlags,
+  __out    struct sockaddr* lpFrom,
+  __inout  LPINT lpFromlen,
+  __in     LPWSAOVERLAPPED lpOverlapped,
+  __in     LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+  __in     LPWSATHREADID lpThreadId,
+  __inout  LPINT lpErrno
+)
+{
+ DWORD_PTR rs  = INVALID_SOCKET;
+ DWORD     i;
+ DWORD     dwNumberOfBytesRecvd = 0;
+ int       len = 0;
+ int       ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+  switch (len = (int)rrecvfrom(
+       (int)rs,
+       lpBuffers[i].buf,
+       lpBuffers[i].len,
+       *lpFlags,
+       lpFrom,
+       lpFromlen
+    )) {
+  case  0:
+   goto out;
+  case -1:
+   *lpErrno = WSAGetLastError();
+      ret   = SOCKET_ERROR;
+   goto out;
+  default:
+   dwNumberOfBytesRecvd += len;
+  }
+ }
+
+out:
+ *lpNumberOfBytesRecvd = dwNumberOfBytesRecvd;
+ 
+ return ret;
+}
+
+int WSPAPI
+WSPSendDisconnect(
+  __in   SOCKET s,
+  __in   LPWSABUF lpOutboundDisconnectData,
+  __out  LPINT lpErrno
+)
+{
+ return WSPShutdown(s, SD_SEND, lpErrno); // Ignore lpOutboundDisconnectData
+}
+
+int WSPAPI
+WSPSendTo(
+  __in   SOCKET s,
+  __in   LPWSABUF lpBuffers,
+  __in   DWORD dwBufferCount,
+  __out  LPDWORD lpNumberOfBytesSent,
+  __in   DWORD dwFlags,
+  __in   const struct sockaddr* lpTo,
+  __in   int iTolen,
+  __in   LPWSAOVERLAPPED lpOverlapped,
+  __in   LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+  __in   LPWSATHREADID lpThreadId,
+  __out  LPINT lpErrno
+)
+{
+ DWORD_PTR rs  = INVALID_SOCKET;
+ DWORD     i;
+ DWORD     dwNumberOfBytesSent = 0;
+ int       len;
+ int       ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+  return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+  if (-1 == (len = (int)rsendto(
+        (int)rs,
+        lpBuffers[i].buf,
+        lpBuffers[i].len,
+        dwFlags,
+        lpTo,
+        iTolen
+       ))) {
+   *lpErrno = WSAGetLastError();
+      ret   = SOCKET_ERROR;
+   break;
+  }
+  else
+   dwNumberOfBytesSent += len;
+ }
+ *lpNumberOfBytesSent = dwNumberOfBytesSent;
+ 
+ return ret;
+}
+
+int WSPAPI
+WSPStringToAddress(
+  __in     LPWSTR AddressString,
+  __in     INT AddressFamily,
+  __in     LPWSAPROTOCOL_INFO lpProtocolInfo,
+  __out    LPSOCKADDR lpAddress,
+  __inout  LPINT lpAddressLength,
+  __out    LPINT lpErrno
+)
+{
+ int ret = 0;
+ 
+ if (AF_INET != AddressFamily && AddressFamily != AF_INET6) {
+  *lpErrno = WSAEAFNOSUPPORT;
+  return SOCKET_ERROR;
+ }
+ 
+ if (lpProtocolInfo) {
+  if (0 != memcmp(&lpProtocolInfo->ProviderId, &rsProviderGuid, sizeof(rsProviderGuid))) {
+   *lpErrno = WSAEINVALIDPROVIDER;
+   return SOCKET_ERROR;
+  }
+ }
+ 
+ if (SOCKET_ERROR == (ret = WSAStringToAddressW(
+        AddressString,
+        AddressFamily,
+        NULL,
+        lpAddress,
+        lpAddressLength
+   )))
+  *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+/*
+ * Function: WSPStartup
+ *
+ * Description:
+ *    This function initializes the base provider. 
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__declspec(dllexport)
+__checkReturn
+int
+WSPAPI
+WSPStartup(
+    __in WORD wVersion,
+    __in LPWSPDATA lpWSPData,
+    __in LPWSAPROTOCOL_INFOW lpProtocolInfo,
+    __in WSPUPCALLTABLE UpCallTable,
+    __out LPWSPPROC_TABLE lpProcTable
+    )
+{
+ static WSPDATA   gWSPData;
+ static WSPPROC_TABLE gProcTable;
+    int      Error = NO_ERROR,
+       rc;
+
+TRACE("Requested version = %d.%d", LOBYTE(wVersion), HIBYTE(wVersion));
+
+ /* Make sure that the version requested is >= 2.2. The low byte is the 
+    major version and the high byte is the minor version. */
+ if( (LOBYTE(wVersion) < 2) || ((LOBYTE(wVersion) == 2) && (HIBYTE(wVersion) < 2)) )
+  return WSAVERNOTSUPPORTED;
+
+    EnterCriticalSection( &gCriticalSection );
+
+    // The first time the startup is called, create our heap and allocate some
+    //    data structures for tracking the LSP providers
+    if ( 0 == gStartupCount )
+    {
+TRACE("Called 1st time => Initializing ProtocolInfo...");
+  /* Save the global WSPData */
+  gWSPData.wVersion = MAKEWORD(2, 2);
+  gWSPData.wHighVersion = MAKEWORD(2, 2);
+  wcscpy_s( gWSPData.szDescription, 2*sizeof(gWSPData.szDescription), Description );
+
+  /* provide Service provider's entry points in proc table */
+  ZeroMemory( &gProcTable, sizeof(gProcTable) );
+  gProcTable.lpWSPSocket    = WSPSocket;
+  gProcTable.lpWSPBind    = WSPBind;
+  gProcTable.lpWSPListen    = WSPListen;
+  gProcTable.lpWSPAccept    = WSPAccept;
+  gProcTable.lpWSPConnect    = WSPConnect;
+  gProcTable.lpWSPShutdown   = WSPShutdown;
+  gProcTable.lpWSPCloseSocket   = WSPCloseSocket;
+  gProcTable.lpWSPRecv    = WSPRecv;
+  gProcTable.lpWSPSend    = WSPSend;
+  gProcTable.lpWSPGetSockOpt   = WSPGetSockOpt;
+  gProcTable.lpWSPSetSockOpt   = WSPSetSockOpt;
+  gProcTable.lpWSPSelect    = WSPSelect;
+  gProcTable.lpWSPIoctl    = WSPIoctl;
+  gProcTable.lpWSPCleanup    = WSPCleanup;
+// Additional functions required for (base provider's) WSPStartup:
+  gProcTable.lpWSPAddressToString  = WSPAddressToString;
+  gProcTable.lpWSPAsyncSelect   = WSPAsyncSelect;
+  gProcTable.lpWSPCancelBlockingCall = WSPCancelBlockingCall;
+  gProcTable.lpWSPDuplicateSocket  = WSPDuplicateSocket;
+  gProcTable.lpWSPEnumNetworkEvents = WSPEnumNetworkEvents;
+  gProcTable.lpWSPEventSelect   = WSPEventSelect;
+  gProcTable.lpWSPGetOverlappedResult = WSPGetOverlappedResult;
+  gProcTable.lpWSPGetPeerName   = WSPGetPeerName;
+  gProcTable.lpWSPGetSockName   = WSPGetSockName;
+  gProcTable.lpWSPGetQOSByName  = WSPGetQOSByName;
+  gProcTable.lpWSPJoinLeaf   = WSPJoinLeaf;
+  gProcTable.lpWSPRecvDisconnect  = WSPRecvDisconnect;
+  gProcTable.lpWSPRecvFrom   = WSPRecvFrom;
+  gProcTable.lpWSPSendDisconnect  = WSPSendDisconnect;
+  gProcTable.lpWSPSendTo    = WSPSendTo;
+  gProcTable.lpWSPStringToAddress  = (LPWSPSTRINGTOADDRESS)WSPStringToAddress;
+ 
+  gProtocolInfo = *lpProtocolInfo;
+ }
+    gStartupCount++;
+TRACE("StartupCount incremented to %d", gStartupCount);
+
+    LeaveCriticalSection( &gCriticalSection );
+
+ /* Set the return parameters */
+ *lpWSPData   = gWSPData;
+ *lpProcTable = gProcTable;
+ 
+ /* store the upcall function table */
+    gMainUpCallTable = UpCallTable;
+
+    return Error;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
 BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved)
 {
  UNREFERENCED_PARAMETER(hInstance);
@@ -44,10 +1195,28 @@
   if (heap == NULL) {
    return FALSE;
   }
-  InitializeCriticalSection(&lock);
+  fastlock_init(&lock);
+  fastlock_init(&mut);
+  fastlock_init(&TraceLock);
+
+  //
+  // Initialize some critical section objects 
+  //
+  __try
+  {
+   InitializeCriticalSection( &gCriticalSection );
+  }
+  __except( EXCEPTION_EXECUTE_HANDLER )
+  {
+   goto cleanup;
+  }
   break;
  case DLL_PROCESS_DETACH:
-  DeleteCriticalSection(&lock);
+  gDetached = TRUE;
+  DeleteCriticalSection( &gCriticalSection );
+  fastlock_destroy(&mut);
+  fastlock_destroy(&lock);
+  fastlock_destroy(&TraceLock);
   HeapDestroy(heap);
   break;
  default:
@@ -55,4 +1224,8 @@
  }
 
  return TRUE;
+
+cleanup:
+
+    return FALSE;
 }
Index: ulp/librdmacm/src/indexer.cpp
===================================================================
--- ulp/librdmacm/src/indexer.cpp (revision 0)
+++ ulp/librdmacm/src/indexer.cpp (working copy)
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2011 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <windows.h>
+
+#include <sys/types.h>
+#include <stdlib.h>
+
+#include "indexer.h"
+#include "cma.h"
+
+/*
+ * Indexer - to find a structure given an index
+ *
+ * We store pointers using a double lookup and return an index to the
+ * user which is then used to retrieve the pointer.  The upper bits of
+ * the index are itself an index into an array of memory allocations.
+ * The lower bits specify the offset into the allocated memory where
+ * the pointer is stored.
+ *
+ * This allows us to adjust the number of pointers stored by the index
+ * list without taking a lock during data lookups.
+ */
+
+static int idx_grow(struct indexer *idx)
+{
+ union idx_entry *entry;
+ int i, start_index;
+
+ if (idx->size >= IDX_ARRAY_SIZE)
+  goto nomem;
+
+ idx->array[idx->size] = (union idx_entry *)calloc(IDX_ENTRY_SIZE, sizeof(union idx_entry));
+ if (!idx->array[idx->size])
+  goto nomem;
+
+ entry = idx->array[idx->size];
+ start_index = idx->size << IDX_ENTRY_BITS;
+ entry[IDX_ENTRY_SIZE - 1].next = idx->free_list;
+
+ for (i = IDX_ENTRY_SIZE - 2; i >= 0; i--)
+  entry[i].next = start_index + i + 1;
+
+ /* Index 0 is reserved */
+ if (start_index == 0)
+  start_index++;
+ idx->free_list = start_index;
+ idx->size++;
+ return start_index;
+
+nomem:
+ errno = ENOMEM;
+ return -1;
+}
+
+int idx_insert(struct indexer *idx, void *item)
+{
+ union idx_entry *entry;
+ int index;
+
+ if ((index = idx->free_list) == 0) {
+  if ((index = idx_grow(idx)) <= 0)
+   return index;
+ }
+
+ entry = idx->array[idx_array_index(index)];
+ idx->free_list = entry[idx_entry_index(index)].next;
+ entry[idx_entry_index(index)].item = item;
+ return index;
+}
+
+void *idx_remove(struct indexer *idx, int index)
+{
+ union idx_entry *entry;
+ void *item;
+
+ entry = idx->array[idx_array_index(index)];
+ item = entry[idx_entry_index(index)].item;
+ entry[idx_entry_index(index)].next = idx->free_list;
+ idx->free_list = index;
+ return item;
+}
+
+void idx_replace(struct indexer *idx, int index, void *item)
+{
+ union idx_entry *entry;
+
+ entry = idx->array[idx_array_index(index)];
+ entry[idx_entry_index(index)].item = item;
+}
+
+
+static int idm_grow(struct index_map *idm, int index)
+{
+ idm->array[idx_array_index(index)] = (void **)calloc(IDX_ENTRY_SIZE, sizeof(void *));
+ if (!idm->array[idx_array_index(index)])
+  goto nomem;
+
+ return index;
+
+nomem:
+ errno = ENOMEM;
+ return -1;
+}
+
+int idm_set(struct index_map *idm, int index, void *item)
+{
+ void **entry;
+
+ if (index > IDX_MAX_INDEX) {
+  errno = ENOMEM;
+  return -1;
+ }
+
+ if (!idm->array[idx_array_index(index)]) {
+  if (idm_grow(idm, index) < 0)
+   return -1;
+ }
+
+ entry = idm->array[idx_array_index(index)];
+ entry[idx_entry_index(index)] = item;
+ return index;
+}
+
+void *idm_clear(struct index_map *idm, int index)
+{
+ void **entry;
+ void *item;
+
+ entry = idm->array[idx_array_index(index)];
+ item = entry[idx_entry_index(index)];
+ entry[idx_entry_index(index)] = NULL;
+ return item;
+}
Index: ulp/librdmacm/src/indexer.h
===================================================================
--- ulp/librdmacm/src/indexer.h (revision 0)
+++ ulp/librdmacm/src/indexer.h (working copy)
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2011 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+
+/*
+ * Indexer - to find a structure given an index.  Synchronization
+ * must be provided by the caller.  Caller must initialize the
+ * indexer by setting free_list and size to 0.
+ */
+
+union idx_entry {
+ void *item;
+ int   next;
+};
+
+#define IDX_INDEX_BITS 16
+#define IDX_ENTRY_BITS 10
+#define IDX_ENTRY_SIZE (1 << IDX_ENTRY_BITS)
+#define IDX_ARRAY_SIZE (1 << (IDX_INDEX_BITS - IDX_ENTRY_BITS))
+#define IDX_MAX_INDEX  ((1 << IDX_INDEX_BITS) - 1)
+
+struct indexer
+{
+ union idx_entry *array[IDX_ARRAY_SIZE];
+ int   free_list;
+ int   size;
+};
+
+#define idx_array_index(index) (index >> IDX_ENTRY_BITS)
+#define idx_entry_index(index) (index & (IDX_ENTRY_SIZE - 1))
+
+int idx_insert(struct indexer *idx, void *item);
+void *idx_remove(struct indexer *idx, int index);
+void idx_replace(struct indexer *idx, int index, void *item);
+
+static inline void *idx_at(struct indexer *idx, int index)
+{
+ return (idx->array[idx_array_index(index)] + idx_entry_index(index))->item;
+}
+
+/*
+ * Index map - associates a structure with an index.  Synchronization
+ * must be provided by the caller.  Caller must initialize the
+ * index map by setting it to 0.
+ */
+
+struct index_map
+{
+ void **array[IDX_ARRAY_SIZE];
+};
+
+int idm_set(struct index_map *idm, int index, void *item);
+void *idm_clear(struct index_map *idm, int index);
+
+static inline void *idm_at(struct index_map *idm, int index)
+{
+ void **entry;
+ entry = idm->array[idx_array_index(index)];
+ return entry[idx_entry_index(index)];
+}
+
+static inline void *idm_lookup(struct index_map *idm, int index)
+{
+ return ((index <= IDX_MAX_INDEX) && idm->array[idx_array_index(index)]) ?
+  idm_at(idm, index) : NULL;
+}
Index: ulp/librdmacm/src/openib_osd.h
===================================================================
--- ulp/librdmacm/src/openib_osd.h (revision 0)
+++ ulp/librdmacm/src/openib_osd.h (working copy)
@@ -0,0 +1,47 @@
+#ifndef OPENIB_OSD_H
+#define OPENIB_OSD_H
+
+#if defined(FD_SETSIZE) && FD_SETSIZE != 1024
+#undef FD_SETSIZE
+#endif
+#define FD_SETSIZE 1024 /* Set before including winsock2 - see select help */
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <io.h>
+#include <fcntl.h>
+
+#define container_of CONTAINING_RECORD
+#define offsetof  FIELD_OFFSET
+#define ssize_t   SSIZE_T
+
+#define __thread  /*???*/
+
+#define ntohll   _byteswap_uint64
+#define htonll   _byteswap_uint64
+
+#define SHUT_RD    SD_RECEIVE
+#define SHUT_WR    SD_SEND
+#define SHUT_RDWR   SD_BOTH
+
+#define O_NONBLOCK  0x4000
+
+/* allow casting to WSABUF */
+struct iovec
+{
+       u_long iov_len;
+       char FAR* iov_base;
+};
+
+struct msghdr
+{
+ void         *msg_name;       // optional address
+ socklen_t     msg_namelen;    // size of address
+ struct iovec *msg_iov;        // scatter/gather array
+ int           msg_iovlen;     // members in msg_iov
+ void         *msg_control;    // ancillary data, see below
+ socklen_t     msg_controllen; // ancillary data buffer len
+ int           msg_flags;      // flags on received message
+};
+
+#endif // OPENIB_OSD_H
Index: ulp/librdmacm/src/rsocket.cpp
===================================================================
--- ulp/librdmacm/src/rsocket.cpp (revision 0)
+++ ulp/librdmacm/src/rsocket.cpp (working copy)
@@ -0,0 +1,2334 @@
+/*
+ * Copyright (c) 2008-2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH.  All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *      - Neither the name Oce Printing Systems GmbH nor the names
+ *        of the authors may be used to endorse or promote products
+ *        derived from this software without specific prior written
+ *        permission.
+ *
+ * THIS SOFTWARE IS PROVIDED  "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE. 
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+
+#include <windows.h>
+#include "openib_osd.h"
+#include <sys/time.h>
+#include <stdarg.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <_fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <_errno.h>
+#include <complib/cl_byteswap.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+#include <rdma/rwinsock.h>
+#include <rdma/rsocket.h>
+#include "cma.h"
+#include "indexer.h"
+#include "../../../etc/user/gtod.c" // getimeofday()
+
+#define RS_OLAP_START_SIZE 2048
+#define RS_MAX_TRANSFER 65536
+#define RS_MAX_BACKLOG  256
+#define RS_QP_MAX_SIZE 0xFFFE
+#define RS_QP_CTRL_SIZE 4
+#define RS_CONN_RETRIES 6
+#define RS_SGL_SIZE 2
+
+static struct index_map idm;
+static uint16_t def_inline = 64;
+static uint16_t def_sqsize = 384;
+static uint16_t def_rqsize = 384;
+static uint32_t def_mem = (1 << 17);
+static uint32_t def_wmem = (1 << 17);
+static uint32_t polling_time = 10;
+
+extern fastlock_t  mut;
+extern BOOL    gDetached;   // Indicates if process is detaching from DLL
+extern CRITICAL_SECTION gCriticalSection;   // Critical section for initialization and 
+extern WSPUPCALLTABLE   gMainUpCallTable;   // Upcall functions given to us by Winsock
+extern WSAPROTOCOL_INFOW gProtocolInfo;
+
+/*
+ * Immediate data format is determined by the upper bits
+ * bit 31: message type, 0 - data, 1 - control
+ * bit 30: buffers updated, 0 - target, 1 - direct-receive
+ * bit 29: more data, 0 - end of transfer, 1 - more data available
+ *
+ * for data transfers:
+ * bits [28:0]: bytes transfered, 0 = 1 GB
+ * for control messages:
+ * bits [28-0]: receive credits granted
+ */
+
+enum {
+ RS_OP_DATA,
+ RS_OP_RSVD_DATA_MORE,
+ RS_OP_RSVD_DRA,
+ RS_OP_RSVD_DRA_MORE,
+ RS_OP_SGL,
+ RS_OP_RSVD,
+ RS_OP_RSVD_DRA_SGL,
+ RS_OP_CTRL
+};
+#define rs_msg_set(op, data)  ((op << 29) | (uint32_t) (data))
+#define rs_msg_op(imm_data)   (imm_data >> 29)
+#define rs_msg_data(imm_data) (imm_data & 0x1FFFFFFF)
+
+enum {
+ RS_CTRL_DISCONNECT,
+ RS_CTRL_SHUTDOWN
+};
+
+struct rs_msg {
+ uint32_t op;
+ uint32_t data;
+};
+
+struct rs_sge {
+ uint64_t addr;
+ uint32_t key;
+ uint32_t length;
+};
+
+#define RS_MIN_INLINE    (sizeof(struct rs_sge))
+#define rs_host_is_net() (1 == htonl(1))
+#define RS_CONN_FLAG_NET 1
+
+struct rs_conn_data {
+ uint8_t    version;
+ uint8_t    flags;
+ uint16_t   credits;
+ uint32_t   reserved2;
+ struct rs_sge   target_sgl;
+ struct rs_sge   data_buf;
+};
+
+#define RS_RECV_WR_ID (~((uint64_t) 0))
+
+/*
+ * rsocket states are ordered as passive, connecting, connected, disconnected.
+ */
+enum rs_state {
+ rs_init,
+ rs_bound    =      0x0001,
+ rs_listening    =      0x0002,
+ rs_opening    =      0x0004,
+ rs_resolving_addr  = rs_opening |   0x0010,
+ rs_resolving_route = rs_opening |   0x0020,
+ rs_connecting      = rs_opening |   0x0040,
+ rs_accepting       = rs_opening |   0x0080,
+ rs_connected    =      0x0100,
+ rs_connect_wr     =      0x0200,
+ rs_connect_rd    =      0x0400,
+ rs_connect_rdwr    = rs_connected | rs_connect_rd | rs_connect_wr,
+ rs_connect_error   =      0x0800,
+ rs_disconnected    =      0x1000,
+ rs_error    =      0x2000,
+};
+
+#define RS_OPT_SWAP_SGL 1
+
+struct rsocket {
+ struct rdma_cm_id *cm_id;
+ fastlock_t   slock;
+ fastlock_t   rlock;
+ fastlock_t   cq_lock;
+ fastlock_t   cq_wait_lock;
+
+ int    opts;
+ long    fd_flags;
+ uint64_t   so_opts;
+ uint64_t   tcp_opts;
+ uint64_t   ipv6_opts;
+ int    state;
+ int    cq_armed;
+ int    retries;
+ int    err;
+ int    index;
+ int    ctrl_avail;
+ int    sqe_avail;
+ int    sbuf_bytes_avail;
+ uint16_t   sseq_no;
+ uint16_t   sseq_comp;
+ uint16_t   sq_size;
+ uint16_t   sq_inline;
+
+ uint16_t   rq_size;
+ uint16_t   rseq_no;
+ uint16_t   rseq_comp;
+ int    rbuf_bytes_avail;
+ int    rbuf_free_offset;
+ int    rbuf_offset;
+ int    rmsg_head;
+ int    rmsg_tail;
+ struct rs_msg   *rmsg;
+
+ int    remote_sge;
+ struct rs_sge   remote_sgl;
+
+ struct ibv_mr  *target_mr;
+ int    target_sge;
+ volatile struct rs_sge   target_sgl[RS_SGL_SIZE];
+
+ uint32_t   rbuf_size;
+ struct ibv_mr  *rmr;
+ uint8_t    *rbuf;
+
+ uint32_t   sbuf_size;
+ struct ibv_mr  *smr;
+ struct ibv_sge   ssgl[2];
+ uint8_t    *sbuf;
+};
+
+static void rs_configure(void)
+{
+ FILE *f;
+ static int init;
+
+ if (init)
+  return;
+
+ fastlock_acquire(&mut);
+ if (init)
+  goto out;
+
+ if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) {
+  fscanf(f, "%u", &polling_time);
+  fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/inline_default", "r"))) {
+  fscanf(f, "%hu", &def_inline);
+  fclose(f);
+
+  if (def_inline < RS_MIN_INLINE)
+   def_inline = RS_MIN_INLINE;
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/sqsize_default", "r"))) {
+  fscanf(f, "%hu", &def_sqsize);
+  fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/rqsize_default", "r"))) {
+  fscanf(f, "%hu", &def_rqsize);
+  fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/mem_default", "r"))) {
+  fscanf(f, "%u", &def_mem);
+  fclose(f);
+
+  if (def_mem < 1)
+   def_mem = 1;
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/wmem_default", "r"))) {
+  fscanf(f, "%u", &def_wmem);
+  fclose(f);
+
+  if (def_wmem < 1)
+   def_wmem = 1;
+ }
+ init = 1;
+out:
+ fastlock_release(&mut);
+}
+
+static int rs_insert(struct rsocket *rs)
+{
+ fastlock_acquire(&mut);
+ rs->index = idm_set(&idm, ((int)rs->cm_id->channel->channel.Event >> 2)/*fd*/, rs);
+ fastlock_release(&mut);
+ return rs->index;
+}
+
+static void rs_remove(struct rsocket *rs)
+{
+ fastlock_acquire(&mut);
+ idm_clear(&idm, rs->index);
+ fastlock_release(&mut);
+}
+
+static struct rsocket *rs_alloc(struct rsocket *inherited_rs)
+{
+ struct rsocket *rs;
+
+ rs = (struct rsocket *)calloc(1, sizeof *rs);
+ if (!rs)
+  return NULL;
+
+ rs->index = -1;
+ if (inherited_rs) {
+  rs->sbuf_size = inherited_rs->sbuf_size;
+  rs->rbuf_size = inherited_rs->rbuf_size;
+  rs->sq_inline = inherited_rs->sq_inline;
+  rs->sq_size = inherited_rs->sq_size;
+  rs->rq_size = inherited_rs->rq_size;
+  rs->ctrl_avail = inherited_rs->ctrl_avail;
+ } else {
+  rs->sbuf_size = def_wmem;
+  rs->rbuf_size = def_mem;
+  rs->sq_inline = def_inline;
+  rs->sq_size = def_sqsize;
+  rs->rq_size = def_rqsize;
+  rs->ctrl_avail = RS_QP_CTRL_SIZE;
+ }
+ fastlock_init(&rs->slock);
+ fastlock_init(&rs->rlock);
+ fastlock_init(&rs->cq_lock);
+ fastlock_init(&rs->cq_wait_lock);
+ return rs;
+}
+
+static int rs_set_nonblocking(struct rsocket *rs, long arg)
+{
+ if (rs->cm_id->recv_cq_channel)
+  rs->cm_id->recv_cq_channel->comp_channel.Milliseconds = (arg == O_NONBLOCK ? 0 : INFINITE);
+
+ if (rs->state < rs_connected)
+  rs->cm_id->channel->channel.Milliseconds = (arg == O_NONBLOCK ? 0 : INFINITE);
+
+ return 0;
+}
+
+static void rs_set_qp_size(struct rsocket *rs)
+{
+ uint16_t max_size;
+
+ max_size = min(ucma_max_qpsize(rs->cm_id), RS_QP_MAX_SIZE);
+
+ if (rs->sq_size > max_size)
+  rs->sq_size = max_size;
+ else if (rs->sq_size < 2)
+  rs->sq_size = 2;
+ if (rs->sq_size <= (RS_QP_CTRL_SIZE << 2))
+  rs->ctrl_avail = 1;
+
+ if (rs->rq_size > max_size)
+  rs->rq_size = max_size;
+ else if (rs->rq_size < 2)
+  rs->rq_size = 2;
+}
+
+static int rs_init_bufs(struct rsocket *rs)
+{
+ rs->rmsg = (struct rs_msg *)calloc(rs->rq_size + 1, sizeof(*rs->rmsg));
+ if (!rs->rmsg)
+  return -1;
+
+ rs->sbuf = (uint8_t *)calloc(rs->sbuf_size, sizeof(*rs->sbuf));
+ if (!rs->sbuf)
+  return -1;
+
+ rs->smr = rdma_reg_msgs(rs->cm_id, rs->sbuf, rs->sbuf_size);
+ if (!rs->smr)
+  return -1;
+
+ rs->target_mr = rdma_reg_write(rs->cm_id, (void *) rs->target_sgl,
+           sizeof(rs->target_sgl));
+ if (!rs->target_mr)
+  return -1;
+
+ rs->rbuf = (uint8_t *)calloc(rs->rbuf_size, sizeof(*rs->rbuf));
+ if (!rs->rbuf)
+  return -1;
+
+ rs->rmr = rdma_reg_write(rs->cm_id, rs->rbuf, rs->rbuf_size);
+ if (!rs->rmr)
+  return -1;
+
+ rs->ssgl[0].addr = rs->ssgl[1].addr = (uintptr_t) rs->sbuf;
+ rs->sbuf_bytes_avail = rs->sbuf_size;
+ rs->ssgl[0].lkey = rs->ssgl[1].lkey = rs->smr->lkey;
+
+ rs->rbuf_free_offset = rs->rbuf_size >> 1;
+ rs->rbuf_bytes_avail = rs->rbuf_size >> 1;
+ rs->sqe_avail = rs->sq_size - rs->ctrl_avail;
+ rs->rseq_comp = rs->rq_size >> 1;
+ return 0;
+}
+
+static int rs_create_cq(struct rsocket *rs)
+{
+ rs->cm_id->recv_cq_channel = ibv_create_comp_channel(rs->cm_id->verbs);
+ if (!rs->cm_id->recv_cq_channel)
+  return -1;
+
+ rs->cm_id->recv_cq = ibv_create_cq(rs->cm_id->verbs, rs->sq_size + rs->rq_size,
+        rs->cm_id, rs->cm_id->recv_cq_channel, 0);
+ if (!rs->cm_id->recv_cq)
+  goto err1;
+
+ if (rs->fd_flags & O_NONBLOCK) {
+  if (rs_set_nonblocking(rs, O_NONBLOCK))
+   goto err2;
+ }
+
+ rs->cm_id->send_cq_channel = rs->cm_id->recv_cq_channel;
+ rs->cm_id->send_cq = rs->cm_id->recv_cq;
+ return 0;
+
+err2:
+ ibv_destroy_cq(rs->cm_id->recv_cq);
+ rs->cm_id->recv_cq = NULL;
+err1:
+ ibv_destroy_comp_channel(rs->cm_id->recv_cq_channel);
+ rs->cm_id->recv_cq_channel = NULL;
+ return -1;
+}
+
+static __inline int
+rs_post_recv(struct rsocket *rs)
+{
+ struct ibv_recv_wr wr, *bad;
+
+ wr.wr_id = RS_RECV_WR_ID;
+ wr.next = NULL;
+ wr.sg_list = NULL;
+ wr.num_sge = 0;
+
+ return rdma_seterrno(ibv_post_recv(rs->cm_id->qp, &wr, &bad));
+}
+
+static int rs_create_ep(struct rsocket *rs)
+{
+ struct ibv_qp_init_attr qp_attr;
+ int i, ret;
+
+ rs_set_qp_size(rs);
+
+ ret = rs_create_cq(rs);
+ if (ret)
+  return ret;
+
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.qp_context = rs;
+ qp_attr.send_cq = rs->cm_id->send_cq;
+ qp_attr.recv_cq = rs->cm_id->recv_cq;
+ qp_attr.qp_type = IBV_QPT_RC;
+ qp_attr.sq_sig_all = 1;
+ qp_attr.cap.max_send_wr = rs->sq_size;
+ qp_attr.cap.max_recv_wr = rs->rq_size;
+ qp_attr.cap.max_send_sge = 2;
+ qp_attr.cap.max_recv_sge = 1;
+ qp_attr.cap.max_inline_data = rs->sq_inline;
+
+ ret = rdma_create_qp(rs->cm_id, NULL, &qp_attr);
+ if (ret)
+  return ret;
+
+ ret = rs_init_bufs(rs); 
+ if (ret)
+  return ret;
+
+ for (i = 0; i < rs->rq_size; i++) {
+  ret = rs_post_recv(rs);
+  if (ret)
+   return ret;
+ }
+ return 0;
+}
+
+static void rs_free(struct rsocket *rs)
+{
+ if (rs->index >= 0)
+  rs_remove(rs);
+
+ if (rs->rmsg)
+  free(rs->rmsg);
+
+ if (rs->sbuf) {
+  if (rs->smr)
+   rdma_dereg_mr(rs->smr);
+  free(rs->sbuf);
+ }
+
+ if (rs->rbuf) {
+  if (rs->rmr)
+   rdma_dereg_mr(rs->rmr);
+  free(rs->rbuf);
+ }
+
+ if (rs->target_mr)
+  rdma_dereg_mr(rs->target_mr);
+
+ if (rs->cm_id) {
+  if (rs->cm_id->qp)
+   rdma_destroy_qp(rs->cm_id);
+  rdma_destroy_id(rs->cm_id);
+ }
+
+ fastlock_destroy(&rs->cq_wait_lock);
+ fastlock_destroy(&rs->cq_lock);
+ fastlock_destroy(&rs->rlock);
+ fastlock_destroy(&rs->slock);
+ free(rs);
+}
+
+static void rs_set_conn_data(struct rsocket *rs, struct rdma_conn_param *param,
+        struct rs_conn_data *conn)
+{
+ conn->version = 1;
+ conn->flags = rs_host_is_net() ? RS_CONN_FLAG_NET : 0;
+ conn->credits = htons(rs->rq_size);
+ conn->reserved2 = 0;
+
+ conn->target_sgl.addr = htonll((uintptr_t) rs->target_sgl);
+ conn->target_sgl.length = htonl(RS_SGL_SIZE);
+ conn->target_sgl.key = htonl(rs->target_mr->rkey);
+
+ conn->data_buf.addr = htonll((uintptr_t) rs->rbuf);
+ conn->data_buf.length = htonl(rs->rbuf_size >> 1);
+ conn->data_buf.key = htonl(rs->rmr->rkey);
+
+ param->private_data = conn;
+ param->private_data_len = sizeof *conn;
+}
+
+static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
+{
+ rs->remote_sgl.addr = ntohll(conn->target_sgl.addr);
+ rs->remote_sgl.length = ntohl(conn->target_sgl.length);
+ rs->remote_sgl.key = ntohl(conn->target_sgl.key);
+ rs->remote_sge = 1;
+ if ((rs_host_is_net() && !(conn->flags & RS_CONN_FLAG_NET)) ||
+     (!rs_host_is_net() && (conn->flags & RS_CONN_FLAG_NET)))
+  rs->opts = RS_OPT_SWAP_SGL;
+
+ rs->target_sgl[0].addr = ntohll(conn->data_buf.addr);
+ rs->target_sgl[0].length = ntohl(conn->data_buf.length);
+ rs->target_sgl[0].key = ntohl(conn->data_buf.key);
+
+ rs->sseq_comp = ntohs(conn->credits);
+}
+
+__declspec(dllexport)
+int rsocket(int domain, int type, int protocol)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ if ((domain != PF_INET && domain != PF_INET6) ||
+     (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP))
+  return ERR(ENOTSUP);
+
+ rs_configure();
+ rs = rs_alloc(NULL);
+ if (!rs)
+  return ERR(ENOMEM);
+
+ ret = rdma_create_id(NULL, &rs->cm_id, rs, RDMA_PS_TCP);
+ if (ret)
+  goto err;
+
+ ret = rs_insert(rs);
+ if (ret < 0)
+  goto err;
+
+ rs->cm_id->route.addr.src_addr.sa_family = (ADDRESS_FAMILY)domain;
+ return rs->index;
+
+err:
+ rs_free(rs);
+ return ret;
+}
+
+__declspec(dllexport)
+int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ ret = rdma_bind_addr(rs->cm_id, (struct sockaddr *) addr);
+ if (!ret)
+ {
+  rs->state = rs_bound;
+ }
+ else
+ {
+  wsa_setlasterror(errno);
+ }
+ 
+ return ret;
+}
+
+__declspec(dllexport)
+int rlisten(int socket, int backlog)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+
+ if (backlog > RS_MAX_BACKLOG || backlog == SOMAXCONN)
+  backlog = RS_MAX_BACKLOG;
+
+ ret = rdma_listen(rs->cm_id, backlog);
+ if (!ret)
+ {
+  rs->state = rs_listening;
+ }
+ else
+ {
+  wsa_setlasterror(errno);
+ }
+ 
+ return ret;
+}
+
+static int rs_do_connect(struct rsocket *rs)
+{
+ struct rdma_conn_param param;
+ struct rs_conn_data creq, *cresp;
+ struct sockaddr_storage dst_addr;
+ int to, ret;
+
+ switch (rs->state) {
+ case rs_init:
+ case rs_bound:
+resolve_addr:
+  to = 1000 << rs->retries++;
+  RtlCopyMemory(
+   &dst_addr,
+   &rs->cm_id->route.addr.dst_addr,
+   rs->cm_id->route.addr.dst_addr.sa_family == AF_INET
+   ? sizeof(struct sockaddr_in)
+   : sizeof(struct sockaddr_in6)
+  );
+  ret = rdma_resolve_addr(rs->cm_id, NULL,
+     (struct sockaddr *)&dst_addr, to);
+  if (!ret)
+   goto resolve_route;
+  if (errno == EAGAIN || errno == EWOULDBLOCK)
+   rs->state = rs_resolving_addr;
+  break;
+ case rs_resolving_addr:
+  ret = ucma_complete(rs->cm_id);
+  if (ret) {
+   if (errno == ETIMEDOUT && rs->retries <= RS_CONN_RETRIES)
+    goto resolve_addr;
+   break;
+  }
+
+  rs->retries = 0;
+resolve_route:
+  to = 1000 << rs->retries++;
+  ret = rdma_resolve_route(rs->cm_id, to);
+  if (!ret)
+   goto do_connect;
+  if (errno == EAGAIN || errno == EWOULDBLOCK)
+   rs->state = rs_resolving_route;
+  break;
+ case rs_resolving_route:
+  ret = ucma_complete(rs->cm_id);
+  if (ret) {
+   if (errno == ETIMEDOUT && rs->retries <= RS_CONN_RETRIES)
+    goto resolve_route;
+   break;
+  }
+do_connect:
+  ret = rs_create_ep(rs);
+  if (ret)
+   break;
+
+  memset(&param, 0, sizeof param);
+  rs_set_conn_data(rs, &param, &creq);
+  param.flow_control = 1;
+  param.retry_count = 7;
+  param.rnr_retry_count = 7;
+  rs->retries = 0;
+
+  ret = rdma_connect(rs->cm_id, &param);
+  if (!ret)
+   goto connected;
+  if (errno == EAGAIN || errno == EWOULDBLOCK)
+   rs->state = rs_connecting;
+  break;
+ case rs_connecting:
+  ret = ucma_complete(rs->cm_id);
+  if (ret)
+   break;
+connected:
+  cresp = (struct rs_conn_data *) rs->cm_id->event->param.conn.private_data;
+  if (cresp->version != 1) {
+   ret = ERR(ENOTSUP);
+   break;
+  }
+
+  rs_save_conn_data(rs, cresp);
+  rs->state = rs_connect_rdwr;
+  break;
+ case rs_accepting:
+  if (!(rs->fd_flags & O_NONBLOCK))
+   rs_set_nonblocking(rs, 0);
+
+  ret = ucma_complete(rs->cm_id);
+  if (ret)
+   break;
+
+  rs->state = rs_connect_rdwr;
+  break;
+ default:
+  ret = ERR(EINVAL);
+  break;
+ }
+
+ if (ret) {
+  if (errno == EAGAIN || errno == EWOULDBLOCK) {
+   errno = EINPROGRESS;
+  } else {
+   rs->state = rs_connect_error;
+   rs->err = errno;
+  }
+  wsa_setlasterror(errno);
+ }
+ return ret;
+}
+
+__declspec(dllexport)
+int rconnect(int socket, const struct sockaddr *addr, socklen_t addrlen)
+{
+ struct rsocket *rs;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ memcpy(&rs->cm_id->route.addr.dst_addr, addr, addrlen);
+ return rs_do_connect(rs);
+}
+
+static int rs_post_write(struct rsocket *rs,
+    struct ibv_sge *sgl, int nsge,
+    uint32_t imm_data, int flags,
+    uint64_t addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uint64_t) imm_data;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
+ wr.send_flags = flags;
+ wr.imm_data = htonl(imm_data);
+ wr.wr.rdma.remote_addr = addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(rs->cm_id->qp, &wr, &bad));
+}
+
+/*
+ * Update target SGE before sending data.  Otherwise the remote side may
+ * update the entry before we do.
+ */
+static int rs_write_data(struct rsocket *rs,
+    struct ibv_sge *sgl, int nsge,
+    uint32_t length, int flags)
+{
+ uint64_t addr;
+ uint32_t rkey;
+
+ rs->sseq_no++;
+ rs->sqe_avail--;
+ rs->sbuf_bytes_avail -= length;
+
+ addr = rs->target_sgl[rs->target_sge].addr;
+ rkey = rs->target_sgl[rs->target_sge].key;
+
+ rs->target_sgl[rs->target_sge].addr += length;
+ rs->target_sgl[rs->target_sge].length -= length;
+
+ if (!rs->target_sgl[rs->target_sge].length) {
+  if (++rs->target_sge == RS_SGL_SIZE)
+   rs->target_sge = 0;
+ }
+
+ return rs_post_write(rs, sgl, nsge, rs_msg_set(RS_OP_DATA, length),
+        flags, addr, rkey);
+}
+
+static uint32_t rs_sbuf_left(struct rsocket *rs)
+{
+ return (uint32_t) (((uint64_t) (uintptr_t) &rs->sbuf[rs->sbuf_size]) -
+      rs->ssgl[0].addr);
+}
+
+static void rs_send_credits(struct rsocket *rs)
+{
+ struct ibv_sge ibsge;
+ struct rs_sge sge;
+
+ rs->ctrl_avail--;
+ rs->rseq_comp = rs->rseq_no + (rs->rq_size >> 1);
+ if ((uint32_t)rs->rbuf_bytes_avail >= (rs->rbuf_size >> 1)) {
+  if (!(rs->opts & RS_OPT_SWAP_SGL)) {
+   sge.addr = (uintptr_t) &rs->rbuf[rs->rbuf_free_offset];
+   sge.key = rs->rmr->rkey;
+   sge.length = rs->rbuf_size >> 1;
+  } else {
+   sge.addr = cl_ntoh64((uintptr_t) &rs->rbuf[rs->rbuf_free_offset]);
+   sge.key = cl_ntoh32(rs->rmr->rkey);
+   sge.length = cl_ntoh32(rs->rbuf_size >> 1);
+  }
+
+  ibsge.addr = (uintptr_t) &sge;
+  ibsge.lkey = 0;
+  ibsge.length = sizeof(sge);
+
+  rs_post_write(rs, &ibsge, 1,
+         rs_msg_set(RS_OP_SGL, rs->rseq_no + rs->rq_size),
+         IBV_SEND_INLINE,
+         rs->remote_sgl.addr +
+         rs->remote_sge * sizeof(struct rs_sge),
+         rs->remote_sgl.key);
+
+  rs->rbuf_bytes_avail -= rs->rbuf_size >> 1;
+  rs->rbuf_free_offset += rs->rbuf_size >> 1;
+  if ((uint32_t)rs->rbuf_free_offset >= rs->rbuf_size)
+   rs->rbuf_free_offset = 0;
+  if (++rs->remote_sge == rs->remote_sgl.length)
+   rs->remote_sge = 0;
+ } else {
+  rs_post_write(rs, NULL, 0,
+         rs_msg_set(RS_OP_SGL, rs->rseq_no + rs->rq_size), 0, 0, 0);
+ }
+}
+
+static int rs_give_credits(struct rsocket *rs)
+{
+ return (((uint32_t)rs->rbuf_bytes_avail >= (rs->rbuf_size >> 1)) ||
+         ((short) ((short) rs->rseq_no - (short) rs->rseq_comp) >= 0)) &&
+        rs->ctrl_avail && (rs->state & rs_connected);
+}
+
+static void rs_update_credits(struct rsocket *rs)
+{
+ if (rs_give_credits(rs))
+  rs_send_credits(rs);
+}
+
+static int rs_poll_cq(struct rsocket *rs)
+{
+ struct ibv_wc wc;
+ uint32_t imm_data;
+ int ret, rcnt = 0;
+
+ while ((ret = ibv_poll_cq(rs->cm_id->recv_cq, 1, &wc)) > 0) {
+  if (wc.wr_id == RS_RECV_WR_ID) {
+   if (wc.status != IBV_WC_SUCCESS)
+    continue;
+   rcnt++;
+
+   imm_data = ntohl(wc.imm_data);
+   switch (rs_msg_op(imm_data)) {
+   case RS_OP_SGL:
+    rs->sseq_comp = (uint16_t) rs_msg_data(imm_data);
+    break;
+   case RS_OP_CTRL:
+    if (rs_msg_data(imm_data) == RS_CTRL_DISCONNECT) {
+     rs->state = rs_disconnected;
+     return 0;
+    } else if (rs_msg_data(imm_data) == RS_CTRL_SHUTDOWN) {
+     rs->state &= ~rs_connect_rd;
+    }
+    break;
+   default:
+    rs->rmsg[rs->rmsg_tail].op = rs_msg_op(imm_data);
+    rs->rmsg[rs->rmsg_tail].data = rs_msg_data(imm_data);
+    if (++rs->rmsg_tail == rs->rq_size + 1)
+     rs->rmsg_tail = 0;
+    break;
+   }
+  } else {
+   switch  (rs_msg_op((uint32_t) wc.wr_id)) {
+   case RS_OP_SGL:
+    rs->ctrl_avail++;
+    break;
+   case RS_OP_CTRL:
+    rs->ctrl_avail++;
+    if (rs_msg_data((uint32_t) wc.wr_id) == RS_CTRL_DISCONNECT)
+     rs->state = rs_disconnected;
+    break;
+   default:
+    rs->sqe_avail++;
+    rs->sbuf_bytes_avail += rs_msg_data((uint32_t) wc.wr_id);
+    break;
+   }
+
+   if (wc.status != IBV_WC_SUCCESS && (rs->state & rs_connected)) {
+    rs->state = rs_error;
+    rs->err = EIO;
+   }
+  }
+ }
+
+ if (rs->state & rs_connected) {
+  while (!ret && rcnt--)
+  {
+   ret = rs_post_recv(rs);
+  }
+  
+  if (ret) {
+   rs->state = rs_error;
+   rs->err = errno;
+  }
+ }
+ return ret;
+}
+
+static int rs_get_cq_event(struct rsocket *rs)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ if (!rs->cq_armed)
+ {
+  return 0;
+ }
+ ret = ibv_get_cq_event(rs->cm_id->recv_cq_channel, &cq, &context);
+ if (!ret) {
+  ibv_ack_cq_events(rs->cm_id->recv_cq, 1);
+  rs->cq_armed = 0;
+ } else if (errno != EAGAIN) {
+  rs->state = rs_error;
+ }
+ return ret;
+}
+
+/*
+ * Although we serialize rsend and rrecv calls with respect to themselves,
+ * both calls may run simultaneously and need to poll the CQ for completions.
+ * We need to serialize access to the CQ, but rsend and rrecv need to
+ * allow each other to make forward progress.
+ *
+ * For example, rsend may need to wait for credits from the remote side,
+ * which could be stalled until the remote process calls rrecv.  This should
+ * not block rrecv from receiving data from the remote side however.
+ *
+ * We handle this by using two locks.  The cq_lock protects against polling
+ * the CQ and processing completions.  The cq_wait_lock serializes access to
+ * waiting on the CQ.
+ */
+static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
+{
+ int ret;
+
+ fastlock_acquire(&rs->cq_lock);
+ do {
+  rs_update_credits(rs);
+  ret = rs_poll_cq(rs);
+  if (test(rs)) {
+   ret = 0;
+   break;
+  } else if (ret) {
+   break;
+  } else if (nonblock) {
+   ret = ERR(EWOULDBLOCK);
+  } else if (!rs->cq_armed) {
+   ibv_req_notify_cq(rs->cm_id->recv_cq, 0);
+   rs->cq_armed = 1;
+  } else {
+   rs_update_credits(rs);
+   fastlock_acquire(&rs->cq_wait_lock);
+   fastlock_release(&rs->cq_lock);
+   ret = rs_get_cq_event(rs);
+   fastlock_release(&rs->cq_wait_lock);
+   fastlock_acquire(&rs->cq_lock);
+  }
+ } while (!ret);
+
+ rs_update_credits(rs);
+ fastlock_release(&rs->cq_lock);
+ return ret;
+}
+
+static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
+{
+ struct timeval s, e;
+ uint32_t poll_time = 0;
+ int ret;
+ do {
+  ret = rs_process_cq(rs, 1, test);
+  if (!ret || nonblock || errno != EWOULDBLOCK) {
+   return ret;
+  }
+
+  if (!poll_time)
+   gettimeofday(&s, NULL);
+
+  gettimeofday(&e, NULL);
+  poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+       (e.tv_usec - s.tv_usec) + 1;
+ } while (poll_time <= polling_time);
+ ret = rs_process_cq(rs, 0, test);
+ return ret;
+}
+
+static int rs_nonblocking(struct rsocket *rs)
+{
+ return (rs->fd_flags & O_NONBLOCK);
+}
+
+static int rs_is_cq_armed(struct rsocket *rs)
+{
+ return rs->cq_armed;
+}
+
+static int rs_poll_all(struct rsocket *rs)
+{
+ return 1;
+}
+
+/*
+ * We use hardware flow control to prevent over running the remote
+ * receive queue.  However, data transfers still require space in
+ * the remote rmsg queue, or we risk losing notification that data
+ * has been transfered.
+ *
+ * Be careful with race conditions in the check below.  The target SGL
+ * may be updated by a remote RDMA write.
+ */
+static int rs_can_send(struct rsocket *rs)
+{
+ return rs->sqe_avail && rs->sbuf_bytes_avail &&
+        (rs->sseq_no != rs->sseq_comp) &&
+        (rs->target_sgl[rs->target_sge].length != 0);
+}
+
+static int rs_conn_can_send(struct rsocket *rs)
+{
+ return rs_can_send(rs) || !(rs->state & rs_connect_wr);
+}
+
+static int rs_conn_can_send_ctrl(struct rsocket *rs)
+{
+ return rs->ctrl_avail || !(rs->state & rs_connected);
+}
+
+static int rs_have_rdata(struct rsocket *rs)
+{
+ return (rs->rmsg_head != rs->rmsg_tail);
+}
+
+static int rs_conn_have_rdata(struct rsocket *rs)
+{
+ return rs_have_rdata(rs) || !(rs->state & rs_connect_rd);
+}
+
+static int rs_conn_all_sends_done(struct rsocket *rs)
+{
+ return ((rs->sqe_avail + rs->ctrl_avail) == rs->sq_size) ||
+        !(rs->state & rs_connected);
+}
+
+static ssize_t rs_peek(struct rsocket *rs, void *buf, size_t len)
+{
+ size_t left = len;
+ uint32_t end_size, rsize;
+ int rmsg_head, rbuf_offset;
+ uint8_t *bufb = (uint8_t *)buf;
+
+ rmsg_head = rs->rmsg_head;
+ rbuf_offset = rs->rbuf_offset;
+
+ for (; left && (rmsg_head != rs->rmsg_tail); left -= rsize) {
+  if (left < rs->rmsg[rmsg_head].data) {
+   rsize = left;
+  } else {
+   rsize = rs->rmsg[rmsg_head].data;
+   if (++rmsg_head == rs->rq_size + 1)
+    rmsg_head = 0;
+  }
+
+  end_size = rs->rbuf_size - rbuf_offset;
+  if (rsize > end_size) {
+   memcpy(bufb, &rs->rbuf[rbuf_offset], end_size);
+   rbuf_offset = 0;
+   bufb += end_size;
+   rsize -= end_size;
+   left -= end_size;
+  }
+  memcpy(bufb, &rs->rbuf[rbuf_offset], rsize);
+  rbuf_offset += rsize;
+  bufb += rsize;
+ }
+
+ return len - left;
+}
+
+/*
+ * Continue to receive any queued data even if the remote side has disconnected.
+ */
+ __declspec(dllexport)
+ssize_t rrecv(int socket, void *buf, size_t len, int flags)
+{
+ struct rsocket *rs;
+ size_t left = len;
+ uint32_t end_size, rsize;
+ int ret;
+ uint8_t *bufb = (uint8_t *)buf;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+  ret = rs_do_connect(rs);
+  if (ret) {
+   if (errno == EINPROGRESS)
+    errno = EAGAIN;
+   goto out;
+  }
+ }
+ fastlock_acquire(&rs->rlock);
+ do {
+  if (!rs_have_rdata(rs)) {
+   ret = rs_get_comp(rs, rs_nonblocking(rs),
+        rs_conn_have_rdata);
+   if (ret)
+    break;
+  }
+
+  ret = 0;
+  if (flags & MSG_PEEK) {
+   left = len - rs_peek(rs, buf, left);
+   break;
+  }
+
+  for (; left && rs_have_rdata(rs); left -= rsize) {
+   if (left < rs->rmsg[rs->rmsg_head].data) {
+    rsize = left;
+    rs->rmsg[rs->rmsg_head].data -= left;
+   } else {
+    rs->rseq_no++;
+    rsize = rs->rmsg[rs->rmsg_head].data;
+    if (++rs->rmsg_head == rs->rq_size + 1)
+     rs->rmsg_head = 0;
+   }
+
+   end_size = rs->rbuf_size - rs->rbuf_offset;
+   if (rsize > end_size) {
+    memcpy(bufb, &rs->rbuf[rs->rbuf_offset], end_size);
+    rs->rbuf_offset = 0;
+    bufb += end_size;
+    rsize -= end_size;
+    left -= end_size;
+    rs->rbuf_bytes_avail += end_size;
+   }
+   memcpy(bufb, &rs->rbuf[rs->rbuf_offset], rsize);
+   rs->rbuf_offset += rsize;
+   bufb += rsize;
+   rs->rbuf_bytes_avail += rsize;
+  }
+ } while (left && (flags & MSG_WAITALL) && (rs->state & rs_connect_rd));
+ 
+ fastlock_release(&rs->rlock);
+
+out:
+ if (ret)
+ {
+  wsa_setlasterror(errno);
+  return ret;
+ }
+ else
+ {
+  return len - left;
+ }
+}
+
+ssize_t rrecvfrom(int socket, void *buf, size_t len, int flags,
+    struct sockaddr *src_addr, socklen_t *addrlen)
+{
+ ssize_t ret;
+
+ ret = rrecv(socket, buf, len, flags);
+ if (ret > 0 && src_addr)
+  rgetpeername(socket, src_addr, addrlen);
+
+ return ret;
+}
+
+/*
+ * Simple, straightforward implementation for now that only tries to fill
+ * in the first vector.
+ */
+static ssize_t rrecvv(int socket, const struct iovec *iov, int iovcnt, int flags)
+{
+ return rrecv(socket, iov[0].iov_base, iov[0].iov_len, flags);
+}
+
+ssize_t rrecvmsg(int socket, struct msghdr *msg, int flags)
+{
+ if (msg->msg_control && msg->msg_controllen)
+  return ERR(ENOTSUP);
+
+ return rrecvv(socket, msg->msg_iov, (int) msg->msg_iovlen, msg->msg_flags);
+}
+
+ssize_t rread(int socket, void *buf, size_t count)
+{
+ return rrecv(socket, buf, count, 0);
+}
+
+ssize_t rreadv(int socket, const struct iovec *iov, int iovcnt)
+{
+ return rrecvv(socket, iov, iovcnt, 0);
+}
+
+/*
+ * We overlap sending the data, by posting a small work request immediately,
+ * then increasing the size of the send on each iteration.
+ */
+ssize_t rsend(int socket, const void *buf, size_t len, int flags)
+{
+ struct rsocket *rs;
+ struct ibv_sge sge;
+ size_t left = len;
+ uint32_t xfer_size, olen = RS_OLAP_START_SIZE;
+ int ret = 0;
+ uint8_t *bufb = (uint8_t *)buf;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+  ret = rs_do_connect(rs);
+  if (ret) {
+   if (errno == EINPROGRESS)
+    errno = EAGAIN;
+   goto out;
+  }
+ }
+
+ fastlock_acquire(&rs->slock);
+ for (; left; left -= xfer_size, bufb += xfer_size) {
+  if (!rs_can_send(rs)) {
+   ret = rs_get_comp(rs, rs_nonblocking(rs),
+       rs_conn_can_send);
+   if (ret)
+    break;
+   if (!(rs->state & rs_connect_wr)) {
+    ret = ERR(ECONNRESET);
+    break;
+   }
+  }
+
+  if (olen < left) {
+   xfer_size = olen;
+   if (olen < RS_MAX_TRANSFER)
+    olen <<= 1;
+  } else {
+   xfer_size = left;
+  }
+
+  if (xfer_size > (uint32_t)rs->sbuf_bytes_avail)
+   xfer_size = rs->sbuf_bytes_avail;
+  if (xfer_size > rs->target_sgl[rs->target_sge].length)
+   xfer_size = rs->target_sgl[rs->target_sge].length;
+
+  if (xfer_size <= rs->sq_inline) {
+   sge.addr = (uintptr_t)bufb;
+   sge.length = xfer_size;
+   sge.lkey = 0;
+   ret = rs_write_data(rs, &sge, 1, xfer_size, IBV_SEND_INLINE);
+  } else if (xfer_size <= rs_sbuf_left(rs)) {
+   memcpy((void *) (uintptr_t) rs->ssgl[0].addr, bufb, xfer_size);
+   rs->ssgl[0].length = xfer_size;
+   ret = rs_write_data(rs, rs->ssgl, 1, xfer_size, 0);
+   if (xfer_size < rs_sbuf_left(rs))
+    rs->ssgl[0].addr += xfer_size;
+   else
+    rs->ssgl[0].addr = (uintptr_t) rs->sbuf;
+  } else {
+   rs->ssgl[0].length = rs_sbuf_left(rs);
+   memcpy((void *) (uintptr_t) rs->ssgl[0].addr, bufb,
+    rs->ssgl[0].length);
+   rs->ssgl[1].length = xfer_size - rs->ssgl[0].length;
+   memcpy(rs->sbuf, bufb + rs->ssgl[0].length, rs->ssgl[1].length);
+   ret = rs_write_data(rs, rs->ssgl, 2, xfer_size, 0);
+   rs->ssgl[0].addr = (uintptr_t) rs->sbuf + rs->ssgl[1].length;
+  }
+  if (ret)
+   break;
+ }
+ fastlock_release(&rs->slock);
+
+out:
+ if (ret && left == len)
+ {
+  wsa_setlasterror(errno);
+  return ret;
+ }
+ else
+ {
+  return len - left;
+ }
+}
+
+ssize_t rsendto(int socket, const void *buf, size_t len, int flags,
+  const struct sockaddr *dest_addr, socklen_t addrlen)
+{
+/*
+ * In Windows on a connection-oriented socket,
+ * the dest_addr and addrlen parameters are just ignored,
+ * making sendto() equivalent to send().
+ */
+ return rsend(socket, buf, len, flags);
+}
+
+static void rs_copy_iov(void *dst, const struct iovec **iov, size_t *offset, size_t len)
+{
+ size_t size;
+ uint8_t *dstb = (uint8_t *)dst;
+
+ while (len) {
+  size = (*iov)->iov_len - *offset;
+  if (size > len) {
+   memcpy (dstb, (*iov)->iov_base + *offset, len);
+   *offset += len;
+   break;
+  }
+
+  memcpy(dstb, (*iov)->iov_base + *offset, size);
+  len -= size;
+  dstb += size;
+  (*iov)++;
+  *offset = 0;
+ }
+}
+
+static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags)
+{
+ struct rsocket *rs;
+ const struct iovec *cur_iov;
+ size_t left, len, offset = 0;
+ uint32_t xfer_size, olen = RS_OLAP_START_SIZE;
+ int i, ret = 0;
+
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+  ret = rs_do_connect(rs);
+  if (ret) {
+   if (errno == EINPROGRESS)
+    errno = EAGAIN;
+   return ret;
+  }
+ }
+
+ cur_iov = iov;
+ len = iov[0].iov_len;
+ for (i = 1; i < iovcnt; i++)
+  len += iov[i].iov_len;
+
+ fastlock_acquire(&rs->slock);
+ for (left = len; left; left -= xfer_size) {
+  if (!rs_can_send(rs)) {
+   ret = rs_get_comp(rs, rs_nonblocking(rs),
+       rs_conn_can_send);
+   if (ret)
+    break;
+   if (!(rs->state & rs_connect_wr)) {
+    ret = ERR(ECONNRESET);
+    break;
+   }
+  }
+
+  if (olen < left) {
+   xfer_size = olen;
+   if (olen < RS_MAX_TRANSFER)
+    olen <<= 1;
+  } else {
+   xfer_size = left;
+  }
+
+  if (xfer_size > (uint32_t)rs->sbuf_bytes_avail)
+   xfer_size = rs->sbuf_bytes_avail;
+  if (xfer_size > rs->target_sgl[rs->target_sge].length)
+   xfer_size = rs->target_sgl[rs->target_sge].length;
+
+  if (xfer_size <= rs_sbuf_left(rs)) {
+   rs_copy_iov((void *) (uintptr_t) rs->ssgl[0].addr,
+        &cur_iov, &offset, xfer_size);
+   rs->ssgl[0].length = xfer_size;
+   ret = rs_write_data(rs, rs->ssgl, 1, xfer_size,
+         xfer_size <= rs->sq_inline ? IBV_SEND_INLINE : 0);
+   if (xfer_size < rs_sbuf_left(rs))
+    rs->ssgl[0].addr += xfer_size;
+   else
+    rs->ssgl[0].addr = (uintptr_t) rs->sbuf;
+  } else {
+   rs->ssgl[0].length = rs_sbuf_left(rs);
+   rs_copy_iov((void *) (uintptr_t) rs->ssgl[0].addr, &cur_iov,
+        &offset, rs->ssgl[0].length);
+   rs->ssgl[1].length = xfer_size - rs->ssgl[0].length;
+   rs_copy_iov(rs->sbuf, &cur_iov, &offset, rs->ssgl[1].length);
+   ret = rs_write_data(rs, rs->ssgl, 2, xfer_size,
+         xfer_size <= rs->sq_inline ? IBV_SEND_INLINE : 0);
+   rs->ssgl[0].addr = (uintptr_t) rs->sbuf + rs->ssgl[1].length;
+  }
+  if (ret)
+   break;
+ }
+ fastlock_release(&rs->slock);
+
+ return (ret && left == len) ? ret : len - left;
+}
+
+__declspec(dllexport)
+ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags)
+{
+ wsa_setlasterror(0);
+ if (msg->msg_control && msg->msg_controllen)
+  return ERR(ENOTSUP);
+
+ return rsendv(socket, msg->msg_iov, (int) msg->msg_iovlen, msg->msg_flags);
+}
+
+__declspec(dllexport)
+ssize_t rwrite(int socket, const void *buf, size_t count)
+{
+ return rsend(socket, buf, count, 0);
+}
+
+__declspec(dllexport)
+ssize_t rwritev(int socket, const struct iovec *iov, int iovcnt)
+{
+ return rsendv(socket, iov, iovcnt, 0);
+}
+
+static struct pollfd *rs_fds_alloc(nfds_t nfds)
+{
+ static __thread struct pollfd *rfds;
+ static __thread nfds_t rnfds;
+
+ if (nfds > rnfds) {
+  if (rfds)
+   free(rfds);
+
+  rfds = (struct pollfd *)malloc(sizeof *rfds * nfds);
+  rnfds = rfds ? nfds : 0;
+ }
+
+ return rfds;
+}
+
+static int rs_poll_rs(struct rsocket *rs, int events,
+        int nonblock, int (*test)(struct rsocket *rs))
+{
+ COMP_SET fds;
+ short revents;
+ int ret;
+
+check_cq:
+ if ((rs->state & rs_connected) || (rs->state == rs_disconnected) ||
+     (rs->state & rs_error)) {
+  rs_process_cq(rs, nonblock, test);
+
+  revents = 0;
+  if ((events & POLLIN) && rs_conn_have_rdata(rs))
+   revents |= POLLIN;
+  if ((events & POLLOUT) && rs_can_send(rs))
+   revents |= POLLOUT;
+  if (!(rs->state & rs_connected)) {
+   if (rs->state == rs_disconnected)
+    revents |= POLLHUP;
+   else
+    revents |= POLLERR;
+  }
+
+  return revents;
+ }
+
+ if (rs->state == rs_listening) {
+  if (CompSetInit(&fds))
+   return 0;
+
+  CompSetZero(&fds);
+  CompSetAdd(&rs->cm_id->channel->channel, &fds);
+  return CompSetPoll(&fds, 0);
+ }
+
+ if (rs->state & rs_opening) {
+  ret = rs_do_connect(rs);
+  if (ret) {
+   if (errno == EINPROGRESS) {
+    errno = 0;
+    return 0;
+   } else {
+    return POLLOUT;
+   }
+  }
+  goto check_cq;
+ }
+
+ if (rs->state == rs_connect_error)
+  return (rs->err && events & POLLOUT) ? POLLOUT : 0;
+
+ return 0;
+}
+
+static int rs_poll_check(struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i, cnt = 0;
+
+ for (i = 0; i < (int)nfds; i++) {
+  rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+  if (rs)
+   fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 1, rs_poll_all);
+  else
+   ;//WSAPoll(&fds[i], 1, 0);
+
+  if (fds[i].revents)
+   cnt++;
+ }
+ return cnt;
+}
+
+static int rs_poll_arm(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i;
+
+ for (i = 0; i < (int)nfds; i++) {
+  rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+  if (rs) {
+   fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 0, rs_is_cq_armed);
+   if (fds[i].revents)
+    return 1;
+
+   if (rs->state >= rs_connected)
+    rfds[i].fd = ((short)rs->cm_id->recv_cq_channel->comp_channel.Event >> 2);
+   else
+    rfds[i].fd = ((short)rs->cm_id->channel->channel.Event >> 2);
+
+   rfds[i].events = POLLIN;
+  } else {
+   rfds[i].fd = fds[i].fd;
+   rfds[i].events = fds[i].events;
+  }
+  rfds[i].revents = 0;
+ }
+ return 0;
+}
+
+static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i, cnt = 0;
+
+ for (i = 0; i < (int)nfds; i++) {
+  if (!rfds[i].revents)
+   continue;
+
+  rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+  if (rs) {
+   rs_get_cq_event(rs);
+   fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 1, rs_poll_all);
+  } else {
+   fds[i].revents = rfds[i].revents;
+  }
+  if (fds[i].revents)
+   cnt++;
+ }
+ return cnt;
+}
+
+/*
+ * We need to poll *all* fd's that the user specifies at least once.
+ * Note that we may receive events on an rsocket that may not be reported
+ * to the user (e.g. connection events or credit updates).  Process those
+ * events, then return to polling until we find ones of interest.
+ */
+__declspec(dllexport)
+int rpoll(struct pollfd *fds, nfds_t nfds, int timeout)
+{
+ struct timeval s, e;
+ struct pollfd *rfds;
+ uint32_t poll_time = 0;
+ int ret;
+ 
+ wsa_setlasterror(0);
+ do {
+  ret = rs_poll_check(fds, nfds);
+  if (ret || !timeout)
+   return ret;
+
+  if (!poll_time)
+   gettimeofday(&s, NULL);
+
+  gettimeofday(&e, NULL);
+  poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+       (e.tv_usec - s.tv_usec) + 1;
+ } while (poll_time <= polling_time);
+
+ rfds = rs_fds_alloc(nfds);
+ if (!rfds)
+  return ERR(ENOMEM);
+
+ do {
+  ret = rs_poll_arm(rfds, fds, nfds);
+  if (ret)
+   break;
+#if 0
+  ret = WSAPoll(rfds, nfds, timeout);
+  if (ret <= 0)
+   break;
+#endif
+  ret = rs_poll_events(rfds, fds, nfds);
+ } while (!ret);
+
+ return ret;
+}
+
+static struct pollfd *
+rs_select_to_poll(int *nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds)
+{
+ struct pollfd *fds;
+ int fd, i = 0;
+
+ fds = (struct pollfd *)calloc(*nfds, sizeof *fds);
+ if (!fds)
+  return NULL;
+
+ for (fd = 0; fd < *nfds; fd++) {
+  if (readfds && FD_ISSET(fd, readfds)) {
+   fds[i].fd = fd;
+   fds[i].events = POLLIN;
+  }
+
+  if (writefds && FD_ISSET(fd, writefds)) {
+   fds[i].fd = fd;
+   fds[i].events |= POLLOUT;
+  }
+
+  if (exceptfds && FD_ISSET(fd, exceptfds))
+   fds[i].fd = fd;
+
+  if (fds[i].fd)
+   i++;
+ }
+
+ *nfds = i;
+ return fds;
+}
+
+static int
+rs_poll_to_select(int nfds, struct pollfd *fds, fd_set *readfds,
+    fd_set *writefds, fd_set *exceptfds)
+{
+ int i, cnt = 0;
+
+ for (i = 0; i < nfds; i++) {
+  if (readfds && (fds[i].revents & (POLLIN | POLLHUP))) {
+   FD_SET(fds[i].fd, readfds);
+   cnt++;
+  }
+
+  if (writefds && (fds[i].revents & POLLOUT)) {
+   FD_SET(fds[i].fd, writefds);
+   cnt++;
+  }
+
+  if (exceptfds && (fds[i].revents & ~(POLLIN | POLLOUT))) {
+   FD_SET(fds[i].fd, exceptfds);
+   cnt++;
+  }
+ }
+ return cnt;
+}
+
+static int rs_convert_timeout(struct timeval *timeout)
+{
+ return !timeout ? -1 :
+  timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
+}
+
+__declspec(dllexport)
+int rselect(int nfds, fd_set *readfds, fd_set *writefds,
+     fd_set *exceptfds, struct timeval *timeout)
+{
+ struct pollfd *fds;
+ int ret;
+ 
+ wsa_setlasterror(0);
+ fds = rs_select_to_poll(&nfds, readfds, writefds, exceptfds);
+ if (!fds)
+  return ERR(ENOMEM);
+
+ ret = rpoll(fds, nfds, rs_convert_timeout(timeout));
+
+ if (readfds)
+  FD_ZERO(readfds);
+ if (writefds)
+  FD_ZERO(writefds);
+ if (exceptfds)
+  FD_ZERO(exceptfds);
+
+ if (ret > 0)
+  ret = rs_poll_to_select(nfds, fds, readfds, writefds, exceptfds);
+
+ free(fds);
+ return ret;
+}
+
+/*
+ * For graceful disconnect, notify the remote side that we're
+ * disconnecting and wait until all outstanding sends complete.
+ */
+__declspec(dllexport)
+int rshutdown(int socket, int how)
+{
+ struct rsocket *rs;
+ int ctrl, ret = 0;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (how == SHUT_RD) {
+  rs->state &= ~rs_connect_rd;
+  return 0;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+  rs_set_nonblocking(rs, 0);
+
+ if (rs->state & rs_connected) {
+  if (how == SHUT_RDWR) {
+   ctrl = RS_CTRL_DISCONNECT;
+   rs->state &= ~(rs_connect_rd | rs_connect_wr);
+  } else {
+   rs->state &= ~rs_connect_wr;
+   ctrl = (rs->state & rs_connect_rd) ?
+    RS_CTRL_SHUTDOWN : RS_CTRL_DISCONNECT;
+  }
+  if (!rs->ctrl_avail) {
+   ret = rs_process_cq(rs, 0, rs_conn_can_send_ctrl);
+   if (ret)
+   {
+    wsa_setlasterror(errno);
+    return ret;
+   }
+  }
+  if ((rs->state & rs_connected) && rs->ctrl_avail) {
+   rs->ctrl_avail--;
+   ret = rs_post_write(rs, NULL, 0,
+         rs_msg_set(RS_OP_CTRL, ctrl), 0, 0, 0);
+  }
+ }
+
+ if (rs->state & rs_connected)
+  rs_process_cq(rs, 0, rs_conn_all_sends_done);
+
+ if ((rs->fd_flags & O_NONBLOCK) && (rs->state & rs_connected))
+  rs_set_nonblocking(rs, 1);
+
+ return 0;
+}
+
+__declspec(dllexport)
+int rclose(int socket)
+{
+ struct rsocket *rs;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_connected)
+  rshutdown(socket, SHUT_RDWR);
+
+ rs_free(rs);
+ return 0;
+}
+
+static void rs_copy_addr(struct sockaddr *dst, struct sockaddr *src, socklen_t *len)
+{
+ socklen_t size;
+
+ if (src->sa_family == AF_INET) {
+  size = min(*len, sizeof(struct sockaddr_in));
+  *len = sizeof(struct sockaddr_in);
+ } else {
+  size = min(*len, sizeof(struct sockaddr_in6));
+  *len = sizeof(struct sockaddr_in6);
+ }
+ memcpy(dst, src, size);
+}
+
+__declspec(dllexport)
+int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ rs_copy_addr(addr, rdma_get_peer_addr(rs->cm_id), addrlen);
+ return 0;
+}
+
+__declspec(dllexport)
+int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ rs_copy_addr(addr, rdma_get_local_addr(rs->cm_id), addrlen);
+ return 0;
+}
+
+/*
+ * Nonblocking is usually not inherited between sockets, but we need to
+ * inherit it here to establish the connection only.  This is needed to
+ * prevent rdma_accept from blocking until the remote side finishes
+ * establishing the connection.  If we were to allow rdma_accept to block,
+ * then a single thread cannot establish a connection with itself, or
+ * two threads which try to connect to each other can deadlock trying to
+ * form a connection.
+ *
+ * Data transfers on the new socket remain blocking unless the user
+ * specifies otherwise through rfcntl.
+ */
+__declspec(dllexport)
+int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs, *new_rs;
+ struct rdma_conn_param param;
+ struct rs_conn_data *creq, cresp;
+ int ret;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ new_rs = rs_alloc(rs);
+ if (!new_rs)
+  return ERR(ENOMEM);
+
+ ret = rdma_get_request(rs->cm_id, &new_rs->cm_id);
+ if (ret)
+  goto err;
+
+ ret = rs_insert(new_rs);
+ if (ret < 0)
+  goto err;
+
+ creq = (struct rs_conn_data *) new_rs->cm_id->event->param.conn.private_data;
+ if (creq->version != 1) {
+  ret = ERR(ENOTSUP);
+  goto err;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+  rs_set_nonblocking(new_rs, O_NONBLOCK);
+
+ ret = rs_create_ep(new_rs);
+ if (ret)
+  goto err;
+
+ rs_save_conn_data(new_rs, creq);
+ param = new_rs->cm_id->event->param.conn;
+ rs_set_conn_data(new_rs, &param, &cresp);
+ ret = rdma_accept(new_rs->cm_id, &param);
+ if (!ret)
+  new_rs->state = rs_connect_rdwr;
+ else if (errno == EAGAIN || errno == EWOULDBLOCK)
+  new_rs->state = rs_accepting;
+ else
+  goto err;
+
+ if (addr && addrlen)
+  rgetpeername(new_rs->index, addr, addrlen);
+ return new_rs->index;
+
+err:
+ rs_free(new_rs);
+ return ret;
+}
+
+/*
+ * Socket provider's variant:
+ */
+SOCKET WSPAPI WSPAccept(
+  SOCKET    socket,
+  struct sockaddr *addr,
+  LPINT    addrlen,
+  LPCONDITIONPROC  lpfnCondition,
+        DWORD_PTR   dwCallbackData,
+        LPINT    lpErrno
+ )
+{
+ struct rsocket *rs, *new_rs;
+ struct rdma_conn_param param;
+ struct rs_conn_data *creq, cresp;
+ SOCKET new_socket = INVALID_SOCKET;
+ DWORD_PTR rsock = INVALID_SOCKET;
+ int   ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+      socket,
+      (PDWORD_PTR)&rsock, // __out PDWORD_PTR lpContext
+      lpErrno
+     );
+ if (SOCKET_ERROR == ret)
+  return INVALID_SOCKET;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, (int)rsock);
+ new_rs = rs_alloc(rs);
+ if (!new_rs)
+  return ERR(ENOMEM);
+
+ ret = rdma_get_request(rs->cm_id, &new_rs->cm_id);
+ if (ret)
+  goto err;
+
+ ret = rs_insert(new_rs);
+ if (ret < 0)
+  goto err;
+
+ creq = (struct rs_conn_data *) new_rs->cm_id->event->param.conn.private_data;
+ if (creq->version != 1) {
+  ret = ERR(ENOTSUP);
+  goto err;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+  rs_set_nonblocking(new_rs, O_NONBLOCK);
+
+ ret = rs_create_ep(new_rs);
+ if (ret)
+  goto err;
+
+ rs_save_conn_data(new_rs, creq);
+ param = new_rs->cm_id->event->param.conn;
+ rs_set_conn_data(new_rs, &param, &cresp);
+
+ if (addr && addrlen)
+  rgetpeername(new_rs->index, addr, addrlen);
+
+ if (lpfnCondition) {
+  struct sockaddr local_addr;
+  socklen_t       local_addrlen = sizeof(local_addr);
+  WSABUF caller_id;
+  WSABUF callee_id;
+  WSABUF callee_data;
+
+  /* Set the caller and callee data buffer */
+  caller_id.buf = (char *)addr;
+  caller_id.len = sizeof(*addr);
+
+  rs_copy_addr(&local_addr, rdma_get_local_addr(new_rs->cm_id), &local_addrlen);
+
+  callee_id.buf = (char *)&local_addr;
+  callee_id.len = local_addrlen;
+  
+  callee_data.buf = NULL;
+  callee_data.len = 0;
+
+  switch(lpfnCondition(&caller_id, NULL, NULL, NULL, &callee_id, &callee_data, NULL, dwCallbackData)) {
+  default:
+   /* Should never happen */
+   /* Fall through. */
+  case CF_REJECT:
+   *lpErrno = WSAECONNREFUSED;
+   ret = (int)INVALID_SOCKET;
+   goto err;
+  case CF_DEFER:
+   *lpErrno = WSATRY_AGAIN;
+   ret = (int)INVALID_SOCKET;
+   goto err;
+  case CF_ACCEPT:
+   break;
+  }
+ }
+ new_socket = gMainUpCallTable.lpWPUCreateSocketHandle(
+   gProtocolInfo.dwCatalogEntryId,
+   new_rs->index, // __in  DWORD_PTR dwContext
+   lpErrno
+ );
+ if (INVALID_SOCKET == new_socket) {
+  //??? *lpErrno = WSATRY_AGAIN;
+  goto err;
+ }
+ ret = rdma_accept(new_rs->cm_id, &param);
+ if (!ret)
+  new_rs->state = rs_connect_rdwr;
+ else if (errno == EAGAIN || errno == EWOULDBLOCK)
+  new_rs->state = rs_accepting;
+ else
+  goto err;
+
+ return new_socket;
+
+err:
+ rs_free(new_rs);
+ if (new_socket != INVALID_SOCKET)
+  gMainUpCallTable.lpWPUCloseSocketHandle(new_socket, lpErrno);
+
+ return INVALID_SOCKET;
+}
+
+__declspec(dllexport)
+int rsetsockopt(int socket, int level, int optname,
+  const void *optval, socklen_t optlen)
+{
+ struct rsocket *rs;
+ int ret, opt_on = 0;
+ uint64_t *opts = NULL;
+ 
+ ret = ERR(ENOTSUP);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (level) {
+ case SOL_SOCKET:
+  opts = &rs->so_opts;
+  switch (optname) {
+  case SO_REUSEADDR:
+   ret = rdma_set_option(rs->cm_id, RDMA_OPTION_ID,
+           RDMA_OPTION_ID_REUSEADDR,
+           (void *) optval, optlen);
+   if (ret && ((errno == ENOSYS) || ((rs->state != rs_init) &&
+       rs->cm_id->context &&
+       (rs->cm_id->verbs->device->transport_type == IBV_TRANSPORT_IB))))
+    ret = 0;
+   opt_on = *(int *) optval;
+   break;
+  case SO_RCVBUF:
+   if (!rs->rbuf)
+    rs->rbuf_size = (*(uint32_t *) optval) << 1;
+   ret = 0;
+   break;
+  case SO_SNDBUF:
+   if (!rs->sbuf)
+    rs->sbuf_size = (*(uint32_t *) optval) << 1;
+   ret = 0;
+   break;
+  case SO_LINGER:
+   /* Invert value so default so_opt = 0 is on */
+   opt_on =  !((struct linger *) optval)->l_onoff;
+   ret = 0;
+   break;
+  case SO_KEEPALIVE:
+   opt_on = *(int *) optval;
+   ret = 0;
+   break;
+  case SO_OOBINLINE:
+   opt_on = *(int *) optval;
+   ret = 0;
+   break;
+  default:
+   break;
+  }
+  break;
+ case IPPROTO_TCP:
+  opts = &rs->tcp_opts;
+  switch (optname) {
+  case TCP_NODELAY:
+   opt_on = *(int *) optval;
+   ret = 0;
+   break;
+  case TCP_MAXSEG:
+   ret = 0;
+   break;
+  default:
+   break;
+  }
+  break;
+ case IPPROTO_IPV6:
+  opts = &rs->ipv6_opts;
+  switch (optname) {
+  case IPV6_V6ONLY:
+   ret = rdma_set_option(rs->cm_id, RDMA_OPTION_ID,
+           RDMA_OPTION_ID_AFONLY,
+           (void *) optval, optlen);
+   opt_on = *(int *) optval;
+   break;
+  default:
+   break;
+  }
+ case SOL_RDMA:
+  if (rs->state >= rs_opening) {
+   ret = ERR(EINVAL);
+   break;
+  }
+
+  switch (optname) {
+  case RDMA_SQSIZE:
+   rs->sq_size = min((*(uint32_t *) optval), RS_QP_MAX_SIZE);
+   ret = 0;
+   break;
+  case RDMA_RQSIZE:
+   rs->rq_size = min((*(uint32_t *) optval), RS_QP_MAX_SIZE);
+   ret = 0;
+   break;
+  case RDMA_INLINE:
+   rs->sq_inline = min(*(uint32_t *) optval, RS_QP_MAX_SIZE);
+   if (rs->sq_inline < RS_MIN_INLINE)
+    rs->sq_inline = RS_MIN_INLINE;
+   ret = 0;
+   break;
+  default:
+   break;
+  }
+  break;
+ default:
+  break;
+ }
+
+ if (!ret && opts) {
+  if (opt_on)
+   *opts |= (uint64_t)(1 << optname);
+  else
+   *opts &= ~(1 << optname);
+ }
+
+ return ret;
+}
+
+__declspec(dllexport)
+int rgetsockopt(int socket, int level, int optname,
+  void *optval, socklen_t *optlen)
+{
+ struct rsocket *rs;
+ int ret = 0;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (level) {
+ case SOL_SOCKET:
+  switch (optname) {
+  case SO_REUSEADDR:
+  case SO_KEEPALIVE:
+  case SO_OOBINLINE:
+   *((int *) optval) = !!(rs->so_opts & (uint64_t)(1 << optname));
+   *optlen = sizeof(int);
+   break;
+  case SO_RCVBUF:
+   *((int *) optval) = rs->rbuf_size;
+   *optlen = sizeof(int);
+   break;
+  case SO_SNDBUF:
+   *((int *) optval) = rs->sbuf_size;
+   *optlen = sizeof(int);
+   break;
+  case SO_LINGER:
+   /* Value is inverted so default so_opt = 0 is on */
+   ((struct linger *) optval)->l_onoff =
+     !(rs->so_opts & (uint64_t)(1 << optname));
+   ((struct linger *) optval)->l_linger = 0;
+   *optlen = sizeof(struct linger);
+   break;
+  case SO_ERROR:
+   *((int *) optval) = rs->err;
+   *optlen = sizeof(int);
+   rs->err = 0;
+   break;
+  default:
+   ret = ENOTSUP;
+   break;
+  }
+  break;
+ case IPPROTO_TCP:
+  switch (optname) {
+  case TCP_NODELAY:
+   *((int *) optval) = !!(rs->tcp_opts & (uint64_t)(1 << optname));
+   *optlen = sizeof(int);
+   break;
+  case TCP_MAXSEG:
+   *((int *) optval) = (rs->cm_id && rs->cm_id->route.num_paths) ?
+         1 << (7 + rs->cm_id->route.path_rec->mtu) :
+         2048;
+   *optlen = sizeof(int);
+   break;
+  default:
+   ret = ENOTSUP;
+   break;
+  }
+  break;
+ case IPPROTO_IPV6:
+  switch (optname) {
+  case IPV6_V6ONLY:
+   *((int *) optval) = !!(rs->ipv6_opts & (uint64_t)(1 << optname));
+   *optlen = sizeof(int);
+   break;
+  default:
+   ret = ENOTSUP;
+   break;
+  }
+  break;
+ case SOL_RDMA:
+  switch (optname) {
+  case RDMA_SQSIZE:
+   *((int *) optval) = rs->sq_size;
+   *optlen = sizeof(int);
+   break;
+  case RDMA_RQSIZE:
+   *((int *) optval) = rs->rq_size;
+   *optlen = sizeof(int);
+   break;
+  case RDMA_INLINE:
+   *((int *) optval) = rs->sq_inline;
+   *optlen = sizeof(int);
+   break;
+  default:
+   ret = ENOTSUP;
+   break;
+  }
+  break;
+ default:
+  ret = ENOTSUP;
+  break;
+ }
+
+ return ERR/*rdma_seterrno*/(ret);
+}
+
+__declspec(dllexport)
+int rfcntl(int socket, int cmd, ... /* arg */ )
+{
+ struct rsocket *rs;
+ va_list args;
+ long param;
+ int ret = 0;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ va_start(args, cmd);
+ switch (cmd) {
+ case F_GETFL:
+  return (int) rs->fd_flags;
+ case F_SETFL:
+  param = va_arg(args, long);
+  if (param & O_NONBLOCK)
+   ret = rs_set_nonblocking(rs, O_NONBLOCK);
+
+  if (!ret)
+   rs->fd_flags |= param;
+  break;
+ default:
+  ret = ERR(ENOTSUP);
+ }
+ va_end(args);
+ return ret;
+}
+
+__declspec(dllexport)
+int rioctlsocket(int socket, long cmd, u_long* argp)
+{
+ struct rsocket *rs;
+ 
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (cmd) {
+  case FIONBIO:
+   if (*argp)
+    rs->fd_flags |= O_NONBLOCK;
+   return rs_set_nonblocking(rs, *argp ? O_NONBLOCK : 0);
+  case FIONREAD:
+  case SIOCATMARK:
+   return ERR(ENOTSUP);
+  default:
+   return ERR(ENOTSUP);
+ }
+}
+
+/**
+ * \brief     Get current RSockets status information for the calling process
+ *       (by calling librdmacm.dll directly).
+ *
+ * \param lppStatusBuffer Pointer to a buffer with an array of RS_STATUS information entries
+ *       to be allocated and returned. The caller is responsible for
+ *       deallocating that buffer via free() when it is no longer needed.
+ *
+ * \return     The number of RS_STATUS entries contained in the status buffer
+ *       returned by *lppStatusBuffer.
+ */
+int rsGetStatus ( __out LPRS_STATUS *lppStatusBuffer )
+{
+ DWORD   i, e;
+ DWORD   dwNumEntries = 0;
+ struct rsocket *rs;
+ char   *pst = "";
+
+ for (i = 0; i < IDX_MAX_INDEX; i++)
+ {
+  if (idm_lookup(&idm, i))
+   dwNumEntries++;
+ }
+ 
+ if (   0 == dwNumEntries ||
+     NULL == (*lppStatusBuffer = (LPRS_STATUS)malloc(dwNumEntries * sizeof(**lppStatusBuffer)))
+    )
+ {
+  return 0;
+ }
+ 
+ for (
+   i = 0, e = 0;
+   i < IDX_MAX_INDEX && e < dwNumEntries;
+   i++
+  )
+ {
+  if (rs = (struct rsocket *)idm_lookup(&idm, i))
+  {
+   lppStatusBuffer[e]->src_addr = rs->cm_id->route.addr.src_addr;
+   lppStatusBuffer[e]->dst_addr = rs->cm_id->route.addr.dst_addr;
+   switch (rs->state)
+   {
+   case rs_init:   pst = "INIT";   break;
+   case rs_bound:   pst = "BOUND";   break;
+   case rs_listening:  pst = "LISTENING";  break;
+   case rs_opening:  pst = "OPENING";  break;
+   case rs_resolving_addr: pst = "RESOLVING_ADDR"; break;
+   case rs_resolving_route:pst = "RESOLVING_ROUTE";break;
+   case rs_connecting:  pst = "CONNECTING";  break;
+   case rs_accepting:  pst = "ACCEPTING";  break;
+   case rs_connected:  pst = "CONNECTED";  break;
+   case rs_connect_wr:  pst = "CONNECT_WR";  break;
+   case rs_connect_rd:  pst = "CONNECT_RD";  break;
+   case rs_connect_rdwr: pst = "CONNECT_RDWR"; break;
+   case rs_connect_error: pst = "CONNECT_ERROR"; break;
+   case rs_disconnected: pst = "DISCONNECTED"; break;
+   case rs_error:   pst = "ERROR";   break;
+   default:    pst = "UNKNOWN";
+   }
+   strncpy(lppStatusBuffer[e]->state, pst, sizeof(lppStatusBuffer[e]->state) - 1);
+   lppStatusBuffer[e]->state[sizeof(lppStatusBuffer[e]->state)] = '\0';
+   e++;
+  }
+ }
+
+ return e;
+}
Index: ulp/librdmacm/src/Sources
===================================================================
--- ulp/librdmacm/src/Sources (revision 3419)
+++ ulp/librdmacm/src/Sources (working copy)
@@ -12,10 +12,12 @@
  cma.rc   \
  cma_main.cpp \
  cma.cpp   \
- addrinfo.cpp
+ addrinfo.cpp \
+ rsocket.cpp  \
+ indexer.cpp
 
 INCLUDES = ..\include;..\..\..\inc;..\..\..\inc\user;..\..\libibverbs\include;\
-     ..\..\..\inc\user\linux;
+     ..\..\..\inc\user\linux
 
 USER_C_FLAGS = $(USER_C_FLAGS) -DEXPORT_CMA_SYMBOLS
 
@@ -26,4 +28,5 @@
  $(SDK_LIB_PATH)\iphlpapi.lib \
  $(TARGETPATH)\*\ibat.lib  \
  $(TARGETPATH)\*\libibverbs.lib \
- $(TARGETPATH)\*\winverbs.lib
+ $(TARGETPATH)\*\winverbs.lib    \
+ $(TARGETPATH)\*\complib.lib



This message and attachment(s) are intended solely for use by the addressee and may contain information that is privileged, confidential or otherwise exempt from disclosure under applicable law.

If you are not the intended recipient or agent thereof responsible for delivering this message to the intended recipient, you are hereby notified that any dissemination, distribution or copying of this communication is strictly prohibited.

If you have received this communication in error, please notify the sender immediately by telephone and with a 'reply' message.

Thank you for your co-operation.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130110/885b63a8/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: RSocket.zip
Type: application/x-zip-compressed
Size: 42645 bytes
Desc: RSocket.zip
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130110/885b63a8/attachment.bin>


More information about the ofw mailing list