[ofw] Patch for librdmacm incl. RSockets functionality
Schmitt, Hubert
Hubert.Schmitt at oce.com
Thu Jan 10 01:51:30 PST 2013
Hi everyone,
Attached please find a patch for review and discussion (based on rev. 3419 of https://beany.openfabrics.org/svnrepo/ofw/gen1/trunk/) which contains a porting of Sean Hefty's RSockets protocol to Windows. Many thanks to Sean at this point for his cooperation and support.
What I've done is merging the librdmacm sources from current Linux OFED with the respective OFW sources, resulting in a librdmacm.dll acting as a Winsock base transport provider (similar to the ulp/wsd component). In contrast to WSD or ND, that RSockets provider allows socket-based RDMA communication not only between Windows hosts, but also between Windows and Linux.
For further comments and documentation please look at ulp\librdmacm\RSocket.txt.
The contribution of the patch is in the name and on behalf of Océ Printing Systems GmbH. The use of the patch is based in principle upon the terms of the BSD as noted in ulp\librdmacm\RSocket.txt.
Thanks and Regards,
Hubert
===================================================================
Index: etc/user/gtod.c
===================================================================
--- etc/user/gtod.c (revision 3419)
+++ etc/user/gtod.c (working copy)
@@ -57,11 +57,12 @@
ptv->tv_usec = (long) (ll - ((LONGLONG)(ptv->tv_sec) * 10000000)) / 10;
}
-
-// static __inline
-int gettimeofday(struct timeval *ptv, void *ignored)
+static /*__inline*/ int gettimeofday(struct timeval *ptv, void *ignored)
{
static int QueryCounter = 2;
+ static LARGE_INTEGER Frequency = {10000000,0}; /* prevent division by 0 */
+ static LARGE_INTEGER Offset; /* counter offset for right time*/
+ static LARGE_INTEGER LastCounter;
FILETIME CurrentTime;
UNREFERENCED_PARAMETER(ignored);
@@ -69,9 +70,6 @@
if(QueryCounter)
{
- static LARGE_INTEGER Frequency;
- static LARGE_INTEGER Offset; /* counter offset for right time*/
- static LARGE_INTEGER LastCounter;
LARGE_INTEGER Time;
LARGE_INTEGER Counter;
@@ -80,13 +78,15 @@
if(QueryCounter == 2)
{
- QueryCounter = 1;
- if(!QueryPerformanceFrequency(&Frequency))
+ if(QueryPerformanceFrequency(&Frequency))
{
+ QueryCounter = 1; // But now the Frequency is valid!
+ }
+ else
+ {
QueryCounter = 0;
Frequency.QuadPart = 10000000; /* prevent division by 0 */
}
-
/* get time as a large integer */
Counter.HighPart &= 0x7FL; /* Clear high bits to prevent overflow */
Offset.LowPart = CurrentTime.dwLowDateTime;
Index: inc/user/linux/_fcntl.h
===================================================================
--- inc/user/linux/_fcntl.h (revision 0)
+++ inc/user/linux/_fcntl.h (working copy)
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#ifndef __FCNTL_H_
+#define __FCNTL_H_
+
+#include <fcntl.h>
+
+#define F_GETFL 3 /* get file->f_flags */
+#define F_SETFL 4 /* set file->f_flags */
+
+#endif /* __FCNTL_H_ */
Index: tools/dirs
===================================================================
--- tools/dirs (revision 3419)
+++ tools/dirs (working copy)
@@ -6,4 +6,5 @@
part_man \
infiniband-diags \
qlgcvnic_config \
- ndinstall
+ ndinstall \
+ rsinstall
Index: tools/rsinstall/dirs
===================================================================
--- tools/rsinstall/dirs (revision 0)
+++ tools/rsinstall/dirs (working copy)
@@ -0,0 +1,2 @@
+DIRS=\
+ user
Index: tools/rsinstall/user/makefile
===================================================================
--- tools/rsinstall/user/makefile (revision 0)
+++ tools/rsinstall/user/makefile (working copy)
@@ -0,0 +1,8 @@
+#
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source
+# file to this component. This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+MINIMUM_NT_TARGET_VERSION=0x502
+
+!INCLUDE ..\..\..\inc\openib.def
Index: tools/rsinstall/user/rsinstall.c
===================================================================
--- tools/rsinstall/user/rsinstall.c (revision 0)
+++ tools/rsinstall/user/rsinstall.c (working copy)
@@ -0,0 +1,724 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+/*
+ * Module Name: rsinstall.c
+ * Description: This module installs/removes a winsock service provider for infiniband.
+ * execute:
+ * To install the service provider
+ * installsp -i
+ * To remove the service provider
+ * installsp -r
+ */
+
+#include "rdma/rwinsock.h"
+#include <ws2spi.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Initialize the LSP's provider path for Infiband Service Provider dll */
+static const WCHAR provider_path[] = L"%SYSTEMROOT%\\system32\\librdmacm.dll";
+static const WCHAR provider_prefix[] =L" RSockets for InfiniBand"; //includes one whitespace
+static const char provider_name[] = VER_PROVIDER ;//L"%VER_PROVIDER% RSockets for InfiniBand"; //(VER_PROVIDER ## WINDIR);
+static const char openib_key_name[] = IB_COMPANYNAME;
+
+#ifdef PERFMON_ENABLED
+#include "LoadPerf.h"
+#include "rdma/rs_regpath.h"
+
+typedef struct _pm_symbol_def
+{
+ DWORD name_def;
+ CHAR name_str[40];
+ CHAR name_desc[40];
+ CHAR help_desc[256];
+
+} pm_symbol_def_t;
+
+static pm_symbol_def_t _pm_symbols[]=
+{
+ { RS_PM_OBJ,
+ "RS_PM_OBJ",
+ "IB RSockets",
+ "InfiniBand RSockets Provider."
+ },
+ { RS_PM_COUNTER(BYTES_SEND),
+ "RS_PM_BYTES_TX_SEC",
+ "Send bytes/sec",
+ "Send bytes/second, excluding RDMA Write."
+ },
+ { RS_PM_COUNTER(BYTES_RECV),
+ "RS_PM_BYTES_RX_SEC",
+ "Recv bytes/sec",
+ "Receive bytes/second, excluding RDMA Read."
+ },
+ { RS_PM_COUNTER(BYTES_WRITE),
+ "RS_PM_RDMA_WR_SEC",
+ "RDMA Write bytes/sec",
+ "RDMA Write bytes/second."
+ },
+ { RS_PM_COUNTER(BYTES_READ),
+ "RS_PM_RDMA_RD_SEC",
+ "RDMA Read bytes/sec",
+ "RDMA Read bytes/second."
+ },
+ { RS_PM_COUNTER(BYTES_TOTAL),
+ "RS_PM_BYTES_SEC",
+ "Total bytes/sec",
+ "Total bytes transmitted per second, including send, "
+ "receive, RDMA Write, and RDMA Read."
+ },
+ { RS_PM_COUNTER(COMP_SEND),
+ "RS_PM_SEND_COMPLETIONS_SEC",
+ "Send Completions/sec",
+ "Send and RDMA Write Completions/sec."
+ },
+ { RS_PM_COUNTER(COMP_RECV),
+ "RS_PM_RECV_COMPLETIONS_SEC",
+ "Recv Completions/sec",
+ "Recv and RDMA Read Completions/sec."
+ },
+ { RS_PM_COUNTER(COMP_TOTAL),
+ "RS_PM_COMPLETIONS_SEC",
+ "Total Completions/sec",
+ "Total Completions processed per second."
+ },
+ { RS_PM_COUNTER(INTR_TOTAL),
+ "RS_PM_COMPLETIONS_INTR",
+ "Total Interrupts/sec",
+ "Completion Queue events per second."
+ }
+};
+
+#define RS_PM_NUM_SYMBOLS (sizeof(_pm_symbols)/sizeof(pm_symbol_def_t))
+#define RS_PM_LANGUAGE "009" /* good for English */
+
+static CHAR *
+_RSGenerateFileName(char *header, char *file )
+{
+ DWORD size1, size;
+ CHAR *full_file_name;
+ int header_len = header == NULL ? 0 : strlen(header);
+
+ size = GetTempPath(0, NULL);
+ if (size == 0)
+ {
+ fprintf( stderr, "GetTempPath failed\n" );
+ return NULL;
+ }
+ size1 = size + strlen(file) + header_len;
+ full_file_name = HeapAlloc (GetProcessHeap (), HEAP_ZERO_MEMORY, size1);
+ if ( full_file_name == NULL )
+ {
+ fprintf( stderr, "GetTempPath failed\n" );
+ return NULL;
+ }
+ size1 = GetTempPath(size, full_file_name + header_len);
+ if (size != size1 + 1)
+ {
+ fprintf( stderr, "Very strange, GetTempPath returned something different\n" );
+ HeapFree (GetProcessHeap (), 0, full_file_name);
+ return NULL;
+ }
+ if (header_len != 0)
+ {
+ memcpy(full_file_name, header, header_len);
+ }
+ strcat(full_file_name, file);
+ return full_file_name;
+}
+
+
+static DWORD
+_RSPerfmonIniFilesGenerate( void )
+{
+ FILE *f_handle;
+ DWORD num;
+ DWORD ret = ERROR_SUCCESS;
+ char *ibsp_pm_sym_file = NULL;
+ char *ibsp_pm_ini_file = NULL;
+
+ /* create ".h" file first */
+ ibsp_pm_sym_file = _RSGenerateFileName(NULL, RS_PM_SYM_H_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+ fprintf( stderr, "_RSGenerateFileName failed\n" );
+ ret = ERROR_NOT_ENOUGH_MEMORY;
+ goto Cleanup;
+ }
+
+ f_handle = fopen( ibsp_pm_sym_file, "w+" );
+
+ if( !f_handle )
+ {
+ fprintf( stderr, "Create Header file %s failed\n", ibsp_pm_sym_file );
+ ret = ERROR_FILE_INVALID;
+ goto Cleanup;
+ }
+
+ fprintf(
+ f_handle, "/* %s Generated by program */ \r\n", ibsp_pm_sym_file );
+
+
+ for( num = 0; num < RS_PM_NUM_SYMBOLS; num++ )
+ {
+ fprintf( f_handle, "#define\t%s\t%d\r\n",
+ _pm_symbols[num].name_str, _pm_symbols[num].name_def );
+ }
+
+ fflush( f_handle );
+ fclose( f_handle );
+
+ /* create 'ini' file next */
+ ibsp_pm_ini_file = _RSGenerateFileName(NULL, RS_PM_INI_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+ fprintf( stderr, "_RSGenerateFileName failed\n" );
+ ret = ERROR_NOT_ENOUGH_MEMORY;
+ goto Cleanup;
+ }
+ f_handle = fopen( ibsp_pm_ini_file, "w+" );
+
+ if( !f_handle )
+ {
+ fprintf( stderr, "Create INI file %s failed\n", ibsp_pm_ini_file );
+ ret = ERROR_FILE_INVALID;
+ goto Cleanup;
+ }
+
+ fprintf( f_handle, "[info]\r\ndrivername=" RS_PM_SUBKEY_NAME
+ "\r\nsymbolfile=%s\r\n\r\n", ibsp_pm_sym_file );
+ fprintf( f_handle,"[languages]\r\n" RS_PM_LANGUAGE
+ "=language" RS_PM_LANGUAGE "\r\n\r\n" );
+
+ fprintf( f_handle,
+ "[objects]\r\n%s_" RS_PM_LANGUAGE "_NAME=%s\r\n\r\n[text]\r\n",
+ _pm_symbols[0].name_str, _pm_symbols[0].name_desc );
+
+ for( num = 0; num < RS_PM_NUM_SYMBOLS; num++ )
+ {
+ fprintf( f_handle,"%s_" RS_PM_LANGUAGE "_NAME=%s\r\n",
+ _pm_symbols[num].name_str, _pm_symbols[num].name_desc );
+ fprintf( f_handle,"%s_" RS_PM_LANGUAGE "_HELP=%s\r\n",
+ _pm_symbols[num].name_str, _pm_symbols[num].help_desc );
+ }
+
+ fflush( f_handle );
+ fclose( f_handle );
+
+Cleanup:
+ if ( ibsp_pm_sym_file )
+ {
+ HeapFree (GetProcessHeap (), 0, ibsp_pm_sym_file);
+ }
+ if ( ibsp_pm_ini_file )
+ {
+ HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+ return ret;
+}
+
+
+static void
+_RSPerfmonIniFilesRemove( void )
+{
+ char *ibsp_pm_sym_file = NULL;
+ char *ibsp_pm_ini_file = NULL;
+
+ ibsp_pm_sym_file = _RSGenerateFileName(NULL, RS_PM_SYM_H_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+ fprintf( stderr, "_RSGenerateFileName failed\n" );
+ goto Cleanup;
+ }
+
+ ibsp_pm_ini_file = _RSGenerateFileName(NULL, RS_PM_INI_FILE);
+ if( !ibsp_pm_sym_file )
+ {
+ fprintf( stderr, "_RSGenerateFileName failed\n" );
+ goto Cleanup;
+ }
+
+ if( !DeleteFile( ibsp_pm_ini_file ) )
+ {
+ fprintf( stderr, "Delete file %s failed status %d\n",
+ ibsp_pm_ini_file, GetLastError() );
+ }
+ if( !DeleteFile( ibsp_pm_sym_file ) )
+ {
+ fprintf( stderr,"Delete file %s failed status %d\n",
+ ibsp_pm_sym_file, GetLastError() );
+ }
+
+Cleanup:
+ if ( ibsp_pm_sym_file )
+ {
+ HeapFree (GetProcessHeap (), 0, ibsp_pm_sym_file);
+ }
+ if ( ibsp_pm_ini_file )
+ {
+ HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+
+}
+
+
+/* Try to create Performance Register Keys */
+static LONG
+_RSPerfmonRegisterKeys( void )
+{
+ LONG reg_status;
+ HKEY pm_hkey;
+ DWORD typesSupp = 7;
+
+ reg_status = RegCreateKeyEx( HKEY_LOCAL_MACHINE,
+ RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF, 0, NULL,
+ REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &pm_hkey, NULL );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+ fprintf( stderr,
+ "_RSPerfmonRegisterKeys Create Key %s failed with %d\n",
+ RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF, reg_status );
+ return reg_status;
+ }
+
+ /* create/assign values to the key */
+ RegSetValueExW( pm_hkey, L"Library", 0, REG_EXPAND_SZ,
+ (LPBYTE)provider_path, sizeof(provider_path) );
+
+ RegSetValueEx( pm_hkey, TEXT("Open"), 0, REG_SZ,
+ (LPBYTE)TEXT("RSPmOpen"), sizeof(TEXT("RSPmOpen")) );
+
+ RegSetValueEx( pm_hkey, TEXT("Collect"), 0, REG_SZ,
+ (LPBYTE)TEXT("RSPmCollectData"), sizeof(TEXT("RSPmCollectData")) );
+
+ RegSetValueEx( pm_hkey, TEXT("Close"), 0, REG_SZ,
+ (LPBYTE)TEXT("RSPmClose"), sizeof(TEXT("RSPmClose")) );
+
+ RegFlushKey( pm_hkey );
+ RegCloseKey( pm_hkey );
+
+ reg_status = RegCreateKeyEx( HKEY_LOCAL_MACHINE,
+ RS_PM_EVENTLOG_PATH, 0, NULL,
+ REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &pm_hkey, NULL );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+ fprintf(stderr, "Create EventLog Key failed with %d\n", reg_status );
+ return reg_status;
+ }
+
+ /* create/assign values to the key */
+ RegSetValueExW( pm_hkey, L"EventMessageFile", 0, REG_EXPAND_SZ,\
+ (LPBYTE)provider_path, sizeof(provider_path) );
+
+ RegSetValueEx( pm_hkey, TEXT("TypesSupported"), 0, REG_DWORD,
+ (LPBYTE)&typesSupp, sizeof(typesSupp) );
+
+ RegFlushKey( pm_hkey );
+ RegCloseKey( pm_hkey );
+
+ return reg_status;
+}
+
+
+/* Try to destroy Performance Register Keys */
+static LONG
+_RSPerfmonDeregisterKeys( void )
+{
+ LONG reg_status;
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+ RS_PM_REGISTRY_PATH RS_PM_SUBKEY_PERF,
+ (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+ fprintf( stderr,
+ "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+ GetLastError() );
+ }
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+ RS_PM_REGISTRY_PATH, (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+ fprintf( stderr,
+ "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+ GetLastError() );
+ }
+
+ reg_status = RegDeleteKeyEx( HKEY_LOCAL_MACHINE,
+ RS_PM_EVENTLOG_PATH, (KEY_WOW64_32KEY | KEY_WOW64_64KEY), 0 );
+
+ if( reg_status != ERROR_SUCCESS )
+ {
+ fprintf( stderr,
+ "_RSPerfmonRegisterKeys Remove SubKey failed with %d\n",
+ GetLastError() );
+ }
+
+ return reg_status;
+}
+
+
+/*
+ * functions will try to register performance counters
+ * definitions with PerfMon application.
+ * API externally called by lodctr.exe/unlodctr.exe utilities.
+ */
+static DWORD
+_RSPerfmonRegisterCounters( void )
+{
+ DWORD status;
+ char *ibsp_pm_ini_file = NULL;
+
+ ibsp_pm_ini_file = _RSGenerateFileName("unused ", RS_PM_INI_FILE);
+ if( !ibsp_pm_ini_file )
+ {
+ fprintf( stderr, "_RSGenerateFileName failed\n" );
+ status = ERROR_NOT_ENOUGH_MEMORY;
+ goto Cleanup;
+ }
+
+ /*
+ * format commandline string, as per SDK :
+ * Pointer to a null-terminated string that consists of one or more
+ * arbitrary letters, a space, and then the name of the initialization
+ * file.
+ */
+ status = LoadPerfCounterTextStrings( ibsp_pm_ini_file, TRUE );
+ if( status != ERROR_SUCCESS )
+ {
+ status = GetLastError();
+ fprintf( stderr,
+ "IBSPPerfmonRegisterCounters install failed status %d\n", status );
+ }
+Cleanup:
+ if ( ibsp_pm_ini_file )
+ {
+ HeapFree (GetProcessHeap (), 0, ibsp_pm_ini_file);
+ }
+
+ return status;
+}
+
+
+/*
+ * functions will try to unregister performance counters
+ * definitions with PerfMon application.
+ * API externally called by lodctr.exe/unlodctr.exe utilities.
+ */
+static DWORD
+_RSPerfmonDeregisterCounters( void )
+{
+ DWORD status;
+
+ /*
+ * format commandline string, as per SDK :
+ * Pointer to a null-terminated string that consists of one or more
+ * arbitrary letters, a space, and then the name of the initialization
+ * file.
+ */
+ status = UnloadPerfCounterTextStrings(
+ TEXT("unused ") TEXT(RS_PM_SUBKEY_NAME), TRUE );
+ if( status != ERROR_SUCCESS )
+ {
+ fprintf( stderr,
+ "IBSPPerfmonDeregisterCounters remove failed status %d\n",
+ status );
+ }
+ return status;
+}
+
+#endif /* PERFMON_ENABLED */
+
+
+/*
+ * Function: usage
+ * Description: Prints usage information.
+ */
+static void
+usage (char *progname)
+{
+ printf ("usage: %s [-i/-r [-p]]\n", progname);
+ printf (" -i [path] Install the service provider,\n"
+ " optionally specify full pathname of DLL\n"
+ " -r Remove the %s service provider\n"
+ " -r <name> Remove the specified service provider\n"
+ " -l List service providers\n",VER_PROVIDER);
+}
+
+
+/* Function: print_providers
+ * Description:
+ * This function prints out each entry in the Winsock catalog.
+*/
+static void print_providers(void)
+{
+ WSAPROTOCOL_INFOW *protocol_info;
+ unsigned int protocol_count;
+ unsigned int i;
+ DWORD protocol_size;
+ INT err_no;
+ int rc;
+
+ /* Find the size of the buffer */
+ protocol_size = 0;
+ rc = WSCEnumProtocols (NULL, NULL, &protocol_size, &err_no);
+ if (rc == SOCKET_ERROR && err_no != WSAENOBUFS) {
+ printf("WSCEnumProtocols() returned error (%d)\n", err_no);
+ return;
+ }
+
+ /* Allocate the buffer */
+ protocol_info = HeapAlloc (GetProcessHeap (), HEAP_ZERO_MEMORY, protocol_size);
+ if (protocol_info == NULL) {
+ printf("HeapAlloc() failed\n");
+ return;
+ }
+
+ /* Enumerate the catalog for real */
+ rc = WSCEnumProtocols (NULL, protocol_info, &protocol_size, &err_no);
+ if (rc == SOCKET_ERROR) {
+ printf("WSCEnumProtocols returned error for real enumeration (%d)\n",
+ err_no);
+ HeapFree (GetProcessHeap (), 0, protocol_info);
+ return;
+ }
+
+ protocol_count = rc;
+
+ for (i = 0; i < protocol_count; i++) {
+ printf ("%010d - %S\n", protocol_info[i].dwCatalogEntryId,
+ protocol_info[i].szProtocol);
+ }
+
+ HeapFree (GetProcessHeap (), 0, protocol_info);
+
+ return;
+}
+
+/*
+ * Function: install_provider
+ * Description: installs the service provider
+ */
+static void install_provider(LPWSTR szProviderPath)
+{
+ int rc;
+ INT err_no;
+ LONG reg_error;
+ WSAPROTOCOL_INFOW provider;
+ HKEY hkey;
+ size_t res;
+ size_t st_len;
+
+ ZeroMemory(&provider, sizeof(provider));
+
+ /* Setup the values in PROTOCOL_INFO */
+ provider.dwServiceFlags1 =
+ XP1_GUARANTEED_DELIVERY |
+ XP1_GUARANTEED_ORDER |
+ XP1_MESSAGE_ORIENTED |
+ XP1_GRACEFUL_CLOSE;
+ provider.dwServiceFlags2 = 0; /* Reserved */
+ provider.dwServiceFlags3 = 0; /* Reserved */
+ provider.dwServiceFlags4 = 0; /* Reserved */
+// SAN provider only: provider.dwProviderFlags = PFL_HIDDEN;
+ provider.ProviderId = rsProviderGuid; /* Service Provider ID provided by vendor. */
+ provider.dwCatalogEntryId = 0;
+ provider.ProtocolChain.ChainLen = 1; /* Base Protocol Service Provider */
+ provider.iVersion = 2; /* don't know what it is */
+ provider.iAddressFamily = AF_INET;
+ provider.iMaxSockAddr = 16;
+ provider.iMinSockAddr = 16;
+ provider.iSocketType = SOCK_STREAM;
+ provider.iProtocol = IPPROTO_TCP;
+ provider.iProtocolMaxOffset = 0;
+ provider.iNetworkByteOrder = BIGENDIAN;
+ provider.iSecurityScheme = SECURITY_PROTOCOL_NONE;
+ provider.dwMessageSize = 0xFFFFFFFF; /* IB supports 32-bit lengths for data transfers on RC */
+ provider.dwProviderReserved = 0;
+
+ st_len = strlen(provider_name);
+ rc = mbstowcs(provider.szProtocol, provider_name, st_len); //do not count \0
+ // We can't use there mbstowcs_s
+ //rc = mbstowcs_s(&convertedChars, provider.szProtocol, sizeof(provider_name), provider_name, );
+ if (rc != st_len) {
+ printf("<install_provider> Can't convert string %s to WCHAR\n",provider_name);
+ printf("Converted %d from %d\n", rc, st_len);
+ }
+ wcscpy( provider.szProtocol + st_len, provider_prefix);
+ wprintf(L"Provider protocol = %s\n", provider.szProtocol);
+ wprintf(L"Provider path = %s\n", szProviderPath);
+
+ rc = WSCInstallProvider(
+ (LPGUID)&rsProviderGuid, szProviderPath, &provider, 1, &err_no );
+ if( rc == SOCKET_ERROR )
+ {
+ if( err_no == WSANO_RECOVERY )
+ printf("The provider is already installed\n");
+ else
+ printf("install_provider: WSCInstallProvider failed: %d\n", err_no);
+ }
+}
+
+/*
+ * Function: remove_provider
+ * Description: removes our provider.
+ */
+static void remove_provider( const char* const provider_name )
+{
+ int rc;
+ int err_no;
+ LONG reg_error;
+ HKEY hkey;
+
+ /* Remove from the catalog */
+ rc = WSCDeinstallProvider((LPGUID)&rsProviderGuid, &err_no);
+ if (rc == SOCKET_ERROR) {
+ printf ("WSCDeinstallProvider failed: %d\n", err_no);
+ }
+
+#if 0 //def _WIN64
+ /* Remove from the 32-bit catalog too! */
+ rc = WSCDeinstallProvider32((LPGUID)&rsProviderGuid, &err_no);
+ if (rc == SOCKET_ERROR) {
+ printf ("WSCDeinstallProvider32 failed: %d\n", err_no);
+ }
+#endif /* _WIN64 */
+}
+
+/* Function: main
+ *
+ * Description:
+ * Parse the command line arguments and call either the install or remove
+ * routine.
+ */
+int __cdecl main (int argc, char *argv[])
+{
+ WSADATA wsd;
+ size_t rc, st_len;
+ WCHAR arg_provider_path[MAX_PATH];
+
+ /* Load Winsock */
+ if (WSAStartup (MAKEWORD (2, 2), &wsd) != 0) {
+ printf ("InstallSP: Unable to load Winsock: %d\n", GetLastError ());
+ return -1;
+ }
+
+ /* Confirm that the WinSock DLL supports 2.2. Note that if the
+ * DLL supports versions greater than 2.2 in addition to 2.2, it
+ * will still return 2.2 in wVersion since that is the version we
+ * requested. */
+ if (LOBYTE (wsd.wVersion) != 2 || HIBYTE (wsd.wVersion) != 2) {
+
+ /* Tell the user that we could not find a usable WinSock DLL. */
+ WSACleanup ();
+ printf
+ ("InstallSP: Unable to find a usable version of Winsock DLL\n");
+ return -1;
+ }
+ if (argc < 2) {
+ usage (argv[0]);
+ return -1;
+ }
+ if ((strlen (argv[1]) != 2) && (argv[1][0] != '-')
+ && (argv[1][0] != '/')) {
+ usage (argv[0]);
+ return -1;
+ }
+ switch (tolower (argv[1][1])) {
+
+ case 'i':
+ /* Install the Infiniband Service Provider */
+ if( argc == 2 ) {
+ wcscpy_s(arg_provider_path, sizeof(arg_provider_path)/2, provider_path);
+ }
+ else {
+ st_len = strlen( argv[2] );
+ if (st_len >= sizeof(arg_provider_path)/2)
+ st_len = sizeof(arg_provider_path)/2 - 1;
+ rc = mbstowcs(arg_provider_path, argv[2], st_len); //do not count \0
+ arg_provider_path[st_len] = '\0';
+ // We can't use there mbstowcs_s
+ if (rc != st_len) {
+ printf("Can't convert string \"%s\" to WCHAR\n", argv[2]);
+ printf("Converted %d from %d\n", rc, st_len);
+ wcscpy_s(arg_provider_path, sizeof(arg_provider_path)/2, provider_path);
+ }
+ }
+ install_provider( arg_provider_path );
+
+#ifdef PERFMON_ENABLED
+ _RSPerfmonIniFilesGenerate();
+ if ( _RSPerfmonRegisterKeys() == ERROR_SUCCESS )
+ _RSPerfmonRegisterCounters();
+#endif
+ break;
+
+ case 'r':
+ /* Remove the service provider */
+ if( argc == 2 )
+ remove_provider( openib_key_name );
+ else
+ remove_provider( argv[2] );
+#ifdef PERFMON_ENABLED
+ _RSPerfmonIniFilesRemove();
+ if ( _RSPerfmonDeregisterCounters() == ERROR_SUCCESS )
+ _RSPerfmonDeregisterKeys();
+#endif
+ break;
+
+ case 'l':
+ /* List existing providers */
+ print_providers();
+ break;
+
+ default:
+ usage (argv[0]);
+ break;
+ }
+
+ WSACleanup ();
+
+ return 0;
+}
Index: tools/rsinstall/user/rsinstall.exe.manifest
===================================================================
--- tools/rsinstall/user/rsinstall.exe.manifest (revision 0)
+++ tools/rsinstall/user/rsinstall.exe.manifest (working copy)
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+ <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+ <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+ <security>
+ <requestedPrivileges>
+ <requestedExecutionLevel level="requireAdministrator" uiAccess="false"/>
+ </requestedPrivileges>
+ </security>
+ </trustInfo>
+</assembly>
\ No newline at end of file
Index: tools/rsinstall/user/rsinstall.rc
===================================================================
--- tools/rsinstall/user/rsinstall.rc (revision 0)
+++ tools/rsinstall/user/rsinstall.rc (working copy)
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <oib_ver.h>
+
+#define VER_FILETYPE VFT_APP
+#define VER_FILESUBTYPE VFT2_UNKNOWN
+
+#ifdef DBG
+#define VER_FILEDESCRIPTION_STR "RSockets for InfiniBand installer (Debug)"
+#else
+#define VER_FILEDESCRIPTION_STR "RSockets for InfiniBand installer"
+#endif
+
+#define VER_INTERNALNAME_STR "rsinstall.exe"
+#define VER_ORIGINALFILENAME_STR "rsinstall.exe"
+
+#include <common.ver>
Index: tools/rsinstall/user/RsInstall.sln
===================================================================
--- tools/rsinstall/user/RsInstall.sln (revision 0)
+++ tools/rsinstall/user/RsInstall.sln (working copy)
@@ -0,0 +1,21 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "InstallSP", "InstallSP.vcproj", "{B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}"
+ ProjectSection(ProjectDependencies) = postProject
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(SolutionConfiguration) = preSolution
+ Debug = Debug
+ Release = Release
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+ {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Debug.ActiveCfg = Debug|Win32
+ {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Debug.Build.0 = Debug|Win32
+ {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Release.ActiveCfg = Release|Win32
+ {B3A2B7A0-1906-413E-A457-8AD2FC5E88BB}.Release.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal
Index: tools/rsinstall/user/SOURCES
===================================================================
--- tools/rsinstall/user/SOURCES (revision 0)
+++ tools/rsinstall/user/SOURCES (working copy)
@@ -0,0 +1,30 @@
+TARGETNAME=rsinstall
+TARGETPATH=..\..\..\bin\user\obj$(BUILD_ALT_DIR)
+TARGETTYPE=PROGRAM
+UMTYPE=console
+USE_MSVCRT=1
+
+INCLUDES=..\..\..\inc;\
+ ..\..\..\inc\user;\
+ ..\..\..\ulp\librdmacm\include;\
+ $(PLATFORM_SDK_PATH)\include;
+
+SOURCES= \
+ rsinstall.rc \
+ rsinstall.c
+
+USER_C_FLAGS=$(USER_C_FLAGS)
+# -DPERFMON_ENABLED
+
+TARGETLIBS=\
+ $(SDK_LIB_PATH)\ws2_32.lib \
+ $(SDK_LIB_PATH)\LoadPerf.lib
+
+MSC_WARNING_LEVEL= /W3
+
+LINKER_FLAGS=$(LINKER_FLAGS)
+
+SXS_APPLICATION_MANIFEST=rsinstall.exe.manifest
+SXS_ASSEMBLY_VERSION=1.0
+SXS_ASSEMBLY_NAME=rsinstall.exe
+SXS_ASSEMBLY_LANGUAGE=0000
Index: ulp/librdmacm/AUTHORS
===================================================================
--- ulp/librdmacm/AUTHORS (revision 3419)
+++ ulp/librdmacm/AUTHORS (working copy)
@@ -1 +1,2 @@
Sean Hefty <sean.hefty at intel.com>
+Hubert Schmitt <hubert.schmitt at oce.com>
Index: ulp/librdmacm/COPYING
===================================================================
--- ulp/librdmacm/COPYING (revision 3419)
+++ ulp/librdmacm/COPYING (working copy)
@@ -1,11 +1,11 @@
Copyright (c) 2008 Intel Corporation. All rights reserved.
+Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
-This software is available to you under the OpenFabrics.org BSD license
-below:
+This software is available to you under the BSD license below:
- Redistribution and use in source and binary forms, with or
- without modification, are permitted provided that the following
- conditions are met:
+ Redistribution and use in source and binary forms, with or
+ without modification, are permitted provided that the following
+ conditions are met:
- Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
@@ -16,11 +16,21 @@
disclaimer in the documentation and/or other materials
provided with the distribution.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+ - Neither the name Oce Printing Systems GmbH nor the names
+ of the authors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ OF SUCH DAMAGE.
Index: ulp/librdmacm/examples/rstream/makefile
===================================================================
--- ulp/librdmacm/examples/rstream/makefile (revision 0)
+++ ulp/librdmacm/examples/rstream/makefile (working copy)
@@ -0,0 +1,7 @@
+#
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source
+# file to this component. This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+
+!INCLUDE ..\..\..\..\inc\openib.def
Index: ulp/librdmacm/examples/rstream/makefile.inc
===================================================================
--- ulp/librdmacm/examples/rstream/makefile.inc (revision 0)
+++ ulp/librdmacm/examples/rstream/makefile.inc (working copy)
@@ -0,0 +1,8 @@
+Custom_target:
+!if "$(BUILD_PASS)" == "PASS2" || "$(BUILD_PASS)" == "ALL"
+
+!endif
+
+
+
+!INCLUDE ..\..\..\..\inc\mod_ver.def
Index: ulp/librdmacm/examples/rstream/rstream.c
===================================================================
--- ulp/librdmacm/examples/rstream/rstream.c (revision 0)
+++ ulp/librdmacm/examples/rstream/rstream.c (working copy)
@@ -0,0 +1,749 @@
+/*
+ * Copyright (c) 2011-2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <_errno.h>
+#include "../../../../etc/user/gtod.c" // getimeofday()
+#include "getopt.c"
+#include <sys/types.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <_fcntl.h>
+#include <unistd.h>
+
+#include "..\src\openib_osd.h"
+#include <rdma/rdma_cma.h>
+#include <rdma/rwinsock.h>
+
+#define MSG_DONTWAIT 0x80
+
+struct test_size_param {
+ int size;
+ int option;
+};
+
+static struct test_size_param test_size[] = {
+ { 1 << 6, 0 },
+ { 1 << 7, 1 }, { (1 << 7) + (1 << 6), 1},
+ { 1 << 8, 1 }, { (1 << 8) + (1 << 7), 1},
+ { 1 << 9, 1 }, { (1 << 9) + (1 << 8), 1},
+ { 1 << 10, 1 }, { (1 << 10) + (1 << 9), 1},
+ { 1 << 11, 1 }, { (1 << 11) + (1 << 10), 1},
+ { 1 << 12, 0 }, { (1 << 12) + (1 << 11), 1},
+ { 1 << 13, 1 }, { (1 << 13) + (1 << 12), 1},
+ { 1 << 14, 1 }, { (1 << 14) + (1 << 13), 1},
+ { 1 << 15, 1 }, { (1 << 15) + (1 << 14), 1},
+ { 1 << 16, 0 }, { (1 << 16) + (1 << 15), 1},
+ { 1 << 17, 1 }, { (1 << 17) + (1 << 16), 1},
+ { 1 << 18, 1 }, { (1 << 18) + (1 << 17), 1},
+ { 1 << 19, 1 }, { (1 << 19) + (1 << 18), 1},
+ { 1 << 20, 0 }, { (1 << 20) + (1 << 19), 1},
+ { 1 << 21, 1 }, { (1 << 21) + (1 << 20), 1},
+ { 1 << 22, 1 }, { (1 << 22) + (1 << 21), 1},
+};
+#define TEST_CNT (sizeof test_size / sizeof test_size[0])
+
+enum rs_optimization {
+ opt_mixed,
+ opt_latency,
+ opt_bandwidth
+};
+
+static int rs, lrs;
+static int use_rs = 1;
+static int use_async = 0;
+static int verify = 0;
+static int flags = 0; //MSG_DONTWAIT;
+static int poll_timeout = 0;
+static int custom;
+static enum rs_optimization optimization;
+static int size_option;
+static int iterations = 1;
+static int transfer_size = 1000;
+static int transfer_count = 1000;
+static int buffer_size;
+static char test_name[10] = "custom";
+static char *port = "7471";
+static char *dst_addr;
+static char *src_addr;
+static struct timeval start, end;
+static void *buf;
+
+#define rs_socket(f,t,p) use_rs ? WSASocket(f,t,p,rsGetProtocolInfo(NULL),0,0) : socket(f,t,p)
+#define rs_bind(s,a,l) bind(s,a,l)
+#define rs_listen(s,b) listen(s,b)
+#define rs_connect(s,a,l) connect(s,a,l)
+#define rs_accept(s,a,l) accept(s,a,l)
+#define rs_shutdown(s,h) shutdown(s,h)
+#define rs_close(s) closesocket(s)
+#define rs_recv(s,b,l,f) recv(s,b,l,f)
+#define rs_send(s,b,l,f) send(s,b,l,f)
+#define rs_recvfrom(s,b,l,f,a,al) recvfrom(s,b,l,f,a,al)
+#define rs_sendto(s,b,l,f,a,al) sendto(s,b,l,f,a,al)
+#define rs_select(n,rf,wf,ef,t) select(n,rf,wf,ef,t)
+#define rs_ioctlsocket(s,c,p) ioctlsocket(s,c,p)
+#define rs_setsockopt(s,l,n,v,ol) setsockopt(s,l,n,v,ol)
+#define rs_getsockopt(s,l,n,v,ol) getsockopt(s,l,n,v,ol)
+
+static void size_str(char *str, size_t ssize, long long size)
+{
+ long long base, fraction = 0;
+ char mag;
+
+ if (size >= (1 << 30)) {
+ base = 1 << 30;
+ mag = 'g';
+ } else if (size >= (1 << 20)) {
+ base = 1 << 20;
+ mag = 'm';
+ } else if (size >= (1 << 10)) {
+ base = 1 << 10;
+ mag = 'k';
+ } else {
+ base = 1;
+ mag = '\0';
+ }
+
+ if (size / base < 10)
+ fraction = (size % base) * 10 / base;
+ if (fraction) {
+ _snprintf(str, ssize, "%lld.%lld%c", size / base, fraction, mag);
+ } else {
+ _snprintf(str, ssize, "%lld%c", size / base, mag);
+ }
+}
+
+static void cnt_str(char *str, size_t ssize, long long cnt)
+{
+ if (cnt >= 1000000000)
+ _snprintf(str, ssize, "%lldb", cnt / 1000000000);
+ else if (cnt >= 1000000)
+ _snprintf(str, ssize, "%lldm", cnt / 1000000);
+ else if (cnt >= 1000)
+ _snprintf(str, ssize, "%lldk", cnt / 1000);
+ else
+ _snprintf(str, ssize, "%lld", cnt);
+}
+
+static void show_perf(void)
+{
+ char str[32];
+ float usec;
+ long long bytes;
+
+ usec = (float)((end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
+ bytes = (long long) iterations * transfer_count * transfer_size * 2;
+
+ /* name size transfers iterations bytes seconds Gb/sec usec/xfer */
+ printf("%-10s", test_name);
+ size_str(str, sizeof str, transfer_size);
+ printf("%-8s", str);
+ cnt_str(str, sizeof str, transfer_count);
+ printf("%-8s", str);
+ cnt_str(str, sizeof str, iterations);
+ printf("%-8s", str);
+ size_str(str, sizeof str, bytes);
+ printf("%-8s", str);
+ printf("%8.2fs%10.2f%11.2f\n",
+ usec / 1000000., (bytes * 8) / (1000. * usec),
+ (usec / iterations) / (transfer_count * 2));
+}
+
+static int size_to_count(int size)
+{
+ if (size >= 1000000)
+ return 100;
+ else if (size >= 100000)
+ return 1000;
+ else if (size >= 10000)
+ return 10000;
+ else if (size >= 1000)
+ return 100000;
+ else
+ return 1000000;
+}
+
+static void init_latency_test(int size)
+{
+ char sstr[5];
+
+ size_str(sstr, sizeof sstr, size);
+ _snprintf(test_name, sizeof test_name, "%s_lat", sstr);
+ transfer_count = 1;
+ transfer_size = size;
+ iterations = size_to_count(transfer_size);
+}
+
+static void init_bandwidth_test(int size)
+{
+ char sstr[5];
+
+ size_str(sstr, sizeof sstr, size);
+ _snprintf(test_name, sizeof test_name, "%s_bw", sstr);
+ iterations = 1;
+ transfer_size = size;
+ transfer_count = size_to_count(transfer_size);
+}
+
+static void format_buf(void *buf, int size)
+{
+ uint8_t *array = buf;
+ static uint8_t data;
+ int i;
+
+ for (i = 0; i < size; i++)
+ array[i] = data++;
+}
+
+static int verify_buf(void *buf, int size)
+{
+ static long long total_bytes;
+ uint8_t *array = buf;
+ static uint8_t data;
+ int i;
+
+ for (i = 0; i < size; i++, total_bytes++) {
+ if (array[i] != data++) {
+ printf("data verification failed byte %lld\n", total_bytes);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int do_poll(struct pollfd *fds)
+{
+ int ret;
+ int nfds = 0;
+ fd_set readfds, writefds, exceptfds;
+ struct timeval timeout;
+
+ FD_ZERO(&readfds);
+ FD_ZERO(&writefds);
+ FD_ZERO(&exceptfds);
+
+ if (fds->events & (POLLIN | POLLHUP)) {
+ FD_SET(fds->fd, &readfds);
+ nfds++;
+ }
+
+ if (fds->events & POLLOUT) {
+ FD_SET(fds->fd, &writefds);
+ nfds++;
+ }
+
+ if (fds->events & ~(POLLIN | POLLOUT)) {
+ FD_SET(fds->fd, &exceptfds);
+ nfds++;
+ }
+
+ timeout.tv_sec = poll_timeout / 1000;
+ timeout.tv_usec = timeout.tv_sec ? 0 : poll_timeout * 1000;
+
+ do {
+ ret = rs_select(
+ nfds,
+ FD_ISSET(fds->fd, &readfds ) ? &readfds : NULL,
+ FD_ISSET(fds->fd, &writefds ) ? &writefds : NULL,
+ FD_ISSET(fds->fd, &exceptfds) ? &exceptfds : NULL,
+ poll_timeout < 0 ? NULL : &timeout
+ );
+ } while (!ret);
+
+ return ret == 1 ? 0 : ret;
+}
+
+static int send_xfer(int size)
+{
+ struct pollfd fds;
+ int offset, ret;
+
+ if (verify)
+ format_buf(buf, size);
+
+ if (use_async) {
+ fds.fd = rs;
+ fds.events = POLLOUT;
+ }
+
+ for (offset = 0; offset < size; ) {
+ if (use_async) {
+ ret = do_poll(&fds);
+ if (ret)
+ return ret;
+ }
+
+ ret = (int)rs_send(rs, (char *)buf + offset, size - offset, flags);
+ if (ret > 0) {
+ offset += ret;
+ } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
+ perror("rsend");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int recv_xfer(int size)
+{
+ struct pollfd fds;
+ int offset, ret;
+
+ if (use_async) {
+ fds.fd = rs;
+ fds.events = POLLIN;
+ }
+
+ for (offset = 0; offset < size; ) {
+ if (use_async) {
+ ret = do_poll(&fds);
+ if (ret)
+ return ret;
+ }
+
+ ret = (int)rs_recv(rs, (char *)buf + offset, size - offset, flags);
+ if (ret > 0) {
+ offset += ret;
+ } else if (errno != EWOULDBLOCK && errno != EAGAIN) {
+ perror("rrecv");
+ return ret;
+ }
+ }
+
+ if (verify) {
+ ret = verify_buf(buf, size);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int sync_test(void)
+{
+ int ret;
+
+ ret = dst_addr ? send_xfer(16) : recv_xfer(16);
+ if (ret)
+ return ret;
+
+ return dst_addr ? recv_xfer(16) : send_xfer(16);
+}
+
+static int run_test(void)
+{
+ int ret, i, t;
+
+ ret = sync_test();
+ if (ret)
+ goto out;
+
+ gettimeofday(&start, NULL);
+ for (i = 0; i < iterations; i++) {
+ for (t = 0; t < transfer_count; t++) {
+ ret = dst_addr ? send_xfer(transfer_size) :
+ recv_xfer(transfer_size);
+ if (ret)
+ goto out;
+ }
+ for (t = 0; t < transfer_count; t++) {
+ ret = dst_addr ? recv_xfer(transfer_size) :
+ send_xfer(transfer_size);
+ if (ret)
+ goto out;
+ }
+ }
+ gettimeofday(&end, NULL);
+ show_perf();
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void set_options(int rs)
+{
+ int val;
+
+ if (buffer_size) {
+ rs_setsockopt(rs, SOL_SOCKET, SO_SNDBUF, (void *) &buffer_size,
+ sizeof buffer_size);
+ rs_setsockopt(rs, SOL_SOCKET, SO_RCVBUF, (void *) &buffer_size,
+ sizeof buffer_size);
+ } else {
+ val = 1 << 19;
+ rs_setsockopt(rs, SOL_SOCKET, SO_SNDBUF, (void *) &val, sizeof val);
+ rs_setsockopt(rs, SOL_SOCKET, SO_RCVBUF, (void *) &val, sizeof val);
+ }
+
+ val = 1;
+ rs_setsockopt(rs, IPPROTO_TCP, TCP_NODELAY, (void *) &val, sizeof(val));
+
+ val = 1;
+ if (flags & MSG_DONTWAIT)
+ rs_ioctlsocket(rs, FIONBIO, (u_long *)&val);
+
+ if (use_rs) {
+ /* Inline size based on experimental data */
+ if (optimization == opt_latency) {
+ val = 384;
+ rs_setsockopt(rs, SOL_RDMA, RDMA_INLINE, (char *)&val, sizeof val);
+ } else if (optimization == opt_bandwidth) {
+ val = 0;
+ rs_setsockopt(rs, SOL_RDMA, RDMA_INLINE, (char *)&val, sizeof val);
+ }
+ }
+}
+
+static int server_listen(void)
+{
+ struct addrinfo hints, *res;
+ int val, ret;
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags = RAI_PASSIVE;
+ hints.ai_family = AF_INET;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+
+ ret = getaddrinfo(src_addr, port, &hints, &res);
+ if (ret) {
+ perror("getaddrinfo");
+ return ret;
+ }
+
+ lrs = (int)(rs_socket(res->ai_family, res->ai_socktype, res->ai_protocol));
+ if (lrs < 0) {
+ perror("rsocket");
+ ret = lrs;
+ goto free;
+ }
+
+ val = 1;
+ ret = rs_setsockopt(lrs, SOL_SOCKET, SO_REUSEADDR, (char *)&val, sizeof val);
+ if (ret) {
+ perror("rsetsockopt SO_REUSEADDR");
+ goto close;
+ }
+
+ ret = rs_bind(lrs, res->ai_addr, res->ai_addrlen);
+ if (ret) {
+ perror("rbind");
+ goto close;
+ }
+
+ ret = rs_listen(lrs, 1);
+ if (ret)
+ perror("rlisten");
+
+close:
+ if (ret)
+ rs_close(lrs);
+free:
+ freeaddrinfo(res);
+ return ret;
+}
+
+static int server_connect(void)
+{
+ struct pollfd fds;
+ int ret = 0;
+
+ set_options(lrs);
+ do {
+ if (use_async) {
+ fds.fd = lrs;
+ fds.events = POLLIN;
+
+ ret = do_poll(&fds);
+ if (ret) {
+ perror("rpoll");
+ return ret;
+ }
+ }
+
+ rs = (int)(rs_accept(lrs, NULL, 0));
+ } while (rs < 0 && (errno == EAGAIN || errno == EWOULDBLOCK));
+
+ if (rs < 0) {
+ ret = rs;
+ perror("raccept");
+ return ret;
+ }
+ set_options(rs);
+
+ return ret;
+}
+
+static int client_connect(void)
+{
+ struct addrinfo hints, *res;
+ struct pollfd fds;
+ int ret, err;
+ socklen_t len;
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags = RAI_PASSIVE;
+ hints.ai_family = AF_INET;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_protocol = IPPROTO_TCP;
+
+ ret = getaddrinfo(dst_addr, port, &hints, &res);
+ if (ret) {
+ perror("getaddrinfo");
+ return ret;
+ }
+
+ rs = (int)(rs_socket(res->ai_family, res->ai_socktype, res->ai_protocol));
+ if (rs < 0) {
+ perror("rsocket");
+ ret = rs;
+ goto free;
+ }
+
+ set_options(rs);
+ /* TODO: bind client to src_addr */
+
+ ret = rs_connect(rs, res->ai_addr, res->ai_addrlen);
+ if (ret && (errno != EINPROGRESS)) {
+ perror("rconnect");
+ goto close;
+ }
+
+ if (ret && (errno == EINPROGRESS)) {
+ fds.fd = rs;
+ fds.events = POLLOUT;
+ ret = do_poll(&fds);
+ if (ret)
+ goto close;
+
+ len = sizeof err;
+ ret = rs_getsockopt(rs, SOL_SOCKET, SO_ERROR, (char *)&err, &len);
+ if (ret)
+ goto close;
+ if (err) {
+ ret = -1;
+ errno = err;
+ perror("async rconnect");
+ }
+ }
+
+close:
+ if (ret)
+ rs_close(rs);
+free:
+ freeaddrinfo(res);
+ return ret;
+}
+
+static int run(void)
+{
+ int i, ret = 0;
+ DWORD dwBytesReturned = 0;
+
+ buf = malloc(!custom ? test_size[TEST_CNT - 1].size : transfer_size);
+ if (!buf) {
+ perror("malloc");
+ return -1;
+ }
+ if (!dst_addr) {
+ ret = server_listen();
+ if (ret)
+ goto free;
+ }
+ printf("%-10s%-8s%-8s%-8s%-8s%8s %10s%13s\n",
+ "name", "bytes", "xfers", "iters", "total", "time", "Gb/sec", "usec/xfer");
+ if (!custom) {
+ optimization = opt_latency;
+ ret = dst_addr ? client_connect() : server_connect();
+ if (ret)
+ goto free;
+
+ for (i = 0; i < TEST_CNT; i++) {
+ if (test_size[i].option > size_option)
+ continue;
+ init_latency_test(test_size[i].size);
+ run_test();
+ }
+ rs_shutdown(rs, SHUT_RDWR);
+ rs_close(rs);
+
+ optimization = opt_bandwidth;
+ ret = dst_addr ? client_connect() : server_connect();
+ if (ret)
+ goto free;
+
+ for (i = 0; i < TEST_CNT; i++) {
+ if (test_size[i].option > size_option)
+ continue;
+ init_bandwidth_test(test_size[i].size);
+ run_test();
+ }
+ } else {
+ ret = dst_addr ? client_connect() : server_connect();
+ if (ret)
+ goto free;
+
+ ret = run_test();
+ }
+
+ rs_shutdown(rs, SHUT_RDWR);
+ rs_close(rs);
+free:
+ free(buf);
+ return ret;
+}
+
+static int set_test_opt(char *optarg)
+{
+ if (strlen(optarg) == 1) {
+ switch (optarg[0]) {
+ case 's':
+ use_rs = 0;
+ break;
+/* async not yet supported
+ * case 'a':
+ * use_async = 1;
+ * break;
+ */
+ case 'b':
+ flags &= ~MSG_DONTWAIT;
+ break;
+ case 'n':
+ flags |= MSG_DONTWAIT;
+ break;
+ case 'v':
+ verify = 1;
+ break; default:
+ return -1;
+ }
+ } else {
+ if (!_strnicmp("socket", optarg, 6)) {
+ use_rs = 0;
+ } /* async not yet supported
+ * else if (!_strnicmp("async", optarg, 5)) {
+ * use_async = 1;
+ * }
+ */
+ else if (!_strnicmp("block", optarg, 5)) {
+ flags &= ~MSG_DONTWAIT;
+ } else if (!_strnicmp("nonblock", optarg, 8)) {
+ flags |= MSG_DONTWAIT;
+ } else if (!_strnicmp("verify", optarg, 6)) {
+ verify = 1;
+ } else {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int op, ret;
+ WSADATA wsaData;
+
+ if (0 != (ret = WSAStartup(0x202,&wsaData)) ) {
+ fprintf(stderr, "WSAStartup failed with error %d\n",ret);
+ ret = -1;
+ goto out;
+ }
+ while ((op = getopt(argc, argv, "s:b:B:I:C:S:p:T:")) != -1) {
+ switch (op) {
+ case 's':
+ dst_addr = optarg;
+ break;
+ case 'b':
+ src_addr = optarg;
+ break;
+ case 'B':
+ buffer_size = atoi(optarg);
+ break;
+ case 'I':
+ custom = 1;
+ iterations = atoi(optarg);
+ break;
+ case 'C':
+ custom = 1;
+ transfer_count = atoi(optarg);
+ break;
+ case 'S':
+ if (!_strnicmp("all", optarg, 3)) {
+ size_option = 1;
+ } else {
+ custom = 1;
+ transfer_size = atoi(optarg);
+ }
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'T':
+ if (!set_test_opt(optarg))
+ break;
+ /* invalid option - fall through */
+ default:
+ printf("usage: %s\n", argv[0]);
+ printf("\t[-s server_address]\n");
+ printf("\t[-b bind_address]\n");
+ printf("\t[-B buffer_size]\n");
+ printf("\t[-I iterations]\n");
+ printf("\t[-C transfer_count]\n");
+ printf("\t[-S transfer_size or all]\n");
+ printf("\t[-p port_number]\n");
+ printf("\t[-T test_option]\n");
+ printf("\t s|sockets - use standard tcp/ip sockets\n");
+// printf("\t a|async - asynchronous operation (use poll)\n");
+ printf("\t b|blocking - use blocking calls\n");
+ printf("\t n|nonblocking - use nonblocking calls\n");
+ printf("\t v|verify - verify data\n");
+ exit(1);
+ }
+ }
+ if (!(flags & MSG_DONTWAIT))
+ poll_timeout = -1;
+
+ ret = run();
+
+out:
+ WSACleanup();
+
+ return ret;
+}
Index: ulp/librdmacm/examples/rstream/rstream.rc
===================================================================
--- ulp/librdmacm/examples/rstream/rstream.rc (revision 0)
+++ ulp/librdmacm/examples/rstream/rstream.rc (working copy)
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <oib_ver.h>
+
+#define VER_FILETYPE VFT_APP
+#define VER_FILESUBTYPE VFT2_UNKNOWN
+
+#ifdef DBG
+#define VER_FILEDESCRIPTION_STR "(R)Socket Test (Debug)"
+#else
+#define VER_FILEDESCRIPTION_STR "(R)Socket Test "
+#endif
+
+#define VER_INTERNALNAME_STR "rstream.exe"
+#define VER_ORIGINALFILENAME_STR "rstream.exe"
+
+#include <common.ver>
Index: ulp/librdmacm/examples/rstream/SOURCES
===================================================================
--- ulp/librdmacm/examples/rstream/SOURCES (revision 0)
+++ ulp/librdmacm/examples/rstream/SOURCES (working copy)
@@ -0,0 +1,32 @@
+TARGETNAME=rstream
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)
+TARGETTYPE=PROGRAM
+UMTYPE=console
+USE_MSVCRT=1
+NTTARGETFILES=Custom_target
+
+C_DEFINES=$(C_DEFINES) /D__WIN__
+
+SOURCES=rstream.rc \
+ rstream.c
+
+INCLUDES= ..; \
+ ..\..\..\..\inc; \
+ ..\..\..\..\inc\user; \
+ ..\..\..\..\inc\user\linux; \
+ ..\..\include; \
+ ..\..\..\libibverbs\include; \
+ ..\..\..\..\hw\mlx4\user\hca; \
+ ..\..\..\..\inc\complib; \
+ ..\..\..\..\core\complib\user\$(O); \
+ ..\..\..\..\core\al\user\$(O); \
+ ..\..\..\..\etc\user;
+
+RCOPTIONS=/I..\..\win\include
+
+TARGETLIBS= \
+ $(DDK_LIB_PATH)\Ws2_32.lib \
+ $(TARGETPATH)\*\complib.lib \
+ $(TARGETPATH)\*\ibal.lib
+
+MSC_WARNING_LEVEL= /W3
Index: ulp/librdmacm/include/rdma/rdma_cma.h
===================================================================
--- ulp/librdmacm/include/rdma/rdma_cma.h (revision 3419)
+++ ulp/librdmacm/include/rdma/rdma_cma.h (working copy)
@@ -1,8 +1,8 @@
/*
* Copyright (c) 2005-2009 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
*
- * This software is available to you under the OpenFabrics.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
@@ -17,14 +17,24 @@
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
*/
#pragma once
@@ -678,7 +688,10 @@
/* Option details */
enum
{
- RDMA_OPTION_ID_TOS = 0 /* uint8_t: RFC 2474 */
+ RDMA_OPTION_ID_TOS = 0, /* uint8_t: RFC 2474 */
+ RDMA_OPTION_ID_REUSEADDR = 1, /* int: ~SO_REUSEADDR */
+ RDMA_OPTION_ID_AFONLY = 2, /* int: ~IPV6_V6ONLY */
+ RDMA_OPTION_IB_PATH = 1 /* struct ibv_path_data[] */
};
/**
Index: ulp/librdmacm/include/rdma/rdma_verbs.h
===================================================================
--- ulp/librdmacm/include/rdma/rdma_verbs.h (revision 3419)
+++ ulp/librdmacm/include/rdma/rdma_verbs.h (working copy)
@@ -1,297 +1,297 @@
-/*
- * Copyright (c) 2010 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#if !defined(RDMA_VERBS_H)
-#define RDMA_VERBS_H
-
-#include <assert.h>
-#include <infiniband/verbs.h>
-#include <rdma/rdma_cma.h>
-#include <errno.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static __inline int rdma_seterrno(int ret)
-{
- if (ret) {
- errno = ret;
- ret = -1;
- }
- return ret;
-}
-
-/*
- * Memory registration helpers.
- */
-static __inline struct ibv_mr *
-rdma_reg_msgs(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE);
-}
-
-static __inline struct ibv_mr *
-rdma_reg_read(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_READ);
-}
-
-static __inline struct ibv_mr *
-rdma_reg_write(struct rdma_cm_id *id, void *addr, size_t length)
-{
- return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE);
-}
-
-static __inline int
-rdma_dereg_mr(struct ibv_mr *mr)
-{
- return rdma_seterrno(ibv_dereg_mr(mr));
-}
-
-
-/*
- * Vectored send, receive, and RDMA operations.
- * Support multiple scatter-gather entries.
- */
-static __inline int
-rdma_post_recvv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
- int nsge)
-{
- struct ibv_recv_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
-
- return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_sendv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
- int nsge, int flags)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_SEND;
- wr.send_flags = flags;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_readv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
- int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_RDMA_READ;
- wr.send_flags = flags;
- wr.wr.rdma.remote_addr = remote_addr;
- wr.wr.rdma.rkey = rkey;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-static __inline int
-rdma_post_writev(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
- int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_send_wr wr, *bad;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = sgl;
- wr.num_sge = nsge;
- wr.opcode = IBV_WR_RDMA_WRITE;
- wr.send_flags = flags;
- wr.wr.rdma.remote_addr = remote_addr;
- wr.wr.rdma.rkey = rkey;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-/*
- * Simple send, receive, and RDMA calls.
- */
-static __inline int
-rdma_post_recv(struct rdma_cm_id *id, void *context, void *addr,
- size_t length, struct ibv_mr *mr)
-{
- struct ibv_sge sge;
-
- assert((addr >= mr->addr) &&
- (((uint8_t) addr + length) <= ((uint8_t) mr->addr + mr->length)));
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr->lkey;
-
- return rdma_post_recvv(id, context, &sge, 1);
-}
-
-static __inline int
-rdma_post_send(struct rdma_cm_id *id, void *context, void *addr,
- size_t length, struct ibv_mr *mr, int flags)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- return rdma_post_sendv(id, context, &sge, 1, flags);
-}
-
-static __inline int
-rdma_post_read(struct rdma_cm_id *id, void *context, void *addr,
- size_t length, struct ibv_mr *mr, int flags,
- uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr->lkey;
-
- return rdma_post_readv(id, context, &sge, 1, flags, remote_addr, rkey);
-}
-
-static __inline int
-rdma_post_write(struct rdma_cm_id *id, void *context, void *addr,
- size_t length, struct ibv_mr *mr, int flags,
- uint64_t remote_addr, uint32_t rkey)
-{
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- return rdma_post_writev(id, context, &sge, 1, flags, remote_addr, rkey);
-}
-
-static __inline int
-rdma_post_ud_send(struct rdma_cm_id *id, void *context, void *addr,
- size_t length, struct ibv_mr *mr, int flags,
- struct ibv_ah *ah, uint32_t remote_qpn)
-{
- struct ibv_send_wr wr, *bad;
- struct ibv_sge sge;
-
- sge.addr = (uint64_t) (uintptr_t) addr;
- sge.length = (uint32_t) length;
- sge.lkey = mr ? mr->lkey : 0;
-
- wr.wr_id = (uintptr_t) context;
- wr.next = NULL;
- wr.sg_list = &sge;
- wr.num_sge = 1;
- wr.opcode = IBV_WR_SEND;
- wr.send_flags = flags;
- wr.wr.ud.ah = ah;
- wr.wr.ud.remote_qpn = remote_qpn;
- wr.wr.ud.remote_qkey = RDMA_UDP_QKEY;
-
- return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
-}
-
-// Comment out until patch to automatically create CQs
-static __inline int
-rdma_get_send_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
-{
- struct ibv_cq *cq;
- void *context;
- int ret;
-
- ret = ibv_poll_cq(id->send_cq, 1, wc);
- if (ret)
- goto out;
-
- ret = ibv_req_notify_cq(id->send_cq, 0);
- if (ret)
- return rdma_seterrno(ret);
-
- while (!(ret = ibv_poll_cq(id->send_cq, 1, wc))) {
- ret = ibv_get_cq_event(id->send_cq_channel, &cq, &context);
- if (ret)
- return rdma_seterrno(ret);
-
- assert(cq == id->send_cq && context == id);
- ibv_ack_cq_events(id->send_cq, 1);
- }
-out:
- return (ret < 0) ? rdma_seterrno(ret) : ret;
-}
-
-static __inline int
-rdma_get_recv_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
-{
- struct ibv_cq *cq;
- void *context;
- int ret;
-
- ret = ibv_poll_cq(id->recv_cq, 1, wc);
- if (ret)
- goto out;
-
- ret = ibv_req_notify_cq(id->recv_cq, 0);
- if (ret)
- return rdma_seterrno(ret);
-
- while (!(ret = ibv_poll_cq(id->recv_cq, 1, wc))) {
- ret = ibv_get_cq_event(id->recv_cq_channel, &cq, &context);
- if (ret)
- return rdma_seterrno(ret);
-
- assert(cq == id->recv_cq && context == id);
- ibv_ack_cq_events(id->recv_cq, 1);
- }
-out:
- return (ret < 0) ? rdma_seterrno(ret) : ret;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* RDMA_CMA_H */
+/*
+ * Copyright (c) 2010 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(RDMA_VERBS_H)
+#define RDMA_VERBS_H
+
+#include <assert.h>
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <errno.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __inline int rdma_seterrno(int ret)
+{
+ if (ret) {
+ errno = ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * Memory registration helpers.
+ */
+static __inline struct ibv_mr *
+rdma_reg_msgs(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE);
+}
+
+static __inline struct ibv_mr *
+rdma_reg_read(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_READ);
+}
+
+static __inline struct ibv_mr *
+rdma_reg_write(struct rdma_cm_id *id, void *addr, size_t length)
+{
+ return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE);
+}
+
+static __inline int
+rdma_dereg_mr(struct ibv_mr *mr)
+{
+ return rdma_seterrno(ibv_dereg_mr(mr));
+}
+
+
+/*
+ * Vectored send, receive, and RDMA operations.
+ * Support multiple scatter-gather entries.
+ */
+static __inline int
+rdma_post_recvv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+ int nsge)
+{
+ struct ibv_recv_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+
+ return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_sendv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+ int nsge, int flags)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_SEND;
+ wr.send_flags = flags;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_readv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+ int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_READ;
+ wr.send_flags = flags;
+ wr.wr.rdma.remote_addr = remote_addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+static __inline int
+rdma_post_writev(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl,
+ int nsge, int flags, uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_WRITE;
+ wr.send_flags = flags;
+ wr.wr.rdma.remote_addr = remote_addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+/*
+ * Simple send, receive, and RDMA calls.
+ */
+static __inline int
+rdma_post_recv(struct rdma_cm_id *id, void *context, void *addr,
+ size_t length, struct ibv_mr *mr)
+{
+ struct ibv_sge sge;
+
+ assert((addr >= mr->addr) &&
+ (((uint8_t) addr + length) <= ((uint8_t) mr->addr + mr->length)));
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr->lkey;
+
+ return rdma_post_recvv(id, context, &sge, 1);
+}
+
+static __inline int
+rdma_post_send(struct rdma_cm_id *id, void *context, void *addr,
+ size_t length, struct ibv_mr *mr, int flags)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ return rdma_post_sendv(id, context, &sge, 1, flags);
+}
+
+static __inline int
+rdma_post_read(struct rdma_cm_id *id, void *context, void *addr,
+ size_t length, struct ibv_mr *mr, int flags,
+ uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr->lkey;
+
+ return rdma_post_readv(id, context, &sge, 1, flags, remote_addr, rkey);
+}
+
+static __inline int
+rdma_post_write(struct rdma_cm_id *id, void *context, void *addr,
+ size_t length, struct ibv_mr *mr, int flags,
+ uint64_t remote_addr, uint32_t rkey)
+{
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ return rdma_post_writev(id, context, &sge, 1, flags, remote_addr, rkey);
+}
+
+static __inline int
+rdma_post_ud_send(struct rdma_cm_id *id, void *context, void *addr,
+ size_t length, struct ibv_mr *mr, int flags,
+ struct ibv_ah *ah, uint32_t remote_qpn)
+{
+ struct ibv_send_wr wr, *bad;
+ struct ibv_sge sge;
+
+ sge.addr = (uint64_t) (uintptr_t) addr;
+ sge.length = (uint32_t) length;
+ sge.lkey = mr ? mr->lkey : 0;
+
+ wr.wr_id = (uintptr_t) context;
+ wr.next = NULL;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+ wr.opcode = IBV_WR_SEND;
+ wr.send_flags = flags;
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.remote_qpn = remote_qpn;
+ wr.wr.ud.remote_qkey = RDMA_UDP_QKEY;
+
+ return rdma_seterrno(ibv_post_send(id->qp, &wr, &bad));
+}
+
+// Comment out until patch to automatically create CQs
+static __inline int
+rdma_get_send_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ ret = ibv_poll_cq(id->send_cq, 1, wc);
+ if (ret)
+ goto out;
+
+ ret = ibv_req_notify_cq(id->send_cq, 0);
+ if (ret)
+ return rdma_seterrno(ret);
+
+ while (!(ret = ibv_poll_cq(id->send_cq, 1, wc))) {
+ ret = ibv_get_cq_event(id->send_cq_channel, &cq, &context);
+ if (ret)
+ return rdma_seterrno(ret);
+
+ assert(cq == id->send_cq && context == id);
+ ibv_ack_cq_events(id->send_cq, 1);
+ }
+out:
+ return (ret < 0) ? rdma_seterrno(ret) : ret;
+}
+
+static __inline int
+rdma_get_recv_comp(struct rdma_cm_id *id, struct ibv_wc *wc)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ ret = ibv_poll_cq(id->recv_cq, 1, wc);
+ if (ret)
+ goto out;
+
+ ret = ibv_req_notify_cq(id->recv_cq, 0);
+ if (ret)
+ return rdma_seterrno(ret);
+
+ while (!(ret = ibv_poll_cq(id->recv_cq, 1, wc))) {
+ ret = ibv_get_cq_event(id->recv_cq_channel, &cq, &context);
+ if (ret)
+ return rdma_seterrno(ret);
+
+ assert(cq == id->recv_cq && context == id);
+ ibv_ack_cq_events(id->recv_cq, 1);
+ }
+out:
+ return (ret < 0) ? rdma_seterrno(ret) : ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RDMA_CMA_H */
Index: ulp/librdmacm/include/rdma/rs_regpath.h
===================================================================
--- ulp/librdmacm/include/rdma/rs_regpath.h (revision 0)
+++ ulp/librdmacm/include/rdma/rs_regpath.h (working copy)
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2005 SilverStorm Technologies. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#ifndef _RS_REGPATH_H_
+#define _RS_REGPATH_H_
+
+/* these definitions are common for installSP and WSD projects */
+#define RS_PM_REGISTRY_PATH \
+ TEXT("SYSTEM\\CurrentControlSet\\Services\\RSockets\\")
+#define RS_PM_EVENTLOG_PATH \
+ TEXT("SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\RSockets")
+#define RS_PM_SUBKEY_NAME TEXT("RSockets")
+#define RS_PM_SUBKEY_PERF TEXT("Performance")
+#define RS_PM_INI_FILE "rs_perfcounters.ini"
+#define RS_PM_SYM_H_FILE "rs_perfini.h"
+
+enum RS_PM_COUNTERS
+{
+ BYTES_SEND = 0,
+ BYTES_RECV,
+ BYTES_WRITE,
+ BYTES_READ,
+ BYTES_TOTAL,
+ COMP_SEND,
+ COMP_RECV,
+ COMP_TOTAL,
+ INTR_TOTAL,
+ RS_PM_NUM_COUNTERS
+};
+
+/* counter symbol names */
+#define RS_PM_OBJ 0
+#define RS_PM_COUNTER( X ) ((X + 1) * 2)
+
+#endif /* _RS_REGPATH_H_ */
Index: ulp/librdmacm/include/rdma/rsocket.h
===================================================================
--- ulp/librdmacm/include/rdma/rsocket.h (revision 0)
+++ ulp/librdmacm/include/rdma/rsocket.h (working copy)
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#if !defined(RSOCKET_H)
+#define RSOCKET_H
+
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <ws2spi.h>
+
+typedef unsigned int nfds_t; // Under Linux from poll.h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+__declspec(dllexport)
+int rsocket(int domain, int type, int protocol);
+__declspec(dllexport)
+int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen);
+__declspec(dllexport)
+int rlisten(int socket, int backlog);
+__declspec(dllexport)
+int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen);
+SOCKET WSPAPI WSPAccept(
+ SOCKET socket,
+ struct sockaddr *addr,
+ LPINT addrlen,
+ LPCONDITIONPROC lpfnCondition,
+ DWORD_PTR dwCallbackData,
+ LPINT lpErrno
+);
+__declspec(dllexport)
+int rconnect(int socket, const struct sockaddr *addr, socklen_t addrlen);
+__declspec(dllexport)
+int rshutdown(int socket, int how);
+__declspec(dllexport)
+int rclose(int socket);
+__declspec(dllexport)
+ssize_t rrecv(int socket, void *buf, size_t len, int flags);
+__declspec(dllexport)
+ssize_t rrecvfrom(int socket, void *buf, size_t len, int flags,
+ struct sockaddr *src_addr, socklen_t *addrlen);
+__declspec(dllexport)
+ssize_t rrecvmsg(int socket, struct msghdr *msg, int flags);
+__declspec(dllexport)
+ssize_t rsend(int socket, const void *buf, size_t len, int flags);
+__declspec(dllexport)
+ssize_t rsendto(int socket, const void *buf, size_t len, int flags,
+ const struct sockaddr *dest_addr, socklen_t addrlen);
+__declspec(dllexport)
+ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags);
+__declspec(dllexport)
+ssize_t rread(int socket, void *buf, size_t count);
+__declspec(dllexport)
+ssize_t rreadv(int socket, const struct iovec *iov, int iovcnt);
+__declspec(dllexport)
+ssize_t rwrite(int socket, const void *buf, size_t count);
+__declspec(dllexport)
+ssize_t rwritev(int socket, const struct iovec *iov, int iovcnt);
+__declspec(dllexport)
+int rpoll(struct pollfd *fds, nfds_t nfds, int timeout);
+__declspec(dllexport)
+int rselect(int nfds, fd_set *readfds, fd_set *writefds,
+ fd_set *exceptfds, struct timeval *timeout);
+__declspec(dllexport)
+int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen);
+__declspec(dllexport)
+int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen);
+
+#ifndef SOL_RDMA
+#define SOL_RDMA 0x10000
+enum {
+ RDMA_SQSIZE,
+ RDMA_RQSIZE,
+ RDMA_INLINE
+};
+#endif
+
+__declspec(dllexport)
+int rsetsockopt(int socket, int level, int optname,
+ const void *optval, socklen_t optlen);
+__declspec(dllexport)
+int rgetsockopt(int socket, int level, int optname,
+ void *optval, socklen_t *optlen);
+__declspec(dllexport)
+int rfcntl(int socket, int cmd, ... /* arg */ );
+__declspec(dllexport)
+int rioctlsocket(int socket, long cmd, u_long* argp);
+
+int rsGetStatus ( __out LPRS_STATUS *lppStatusBuffer );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RSOCKET_H */
Index: ulp/librdmacm/include/rdma/rwinsock.h
===================================================================
--- ulp/librdmacm/include/rdma/rwinsock.h (revision 0)
+++ ulp/librdmacm/include/rdma/rwinsock.h (working copy)
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#if !defined(RWINSOCK_H)
+#define RWINSOCK_H
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <stdlib.h>
+#include <string.h>
+
+static const GUID rsProviderGuid = { //D478E78B-A803-4a25-A5E4-83BFB7EAF4A7
+ 0xd478e78b,
+ 0xa803,
+ 0x4a25,
+ { 0xa5, 0xe4, 0x83, 0xbf, 0xb7, 0xea, 0xf4, 0xa7 }
+};
+
+static WSAPROTOCOL_INFO rsProtocolInfo = {0};
+
+/**
+ * \brief Get RSockets Winsock provider's WSAPROTOCOL_INFO structure.
+ *
+ * \param lpStatus Pointer to status variable to be returned. Can be NULL if not required.
+ *
+ * \return Pointer to the RSockets Winsock provider's WSAPROTOCOL_INFO structure
+ * (NULL if the RSockets provider is not found or another error occured).
+ */
+static LPWSAPROTOCOL_INFO rsGetProtocolInfo (LPINT lpStatus)
+{
+ int Status = ERROR_SUCCESS;
+ LPWSAPROTOCOL_INFO lpProtocolBuffer = NULL;
+ LPWSAPROTOCOL_INFO lpReturn = NULL;
+ DWORD BufferLength = 0;
+ DWORD i;
+
+ WSAEnumProtocols (NULL, NULL, &BufferLength); // Should always return the BufferLength
+
+ if (NULL == (lpProtocolBuffer = (LPWSAPROTOCOL_INFO)malloc (BufferLength)))
+ {
+ Status = ERROR_NOT_ENOUGH_MEMORY;
+ goto cleanup;
+ }
+
+ if (SOCKET_ERROR == WSAEnumProtocols (NULL, lpProtocolBuffer, &BufferLength))
+ {
+ Status = WSAGetLastError();
+ goto cleanup;
+ }
+
+ for (i = 0; i < BufferLength / sizeof(*lpProtocolBuffer); i++)
+ {
+ if (0 == memcmp (&lpProtocolBuffer[i].ProviderId, &rsProviderGuid, sizeof(rsProviderGuid)))
+ {
+ rsProtocolInfo = lpProtocolBuffer[i];
+ lpReturn = &rsProtocolInfo;
+ break;
+ }
+ }
+
+ cleanup:
+ if (lpProtocolBuffer)
+ free (lpProtocolBuffer);
+
+ if (lpStatus)
+ *lpStatus = Status;
+
+ return lpReturn;
+}
+
+#ifndef SOL_RDMA
+#define SOL_RDMA 0x10000 // for getsockopt + setsockopt
+enum {
+ RDMA_SQSIZE,
+ RDMA_RQSIZE,
+ RDMA_INLINE
+};
+#endif /* SOL_RDMA */
+
+typedef struct {
+ struct sockaddr src_addr;
+ struct sockaddr dst_addr;
+ char state[16];
+} RS_STATUS, *LPRS_STATUS;
+
+typedef struct {
+ char szLine[128];
+} RS_TRACE_OUT, *LPRS_TRACE_OUT;
+
+/*
+ * IOCTL code definition to get RS_STATUS information via WSAIoctl():
+ */
+#define IOC_VENDOR_OFA 0x0FA0000
+#define SIO_RS_GET_STATUS (IOC_OUT | IOC_VENDOR | IOC_VENDOR_OFA | 1)
+#define SIO_RS_GET_TRACE (IOC_OUT | IOC_VENDOR | IOC_VENDOR_OFA | 2)
+
+#endif /* RWINSOCK_H */
Index: ulp/librdmacm/RSocket.txt
===================================================================
--- ulp/librdmacm/RSocket.txt (revision 0)
+++ ulp/librdmacm/RSocket.txt (working copy)
@@ -0,0 +1,105 @@
+
+ Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+
+ This software is available to you under the BSD license below:
+
+ Redistribution and use in source and binary forms, with or
+ without modification, are permitted provided that the following
+ conditions are met:
+
+ - Redistributions of source code must retain the above
+ copyright notice, this list of conditions and the following
+ disclaimer.
+
+ - Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials
+ provided with the distribution.
+
+ - Neither the name Oce Printing Systems GmbH nor the names
+ of the authors may be used to endorse or promote products
+ derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ OF SUCH DAMAGE.
+
+#############################################################################
+
+
+GENERAL
+=======
+The RSockets protocol provides socket-based RDMA communication between
+Windows nodes (like to WSD or ND) as well as between Windows and Linux nodes.
+
+The RSockets functionality is contained within the librdmacm.dll which now
+is capable to act as a Winsock base transport provider.
+
+For now the librdmacm.dll still exports the direct rsocket calls
+(rsocket, rbind, rrecv etc.) as well. So application developers can
+alternatively circumvent Winsock and call those functions directly
+(by including rsocket.h instead of rwinsock.h).
+Aside from a slight performance gain, this might be useful in case of
+quickly porting a Linux app to Windows(?).
+But beware of using both access methods concurrently in the same application!
+
+
+INSTALLATION
+============
+Installation of that Winsock provider (i.e. registration of the
+librdmacm.dll in the Windows Registry) can be done with the rsinstall
+tool (see tools/rsinstall) which works similar to wsdinstall for the
+WSD Winsock provider.
+For a list of available options, just call rsinstall.exe without
+parameters. (Note that rsinstall.exe requires administrative privileges
+to run properly!)
+
+
+USAGE
+=====
+Usage of the RSocket provider at application level is quite simple,
+as demonstrated by the rstream tool (see ulp/librdmacm/examples/rstream)
+which is also a porting from Linux OFED. In contrast to a 'normal' Winsock
+application there are just two essential differences:
+
+- The ulp\librdmacm\include\rdma\rwinsock.h header has to be included
+ instead of winsock2.h. (Nonetheless it's still necessary to call
+ WSAStartup() and WSACleanup() during initialization and shutdown of
+ your application, respectivily).
+
+- Instead of calling socket() for socket creation, a WSASocket() has to
+ be performed with a WSAPROTOCOL_INFO structure selecting the appropriate
+ Winsock provider. For convenience there is a little helper function
+ rsGetProtocolInfo() implemented in rwinsock.h which provides this structure
+ based on the provider's GUID (static variable 'rsProviderGuid' which is
+ also contained in rwinsock.h).
+
+
+RESTRICTIONS
+============
+Generally there are the same restrictions for socket applications as
+described in the Linux RSockets man page (e.g. no UDP / SOCK_DGRAM).
+Moreover the following restrictions apply:
+
+- The MSG_DONTWAIT flag is not supported when calling WSASocket().
+ Instead to configure a socket for non-blocking operation,
+ ioctlsocket(FIONBIO) can be used.
+
+- Overlapped operation is currently not supported, i.e. a WSASocket() with
+ the WSA_FLAG_OVERLAPPED flag set will be rejected with a WSAEINVAL error.
+
+- The WSAPoll() function (in Windows Vista and later) is not supported,
+ hence the select() function has to be used instead.
+
+- IPv6 should work, but has not been tested yet.
Index: ulp/librdmacm/src/addrinfo.cpp
===================================================================
--- ulp/librdmacm/src/addrinfo.cpp (revision 3419)
+++ ulp/librdmacm/src/addrinfo.cpp (working copy)
@@ -1,11 +1,8 @@
/*
* Copyright (c) 2010 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
*
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
+ * This software is available to you under the BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
@@ -20,14 +17,24 @@
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
*/
#if HAVE_CONFIG_H
@@ -47,7 +54,7 @@
{
WSADATA wsadata;
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
if (addr_ref++) {
goto out;
}
@@ -57,16 +64,16 @@
}
out:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
}
static void ucma_shutdown(void)
{
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
if (--addr_ref == 0) {
WSACleanup();
}
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
}
static void ucma_convert_to_ai(struct addrinfo *ai, struct rdma_addrinfo *rai)
Index: ulp/librdmacm/src/cma.cpp
===================================================================
--- ulp/librdmacm/src/cma.cpp (revision 3419)
+++ ulp/librdmacm/src/cma.cpp (working copy)
@@ -1,8 +1,8 @@
/*
* Copyright (c) 2005-2009 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
*
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
@@ -17,21 +17,31 @@
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
*/
#include <windows.h>
#include <winsock2.h>
+#include "openib_osd.h"
#include <stdio.h>
#include <iphlpapi.h>
-
#include <rdma/rdma_cma.h>
#include <rdma/rdma_verbs.h>
#include <infiniband/verbs.h>
@@ -88,6 +98,7 @@
int port_cnt;
uint8_t max_initiator_depth;
uint8_t max_responder_resources;
+ int max_qpsize;
};
struct cma_event {
@@ -100,6 +111,59 @@
static int cma_dev_cnt;
static DWORD ref;
+void wsa_setlasterror(int err)
+{
+ int wsa_err = 0;
+ switch (err) {
+ case 0: break;
+ case EADDRINUSE: {wsa_err = WSAEADDRINUSE; break;}
+ case EADDRNOTAVAIL: {wsa_err = WSAEADDRNOTAVAIL; break;}
+ case EAFNOSUPPORT: {wsa_err = WSAEAFNOSUPPORT; break;}
+ case EALREADY: {wsa_err = WSAEALREADY; break;}
+// case EBADMSG: {wsa_err = ; break;}
+ case ECANCELED: {wsa_err = WSAECANCELLED; break;}
+ case ECONNABORTED: {wsa_err = WSAECONNABORTED; break;}
+ case ECONNREFUSED: {wsa_err = WSAECONNREFUSED; break;}
+ case ECONNRESET: {wsa_err = WSAECONNRESET; break;}
+ case EDESTADDRREQ: {wsa_err = WSAEDESTADDRREQ; break;}
+ case EHOSTUNREACH: {wsa_err = WSAEHOSTUNREACH; break;}
+// case EIDRM: {wsa_err = ; break;}
+ case EINPROGRESS: {wsa_err = WSAEINPROGRESS; break;}
+ case EISCONN: {wsa_err = WSAEISCONN; break;}
+ case ELOOP: {wsa_err = WSAELOOP; break;}
+ case EMSGSIZE: {wsa_err = WSAEMSGSIZE; break;}
+ case ENETDOWN: {wsa_err = WSAENETDOWN; break;}
+ case ENETRESET: {wsa_err = WSAENETRESET; break;}
+ case ENETUNREACH: {wsa_err = WSAENETUNREACH; break;}
+ case ENOBUFS: {wsa_err = WSAENOBUFS; break;}
+// case ENODATA: {wsa_err = ; break;}
+// case ENOLINK: {wsa_err = ; break;}
+// case ENOMSG: {wsa_err = ; break;}
+ case ENOPROTOOPT: {wsa_err = WSAENOPROTOOPT; break;}
+// case ENOSR: {wsa_err = ; break;}
+// case ENOSTR: {wsa_err = ; break;}
+ case ENOTCONN: {wsa_err = WSAENOTCONN; break;}
+ case ENOTRECOVERABLE: {wsa_err = WSANO_RECOVERY; break;}
+ case ENOTSOCK: {wsa_err = WSAENOTSOCK; break;}
+ case ENOTSUP: {wsa_err = WSAEINVAL; break;} //???
+ case EOPNOTSUPP: {wsa_err = WSAEOPNOTSUPP; break;}
+// case EOTHER: {wsa_err = ; break;}
+// case EOVERFLOW: {wsa_err = ; break;}
+// case EOWNERDEAD: {wsa_err = ; break;}
+ case EPROTO: {wsa_err = WSAENOPROTOOPT; break;} //???
+ case EPROTONOSUPPORT: {wsa_err = WSAEPROTONOSUPPORT; break;}
+ case EPROTOTYPE: {wsa_err = WSAEPROTOTYPE; break;}
+// case ETIME: {wsa_err = ; break;}
+ case ETIMEDOUT: {wsa_err = WSAETIMEDOUT; break;}
+// case ETXTBSY: {wsa_err = ; break;}
+ case ENOMEM: {wsa_err = WSA_NOT_ENOUGH_MEMORY; break;}
+ case EAGAIN:
+ case EWOULDBLOCK: {wsa_err = WSAEWOULDBLOCK; break;}
+ default: ;
+ }
+ WSASetLastError(wsa_err);
+}
+
static int ucma_acquire(void)
{
struct ibv_device **dev_list = NULL;
@@ -107,7 +171,7 @@
struct ibv_device_attr attr;
int i, ret, dev_cnt;
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
if (ref++) {
goto out;
}
@@ -153,13 +217,14 @@
}
cma_dev->port_cnt = attr.phys_port_cnt;
+ cma_dev->max_qpsize = attr.max_qp_wr;
cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
}
ibv_free_device_list(dev_list);
cma_dev_cnt = dev_cnt;
out:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
return 0;
err4:
@@ -174,7 +239,7 @@
ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
err1:
ref--;
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
return ret;
}
@@ -182,7 +247,7 @@
{
int i;
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
if (--ref == 0) {
for (i = 0; i < cma_dev_cnt; i++) {
ibv_dealloc_pd(cma_dev_array[i].pd);
@@ -192,7 +257,7 @@
cma_dev_cnt = 0;
ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
}
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
}
__declspec(dllexport)
@@ -329,9 +394,9 @@
id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
id_priv->state = cma_destroying;
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
if (id->ps == RDMA_PS_TCP) {
id->ep.connect->CancelOverlappedRequests();
@@ -502,13 +567,20 @@
return ret;
}
-static int ucma_complete(struct cma_id_private *id_priv)
+static int ucma_complete_priv(struct cma_id_private *id_priv)
{
+ return ucma_complete(&id_priv->id);
+}
+
+int ucma_complete(struct rdma_cm_id *id)
+{
+ struct cma_id_private *id_priv;
int ret;
- if (!id_priv->sync) {
+ id_priv = container_of(id, struct cma_id_private, id);
+
+ if (!id_priv->sync)
return 0;
- }
if (id_priv->id.event) {
rdma_ack_cm_event(id_priv->id.event);
@@ -516,11 +588,18 @@
}
ret = rdma_get_cm_event(id_priv->id.channel, &id_priv->id.event);
- if (ret) {
+ if (ret)
return ret;
+
+ if (id_priv->id.event->status) {
+ if (id_priv->id.event->event == RDMA_CM_EVENT_REJECTED)
+ ret = ERR(ECONNREFUSED);
+ else if (id_priv->id.event->status < 0)
+ ret = ERR(-id_priv->id.event->status);
+ else
+ ret = ERR(-id_priv->id.event->status);
}
-
- return id_priv->id.event->status;
+ return ret;
}
__declspec(dllexport)
@@ -561,12 +640,17 @@
}
}
- RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
+ if (((struct sockaddr_in *)&id->route.addr.dst_addr)->sin_port == 0)
+ {
+ // port = 0 => Assume that entire dst_addr hasn't been set yet
+ RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));
+ }
+
id_priv->state = cma_addr_resolve;
id_priv->refcnt++;
CompEntryPost(&id->comp_entry);
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
}
__declspec(dllexport)
@@ -594,7 +678,7 @@
id_priv->refcnt++;
CompEntryPost(&id->comp_entry);
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
}
static int ucma_modify_qp_init(struct cma_id_private *id_priv, struct ibv_qp *qp)
@@ -648,10 +732,10 @@
if (id->recv_cq_channel)
ibv_destroy_comp_channel(id->recv_cq_channel);
- if (id->send_cq)
+ if (id->send_cq && id->send_cq != id->recv_cq)
ibv_destroy_cq(id->send_cq);
- if (id->send_cq_channel)
+ if (id->send_cq_channel && id->send_cq_channel != id->recv_cq_channel)
ibv_destroy_comp_channel(id->send_cq_channel);
}
@@ -832,7 +916,7 @@
return ibvw_wv_errno(hr);
}
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
}
static int ucma_get_request(struct cma_id_private *listen, int index)
@@ -841,7 +925,7 @@
HRESULT hr;
int ret;
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
if (listen->state != cma_listening) {
ret = ibvw_wv_errno(WV_INVALID_PARAMETER);
goto err1;
@@ -873,14 +957,14 @@
id_priv->refcnt--;
goto err2;
}
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
return 0;
err2:
InterlockedDecrement(&listen->refcnt);
err1:
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
if (id_priv != NULL) {
rdma_destroy_id(&id_priv->id);
}
@@ -1011,7 +1095,7 @@
return ibvw_wv_errno(hr);
}
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
}
__declspec(dllexport)
@@ -1053,7 +1137,7 @@
return ibvw_wv_errno(hr);
}
- return ucma_complete(id_priv);
+ return ucma_complete_priv(id_priv);
}
__declspec(dllexport)
@@ -1179,7 +1263,7 @@
id_priv = event->id_priv;
- EnterCriticalSection(&lock);
+ fastlock_acquire(&lock);
switch (id_priv->state) {
case cma_get_request:
listen = (struct cma_id_private *) id_priv->id.context;
@@ -1190,7 +1274,7 @@
}
listen->req_list[id_priv->index] = NULL;
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
return ucma_process_conn_req(event);
case cma_addr_resolve:
event->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1216,7 +1300,7 @@
InterlockedDecrement(&id_priv->refcnt);
ret = ECANCELED;
}
- LeaveCriticalSection(&lock);
+ fastlock_release(&lock);
return ret;
}
@@ -1240,6 +1324,8 @@
ret = CompChannelPoll(&channel->channel, &entry);
if (ret) {
+ if (ret == WAIT_TIMEOUT)
+ ret = ERR(EWOULDBLOCK);
delete evt;
return ret;
}
@@ -1473,3 +1559,11 @@
}
return -1;
}
+
+int ucma_max_qpsize(struct rdma_cm_id *id)
+{
+ struct cma_id_private *id_priv;
+
+ id_priv = container_of(id, struct cma_id_private, id);
+ return id_priv->cma_dev->max_qpsize;
+}
Index: ulp/librdmacm/src/cma.h
===================================================================
--- ulp/librdmacm/src/cma.h (revision 3419)
+++ ulp/librdmacm/src/cma.h (working copy)
@@ -1,9 +1,9 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2008-2009 Intel Corp. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
*
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
@@ -18,24 +18,60 @@
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
*/
#ifndef CMA_H
#define CMA_H
-extern CRITICAL_SECTION lock;
-extern HANDLE heap;
+#include <complib/cl_spinlock.h>
+#include <rdma/rdma_verbs.h>
+/*
+ * Fast synchronization for low contention locking.
+ */
+#define fastlock_t cl_spinlock_t
+#define fastlock_init(lock) cl_spinlock_init(lock)
+#define fastlock_destroy(lock) cl_spinlock_destroy(lock)
+#define fastlock_acquire(lock) cl_spinlock_acquire(lock)
+#define fastlock_release(lock) cl_spinlock_release(lock)
+
+extern fastlock_t lock;
+extern HANDLE heap;
+
+#define TRACE(fmt, ...) Trace(__FUNCTION__": " fmt "\n", __VA_ARGS__)
+
+void Trace(const char* fmt, ...);
void ucma_cleanup();
+int ucma_max_qpsize(struct rdma_cm_id *id);
+int ucma_complete(struct rdma_cm_id *id);
+void wsa_setlasterror(int err);
+static __inline int ERR(int err)
+{
+ int ret = rdma_seterrno(err);
+ if (ret)
+ wsa_setlasterror(err);
+ return ret;
+}
+
__inline void* __cdecl operator new(size_t size)
{
return HeapAlloc(heap, 0, size);
@@ -46,4 +82,13 @@
HeapFree(heap, 0, pObj);
}
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+#ifndef RDMADIR
+#define RDMADIR "rdma"
+#endif
+#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR
+#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket"
+
#endif /* CMA_H */
Index: ulp/librdmacm/src/cma_exports.src
===================================================================
--- ulp/librdmacm/src/cma_exports.src (revision 3419)
+++ ulp/librdmacm/src/cma_exports.src (working copy)
@@ -30,4 +30,30 @@
rdma_freeaddrinfo
rdma_get_request
rdmaw_wsa_errno
+rsocket
+rbind
+rlisten
+raccept
+rconnect
+rshutdown
+rclose
+rrecv
+rrecvfrom
+rrecvmsg
+rsend
+rsendto
+rsendmsg
+rread
+rreadv
+rwrite
+rwritev
+rgetpeername
+rgetsockname
+rsetsockopt
+rgetsockopt
+rfcntl
+rioctlsocket
+rselect
+rpoll
+WSPStartup
#endif
Index: ulp/librdmacm/src/cma_main.cpp
===================================================================
--- ulp/librdmacm/src/cma_main.cpp (revision 3419)
+++ ulp/librdmacm/src/cma_main.cpp (working copy)
@@ -1,8 +1,8 @@
/*
- * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
*
- * This software is available to you under the OpenIB.org BSD license
- * below:
+ * This software is available to you under the BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
@@ -17,22 +17,1173 @@
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
*/
-
-#include <windows.h>
+
+#include "openib_osd.h"
#include "cma.h"
+#include "rdma/rwinsock.h"
+#include "rdma/rsocket.h"
+#include <complib/cl_debug.h>
+#include "../../../etc/user/gtod.c" // getimeofday()
-CRITICAL_SECTION lock;
-HANDLE heap;
+#include <strsafe.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define MAX_TRACE_INDEX 4096 // Has to be a power of 2
+
+typedef struct {
+ FILETIME fTime;
+ char* pszFormat;
+ ULONG64 qwArgs[4];
+} RS_TRACE, *LPRS_TRACE;
+
+/*
+ * Globals used across files
+ */
+CRITICAL_SECTION gCriticalSection; // Critical section to protect startup/cleanup
+WSPUPCALLTABLE gMainUpCallTable; // Winsock upcall table
+WSAPROTOCOL_INFOW gProtocolInfo;
+BOOL gDetached = FALSE; // Indicates if process is detaching from DLL
+fastlock_t lock;
+fastlock_t mut;
+HANDLE heap;
+
+/*
+ * Globals local to this file
+ */
+static const WCHAR *Description = L"OpenFabrics RSockets for InfiniBand";
+static int gStartupCount = 0; // Global startup count (for every WSPStartup call)
+static uint32_t iTrace = 0;
+static RS_TRACE rsTrace[MAX_TRACE_INDEX] = {0};
+static fastlock_t TraceLock;
+
+/*
+ * Create trace entries at runtime
+ */
+void Trace (const char* fmt, ...)
+{
+ struct timeval time;
+ va_list argptr;
+ int i;
+
+ fastlock_acquire(&TraceLock);
+
+ GetSystemTimeAsFileTime(&rsTrace[iTrace].fTime);
+ rsTrace[iTrace].pszFormat = (char *)fmt;
+
+ va_start(argptr, fmt);
+ for (i = 0; i < sizeof(rsTrace[0].qwArgs) / sizeof(rsTrace[0].qwArgs[0]); i++)
+ rsTrace[iTrace].qwArgs[i] = va_arg(argptr, uint64_t);
+ va_end(argptr);
+
+ iTrace = (iTrace + 1) & (MAX_TRACE_INDEX-1);
+
+ fastlock_release(&TraceLock);
+
+ return;
+}
+
+/**
+ * \brief Get current RSockets trace information for the calling process
+ * (by calling librdmacm.dll directly).
+ *
+ * \param lppTraceBuffer Pointer to a buffer with an array of RS_TRACE information entries
+ * to be allocated and returned. The caller is responsible for
+ * deallocating that buffer via free() when it is no longer needed.
+ *
+ * \return The number of RS_TRACE entries contained in the trace buffer
+ * returned by *lppTraceBuffer.
+ */
+static uint32_t rsGetTrace ( __out LPRS_TRACE_OUT *lppTraceBuffer )
+{
+ uint32_t i, e, count = 0;
+ struct timeval tvTime;
+
+ if (NULL == lppTraceBuffer ||
+ NULL == (*lppTraceBuffer = (LPRS_TRACE_OUT)malloc(MAX_TRACE_INDEX * sizeof(**lppTraceBuffer)))
+ )
+ return 0;
+
+ memset( *lppTraceBuffer, 0, MAX_TRACE_INDEX * sizeof(**lppTraceBuffer) );
+
+ fastlock_acquire(&TraceLock);
+ for (
+ i = 0, e = iTrace;
+ i < MAX_TRACE_INDEX;
+ i++, e = (e+1) & (MAX_TRACE_INDEX - 1)
+ )
+ {
+ if (rsTrace[e].pszFormat)
+ {
+ FileTimeToTimeval(&rsTrace[e].fTime, &tvTime);
+ sprintf_s(
+ (*lppTraceBuffer)[i].szLine,
+ sizeof((*lppTraceBuffer)[0].szLine),
+ "%010d:%010d %s",
+ tvTime.tv_sec, tvTime.tv_usec,
+ rsTrace[e].pszFormat,
+ rsTrace[e].qwArgs[0], rsTrace[e].qwArgs[1], rsTrace[e].qwArgs[2], rsTrace[e].qwArgs[3]
+ );
+ cl_dbg_out("%s", (*lppTraceBuffer)[i].szLine);
+ count++;
+ }
+ }
+ fastlock_release(&TraceLock);
+
+ return count;
+}
+
+/*
+ * SPI Function Implementation
+ */
+
+/*
+ * Function: WSPCleanup
+ *
+ * Description:
+ * Decrement the entry count. If equal to zero then we can prepare to have us
+ * unloaded so all resources should be freed
+ */
+int WSPAPI
+WSPCleanup(
+ LPINT lpErrno
+ )
+{
+ int rc = SOCKET_ERROR;
+
+ if ( gDetached ) {
+ rc = NO_ERROR;
+ goto cleanup;
+ }
+
+ //
+ // Grab the DLL global critical section
+ //
+ EnterCriticalSection( &gCriticalSection );
+
+ // Verify WSPStartup has been called
+ if ( 0 == gStartupCount ) {
+ *lpErrno = WSANOTINITIALISED;
+ goto cleanup;
+ }
+
+ // Decrement the global entry count
+ gStartupCount--;
+
+TRACE("StartupCount decremented to %d", gStartupCount);
+
+ if ( 0 == gStartupCount ) {
+ // Free LSP structures if still present as well as call WSPCleanup
+ // for all providers this LSP loaded
+ }
+ rc = NO_ERROR;
+
+cleanup:
+ LeaveCriticalSection( &gCriticalSection );
+
+ return rc;
+}
+
+/*
+ * Function: WSPSocket
+ *
+ * Description:
+ * This routine creates a socket. For an IFS LSP the lower provider's socket
+ * handle is returned to the uppler layer. When a socket is created, a socket
+ * context structure is created for the socket returned from the lower provider.
+ * This context is used if the socket is later connected to a proxied address.
+ */
+SOCKET WSPAPI
+WSPSocket(
+ int af,
+ int type,
+ int protocol,
+ LPWSAPROTOCOL_INFOW lpProtocolInfo,
+ GROUP g,
+ DWORD dwFlags,
+ LPINT lpErrno
+ )
+{
+ WSAPROTOCOL_INFOW InfoCopy = {0};
+ int rs = (int)INVALID_SOCKET;
+ SOCKET winSocket = INVALID_SOCKET,
+ sret = INVALID_SOCKET;
+ int rc;
+
+TRACE("af=%d type=%d protocol=%d flags=0x%X", af, type, protocol, dwFlags);
+
+ if (af != AF_INET) {
+ *lpErrno = WSAEAFNOSUPPORT;
+ return INVALID_SOCKET;
+ }
+
+ if (type != SOCK_STREAM) {
+ *lpErrno = WSAEPROTOTYPE;
+ return INVALID_SOCKET;
+ }
+
+ if (protocol != IPPROTO_TCP) {
+ *lpErrno = WSAEPROTONOSUPPORT;
+ return INVALID_SOCKET;
+ }
+
+ if (dwFlags != 0) {
+ *lpErrno = WSAEINVAL;
+ return INVALID_SOCKET;
+ }
+
+ //
+ // Create the socket from the lower layer
+ //
+ if ( INVALID_SOCKET == (rs = rsocket( af, type, protocol)) ) {
+ *lpErrno = WSAGetLastError();
+ goto cleanup;
+ }
+
+ winSocket = gMainUpCallTable.lpWPUCreateSocketHandle(
+ gProtocolInfo.dwCatalogEntryId,
+ rs, // __in DWORD_PTR dwContext
+ lpErrno
+ );
+ if (INVALID_SOCKET == winSocket)
+ goto cleanup;
+
+TRACE(" New socket handle = %d", winSocket);
+
+ return winSocket;
+
+cleanup:
+
+ // If an error occured close the socket if it was already created
+ if (INVALID_SOCKET != rs)
+ rclose((int)rs);
+
+ *lpErrno = WSAGetLastError();
+
+ return INVALID_SOCKET;
+}
+
+int WSPAPI
+WSPBind(
+ __in SOCKET s,
+ __in const struct sockaddr* name,
+ __in int namelen,
+ __out LPINT lpErrno
+)
+{
+ struct sockaddr_in * name_in = (struct sockaddr_in *)name;
+ struct sockaddr_in6* name_in6 = (struct sockaddr_in6*)name;
+
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rbind((int)rs, name, namelen);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d:", s);
+ if (AF_INET == name->sa_family) {
+ TRACE(" Addr = %d.%d.%d.%d",
+ name_in->sin_addr.S_un.S_un_b.s_b1,
+ name_in->sin_addr.S_un.S_un_b.s_b2,
+ name_in->sin_addr.S_un.S_un_b.s_b3,
+ name_in->sin_addr.S_un.S_un_b.s_b4
+ );
+ TRACE(" Port = %d, Returning %d", name_in->sin_port, ret);
+ } else {
+ TRACE(" Addr = %d:%d:%d:%d:%d:%d:%d:%d",
+ name_in6->sin6_addr.u.Word[0],
+ name_in6->sin6_addr.u.Word[1],
+ name_in6->sin6_addr.u.Word[2],
+ name_in6->sin6_addr.u.Word[3],
+ name_in6->sin6_addr.u.Word[4],
+ name_in6->sin6_addr.u.Word[5],
+ name_in6->sin6_addr.u.Word[6],
+ name_in6->sin6_addr.u.Word[7]
+ );
+ TRACE(" Port = %d , Returning %d", name_in6->sin6_port, ret);
+ }
+ return ret;
+}
+
+int WSPAPI
+WSPListen(
+ __in SOCKET s,
+ __in int backlog,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rlisten((int)rs, backlog);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d: Backlog=%d, Returning %d", s, backlog, ret);
+
+ return ret;
+}
+
+int WSPAPI
+WSPConnect(
+ __in SOCKET s,
+ __in const struct sockaddr* name,
+ __in int namelen,
+ __in LPWSABUF lpCallerData,
+ __out LPWSABUF lpCalleeData,
+ __in LPQOS lpSQOS,
+ __in LPQOS lpGQOS,
+ __out LPINT lpErrno
+)
+{
+ struct sockaddr_in * name_in = (struct sockaddr_in *)name;
+ struct sockaddr_in6* name_in6 = (struct sockaddr_in6*)name;
+
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rconnect((int)rs, name, namelen);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+ if (lpCalleeData)
+ lpCalleeData->len = 0;
+
+TRACE("Socket = %d:", s);
+ if (AF_INET == name->sa_family) {
+ TRACE(" Addr = %d.%d.%d.%d",
+ name_in->sin_addr.S_un.S_un_b.s_b1,
+ name_in->sin_addr.S_un.S_un_b.s_b2,
+ name_in->sin_addr.S_un.S_un_b.s_b3,
+ name_in->sin_addr.S_un.S_un_b.s_b4
+ );
+ TRACE(" Port = %d, Returning %d", name_in->sin_port, ret);
+ } else {
+ TRACE(" Addr = %d:%d:%d:%d:%d:%d:%d:%d",
+ name_in6->sin6_addr.u.Word[0],
+ name_in6->sin6_addr.u.Word[1],
+ name_in6->sin6_addr.u.Word[2],
+ name_in6->sin6_addr.u.Word[3],
+ name_in6->sin6_addr.u.Word[4],
+ name_in6->sin6_addr.u.Word[5],
+ name_in6->sin6_addr.u.Word[6],
+ name_in6->sin6_addr.u.Word[7]
+ );
+ TRACE(" Port = %d , Returning %d", name_in6->sin6_port, ret);
+ }
+
+ return ret;
+}
+
+int WSPAPI
+WSPShutdown(
+ __in SOCKET s,
+ __in int how,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rshutdown((int)rs, how);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+TRACE("Socket = %d: how=%d, Returning %d", s, how, ret);
+
+ return ret;
+}
+
+int WSPAPI
+WSPCloseSocket(
+ __in SOCKET s,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rclose((int)rs);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+ ret = gMainUpCallTable.lpWPUCloseSocketHandle(s, lpErrno);
+
+TRACE("Socket handle %s closed", s);
+
+ return ret;
+}
+
+int WSPAPI
+WSPRecv(
+ __in SOCKET s,
+ __inout LPWSABUF lpBuffers,
+ __in DWORD dwBufferCount,
+ __out LPDWORD lpNumberOfBytesRecvd,
+ __inout LPDWORD lpFlags,
+ __in LPWSAOVERLAPPED lpOverlapped,
+ __in LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in LPWSATHREADID lpThreadId,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD i;
+ DWORD dwNumberOfBytesRecvd = 0;
+ int len = 0;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+ len = (int)rrecv((int)rs, lpBuffers[i].buf, lpBuffers[i].len, *lpFlags);
+ switch (len) {
+ case 0:
+ goto out;
+ case -1:
+ *lpErrno = WSAGetLastError();
+ ret = SOCKET_ERROR;
+ goto out;
+ default:
+ dwNumberOfBytesRecvd += len;
+ }
+ }
+
+out:
+ *lpNumberOfBytesRecvd = dwNumberOfBytesRecvd;
+
+ return ret;
+}
+
+int WSPAPI
+WSPSend(
+ __in SOCKET s,
+ __in LPWSABUF lpBuffers,
+ __in DWORD dwBufferCount,
+ __out LPDWORD lpNumberOfBytesSent,
+ __in DWORD dwFlags,
+ __in LPWSAOVERLAPPED lpOverlapped,
+ __in LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in LPWSATHREADID lpThreadId,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD i;
+ DWORD dwNumberOfBytesSent = 0;
+ int len;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+ len = (int)rsend((int)rs, lpBuffers[i].buf, lpBuffers[i].len, dwFlags);
+ if (-1 == len) {
+ *lpErrno = WSAGetLastError();
+ ret = SOCKET_ERROR;
+ break;
+ } else
+ dwNumberOfBytesSent += len;
+ }
+ *lpNumberOfBytesSent = dwNumberOfBytesSent;
+
+ return ret;
+}
+
+int WSPAPI
+WSPGetSockOpt(
+ __in SOCKET s,
+ __in int level,
+ __in int optname,
+ __out char* optval,
+ __inout LPINT optlen,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rgetsockopt((int)rs, level, optname, optval, optlen);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPSetSockOpt(
+ __in SOCKET s,
+ __in int level,
+ __in int optname,
+ __in const char* optval,
+ __in int optlen,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ ret = rsetsockopt((int)rs, level, optname, optval, optlen);
+ if (SOCKET_ERROR == ret)
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPSelect(
+ __in int nfds,
+ __inout fd_set* readfds,
+ __inout fd_set* writefds,
+ __inout fd_set* exceptfds,
+ __in const struct timeval* timeout,
+ __out LPINT lpErrno
+)
+{
+ u_int i;
+ int ret;
+ fd_set rreadfds, rwritefds, rexceptfds;
+
+ FD_ZERO(&rreadfds);
+ FD_ZERO(&rwritefds);
+ FD_ZERO(&rexceptfds);
+
+ nfds = 1;
+
+ for (i = 0; readfds && i < readfds->fd_count; i++) {
+ if (readfds->fd_array[i]) {
+ ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+ readfds->fd_array[i],
+ &rreadfds.fd_array[i],
+ lpErrno
+ );
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ if (rreadfds.fd_array[i] > nfds)
+ nfds = (int)rreadfds.fd_array[i];
+
+ rreadfds.fd_count++;
+ }
+ }
+ for (i = 0; writefds && i < writefds->fd_count; i++) {
+ if (writefds->fd_array[i]) {
+ ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+ writefds->fd_array[i],
+ &rwritefds.fd_array[i],
+ lpErrno
+ );
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ if (rwritefds.fd_array[i] > nfds)
+ nfds = (int)rwritefds.fd_array[i];
+
+ rwritefds.fd_count++;
+ }
+ }
+ for (i = 0; exceptfds && i < exceptfds->fd_count; i++) {
+ if (exceptfds->fd_array[i]) {
+ ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+ exceptfds->fd_array[i],
+ &rexceptfds.fd_array[i],
+ lpErrno
+ );
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ if (rexceptfds.fd_array[i] > nfds)
+ nfds = (int)rexceptfds.fd_array[i];
+
+ rexceptfds.fd_count++;
+ }
+ }
+
+ ret = rselect(
+ nfds + 1, // Max. valid rsocket descriptor + 1
+ readfds ? &rreadfds : NULL,
+ writefds ? &rwritefds : NULL,
+ exceptfds ? &rexceptfds : NULL,
+ (struct timeval*)timeout
+ );
+ if (SOCKET_ERROR == ret) {
+ *lpErrno = WSAGetLastError();
+ return ret;
+ }
+ nfds = ret;
+
+ for (i = 0; ret && readfds && i < rreadfds.fd_count; i++) {
+ if (rreadfds.fd_array[i] && readfds->fd_array[i]) {
+ ret--;
+ } else {
+ readfds->fd_array[i] = 0;
+ readfds->fd_count--;
+ }
+ }
+ for (i = 0; ret && writefds && i < rwritefds.fd_count; i++) {
+ if (rwritefds.fd_array[i] && writefds->fd_array[i]) {
+ ret--;
+ } else {
+ writefds->fd_array[i] = 0;
+ writefds->fd_count--;
+ }
+ }
+ for (i = 0; ret && exceptfds && i < rexceptfds.fd_count; i++) {
+ if (rexceptfds.fd_array[i] && exceptfds->fd_array[i]) {
+ ret--;
+ } else {
+ exceptfds->fd_array[i] = 0;
+ exceptfds->fd_count--;
+ }
+ }
+
+ return (nfds - ret);
+}
+
+int WSPAPI
+WSPIoctl(
+ __in SOCKET s,
+ __in DWORD dwIoControlCode,
+ __in LPVOID lpvInBuffer,
+ __in DWORD cbInBuffer,
+ __out LPVOID lpvOutBuffer,
+ __in DWORD cbOutBuffer,
+ __out LPDWORD lpcbBytesReturned,
+ __in LPWSAOVERLAPPED lpOverlapped,
+ __in LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in LPWSATHREADID lpThreadId,
+ __out LPINT lpErrno
+)
+{
+ int ret = 0;
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD dwCount;
+ LPVOID lpResultBuffer = NULL;
+
+ *lpcbBytesReturned = 0;
+
+ ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ switch (dwIoControlCode) {
+ case SIO_RS_GET_STATUS:
+ if (lpvOutBuffer) {
+ dwCount = rsGetStatus((LPRS_STATUS *)&lpResultBuffer);
+ if (lpResultBuffer) {
+ if (cbOutBuffer >= dwCount * sizeof(RS_STATUS)) {
+ *lpcbBytesReturned = dwCount * sizeof(RS_STATUS);
+ } else {
+ ret = SOCKET_ERROR;
+ *lpErrno = WSA_IO_INCOMPLETE;
+ *lpcbBytesReturned = cbOutBuffer; // Copy as much as possible anyway
+ }
+ }
+ }
+ break;
+ case SIO_RS_GET_TRACE:
+ if (lpvOutBuffer) {
+ dwCount = rsGetTrace((LPRS_TRACE_OUT *)&lpResultBuffer);
+ if (lpResultBuffer) {
+ if (cbOutBuffer >= dwCount * sizeof(RS_TRACE_OUT)) {
+ *lpcbBytesReturned = dwCount * sizeof(RS_TRACE_OUT);
+ } else {
+ ret = SOCKET_ERROR;
+ *lpErrno = WSA_IO_INCOMPLETE;
+ *lpcbBytesReturned = cbOutBuffer; // Copy as much as possible anyway
+ }
+ }
+ }
+ break;
+ case FIONBIO:
+ case FIONREAD:
+ case SIOCATMARK:
+ if (lpvInBuffer && cbInBuffer >= sizeof(u_long)) {
+ ret = rioctlsocket(
+ (int)rs,
+ dwIoControlCode,
+ (u_long *)lpvInBuffer
+ );
+ break;
+ }
+ default:
+ ret = SOCKET_ERROR;
+ *lpErrno = WSAEINVAL;
+ }
+ if (lpResultBuffer) {
+ memcpy(lpvOutBuffer, lpResultBuffer, *lpcbBytesReturned);
+ free(lpResultBuffer);
+ }
+
+ return ret;
+}
+
+int WSPAPI
+WSPAddressToString(
+ __in LPSOCKADDR lpsaAddress,
+ __in DWORD dwAddressLength,
+ __in LPWSAPROTOCOL_INFOW lpProtocolInfo,
+ __out LPWSTR lpszAddressString,
+ __inout LPDWORD lpdwAddressStringLength,
+ __out LPINT lpErrno
+)
+{
+ int ret = 0;
+
+ if (lpProtocolInfo) {
+ if (0 != memcmp(&lpProtocolInfo->ProviderId, &rsProviderGuid, sizeof(rsProviderGuid))) {
+ *lpErrno = WSAEINVALIDPROVIDER;
+ return SOCKET_ERROR;
+ }
+ }
+ if (SOCKET_ERROR == (ret = WSAAddressToStringW(
+ lpsaAddress,
+ dwAddressLength,
+ NULL,
+ lpszAddressString,
+ lpdwAddressStringLength
+ )))
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPAsyncSelect(
+ __in SOCKET s,
+ __in HWND hWnd,
+ __in unsigned int wMsg,
+ __in long lEvent,
+ __out LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPCancelBlockingCall(
+ __out LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPDuplicateSocket(
+ IN SOCKET s,
+ IN DWORD dwProcessId,
+ OUT LPWSAPROTOCOL_INFOW lpProtocolInfo,
+ OUT LPINT lpErrno
+ )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPEnumNetworkEvents(
+ IN SOCKET s,
+ IN WSAEVENT hEventObject,
+ OUT LPWSANETWORKEVENTS lpNetworkEvents,
+ OUT LPINT lpErrno
+ )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+int WSPAPI
+WSPEventSelect(
+ __in SOCKET s,
+ __in WSAEVENT hEventObject,
+ __in long lNetworkEvents,
+ __out LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return SOCKET_ERROR;
+}
+
+BOOL WSPAPI
+WSPGetOverlappedResult(
+ IN SOCKET s,
+ IN LPWSAOVERLAPPED lpOverlapped,
+ OUT LPDWORD lpcbTransfer,
+ IN BOOL fWait,
+ OUT LPDWORD lpdwFlags,
+ OUT LPINT lpErrno
+ )
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return FALSE;
+}
+
+int WSPAPI
+WSPGetPeerName(
+ __in SOCKET s,
+ __out struct sockaddr* name,
+ __inout LPINT namelen,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ if (SOCKET_ERROR == (ret = rgetpeername((int)rs, name, namelen)))
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+int WSPAPI
+WSPGetSockName(
+ __in SOCKET s,
+ __out struct sockaddr* name,
+ __inout LPINT namelen,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ if (SOCKET_ERROR == (ret = rgetsockname((int)rs, name, namelen)))
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+BOOL WSPAPI
+WSPGetQOSByName(
+ __in SOCKET s,
+ __inout LPWSABUF lpQOSName,
+ __out LPQOS lpQOS,
+ __out LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return FALSE;
+}
+
+SOCKET WSPAPI
+WSPJoinLeaf(
+ __in SOCKET s,
+ __in const struct sockaddr* name,
+ __in int namelen,
+ __in LPWSABUF lpCallerData,
+ __out LPWSABUF lpCalleeData,
+ __in LPQOS lpSQOS,
+ __in LPQOS lpGQOS,
+ __in DWORD dwFlags,
+ __out LPINT lpErrno
+)
+{
+ *lpErrno = WSAEOPNOTSUPP;
+ return INVALID_SOCKET;
+}
+
+int WSPAPI
+WSPRecvDisconnect(
+ __in SOCKET s,
+ __out LPWSABUF lpInboundDisconnectData,
+ __out LPINT lpErrno
+)
+{
+ return WSPShutdown(s, SD_RECEIVE, lpErrno); // Ignore lpInboundDisconnectData
+}
+
+int WSPAPI
+WSPRecvFrom(
+ __in SOCKET s,
+ __inout LPWSABUF lpBuffers,
+ __in DWORD dwBufferCount,
+ __out LPDWORD lpNumberOfBytesRecvd,
+ __inout LPDWORD lpFlags,
+ __out struct sockaddr* lpFrom,
+ __inout LPINT lpFromlen,
+ __in LPWSAOVERLAPPED lpOverlapped,
+ __in LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in LPWSATHREADID lpThreadId,
+ __inout LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD i;
+ DWORD dwNumberOfBytesRecvd = 0;
+ int len = 0;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+ switch (len = (int)rrecvfrom(
+ (int)rs,
+ lpBuffers[i].buf,
+ lpBuffers[i].len,
+ *lpFlags,
+ lpFrom,
+ lpFromlen
+ )) {
+ case 0:
+ goto out;
+ case -1:
+ *lpErrno = WSAGetLastError();
+ ret = SOCKET_ERROR;
+ goto out;
+ default:
+ dwNumberOfBytesRecvd += len;
+ }
+ }
+
+out:
+ *lpNumberOfBytesRecvd = dwNumberOfBytesRecvd;
+
+ return ret;
+}
+
+int WSPAPI
+WSPSendDisconnect(
+ __in SOCKET s,
+ __in LPWSABUF lpOutboundDisconnectData,
+ __out LPINT lpErrno
+)
+{
+ return WSPShutdown(s, SD_SEND, lpErrno); // Ignore lpOutboundDisconnectData
+}
+
+int WSPAPI
+WSPSendTo(
+ __in SOCKET s,
+ __in LPWSABUF lpBuffers,
+ __in DWORD dwBufferCount,
+ __out LPDWORD lpNumberOfBytesSent,
+ __in DWORD dwFlags,
+ __in const struct sockaddr* lpTo,
+ __in int iTolen,
+ __in LPWSAOVERLAPPED lpOverlapped,
+ __in LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine,
+ __in LPWSATHREADID lpThreadId,
+ __out LPINT lpErrno
+)
+{
+ DWORD_PTR rs = INVALID_SOCKET;
+ DWORD i;
+ DWORD dwNumberOfBytesSent = 0;
+ int len;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(s, &rs, lpErrno);
+
+ if (SOCKET_ERROR == ret)
+ return ret;
+
+ for (i = 0; i < dwBufferCount; i++) {
+ if (-1 == (len = (int)rsendto(
+ (int)rs,
+ lpBuffers[i].buf,
+ lpBuffers[i].len,
+ dwFlags,
+ lpTo,
+ iTolen
+ ))) {
+ *lpErrno = WSAGetLastError();
+ ret = SOCKET_ERROR;
+ break;
+ }
+ else
+ dwNumberOfBytesSent += len;
+ }
+ *lpNumberOfBytesSent = dwNumberOfBytesSent;
+
+ return ret;
+}
+
+int WSPAPI
+WSPStringToAddress(
+ __in LPWSTR AddressString,
+ __in INT AddressFamily,
+ __in LPWSAPROTOCOL_INFO lpProtocolInfo,
+ __out LPSOCKADDR lpAddress,
+ __inout LPINT lpAddressLength,
+ __out LPINT lpErrno
+)
+{
+ int ret = 0;
+
+ if (AF_INET != AddressFamily && AddressFamily != AF_INET6) {
+ *lpErrno = WSAEAFNOSUPPORT;
+ return SOCKET_ERROR;
+ }
+
+ if (lpProtocolInfo) {
+ if (0 != memcmp(&lpProtocolInfo->ProviderId, &rsProviderGuid, sizeof(rsProviderGuid))) {
+ *lpErrno = WSAEINVALIDPROVIDER;
+ return SOCKET_ERROR;
+ }
+ }
+
+ if (SOCKET_ERROR == (ret = WSAStringToAddressW(
+ AddressString,
+ AddressFamily,
+ NULL,
+ lpAddress,
+ lpAddressLength
+ )))
+ *lpErrno = WSAGetLastError();
+
+ return ret;
+}
+
+/*
+ * Function: WSPStartup
+ *
+ * Description:
+ * This function initializes the base provider.
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__declspec(dllexport)
+__checkReturn
+int
+WSPAPI
+WSPStartup(
+ __in WORD wVersion,
+ __in LPWSPDATA lpWSPData,
+ __in LPWSAPROTOCOL_INFOW lpProtocolInfo,
+ __in WSPUPCALLTABLE UpCallTable,
+ __out LPWSPPROC_TABLE lpProcTable
+ )
+{
+ static WSPDATA gWSPData;
+ static WSPPROC_TABLE gProcTable;
+ int Error = NO_ERROR,
+ rc;
+
+TRACE("Requested version = %d.%d", LOBYTE(wVersion), HIBYTE(wVersion));
+
+ /* Make sure that the version requested is >= 2.2. The low byte is the
+ major version and the high byte is the minor version. */
+ if( (LOBYTE(wVersion) < 2) || ((LOBYTE(wVersion) == 2) && (HIBYTE(wVersion) < 2)) )
+ return WSAVERNOTSUPPORTED;
+
+ EnterCriticalSection( &gCriticalSection );
+
+ // The first time the startup is called, create our heap and allocate some
+ // data structures for tracking the LSP providers
+ if ( 0 == gStartupCount )
+ {
+TRACE("Called 1st time => Initializing ProtocolInfo...");
+ /* Save the global WSPData */
+ gWSPData.wVersion = MAKEWORD(2, 2);
+ gWSPData.wHighVersion = MAKEWORD(2, 2);
+ wcscpy_s( gWSPData.szDescription, 2*sizeof(gWSPData.szDescription), Description );
+
+ /* provide Service provider's entry points in proc table */
+ ZeroMemory( &gProcTable, sizeof(gProcTable) );
+ gProcTable.lpWSPSocket = WSPSocket;
+ gProcTable.lpWSPBind = WSPBind;
+ gProcTable.lpWSPListen = WSPListen;
+ gProcTable.lpWSPAccept = WSPAccept;
+ gProcTable.lpWSPConnect = WSPConnect;
+ gProcTable.lpWSPShutdown = WSPShutdown;
+ gProcTable.lpWSPCloseSocket = WSPCloseSocket;
+ gProcTable.lpWSPRecv = WSPRecv;
+ gProcTable.lpWSPSend = WSPSend;
+ gProcTable.lpWSPGetSockOpt = WSPGetSockOpt;
+ gProcTable.lpWSPSetSockOpt = WSPSetSockOpt;
+ gProcTable.lpWSPSelect = WSPSelect;
+ gProcTable.lpWSPIoctl = WSPIoctl;
+ gProcTable.lpWSPCleanup = WSPCleanup;
+// Additional functions required for (base provider's) WSPStartup:
+ gProcTable.lpWSPAddressToString = WSPAddressToString;
+ gProcTable.lpWSPAsyncSelect = WSPAsyncSelect;
+ gProcTable.lpWSPCancelBlockingCall = WSPCancelBlockingCall;
+ gProcTable.lpWSPDuplicateSocket = WSPDuplicateSocket;
+ gProcTable.lpWSPEnumNetworkEvents = WSPEnumNetworkEvents;
+ gProcTable.lpWSPEventSelect = WSPEventSelect;
+ gProcTable.lpWSPGetOverlappedResult = WSPGetOverlappedResult;
+ gProcTable.lpWSPGetPeerName = WSPGetPeerName;
+ gProcTable.lpWSPGetSockName = WSPGetSockName;
+ gProcTable.lpWSPGetQOSByName = WSPGetQOSByName;
+ gProcTable.lpWSPJoinLeaf = WSPJoinLeaf;
+ gProcTable.lpWSPRecvDisconnect = WSPRecvDisconnect;
+ gProcTable.lpWSPRecvFrom = WSPRecvFrom;
+ gProcTable.lpWSPSendDisconnect = WSPSendDisconnect;
+ gProcTable.lpWSPSendTo = WSPSendTo;
+ gProcTable.lpWSPStringToAddress = (LPWSPSTRINGTOADDRESS)WSPStringToAddress;
+
+ gProtocolInfo = *lpProtocolInfo;
+ }
+ gStartupCount++;
+TRACE("StartupCount incremented to %d", gStartupCount);
+
+ LeaveCriticalSection( &gCriticalSection );
+
+ /* Set the return parameters */
+ *lpWSPData = gWSPData;
+ *lpProcTable = gProcTable;
+
+ /* store the upcall function table */
+ gMainUpCallTable = UpCallTable;
+
+ return Error;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD dwReason, LPVOID lpReserved)
{
UNREFERENCED_PARAMETER(hInstance);
@@ -44,10 +1195,28 @@
if (heap == NULL) {
return FALSE;
}
- InitializeCriticalSection(&lock);
+ fastlock_init(&lock);
+ fastlock_init(&mut);
+ fastlock_init(&TraceLock);
+
+ //
+ // Initialize some critical section objects
+ //
+ __try
+ {
+ InitializeCriticalSection( &gCriticalSection );
+ }
+ __except( EXCEPTION_EXECUTE_HANDLER )
+ {
+ goto cleanup;
+ }
break;
case DLL_PROCESS_DETACH:
- DeleteCriticalSection(&lock);
+ gDetached = TRUE;
+ DeleteCriticalSection( &gCriticalSection );
+ fastlock_destroy(&mut);
+ fastlock_destroy(&lock);
+ fastlock_destroy(&TraceLock);
HeapDestroy(heap);
break;
default:
@@ -55,4 +1224,8 @@
}
return TRUE;
+
+cleanup:
+
+ return FALSE;
}
Index: ulp/librdmacm/src/indexer.cpp
===================================================================
--- ulp/librdmacm/src/indexer.cpp (revision 0)
+++ ulp/librdmacm/src/indexer.cpp (working copy)
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <windows.h>
+
+#include <sys/types.h>
+#include <stdlib.h>
+
+#include "indexer.h"
+#include "cma.h"
+
+/*
+ * Indexer - to find a structure given an index
+ *
+ * We store pointers using a double lookup and return an index to the
+ * user which is then used to retrieve the pointer. The upper bits of
+ * the index are itself an index into an array of memory allocations.
+ * The lower bits specify the offset into the allocated memory where
+ * the pointer is stored.
+ *
+ * This allows us to adjust the number of pointers stored by the index
+ * list without taking a lock during data lookups.
+ */
+
+static int idx_grow(struct indexer *idx)
+{
+ union idx_entry *entry;
+ int i, start_index;
+
+ if (idx->size >= IDX_ARRAY_SIZE)
+ goto nomem;
+
+ idx->array[idx->size] = (union idx_entry *)calloc(IDX_ENTRY_SIZE, sizeof(union idx_entry));
+ if (!idx->array[idx->size])
+ goto nomem;
+
+ entry = idx->array[idx->size];
+ start_index = idx->size << IDX_ENTRY_BITS;
+ entry[IDX_ENTRY_SIZE - 1].next = idx->free_list;
+
+ for (i = IDX_ENTRY_SIZE - 2; i >= 0; i--)
+ entry[i].next = start_index + i + 1;
+
+ /* Index 0 is reserved */
+ if (start_index == 0)
+ start_index++;
+ idx->free_list = start_index;
+ idx->size++;
+ return start_index;
+
+nomem:
+ errno = ENOMEM;
+ return -1;
+}
+
+int idx_insert(struct indexer *idx, void *item)
+{
+ union idx_entry *entry;
+ int index;
+
+ if ((index = idx->free_list) == 0) {
+ if ((index = idx_grow(idx)) <= 0)
+ return index;
+ }
+
+ entry = idx->array[idx_array_index(index)];
+ idx->free_list = entry[idx_entry_index(index)].next;
+ entry[idx_entry_index(index)].item = item;
+ return index;
+}
+
+void *idx_remove(struct indexer *idx, int index)
+{
+ union idx_entry *entry;
+ void *item;
+
+ entry = idx->array[idx_array_index(index)];
+ item = entry[idx_entry_index(index)].item;
+ entry[idx_entry_index(index)].next = idx->free_list;
+ idx->free_list = index;
+ return item;
+}
+
+void idx_replace(struct indexer *idx, int index, void *item)
+{
+ union idx_entry *entry;
+
+ entry = idx->array[idx_array_index(index)];
+ entry[idx_entry_index(index)].item = item;
+}
+
+
+static int idm_grow(struct index_map *idm, int index)
+{
+ idm->array[idx_array_index(index)] = (void **)calloc(IDX_ENTRY_SIZE, sizeof(void *));
+ if (!idm->array[idx_array_index(index)])
+ goto nomem;
+
+ return index;
+
+nomem:
+ errno = ENOMEM;
+ return -1;
+}
+
+int idm_set(struct index_map *idm, int index, void *item)
+{
+ void **entry;
+
+ if (index > IDX_MAX_INDEX) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (!idm->array[idx_array_index(index)]) {
+ if (idm_grow(idm, index) < 0)
+ return -1;
+ }
+
+ entry = idm->array[idx_array_index(index)];
+ entry[idx_entry_index(index)] = item;
+ return index;
+}
+
+void *idm_clear(struct index_map *idm, int index)
+{
+ void **entry;
+ void *item;
+
+ entry = idm->array[idx_array_index(index)];
+ item = entry[idx_entry_index(index)];
+ entry[idx_entry_index(index)] = NULL;
+ return item;
+}
Index: ulp/librdmacm/src/indexer.h
===================================================================
--- ulp/librdmacm/src/indexer.h (revision 0)
+++ ulp/librdmacm/src/indexer.h (working copy)
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2011 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+
+/*
+ * Indexer - to find a structure given an index. Synchronization
+ * must be provided by the caller. Caller must initialize the
+ * indexer by setting free_list and size to 0.
+ */
+
+union idx_entry {
+ void *item;
+ int next;
+};
+
+#define IDX_INDEX_BITS 16
+#define IDX_ENTRY_BITS 10
+#define IDX_ENTRY_SIZE (1 << IDX_ENTRY_BITS)
+#define IDX_ARRAY_SIZE (1 << (IDX_INDEX_BITS - IDX_ENTRY_BITS))
+#define IDX_MAX_INDEX ((1 << IDX_INDEX_BITS) - 1)
+
+struct indexer
+{
+ union idx_entry *array[IDX_ARRAY_SIZE];
+ int free_list;
+ int size;
+};
+
+#define idx_array_index(index) (index >> IDX_ENTRY_BITS)
+#define idx_entry_index(index) (index & (IDX_ENTRY_SIZE - 1))
+
+int idx_insert(struct indexer *idx, void *item);
+void *idx_remove(struct indexer *idx, int index);
+void idx_replace(struct indexer *idx, int index, void *item);
+
+static inline void *idx_at(struct indexer *idx, int index)
+{
+ return (idx->array[idx_array_index(index)] + idx_entry_index(index))->item;
+}
+
+/*
+ * Index map - associates a structure with an index. Synchronization
+ * must be provided by the caller. Caller must initialize the
+ * index map by setting it to 0.
+ */
+
+struct index_map
+{
+ void **array[IDX_ARRAY_SIZE];
+};
+
+int idm_set(struct index_map *idm, int index, void *item);
+void *idm_clear(struct index_map *idm, int index);
+
+static inline void *idm_at(struct index_map *idm, int index)
+{
+ void **entry;
+ entry = idm->array[idx_array_index(index)];
+ return entry[idx_entry_index(index)];
+}
+
+static inline void *idm_lookup(struct index_map *idm, int index)
+{
+ return ((index <= IDX_MAX_INDEX) && idm->array[idx_array_index(index)]) ?
+ idm_at(idm, index) : NULL;
+}
Index: ulp/librdmacm/src/openib_osd.h
===================================================================
--- ulp/librdmacm/src/openib_osd.h (revision 0)
+++ ulp/librdmacm/src/openib_osd.h (working copy)
@@ -0,0 +1,47 @@
+#ifndef OPENIB_OSD_H
+#define OPENIB_OSD_H
+
+#if defined(FD_SETSIZE) && FD_SETSIZE != 1024
+#undef FD_SETSIZE
+#endif
+#define FD_SETSIZE 1024 /* Set before including winsock2 - see select help */
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <io.h>
+#include <fcntl.h>
+
+#define container_of CONTAINING_RECORD
+#define offsetof FIELD_OFFSET
+#define ssize_t SSIZE_T
+
+#define __thread /*???*/
+
+#define ntohll _byteswap_uint64
+#define htonll _byteswap_uint64
+
+#define SHUT_RD SD_RECEIVE
+#define SHUT_WR SD_SEND
+#define SHUT_RDWR SD_BOTH
+
+#define O_NONBLOCK 0x4000
+
+/* allow casting to WSABUF */
+struct iovec
+{
+ u_long iov_len;
+ char FAR* iov_base;
+};
+
+struct msghdr
+{
+ void *msg_name; // optional address
+ socklen_t msg_namelen; // size of address
+ struct iovec *msg_iov; // scatter/gather array
+ int msg_iovlen; // members in msg_iov
+ void *msg_control; // ancillary data, see below
+ socklen_t msg_controllen; // ancillary data buffer len
+ int msg_flags; // flags on received message
+};
+
+#endif // OPENIB_OSD_H
Index: ulp/librdmacm/src/rsocket.cpp
===================================================================
--- ulp/librdmacm/src/rsocket.cpp (revision 0)
+++ ulp/librdmacm/src/rsocket.cpp (working copy)
@@ -0,0 +1,2334 @@
+/*
+ * Copyright (c) 2008-2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 Oce Printing Systems GmbH. All rights reserved.
+ *
+ * This software is available to you under the BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * - Neither the name Oce Printing Systems GmbH nor the names
+ * of the authors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
+ * OR CONTRIBUTOR OR COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <sys/types.h>
+
+#include <windows.h>
+#include "openib_osd.h"
+#include <sys/time.h>
+#include <stdarg.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <_fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <_errno.h>
+#include <complib/cl_byteswap.h>
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_verbs.h>
+#include <rdma/rwinsock.h>
+#include <rdma/rsocket.h>
+#include "cma.h"
+#include "indexer.h"
+#include "../../../etc/user/gtod.c" // getimeofday()
+
+#define RS_OLAP_START_SIZE 2048
+#define RS_MAX_TRANSFER 65536
+#define RS_MAX_BACKLOG 256
+#define RS_QP_MAX_SIZE 0xFFFE
+#define RS_QP_CTRL_SIZE 4
+#define RS_CONN_RETRIES 6
+#define RS_SGL_SIZE 2
+
+static struct index_map idm;
+static uint16_t def_inline = 64;
+static uint16_t def_sqsize = 384;
+static uint16_t def_rqsize = 384;
+static uint32_t def_mem = (1 << 17);
+static uint32_t def_wmem = (1 << 17);
+static uint32_t polling_time = 10;
+
+extern fastlock_t mut;
+extern BOOL gDetached; // Indicates if process is detaching from DLL
+extern CRITICAL_SECTION gCriticalSection; // Critical section for initialization and
+extern WSPUPCALLTABLE gMainUpCallTable; // Upcall functions given to us by Winsock
+extern WSAPROTOCOL_INFOW gProtocolInfo;
+
+/*
+ * Immediate data format is determined by the upper bits
+ * bit 31: message type, 0 - data, 1 - control
+ * bit 30: buffers updated, 0 - target, 1 - direct-receive
+ * bit 29: more data, 0 - end of transfer, 1 - more data available
+ *
+ * for data transfers:
+ * bits [28:0]: bytes transfered, 0 = 1 GB
+ * for control messages:
+ * bits [28-0]: receive credits granted
+ */
+
+enum {
+ RS_OP_DATA,
+ RS_OP_RSVD_DATA_MORE,
+ RS_OP_RSVD_DRA,
+ RS_OP_RSVD_DRA_MORE,
+ RS_OP_SGL,
+ RS_OP_RSVD,
+ RS_OP_RSVD_DRA_SGL,
+ RS_OP_CTRL
+};
+#define rs_msg_set(op, data) ((op << 29) | (uint32_t) (data))
+#define rs_msg_op(imm_data) (imm_data >> 29)
+#define rs_msg_data(imm_data) (imm_data & 0x1FFFFFFF)
+
+enum {
+ RS_CTRL_DISCONNECT,
+ RS_CTRL_SHUTDOWN
+};
+
+struct rs_msg {
+ uint32_t op;
+ uint32_t data;
+};
+
+struct rs_sge {
+ uint64_t addr;
+ uint32_t key;
+ uint32_t length;
+};
+
+#define RS_MIN_INLINE (sizeof(struct rs_sge))
+#define rs_host_is_net() (1 == htonl(1))
+#define RS_CONN_FLAG_NET 1
+
+struct rs_conn_data {
+ uint8_t version;
+ uint8_t flags;
+ uint16_t credits;
+ uint32_t reserved2;
+ struct rs_sge target_sgl;
+ struct rs_sge data_buf;
+};
+
+#define RS_RECV_WR_ID (~((uint64_t) 0))
+
+/*
+ * rsocket states are ordered as passive, connecting, connected, disconnected.
+ */
+enum rs_state {
+ rs_init,
+ rs_bound = 0x0001,
+ rs_listening = 0x0002,
+ rs_opening = 0x0004,
+ rs_resolving_addr = rs_opening | 0x0010,
+ rs_resolving_route = rs_opening | 0x0020,
+ rs_connecting = rs_opening | 0x0040,
+ rs_accepting = rs_opening | 0x0080,
+ rs_connected = 0x0100,
+ rs_connect_wr = 0x0200,
+ rs_connect_rd = 0x0400,
+ rs_connect_rdwr = rs_connected | rs_connect_rd | rs_connect_wr,
+ rs_connect_error = 0x0800,
+ rs_disconnected = 0x1000,
+ rs_error = 0x2000,
+};
+
+#define RS_OPT_SWAP_SGL 1
+
+struct rsocket {
+ struct rdma_cm_id *cm_id;
+ fastlock_t slock;
+ fastlock_t rlock;
+ fastlock_t cq_lock;
+ fastlock_t cq_wait_lock;
+
+ int opts;
+ long fd_flags;
+ uint64_t so_opts;
+ uint64_t tcp_opts;
+ uint64_t ipv6_opts;
+ int state;
+ int cq_armed;
+ int retries;
+ int err;
+ int index;
+ int ctrl_avail;
+ int sqe_avail;
+ int sbuf_bytes_avail;
+ uint16_t sseq_no;
+ uint16_t sseq_comp;
+ uint16_t sq_size;
+ uint16_t sq_inline;
+
+ uint16_t rq_size;
+ uint16_t rseq_no;
+ uint16_t rseq_comp;
+ int rbuf_bytes_avail;
+ int rbuf_free_offset;
+ int rbuf_offset;
+ int rmsg_head;
+ int rmsg_tail;
+ struct rs_msg *rmsg;
+
+ int remote_sge;
+ struct rs_sge remote_sgl;
+
+ struct ibv_mr *target_mr;
+ int target_sge;
+ volatile struct rs_sge target_sgl[RS_SGL_SIZE];
+
+ uint32_t rbuf_size;
+ struct ibv_mr *rmr;
+ uint8_t *rbuf;
+
+ uint32_t sbuf_size;
+ struct ibv_mr *smr;
+ struct ibv_sge ssgl[2];
+ uint8_t *sbuf;
+};
+
+static void rs_configure(void)
+{
+ FILE *f;
+ static int init;
+
+ if (init)
+ return;
+
+ fastlock_acquire(&mut);
+ if (init)
+ goto out;
+
+ if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) {
+ fscanf(f, "%u", &polling_time);
+ fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/inline_default", "r"))) {
+ fscanf(f, "%hu", &def_inline);
+ fclose(f);
+
+ if (def_inline < RS_MIN_INLINE)
+ def_inline = RS_MIN_INLINE;
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/sqsize_default", "r"))) {
+ fscanf(f, "%hu", &def_sqsize);
+ fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/rqsize_default", "r"))) {
+ fscanf(f, "%hu", &def_rqsize);
+ fclose(f);
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/mem_default", "r"))) {
+ fscanf(f, "%u", &def_mem);
+ fclose(f);
+
+ if (def_mem < 1)
+ def_mem = 1;
+ }
+
+ if ((f = fopen(RS_CONF_DIR "/wmem_default", "r"))) {
+ fscanf(f, "%u", &def_wmem);
+ fclose(f);
+
+ if (def_wmem < 1)
+ def_wmem = 1;
+ }
+ init = 1;
+out:
+ fastlock_release(&mut);
+}
+
+static int rs_insert(struct rsocket *rs)
+{
+ fastlock_acquire(&mut);
+ rs->index = idm_set(&idm, ((int)rs->cm_id->channel->channel.Event >> 2)/*fd*/, rs);
+ fastlock_release(&mut);
+ return rs->index;
+}
+
+static void rs_remove(struct rsocket *rs)
+{
+ fastlock_acquire(&mut);
+ idm_clear(&idm, rs->index);
+ fastlock_release(&mut);
+}
+
+static struct rsocket *rs_alloc(struct rsocket *inherited_rs)
+{
+ struct rsocket *rs;
+
+ rs = (struct rsocket *)calloc(1, sizeof *rs);
+ if (!rs)
+ return NULL;
+
+ rs->index = -1;
+ if (inherited_rs) {
+ rs->sbuf_size = inherited_rs->sbuf_size;
+ rs->rbuf_size = inherited_rs->rbuf_size;
+ rs->sq_inline = inherited_rs->sq_inline;
+ rs->sq_size = inherited_rs->sq_size;
+ rs->rq_size = inherited_rs->rq_size;
+ rs->ctrl_avail = inherited_rs->ctrl_avail;
+ } else {
+ rs->sbuf_size = def_wmem;
+ rs->rbuf_size = def_mem;
+ rs->sq_inline = def_inline;
+ rs->sq_size = def_sqsize;
+ rs->rq_size = def_rqsize;
+ rs->ctrl_avail = RS_QP_CTRL_SIZE;
+ }
+ fastlock_init(&rs->slock);
+ fastlock_init(&rs->rlock);
+ fastlock_init(&rs->cq_lock);
+ fastlock_init(&rs->cq_wait_lock);
+ return rs;
+}
+
+static int rs_set_nonblocking(struct rsocket *rs, long arg)
+{
+ if (rs->cm_id->recv_cq_channel)
+ rs->cm_id->recv_cq_channel->comp_channel.Milliseconds = (arg == O_NONBLOCK ? 0 : INFINITE);
+
+ if (rs->state < rs_connected)
+ rs->cm_id->channel->channel.Milliseconds = (arg == O_NONBLOCK ? 0 : INFINITE);
+
+ return 0;
+}
+
+static void rs_set_qp_size(struct rsocket *rs)
+{
+ uint16_t max_size;
+
+ max_size = min(ucma_max_qpsize(rs->cm_id), RS_QP_MAX_SIZE);
+
+ if (rs->sq_size > max_size)
+ rs->sq_size = max_size;
+ else if (rs->sq_size < 2)
+ rs->sq_size = 2;
+ if (rs->sq_size <= (RS_QP_CTRL_SIZE << 2))
+ rs->ctrl_avail = 1;
+
+ if (rs->rq_size > max_size)
+ rs->rq_size = max_size;
+ else if (rs->rq_size < 2)
+ rs->rq_size = 2;
+}
+
+static int rs_init_bufs(struct rsocket *rs)
+{
+ rs->rmsg = (struct rs_msg *)calloc(rs->rq_size + 1, sizeof(*rs->rmsg));
+ if (!rs->rmsg)
+ return -1;
+
+ rs->sbuf = (uint8_t *)calloc(rs->sbuf_size, sizeof(*rs->sbuf));
+ if (!rs->sbuf)
+ return -1;
+
+ rs->smr = rdma_reg_msgs(rs->cm_id, rs->sbuf, rs->sbuf_size);
+ if (!rs->smr)
+ return -1;
+
+ rs->target_mr = rdma_reg_write(rs->cm_id, (void *) rs->target_sgl,
+ sizeof(rs->target_sgl));
+ if (!rs->target_mr)
+ return -1;
+
+ rs->rbuf = (uint8_t *)calloc(rs->rbuf_size, sizeof(*rs->rbuf));
+ if (!rs->rbuf)
+ return -1;
+
+ rs->rmr = rdma_reg_write(rs->cm_id, rs->rbuf, rs->rbuf_size);
+ if (!rs->rmr)
+ return -1;
+
+ rs->ssgl[0].addr = rs->ssgl[1].addr = (uintptr_t) rs->sbuf;
+ rs->sbuf_bytes_avail = rs->sbuf_size;
+ rs->ssgl[0].lkey = rs->ssgl[1].lkey = rs->smr->lkey;
+
+ rs->rbuf_free_offset = rs->rbuf_size >> 1;
+ rs->rbuf_bytes_avail = rs->rbuf_size >> 1;
+ rs->sqe_avail = rs->sq_size - rs->ctrl_avail;
+ rs->rseq_comp = rs->rq_size >> 1;
+ return 0;
+}
+
+static int rs_create_cq(struct rsocket *rs)
+{
+ rs->cm_id->recv_cq_channel = ibv_create_comp_channel(rs->cm_id->verbs);
+ if (!rs->cm_id->recv_cq_channel)
+ return -1;
+
+ rs->cm_id->recv_cq = ibv_create_cq(rs->cm_id->verbs, rs->sq_size + rs->rq_size,
+ rs->cm_id, rs->cm_id->recv_cq_channel, 0);
+ if (!rs->cm_id->recv_cq)
+ goto err1;
+
+ if (rs->fd_flags & O_NONBLOCK) {
+ if (rs_set_nonblocking(rs, O_NONBLOCK))
+ goto err2;
+ }
+
+ rs->cm_id->send_cq_channel = rs->cm_id->recv_cq_channel;
+ rs->cm_id->send_cq = rs->cm_id->recv_cq;
+ return 0;
+
+err2:
+ ibv_destroy_cq(rs->cm_id->recv_cq);
+ rs->cm_id->recv_cq = NULL;
+err1:
+ ibv_destroy_comp_channel(rs->cm_id->recv_cq_channel);
+ rs->cm_id->recv_cq_channel = NULL;
+ return -1;
+}
+
+static __inline int
+rs_post_recv(struct rsocket *rs)
+{
+ struct ibv_recv_wr wr, *bad;
+
+ wr.wr_id = RS_RECV_WR_ID;
+ wr.next = NULL;
+ wr.sg_list = NULL;
+ wr.num_sge = 0;
+
+ return rdma_seterrno(ibv_post_recv(rs->cm_id->qp, &wr, &bad));
+}
+
+static int rs_create_ep(struct rsocket *rs)
+{
+ struct ibv_qp_init_attr qp_attr;
+ int i, ret;
+
+ rs_set_qp_size(rs);
+
+ ret = rs_create_cq(rs);
+ if (ret)
+ return ret;
+
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.qp_context = rs;
+ qp_attr.send_cq = rs->cm_id->send_cq;
+ qp_attr.recv_cq = rs->cm_id->recv_cq;
+ qp_attr.qp_type = IBV_QPT_RC;
+ qp_attr.sq_sig_all = 1;
+ qp_attr.cap.max_send_wr = rs->sq_size;
+ qp_attr.cap.max_recv_wr = rs->rq_size;
+ qp_attr.cap.max_send_sge = 2;
+ qp_attr.cap.max_recv_sge = 1;
+ qp_attr.cap.max_inline_data = rs->sq_inline;
+
+ ret = rdma_create_qp(rs->cm_id, NULL, &qp_attr);
+ if (ret)
+ return ret;
+
+ ret = rs_init_bufs(rs);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rs->rq_size; i++) {
+ ret = rs_post_recv(rs);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static void rs_free(struct rsocket *rs)
+{
+ if (rs->index >= 0)
+ rs_remove(rs);
+
+ if (rs->rmsg)
+ free(rs->rmsg);
+
+ if (rs->sbuf) {
+ if (rs->smr)
+ rdma_dereg_mr(rs->smr);
+ free(rs->sbuf);
+ }
+
+ if (rs->rbuf) {
+ if (rs->rmr)
+ rdma_dereg_mr(rs->rmr);
+ free(rs->rbuf);
+ }
+
+ if (rs->target_mr)
+ rdma_dereg_mr(rs->target_mr);
+
+ if (rs->cm_id) {
+ if (rs->cm_id->qp)
+ rdma_destroy_qp(rs->cm_id);
+ rdma_destroy_id(rs->cm_id);
+ }
+
+ fastlock_destroy(&rs->cq_wait_lock);
+ fastlock_destroy(&rs->cq_lock);
+ fastlock_destroy(&rs->rlock);
+ fastlock_destroy(&rs->slock);
+ free(rs);
+}
+
+static void rs_set_conn_data(struct rsocket *rs, struct rdma_conn_param *param,
+ struct rs_conn_data *conn)
+{
+ conn->version = 1;
+ conn->flags = rs_host_is_net() ? RS_CONN_FLAG_NET : 0;
+ conn->credits = htons(rs->rq_size);
+ conn->reserved2 = 0;
+
+ conn->target_sgl.addr = htonll((uintptr_t) rs->target_sgl);
+ conn->target_sgl.length = htonl(RS_SGL_SIZE);
+ conn->target_sgl.key = htonl(rs->target_mr->rkey);
+
+ conn->data_buf.addr = htonll((uintptr_t) rs->rbuf);
+ conn->data_buf.length = htonl(rs->rbuf_size >> 1);
+ conn->data_buf.key = htonl(rs->rmr->rkey);
+
+ param->private_data = conn;
+ param->private_data_len = sizeof *conn;
+}
+
+static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
+{
+ rs->remote_sgl.addr = ntohll(conn->target_sgl.addr);
+ rs->remote_sgl.length = ntohl(conn->target_sgl.length);
+ rs->remote_sgl.key = ntohl(conn->target_sgl.key);
+ rs->remote_sge = 1;
+ if ((rs_host_is_net() && !(conn->flags & RS_CONN_FLAG_NET)) ||
+ (!rs_host_is_net() && (conn->flags & RS_CONN_FLAG_NET)))
+ rs->opts = RS_OPT_SWAP_SGL;
+
+ rs->target_sgl[0].addr = ntohll(conn->data_buf.addr);
+ rs->target_sgl[0].length = ntohl(conn->data_buf.length);
+ rs->target_sgl[0].key = ntohl(conn->data_buf.key);
+
+ rs->sseq_comp = ntohs(conn->credits);
+}
+
+__declspec(dllexport)
+int rsocket(int domain, int type, int protocol)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ if ((domain != PF_INET && domain != PF_INET6) ||
+ (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP))
+ return ERR(ENOTSUP);
+
+ rs_configure();
+ rs = rs_alloc(NULL);
+ if (!rs)
+ return ERR(ENOMEM);
+
+ ret = rdma_create_id(NULL, &rs->cm_id, rs, RDMA_PS_TCP);
+ if (ret)
+ goto err;
+
+ ret = rs_insert(rs);
+ if (ret < 0)
+ goto err;
+
+ rs->cm_id->route.addr.src_addr.sa_family = (ADDRESS_FAMILY)domain;
+ return rs->index;
+
+err:
+ rs_free(rs);
+ return ret;
+}
+
+__declspec(dllexport)
+int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ ret = rdma_bind_addr(rs->cm_id, (struct sockaddr *) addr);
+ if (!ret)
+ {
+ rs->state = rs_bound;
+ }
+ else
+ {
+ wsa_setlasterror(errno);
+ }
+
+ return ret;
+}
+
+__declspec(dllexport)
+int rlisten(int socket, int backlog)
+{
+ struct rsocket *rs;
+ int ret;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+
+ if (backlog > RS_MAX_BACKLOG || backlog == SOMAXCONN)
+ backlog = RS_MAX_BACKLOG;
+
+ ret = rdma_listen(rs->cm_id, backlog);
+ if (!ret)
+ {
+ rs->state = rs_listening;
+ }
+ else
+ {
+ wsa_setlasterror(errno);
+ }
+
+ return ret;
+}
+
+static int rs_do_connect(struct rsocket *rs)
+{
+ struct rdma_conn_param param;
+ struct rs_conn_data creq, *cresp;
+ struct sockaddr_storage dst_addr;
+ int to, ret;
+
+ switch (rs->state) {
+ case rs_init:
+ case rs_bound:
+resolve_addr:
+ to = 1000 << rs->retries++;
+ RtlCopyMemory(
+ &dst_addr,
+ &rs->cm_id->route.addr.dst_addr,
+ rs->cm_id->route.addr.dst_addr.sa_family == AF_INET
+ ? sizeof(struct sockaddr_in)
+ : sizeof(struct sockaddr_in6)
+ );
+ ret = rdma_resolve_addr(rs->cm_id, NULL,
+ (struct sockaddr *)&dst_addr, to);
+ if (!ret)
+ goto resolve_route;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ rs->state = rs_resolving_addr;
+ break;
+ case rs_resolving_addr:
+ ret = ucma_complete(rs->cm_id);
+ if (ret) {
+ if (errno == ETIMEDOUT && rs->retries <= RS_CONN_RETRIES)
+ goto resolve_addr;
+ break;
+ }
+
+ rs->retries = 0;
+resolve_route:
+ to = 1000 << rs->retries++;
+ ret = rdma_resolve_route(rs->cm_id, to);
+ if (!ret)
+ goto do_connect;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ rs->state = rs_resolving_route;
+ break;
+ case rs_resolving_route:
+ ret = ucma_complete(rs->cm_id);
+ if (ret) {
+ if (errno == ETIMEDOUT && rs->retries <= RS_CONN_RETRIES)
+ goto resolve_route;
+ break;
+ }
+do_connect:
+ ret = rs_create_ep(rs);
+ if (ret)
+ break;
+
+ memset(¶m, 0, sizeof param);
+ rs_set_conn_data(rs, ¶m, &creq);
+ param.flow_control = 1;
+ param.retry_count = 7;
+ param.rnr_retry_count = 7;
+ rs->retries = 0;
+
+ ret = rdma_connect(rs->cm_id, ¶m);
+ if (!ret)
+ goto connected;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ rs->state = rs_connecting;
+ break;
+ case rs_connecting:
+ ret = ucma_complete(rs->cm_id);
+ if (ret)
+ break;
+connected:
+ cresp = (struct rs_conn_data *) rs->cm_id->event->param.conn.private_data;
+ if (cresp->version != 1) {
+ ret = ERR(ENOTSUP);
+ break;
+ }
+
+ rs_save_conn_data(rs, cresp);
+ rs->state = rs_connect_rdwr;
+ break;
+ case rs_accepting:
+ if (!(rs->fd_flags & O_NONBLOCK))
+ rs_set_nonblocking(rs, 0);
+
+ ret = ucma_complete(rs->cm_id);
+ if (ret)
+ break;
+
+ rs->state = rs_connect_rdwr;
+ break;
+ default:
+ ret = ERR(EINVAL);
+ break;
+ }
+
+ if (ret) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ errno = EINPROGRESS;
+ } else {
+ rs->state = rs_connect_error;
+ rs->err = errno;
+ }
+ wsa_setlasterror(errno);
+ }
+ return ret;
+}
+
+__declspec(dllexport)
+int rconnect(int socket, const struct sockaddr *addr, socklen_t addrlen)
+{
+ struct rsocket *rs;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ memcpy(&rs->cm_id->route.addr.dst_addr, addr, addrlen);
+ return rs_do_connect(rs);
+}
+
+static int rs_post_write(struct rsocket *rs,
+ struct ibv_sge *sgl, int nsge,
+ uint32_t imm_data, int flags,
+ uint64_t addr, uint32_t rkey)
+{
+ struct ibv_send_wr wr, *bad;
+
+ wr.wr_id = (uint64_t) imm_data;
+ wr.next = NULL;
+ wr.sg_list = sgl;
+ wr.num_sge = nsge;
+ wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
+ wr.send_flags = flags;
+ wr.imm_data = htonl(imm_data);
+ wr.wr.rdma.remote_addr = addr;
+ wr.wr.rdma.rkey = rkey;
+
+ return rdma_seterrno(ibv_post_send(rs->cm_id->qp, &wr, &bad));
+}
+
+/*
+ * Update target SGE before sending data. Otherwise the remote side may
+ * update the entry before we do.
+ */
+static int rs_write_data(struct rsocket *rs,
+ struct ibv_sge *sgl, int nsge,
+ uint32_t length, int flags)
+{
+ uint64_t addr;
+ uint32_t rkey;
+
+ rs->sseq_no++;
+ rs->sqe_avail--;
+ rs->sbuf_bytes_avail -= length;
+
+ addr = rs->target_sgl[rs->target_sge].addr;
+ rkey = rs->target_sgl[rs->target_sge].key;
+
+ rs->target_sgl[rs->target_sge].addr += length;
+ rs->target_sgl[rs->target_sge].length -= length;
+
+ if (!rs->target_sgl[rs->target_sge].length) {
+ if (++rs->target_sge == RS_SGL_SIZE)
+ rs->target_sge = 0;
+ }
+
+ return rs_post_write(rs, sgl, nsge, rs_msg_set(RS_OP_DATA, length),
+ flags, addr, rkey);
+}
+
+static uint32_t rs_sbuf_left(struct rsocket *rs)
+{
+ return (uint32_t) (((uint64_t) (uintptr_t) &rs->sbuf[rs->sbuf_size]) -
+ rs->ssgl[0].addr);
+}
+
+static void rs_send_credits(struct rsocket *rs)
+{
+ struct ibv_sge ibsge;
+ struct rs_sge sge;
+
+ rs->ctrl_avail--;
+ rs->rseq_comp = rs->rseq_no + (rs->rq_size >> 1);
+ if ((uint32_t)rs->rbuf_bytes_avail >= (rs->rbuf_size >> 1)) {
+ if (!(rs->opts & RS_OPT_SWAP_SGL)) {
+ sge.addr = (uintptr_t) &rs->rbuf[rs->rbuf_free_offset];
+ sge.key = rs->rmr->rkey;
+ sge.length = rs->rbuf_size >> 1;
+ } else {
+ sge.addr = cl_ntoh64((uintptr_t) &rs->rbuf[rs->rbuf_free_offset]);
+ sge.key = cl_ntoh32(rs->rmr->rkey);
+ sge.length = cl_ntoh32(rs->rbuf_size >> 1);
+ }
+
+ ibsge.addr = (uintptr_t) &sge;
+ ibsge.lkey = 0;
+ ibsge.length = sizeof(sge);
+
+ rs_post_write(rs, &ibsge, 1,
+ rs_msg_set(RS_OP_SGL, rs->rseq_no + rs->rq_size),
+ IBV_SEND_INLINE,
+ rs->remote_sgl.addr +
+ rs->remote_sge * sizeof(struct rs_sge),
+ rs->remote_sgl.key);
+
+ rs->rbuf_bytes_avail -= rs->rbuf_size >> 1;
+ rs->rbuf_free_offset += rs->rbuf_size >> 1;
+ if ((uint32_t)rs->rbuf_free_offset >= rs->rbuf_size)
+ rs->rbuf_free_offset = 0;
+ if (++rs->remote_sge == rs->remote_sgl.length)
+ rs->remote_sge = 0;
+ } else {
+ rs_post_write(rs, NULL, 0,
+ rs_msg_set(RS_OP_SGL, rs->rseq_no + rs->rq_size), 0, 0, 0);
+ }
+}
+
+static int rs_give_credits(struct rsocket *rs)
+{
+ return (((uint32_t)rs->rbuf_bytes_avail >= (rs->rbuf_size >> 1)) ||
+ ((short) ((short) rs->rseq_no - (short) rs->rseq_comp) >= 0)) &&
+ rs->ctrl_avail && (rs->state & rs_connected);
+}
+
+static void rs_update_credits(struct rsocket *rs)
+{
+ if (rs_give_credits(rs))
+ rs_send_credits(rs);
+}
+
+static int rs_poll_cq(struct rsocket *rs)
+{
+ struct ibv_wc wc;
+ uint32_t imm_data;
+ int ret, rcnt = 0;
+
+ while ((ret = ibv_poll_cq(rs->cm_id->recv_cq, 1, &wc)) > 0) {
+ if (wc.wr_id == RS_RECV_WR_ID) {
+ if (wc.status != IBV_WC_SUCCESS)
+ continue;
+ rcnt++;
+
+ imm_data = ntohl(wc.imm_data);
+ switch (rs_msg_op(imm_data)) {
+ case RS_OP_SGL:
+ rs->sseq_comp = (uint16_t) rs_msg_data(imm_data);
+ break;
+ case RS_OP_CTRL:
+ if (rs_msg_data(imm_data) == RS_CTRL_DISCONNECT) {
+ rs->state = rs_disconnected;
+ return 0;
+ } else if (rs_msg_data(imm_data) == RS_CTRL_SHUTDOWN) {
+ rs->state &= ~rs_connect_rd;
+ }
+ break;
+ default:
+ rs->rmsg[rs->rmsg_tail].op = rs_msg_op(imm_data);
+ rs->rmsg[rs->rmsg_tail].data = rs_msg_data(imm_data);
+ if (++rs->rmsg_tail == rs->rq_size + 1)
+ rs->rmsg_tail = 0;
+ break;
+ }
+ } else {
+ switch (rs_msg_op((uint32_t) wc.wr_id)) {
+ case RS_OP_SGL:
+ rs->ctrl_avail++;
+ break;
+ case RS_OP_CTRL:
+ rs->ctrl_avail++;
+ if (rs_msg_data((uint32_t) wc.wr_id) == RS_CTRL_DISCONNECT)
+ rs->state = rs_disconnected;
+ break;
+ default:
+ rs->sqe_avail++;
+ rs->sbuf_bytes_avail += rs_msg_data((uint32_t) wc.wr_id);
+ break;
+ }
+
+ if (wc.status != IBV_WC_SUCCESS && (rs->state & rs_connected)) {
+ rs->state = rs_error;
+ rs->err = EIO;
+ }
+ }
+ }
+
+ if (rs->state & rs_connected) {
+ while (!ret && rcnt--)
+ {
+ ret = rs_post_recv(rs);
+ }
+
+ if (ret) {
+ rs->state = rs_error;
+ rs->err = errno;
+ }
+ }
+ return ret;
+}
+
+static int rs_get_cq_event(struct rsocket *rs)
+{
+ struct ibv_cq *cq;
+ void *context;
+ int ret;
+
+ if (!rs->cq_armed)
+ {
+ return 0;
+ }
+ ret = ibv_get_cq_event(rs->cm_id->recv_cq_channel, &cq, &context);
+ if (!ret) {
+ ibv_ack_cq_events(rs->cm_id->recv_cq, 1);
+ rs->cq_armed = 0;
+ } else if (errno != EAGAIN) {
+ rs->state = rs_error;
+ }
+ return ret;
+}
+
+/*
+ * Although we serialize rsend and rrecv calls with respect to themselves,
+ * both calls may run simultaneously and need to poll the CQ for completions.
+ * We need to serialize access to the CQ, but rsend and rrecv need to
+ * allow each other to make forward progress.
+ *
+ * For example, rsend may need to wait for credits from the remote side,
+ * which could be stalled until the remote process calls rrecv. This should
+ * not block rrecv from receiving data from the remote side however.
+ *
+ * We handle this by using two locks. The cq_lock protects against polling
+ * the CQ and processing completions. The cq_wait_lock serializes access to
+ * waiting on the CQ.
+ */
+static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
+{
+ int ret;
+
+ fastlock_acquire(&rs->cq_lock);
+ do {
+ rs_update_credits(rs);
+ ret = rs_poll_cq(rs);
+ if (test(rs)) {
+ ret = 0;
+ break;
+ } else if (ret) {
+ break;
+ } else if (nonblock) {
+ ret = ERR(EWOULDBLOCK);
+ } else if (!rs->cq_armed) {
+ ibv_req_notify_cq(rs->cm_id->recv_cq, 0);
+ rs->cq_armed = 1;
+ } else {
+ rs_update_credits(rs);
+ fastlock_acquire(&rs->cq_wait_lock);
+ fastlock_release(&rs->cq_lock);
+ ret = rs_get_cq_event(rs);
+ fastlock_release(&rs->cq_wait_lock);
+ fastlock_acquire(&rs->cq_lock);
+ }
+ } while (!ret);
+
+ rs_update_credits(rs);
+ fastlock_release(&rs->cq_lock);
+ return ret;
+}
+
+static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
+{
+ struct timeval s, e;
+ uint32_t poll_time = 0;
+ int ret;
+ do {
+ ret = rs_process_cq(rs, 1, test);
+ if (!ret || nonblock || errno != EWOULDBLOCK) {
+ return ret;
+ }
+
+ if (!poll_time)
+ gettimeofday(&s, NULL);
+
+ gettimeofday(&e, NULL);
+ poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+ (e.tv_usec - s.tv_usec) + 1;
+ } while (poll_time <= polling_time);
+ ret = rs_process_cq(rs, 0, test);
+ return ret;
+}
+
+static int rs_nonblocking(struct rsocket *rs)
+{
+ return (rs->fd_flags & O_NONBLOCK);
+}
+
+static int rs_is_cq_armed(struct rsocket *rs)
+{
+ return rs->cq_armed;
+}
+
+static int rs_poll_all(struct rsocket *rs)
+{
+ return 1;
+}
+
+/*
+ * We use hardware flow control to prevent over running the remote
+ * receive queue. However, data transfers still require space in
+ * the remote rmsg queue, or we risk losing notification that data
+ * has been transfered.
+ *
+ * Be careful with race conditions in the check below. The target SGL
+ * may be updated by a remote RDMA write.
+ */
+static int rs_can_send(struct rsocket *rs)
+{
+ return rs->sqe_avail && rs->sbuf_bytes_avail &&
+ (rs->sseq_no != rs->sseq_comp) &&
+ (rs->target_sgl[rs->target_sge].length != 0);
+}
+
+static int rs_conn_can_send(struct rsocket *rs)
+{
+ return rs_can_send(rs) || !(rs->state & rs_connect_wr);
+}
+
+static int rs_conn_can_send_ctrl(struct rsocket *rs)
+{
+ return rs->ctrl_avail || !(rs->state & rs_connected);
+}
+
+static int rs_have_rdata(struct rsocket *rs)
+{
+ return (rs->rmsg_head != rs->rmsg_tail);
+}
+
+static int rs_conn_have_rdata(struct rsocket *rs)
+{
+ return rs_have_rdata(rs) || !(rs->state & rs_connect_rd);
+}
+
+static int rs_conn_all_sends_done(struct rsocket *rs)
+{
+ return ((rs->sqe_avail + rs->ctrl_avail) == rs->sq_size) ||
+ !(rs->state & rs_connected);
+}
+
+static ssize_t rs_peek(struct rsocket *rs, void *buf, size_t len)
+{
+ size_t left = len;
+ uint32_t end_size, rsize;
+ int rmsg_head, rbuf_offset;
+ uint8_t *bufb = (uint8_t *)buf;
+
+ rmsg_head = rs->rmsg_head;
+ rbuf_offset = rs->rbuf_offset;
+
+ for (; left && (rmsg_head != rs->rmsg_tail); left -= rsize) {
+ if (left < rs->rmsg[rmsg_head].data) {
+ rsize = left;
+ } else {
+ rsize = rs->rmsg[rmsg_head].data;
+ if (++rmsg_head == rs->rq_size + 1)
+ rmsg_head = 0;
+ }
+
+ end_size = rs->rbuf_size - rbuf_offset;
+ if (rsize > end_size) {
+ memcpy(bufb, &rs->rbuf[rbuf_offset], end_size);
+ rbuf_offset = 0;
+ bufb += end_size;
+ rsize -= end_size;
+ left -= end_size;
+ }
+ memcpy(bufb, &rs->rbuf[rbuf_offset], rsize);
+ rbuf_offset += rsize;
+ bufb += rsize;
+ }
+
+ return len - left;
+}
+
+/*
+ * Continue to receive any queued data even if the remote side has disconnected.
+ */
+ __declspec(dllexport)
+ssize_t rrecv(int socket, void *buf, size_t len, int flags)
+{
+ struct rsocket *rs;
+ size_t left = len;
+ uint32_t end_size, rsize;
+ int ret;
+ uint8_t *bufb = (uint8_t *)buf;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+ ret = rs_do_connect(rs);
+ if (ret) {
+ if (errno == EINPROGRESS)
+ errno = EAGAIN;
+ goto out;
+ }
+ }
+ fastlock_acquire(&rs->rlock);
+ do {
+ if (!rs_have_rdata(rs)) {
+ ret = rs_get_comp(rs, rs_nonblocking(rs),
+ rs_conn_have_rdata);
+ if (ret)
+ break;
+ }
+
+ ret = 0;
+ if (flags & MSG_PEEK) {
+ left = len - rs_peek(rs, buf, left);
+ break;
+ }
+
+ for (; left && rs_have_rdata(rs); left -= rsize) {
+ if (left < rs->rmsg[rs->rmsg_head].data) {
+ rsize = left;
+ rs->rmsg[rs->rmsg_head].data -= left;
+ } else {
+ rs->rseq_no++;
+ rsize = rs->rmsg[rs->rmsg_head].data;
+ if (++rs->rmsg_head == rs->rq_size + 1)
+ rs->rmsg_head = 0;
+ }
+
+ end_size = rs->rbuf_size - rs->rbuf_offset;
+ if (rsize > end_size) {
+ memcpy(bufb, &rs->rbuf[rs->rbuf_offset], end_size);
+ rs->rbuf_offset = 0;
+ bufb += end_size;
+ rsize -= end_size;
+ left -= end_size;
+ rs->rbuf_bytes_avail += end_size;
+ }
+ memcpy(bufb, &rs->rbuf[rs->rbuf_offset], rsize);
+ rs->rbuf_offset += rsize;
+ bufb += rsize;
+ rs->rbuf_bytes_avail += rsize;
+ }
+ } while (left && (flags & MSG_WAITALL) && (rs->state & rs_connect_rd));
+
+ fastlock_release(&rs->rlock);
+
+out:
+ if (ret)
+ {
+ wsa_setlasterror(errno);
+ return ret;
+ }
+ else
+ {
+ return len - left;
+ }
+}
+
+ssize_t rrecvfrom(int socket, void *buf, size_t len, int flags,
+ struct sockaddr *src_addr, socklen_t *addrlen)
+{
+ ssize_t ret;
+
+ ret = rrecv(socket, buf, len, flags);
+ if (ret > 0 && src_addr)
+ rgetpeername(socket, src_addr, addrlen);
+
+ return ret;
+}
+
+/*
+ * Simple, straightforward implementation for now that only tries to fill
+ * in the first vector.
+ */
+static ssize_t rrecvv(int socket, const struct iovec *iov, int iovcnt, int flags)
+{
+ return rrecv(socket, iov[0].iov_base, iov[0].iov_len, flags);
+}
+
+ssize_t rrecvmsg(int socket, struct msghdr *msg, int flags)
+{
+ if (msg->msg_control && msg->msg_controllen)
+ return ERR(ENOTSUP);
+
+ return rrecvv(socket, msg->msg_iov, (int) msg->msg_iovlen, msg->msg_flags);
+}
+
+ssize_t rread(int socket, void *buf, size_t count)
+{
+ return rrecv(socket, buf, count, 0);
+}
+
+ssize_t rreadv(int socket, const struct iovec *iov, int iovcnt)
+{
+ return rrecvv(socket, iov, iovcnt, 0);
+}
+
+/*
+ * We overlap sending the data, by posting a small work request immediately,
+ * then increasing the size of the send on each iteration.
+ */
+ssize_t rsend(int socket, const void *buf, size_t len, int flags)
+{
+ struct rsocket *rs;
+ struct ibv_sge sge;
+ size_t left = len;
+ uint32_t xfer_size, olen = RS_OLAP_START_SIZE;
+ int ret = 0;
+ uint8_t *bufb = (uint8_t *)buf;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+ ret = rs_do_connect(rs);
+ if (ret) {
+ if (errno == EINPROGRESS)
+ errno = EAGAIN;
+ goto out;
+ }
+ }
+
+ fastlock_acquire(&rs->slock);
+ for (; left; left -= xfer_size, bufb += xfer_size) {
+ if (!rs_can_send(rs)) {
+ ret = rs_get_comp(rs, rs_nonblocking(rs),
+ rs_conn_can_send);
+ if (ret)
+ break;
+ if (!(rs->state & rs_connect_wr)) {
+ ret = ERR(ECONNRESET);
+ break;
+ }
+ }
+
+ if (olen < left) {
+ xfer_size = olen;
+ if (olen < RS_MAX_TRANSFER)
+ olen <<= 1;
+ } else {
+ xfer_size = left;
+ }
+
+ if (xfer_size > (uint32_t)rs->sbuf_bytes_avail)
+ xfer_size = rs->sbuf_bytes_avail;
+ if (xfer_size > rs->target_sgl[rs->target_sge].length)
+ xfer_size = rs->target_sgl[rs->target_sge].length;
+
+ if (xfer_size <= rs->sq_inline) {
+ sge.addr = (uintptr_t)bufb;
+ sge.length = xfer_size;
+ sge.lkey = 0;
+ ret = rs_write_data(rs, &sge, 1, xfer_size, IBV_SEND_INLINE);
+ } else if (xfer_size <= rs_sbuf_left(rs)) {
+ memcpy((void *) (uintptr_t) rs->ssgl[0].addr, bufb, xfer_size);
+ rs->ssgl[0].length = xfer_size;
+ ret = rs_write_data(rs, rs->ssgl, 1, xfer_size, 0);
+ if (xfer_size < rs_sbuf_left(rs))
+ rs->ssgl[0].addr += xfer_size;
+ else
+ rs->ssgl[0].addr = (uintptr_t) rs->sbuf;
+ } else {
+ rs->ssgl[0].length = rs_sbuf_left(rs);
+ memcpy((void *) (uintptr_t) rs->ssgl[0].addr, bufb,
+ rs->ssgl[0].length);
+ rs->ssgl[1].length = xfer_size - rs->ssgl[0].length;
+ memcpy(rs->sbuf, bufb + rs->ssgl[0].length, rs->ssgl[1].length);
+ ret = rs_write_data(rs, rs->ssgl, 2, xfer_size, 0);
+ rs->ssgl[0].addr = (uintptr_t) rs->sbuf + rs->ssgl[1].length;
+ }
+ if (ret)
+ break;
+ }
+ fastlock_release(&rs->slock);
+
+out:
+ if (ret && left == len)
+ {
+ wsa_setlasterror(errno);
+ return ret;
+ }
+ else
+ {
+ return len - left;
+ }
+}
+
+ssize_t rsendto(int socket, const void *buf, size_t len, int flags,
+ const struct sockaddr *dest_addr, socklen_t addrlen)
+{
+/*
+ * In Windows on a connection-oriented socket,
+ * the dest_addr and addrlen parameters are just ignored,
+ * making sendto() equivalent to send().
+ */
+ return rsend(socket, buf, len, flags);
+}
+
+static void rs_copy_iov(void *dst, const struct iovec **iov, size_t *offset, size_t len)
+{
+ size_t size;
+ uint8_t *dstb = (uint8_t *)dst;
+
+ while (len) {
+ size = (*iov)->iov_len - *offset;
+ if (size > len) {
+ memcpy (dstb, (*iov)->iov_base + *offset, len);
+ *offset += len;
+ break;
+ }
+
+ memcpy(dstb, (*iov)->iov_base + *offset, size);
+ len -= size;
+ dstb += size;
+ (*iov)++;
+ *offset = 0;
+ }
+}
+
+static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags)
+{
+ struct rsocket *rs;
+ const struct iovec *cur_iov;
+ size_t left, len, offset = 0;
+ uint32_t xfer_size, olen = RS_OLAP_START_SIZE;
+ int i, ret = 0;
+
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_opening) {
+ ret = rs_do_connect(rs);
+ if (ret) {
+ if (errno == EINPROGRESS)
+ errno = EAGAIN;
+ return ret;
+ }
+ }
+
+ cur_iov = iov;
+ len = iov[0].iov_len;
+ for (i = 1; i < iovcnt; i++)
+ len += iov[i].iov_len;
+
+ fastlock_acquire(&rs->slock);
+ for (left = len; left; left -= xfer_size) {
+ if (!rs_can_send(rs)) {
+ ret = rs_get_comp(rs, rs_nonblocking(rs),
+ rs_conn_can_send);
+ if (ret)
+ break;
+ if (!(rs->state & rs_connect_wr)) {
+ ret = ERR(ECONNRESET);
+ break;
+ }
+ }
+
+ if (olen < left) {
+ xfer_size = olen;
+ if (olen < RS_MAX_TRANSFER)
+ olen <<= 1;
+ } else {
+ xfer_size = left;
+ }
+
+ if (xfer_size > (uint32_t)rs->sbuf_bytes_avail)
+ xfer_size = rs->sbuf_bytes_avail;
+ if (xfer_size > rs->target_sgl[rs->target_sge].length)
+ xfer_size = rs->target_sgl[rs->target_sge].length;
+
+ if (xfer_size <= rs_sbuf_left(rs)) {
+ rs_copy_iov((void *) (uintptr_t) rs->ssgl[0].addr,
+ &cur_iov, &offset, xfer_size);
+ rs->ssgl[0].length = xfer_size;
+ ret = rs_write_data(rs, rs->ssgl, 1, xfer_size,
+ xfer_size <= rs->sq_inline ? IBV_SEND_INLINE : 0);
+ if (xfer_size < rs_sbuf_left(rs))
+ rs->ssgl[0].addr += xfer_size;
+ else
+ rs->ssgl[0].addr = (uintptr_t) rs->sbuf;
+ } else {
+ rs->ssgl[0].length = rs_sbuf_left(rs);
+ rs_copy_iov((void *) (uintptr_t) rs->ssgl[0].addr, &cur_iov,
+ &offset, rs->ssgl[0].length);
+ rs->ssgl[1].length = xfer_size - rs->ssgl[0].length;
+ rs_copy_iov(rs->sbuf, &cur_iov, &offset, rs->ssgl[1].length);
+ ret = rs_write_data(rs, rs->ssgl, 2, xfer_size,
+ xfer_size <= rs->sq_inline ? IBV_SEND_INLINE : 0);
+ rs->ssgl[0].addr = (uintptr_t) rs->sbuf + rs->ssgl[1].length;
+ }
+ if (ret)
+ break;
+ }
+ fastlock_release(&rs->slock);
+
+ return (ret && left == len) ? ret : len - left;
+}
+
+__declspec(dllexport)
+ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags)
+{
+ wsa_setlasterror(0);
+ if (msg->msg_control && msg->msg_controllen)
+ return ERR(ENOTSUP);
+
+ return rsendv(socket, msg->msg_iov, (int) msg->msg_iovlen, msg->msg_flags);
+}
+
+__declspec(dllexport)
+ssize_t rwrite(int socket, const void *buf, size_t count)
+{
+ return rsend(socket, buf, count, 0);
+}
+
+__declspec(dllexport)
+ssize_t rwritev(int socket, const struct iovec *iov, int iovcnt)
+{
+ return rsendv(socket, iov, iovcnt, 0);
+}
+
+static struct pollfd *rs_fds_alloc(nfds_t nfds)
+{
+ static __thread struct pollfd *rfds;
+ static __thread nfds_t rnfds;
+
+ if (nfds > rnfds) {
+ if (rfds)
+ free(rfds);
+
+ rfds = (struct pollfd *)malloc(sizeof *rfds * nfds);
+ rnfds = rfds ? nfds : 0;
+ }
+
+ return rfds;
+}
+
+static int rs_poll_rs(struct rsocket *rs, int events,
+ int nonblock, int (*test)(struct rsocket *rs))
+{
+ COMP_SET fds;
+ short revents;
+ int ret;
+
+check_cq:
+ if ((rs->state & rs_connected) || (rs->state == rs_disconnected) ||
+ (rs->state & rs_error)) {
+ rs_process_cq(rs, nonblock, test);
+
+ revents = 0;
+ if ((events & POLLIN) && rs_conn_have_rdata(rs))
+ revents |= POLLIN;
+ if ((events & POLLOUT) && rs_can_send(rs))
+ revents |= POLLOUT;
+ if (!(rs->state & rs_connected)) {
+ if (rs->state == rs_disconnected)
+ revents |= POLLHUP;
+ else
+ revents |= POLLERR;
+ }
+
+ return revents;
+ }
+
+ if (rs->state == rs_listening) {
+ if (CompSetInit(&fds))
+ return 0;
+
+ CompSetZero(&fds);
+ CompSetAdd(&rs->cm_id->channel->channel, &fds);
+ return CompSetPoll(&fds, 0);
+ }
+
+ if (rs->state & rs_opening) {
+ ret = rs_do_connect(rs);
+ if (ret) {
+ if (errno == EINPROGRESS) {
+ errno = 0;
+ return 0;
+ } else {
+ return POLLOUT;
+ }
+ }
+ goto check_cq;
+ }
+
+ if (rs->state == rs_connect_error)
+ return (rs->err && events & POLLOUT) ? POLLOUT : 0;
+
+ return 0;
+}
+
+static int rs_poll_check(struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i, cnt = 0;
+
+ for (i = 0; i < (int)nfds; i++) {
+ rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+ if (rs)
+ fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 1, rs_poll_all);
+ else
+ ;//WSAPoll(&fds[i], 1, 0);
+
+ if (fds[i].revents)
+ cnt++;
+ }
+ return cnt;
+}
+
+static int rs_poll_arm(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i;
+
+ for (i = 0; i < (int)nfds; i++) {
+ rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+ if (rs) {
+ fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 0, rs_is_cq_armed);
+ if (fds[i].revents)
+ return 1;
+
+ if (rs->state >= rs_connected)
+ rfds[i].fd = ((short)rs->cm_id->recv_cq_channel->comp_channel.Event >> 2);
+ else
+ rfds[i].fd = ((short)rs->cm_id->channel->channel.Event >> 2);
+
+ rfds[i].events = POLLIN;
+ } else {
+ rfds[i].fd = fds[i].fd;
+ rfds[i].events = fds[i].events;
+ }
+ rfds[i].revents = 0;
+ }
+ return 0;
+}
+
+static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+{
+ struct rsocket *rs;
+ int i, cnt = 0;
+
+ for (i = 0; i < (int)nfds; i++) {
+ if (!rfds[i].revents)
+ continue;
+
+ rs = (struct rsocket *)idm_lookup(&idm, (int)fds[i].fd);
+ if (rs) {
+ rs_get_cq_event(rs);
+ fds[i].revents = (SHORT)rs_poll_rs(rs, fds[i].events, 1, rs_poll_all);
+ } else {
+ fds[i].revents = rfds[i].revents;
+ }
+ if (fds[i].revents)
+ cnt++;
+ }
+ return cnt;
+}
+
+/*
+ * We need to poll *all* fd's that the user specifies at least once.
+ * Note that we may receive events on an rsocket that may not be reported
+ * to the user (e.g. connection events or credit updates). Process those
+ * events, then return to polling until we find ones of interest.
+ */
+__declspec(dllexport)
+int rpoll(struct pollfd *fds, nfds_t nfds, int timeout)
+{
+ struct timeval s, e;
+ struct pollfd *rfds;
+ uint32_t poll_time = 0;
+ int ret;
+
+ wsa_setlasterror(0);
+ do {
+ ret = rs_poll_check(fds, nfds);
+ if (ret || !timeout)
+ return ret;
+
+ if (!poll_time)
+ gettimeofday(&s, NULL);
+
+ gettimeofday(&e, NULL);
+ poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+ (e.tv_usec - s.tv_usec) + 1;
+ } while (poll_time <= polling_time);
+
+ rfds = rs_fds_alloc(nfds);
+ if (!rfds)
+ return ERR(ENOMEM);
+
+ do {
+ ret = rs_poll_arm(rfds, fds, nfds);
+ if (ret)
+ break;
+#if 0
+ ret = WSAPoll(rfds, nfds, timeout);
+ if (ret <= 0)
+ break;
+#endif
+ ret = rs_poll_events(rfds, fds, nfds);
+ } while (!ret);
+
+ return ret;
+}
+
+static struct pollfd *
+rs_select_to_poll(int *nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds)
+{
+ struct pollfd *fds;
+ int fd, i = 0;
+
+ fds = (struct pollfd *)calloc(*nfds, sizeof *fds);
+ if (!fds)
+ return NULL;
+
+ for (fd = 0; fd < *nfds; fd++) {
+ if (readfds && FD_ISSET(fd, readfds)) {
+ fds[i].fd = fd;
+ fds[i].events = POLLIN;
+ }
+
+ if (writefds && FD_ISSET(fd, writefds)) {
+ fds[i].fd = fd;
+ fds[i].events |= POLLOUT;
+ }
+
+ if (exceptfds && FD_ISSET(fd, exceptfds))
+ fds[i].fd = fd;
+
+ if (fds[i].fd)
+ i++;
+ }
+
+ *nfds = i;
+ return fds;
+}
+
+static int
+rs_poll_to_select(int nfds, struct pollfd *fds, fd_set *readfds,
+ fd_set *writefds, fd_set *exceptfds)
+{
+ int i, cnt = 0;
+
+ for (i = 0; i < nfds; i++) {
+ if (readfds && (fds[i].revents & (POLLIN | POLLHUP))) {
+ FD_SET(fds[i].fd, readfds);
+ cnt++;
+ }
+
+ if (writefds && (fds[i].revents & POLLOUT)) {
+ FD_SET(fds[i].fd, writefds);
+ cnt++;
+ }
+
+ if (exceptfds && (fds[i].revents & ~(POLLIN | POLLOUT))) {
+ FD_SET(fds[i].fd, exceptfds);
+ cnt++;
+ }
+ }
+ return cnt;
+}
+
+static int rs_convert_timeout(struct timeval *timeout)
+{
+ return !timeout ? -1 :
+ timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
+}
+
+__declspec(dllexport)
+int rselect(int nfds, fd_set *readfds, fd_set *writefds,
+ fd_set *exceptfds, struct timeval *timeout)
+{
+ struct pollfd *fds;
+ int ret;
+
+ wsa_setlasterror(0);
+ fds = rs_select_to_poll(&nfds, readfds, writefds, exceptfds);
+ if (!fds)
+ return ERR(ENOMEM);
+
+ ret = rpoll(fds, nfds, rs_convert_timeout(timeout));
+
+ if (readfds)
+ FD_ZERO(readfds);
+ if (writefds)
+ FD_ZERO(writefds);
+ if (exceptfds)
+ FD_ZERO(exceptfds);
+
+ if (ret > 0)
+ ret = rs_poll_to_select(nfds, fds, readfds, writefds, exceptfds);
+
+ free(fds);
+ return ret;
+}
+
+/*
+ * For graceful disconnect, notify the remote side that we're
+ * disconnecting and wait until all outstanding sends complete.
+ */
+__declspec(dllexport)
+int rshutdown(int socket, int how)
+{
+ struct rsocket *rs;
+ int ctrl, ret = 0;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (how == SHUT_RD) {
+ rs->state &= ~rs_connect_rd;
+ return 0;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+ rs_set_nonblocking(rs, 0);
+
+ if (rs->state & rs_connected) {
+ if (how == SHUT_RDWR) {
+ ctrl = RS_CTRL_DISCONNECT;
+ rs->state &= ~(rs_connect_rd | rs_connect_wr);
+ } else {
+ rs->state &= ~rs_connect_wr;
+ ctrl = (rs->state & rs_connect_rd) ?
+ RS_CTRL_SHUTDOWN : RS_CTRL_DISCONNECT;
+ }
+ if (!rs->ctrl_avail) {
+ ret = rs_process_cq(rs, 0, rs_conn_can_send_ctrl);
+ if (ret)
+ {
+ wsa_setlasterror(errno);
+ return ret;
+ }
+ }
+ if ((rs->state & rs_connected) && rs->ctrl_avail) {
+ rs->ctrl_avail--;
+ ret = rs_post_write(rs, NULL, 0,
+ rs_msg_set(RS_OP_CTRL, ctrl), 0, 0, 0);
+ }
+ }
+
+ if (rs->state & rs_connected)
+ rs_process_cq(rs, 0, rs_conn_all_sends_done);
+
+ if ((rs->fd_flags & O_NONBLOCK) && (rs->state & rs_connected))
+ rs_set_nonblocking(rs, 1);
+
+ return 0;
+}
+
+__declspec(dllexport)
+int rclose(int socket)
+{
+ struct rsocket *rs;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ if (rs->state & rs_connected)
+ rshutdown(socket, SHUT_RDWR);
+
+ rs_free(rs);
+ return 0;
+}
+
+static void rs_copy_addr(struct sockaddr *dst, struct sockaddr *src, socklen_t *len)
+{
+ socklen_t size;
+
+ if (src->sa_family == AF_INET) {
+ size = min(*len, sizeof(struct sockaddr_in));
+ *len = sizeof(struct sockaddr_in);
+ } else {
+ size = min(*len, sizeof(struct sockaddr_in6));
+ *len = sizeof(struct sockaddr_in6);
+ }
+ memcpy(dst, src, size);
+}
+
+__declspec(dllexport)
+int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ rs_copy_addr(addr, rdma_get_peer_addr(rs->cm_id), addrlen);
+ return 0;
+}
+
+__declspec(dllexport)
+int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ rs_copy_addr(addr, rdma_get_local_addr(rs->cm_id), addrlen);
+ return 0;
+}
+
+/*
+ * Nonblocking is usually not inherited between sockets, but we need to
+ * inherit it here to establish the connection only. This is needed to
+ * prevent rdma_accept from blocking until the remote side finishes
+ * establishing the connection. If we were to allow rdma_accept to block,
+ * then a single thread cannot establish a connection with itself, or
+ * two threads which try to connect to each other can deadlock trying to
+ * form a connection.
+ *
+ * Data transfers on the new socket remain blocking unless the user
+ * specifies otherwise through rfcntl.
+ */
+__declspec(dllexport)
+int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
+{
+ struct rsocket *rs, *new_rs;
+ struct rdma_conn_param param;
+ struct rs_conn_data *creq, cresp;
+ int ret;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ new_rs = rs_alloc(rs);
+ if (!new_rs)
+ return ERR(ENOMEM);
+
+ ret = rdma_get_request(rs->cm_id, &new_rs->cm_id);
+ if (ret)
+ goto err;
+
+ ret = rs_insert(new_rs);
+ if (ret < 0)
+ goto err;
+
+ creq = (struct rs_conn_data *) new_rs->cm_id->event->param.conn.private_data;
+ if (creq->version != 1) {
+ ret = ERR(ENOTSUP);
+ goto err;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+ rs_set_nonblocking(new_rs, O_NONBLOCK);
+
+ ret = rs_create_ep(new_rs);
+ if (ret)
+ goto err;
+
+ rs_save_conn_data(new_rs, creq);
+ param = new_rs->cm_id->event->param.conn;
+ rs_set_conn_data(new_rs, ¶m, &cresp);
+ ret = rdma_accept(new_rs->cm_id, ¶m);
+ if (!ret)
+ new_rs->state = rs_connect_rdwr;
+ else if (errno == EAGAIN || errno == EWOULDBLOCK)
+ new_rs->state = rs_accepting;
+ else
+ goto err;
+
+ if (addr && addrlen)
+ rgetpeername(new_rs->index, addr, addrlen);
+ return new_rs->index;
+
+err:
+ rs_free(new_rs);
+ return ret;
+}
+
+/*
+ * Socket provider's variant:
+ */
+SOCKET WSPAPI WSPAccept(
+ SOCKET socket,
+ struct sockaddr *addr,
+ LPINT addrlen,
+ LPCONDITIONPROC lpfnCondition,
+ DWORD_PTR dwCallbackData,
+ LPINT lpErrno
+ )
+{
+ struct rsocket *rs, *new_rs;
+ struct rdma_conn_param param;
+ struct rs_conn_data *creq, cresp;
+ SOCKET new_socket = INVALID_SOCKET;
+ DWORD_PTR rsock = INVALID_SOCKET;
+ int ret = gMainUpCallTable.lpWPUQuerySocketHandleContext(
+ socket,
+ (PDWORD_PTR)&rsock, // __out PDWORD_PTR lpContext
+ lpErrno
+ );
+ if (SOCKET_ERROR == ret)
+ return INVALID_SOCKET;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, (int)rsock);
+ new_rs = rs_alloc(rs);
+ if (!new_rs)
+ return ERR(ENOMEM);
+
+ ret = rdma_get_request(rs->cm_id, &new_rs->cm_id);
+ if (ret)
+ goto err;
+
+ ret = rs_insert(new_rs);
+ if (ret < 0)
+ goto err;
+
+ creq = (struct rs_conn_data *) new_rs->cm_id->event->param.conn.private_data;
+ if (creq->version != 1) {
+ ret = ERR(ENOTSUP);
+ goto err;
+ }
+
+ if (rs->fd_flags & O_NONBLOCK)
+ rs_set_nonblocking(new_rs, O_NONBLOCK);
+
+ ret = rs_create_ep(new_rs);
+ if (ret)
+ goto err;
+
+ rs_save_conn_data(new_rs, creq);
+ param = new_rs->cm_id->event->param.conn;
+ rs_set_conn_data(new_rs, ¶m, &cresp);
+
+ if (addr && addrlen)
+ rgetpeername(new_rs->index, addr, addrlen);
+
+ if (lpfnCondition) {
+ struct sockaddr local_addr;
+ socklen_t local_addrlen = sizeof(local_addr);
+ WSABUF caller_id;
+ WSABUF callee_id;
+ WSABUF callee_data;
+
+ /* Set the caller and callee data buffer */
+ caller_id.buf = (char *)addr;
+ caller_id.len = sizeof(*addr);
+
+ rs_copy_addr(&local_addr, rdma_get_local_addr(new_rs->cm_id), &local_addrlen);
+
+ callee_id.buf = (char *)&local_addr;
+ callee_id.len = local_addrlen;
+
+ callee_data.buf = NULL;
+ callee_data.len = 0;
+
+ switch(lpfnCondition(&caller_id, NULL, NULL, NULL, &callee_id, &callee_data, NULL, dwCallbackData)) {
+ default:
+ /* Should never happen */
+ /* Fall through. */
+ case CF_REJECT:
+ *lpErrno = WSAECONNREFUSED;
+ ret = (int)INVALID_SOCKET;
+ goto err;
+ case CF_DEFER:
+ *lpErrno = WSATRY_AGAIN;
+ ret = (int)INVALID_SOCKET;
+ goto err;
+ case CF_ACCEPT:
+ break;
+ }
+ }
+ new_socket = gMainUpCallTable.lpWPUCreateSocketHandle(
+ gProtocolInfo.dwCatalogEntryId,
+ new_rs->index, // __in DWORD_PTR dwContext
+ lpErrno
+ );
+ if (INVALID_SOCKET == new_socket) {
+ //??? *lpErrno = WSATRY_AGAIN;
+ goto err;
+ }
+ ret = rdma_accept(new_rs->cm_id, ¶m);
+ if (!ret)
+ new_rs->state = rs_connect_rdwr;
+ else if (errno == EAGAIN || errno == EWOULDBLOCK)
+ new_rs->state = rs_accepting;
+ else
+ goto err;
+
+ return new_socket;
+
+err:
+ rs_free(new_rs);
+ if (new_socket != INVALID_SOCKET)
+ gMainUpCallTable.lpWPUCloseSocketHandle(new_socket, lpErrno);
+
+ return INVALID_SOCKET;
+}
+
+__declspec(dllexport)
+int rsetsockopt(int socket, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ struct rsocket *rs;
+ int ret, opt_on = 0;
+ uint64_t *opts = NULL;
+
+ ret = ERR(ENOTSUP);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (level) {
+ case SOL_SOCKET:
+ opts = &rs->so_opts;
+ switch (optname) {
+ case SO_REUSEADDR:
+ ret = rdma_set_option(rs->cm_id, RDMA_OPTION_ID,
+ RDMA_OPTION_ID_REUSEADDR,
+ (void *) optval, optlen);
+ if (ret && ((errno == ENOSYS) || ((rs->state != rs_init) &&
+ rs->cm_id->context &&
+ (rs->cm_id->verbs->device->transport_type == IBV_TRANSPORT_IB))))
+ ret = 0;
+ opt_on = *(int *) optval;
+ break;
+ case SO_RCVBUF:
+ if (!rs->rbuf)
+ rs->rbuf_size = (*(uint32_t *) optval) << 1;
+ ret = 0;
+ break;
+ case SO_SNDBUF:
+ if (!rs->sbuf)
+ rs->sbuf_size = (*(uint32_t *) optval) << 1;
+ ret = 0;
+ break;
+ case SO_LINGER:
+ /* Invert value so default so_opt = 0 is on */
+ opt_on = !((struct linger *) optval)->l_onoff;
+ ret = 0;
+ break;
+ case SO_KEEPALIVE:
+ opt_on = *(int *) optval;
+ ret = 0;
+ break;
+ case SO_OOBINLINE:
+ opt_on = *(int *) optval;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ case IPPROTO_TCP:
+ opts = &rs->tcp_opts;
+ switch (optname) {
+ case TCP_NODELAY:
+ opt_on = *(int *) optval;
+ ret = 0;
+ break;
+ case TCP_MAXSEG:
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ case IPPROTO_IPV6:
+ opts = &rs->ipv6_opts;
+ switch (optname) {
+ case IPV6_V6ONLY:
+ ret = rdma_set_option(rs->cm_id, RDMA_OPTION_ID,
+ RDMA_OPTION_ID_AFONLY,
+ (void *) optval, optlen);
+ opt_on = *(int *) optval;
+ break;
+ default:
+ break;
+ }
+ case SOL_RDMA:
+ if (rs->state >= rs_opening) {
+ ret = ERR(EINVAL);
+ break;
+ }
+
+ switch (optname) {
+ case RDMA_SQSIZE:
+ rs->sq_size = min((*(uint32_t *) optval), RS_QP_MAX_SIZE);
+ ret = 0;
+ break;
+ case RDMA_RQSIZE:
+ rs->rq_size = min((*(uint32_t *) optval), RS_QP_MAX_SIZE);
+ ret = 0;
+ break;
+ case RDMA_INLINE:
+ rs->sq_inline = min(*(uint32_t *) optval, RS_QP_MAX_SIZE);
+ if (rs->sq_inline < RS_MIN_INLINE)
+ rs->sq_inline = RS_MIN_INLINE;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!ret && opts) {
+ if (opt_on)
+ *opts |= (uint64_t)(1 << optname);
+ else
+ *opts &= ~(1 << optname);
+ }
+
+ return ret;
+}
+
+__declspec(dllexport)
+int rgetsockopt(int socket, int level, int optname,
+ void *optval, socklen_t *optlen)
+{
+ struct rsocket *rs;
+ int ret = 0;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (level) {
+ case SOL_SOCKET:
+ switch (optname) {
+ case SO_REUSEADDR:
+ case SO_KEEPALIVE:
+ case SO_OOBINLINE:
+ *((int *) optval) = !!(rs->so_opts & (uint64_t)(1 << optname));
+ *optlen = sizeof(int);
+ break;
+ case SO_RCVBUF:
+ *((int *) optval) = rs->rbuf_size;
+ *optlen = sizeof(int);
+ break;
+ case SO_SNDBUF:
+ *((int *) optval) = rs->sbuf_size;
+ *optlen = sizeof(int);
+ break;
+ case SO_LINGER:
+ /* Value is inverted so default so_opt = 0 is on */
+ ((struct linger *) optval)->l_onoff =
+ !(rs->so_opts & (uint64_t)(1 << optname));
+ ((struct linger *) optval)->l_linger = 0;
+ *optlen = sizeof(struct linger);
+ break;
+ case SO_ERROR:
+ *((int *) optval) = rs->err;
+ *optlen = sizeof(int);
+ rs->err = 0;
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+ break;
+ case IPPROTO_TCP:
+ switch (optname) {
+ case TCP_NODELAY:
+ *((int *) optval) = !!(rs->tcp_opts & (uint64_t)(1 << optname));
+ *optlen = sizeof(int);
+ break;
+ case TCP_MAXSEG:
+ *((int *) optval) = (rs->cm_id && rs->cm_id->route.num_paths) ?
+ 1 << (7 + rs->cm_id->route.path_rec->mtu) :
+ 2048;
+ *optlen = sizeof(int);
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+ break;
+ case IPPROTO_IPV6:
+ switch (optname) {
+ case IPV6_V6ONLY:
+ *((int *) optval) = !!(rs->ipv6_opts & (uint64_t)(1 << optname));
+ *optlen = sizeof(int);
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+ break;
+ case SOL_RDMA:
+ switch (optname) {
+ case RDMA_SQSIZE:
+ *((int *) optval) = rs->sq_size;
+ *optlen = sizeof(int);
+ break;
+ case RDMA_RQSIZE:
+ *((int *) optval) = rs->rq_size;
+ *optlen = sizeof(int);
+ break;
+ case RDMA_INLINE:
+ *((int *) optval) = rs->sq_inline;
+ *optlen = sizeof(int);
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+
+ return ERR/*rdma_seterrno*/(ret);
+}
+
+__declspec(dllexport)
+int rfcntl(int socket, int cmd, ... /* arg */ )
+{
+ struct rsocket *rs;
+ va_list args;
+ long param;
+ int ret = 0;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ va_start(args, cmd);
+ switch (cmd) {
+ case F_GETFL:
+ return (int) rs->fd_flags;
+ case F_SETFL:
+ param = va_arg(args, long);
+ if (param & O_NONBLOCK)
+ ret = rs_set_nonblocking(rs, O_NONBLOCK);
+
+ if (!ret)
+ rs->fd_flags |= param;
+ break;
+ default:
+ ret = ERR(ENOTSUP);
+ }
+ va_end(args);
+ return ret;
+}
+
+__declspec(dllexport)
+int rioctlsocket(int socket, long cmd, u_long* argp)
+{
+ struct rsocket *rs;
+
+ wsa_setlasterror(0);
+ rs = (struct rsocket *)idm_at(&idm, socket);
+ switch (cmd) {
+ case FIONBIO:
+ if (*argp)
+ rs->fd_flags |= O_NONBLOCK;
+ return rs_set_nonblocking(rs, *argp ? O_NONBLOCK : 0);
+ case FIONREAD:
+ case SIOCATMARK:
+ return ERR(ENOTSUP);
+ default:
+ return ERR(ENOTSUP);
+ }
+}
+
+/**
+ * \brief Get current RSockets status information for the calling process
+ * (by calling librdmacm.dll directly).
+ *
+ * \param lppStatusBuffer Pointer to a buffer with an array of RS_STATUS information entries
+ * to be allocated and returned. The caller is responsible for
+ * deallocating that buffer via free() when it is no longer needed.
+ *
+ * \return The number of RS_STATUS entries contained in the status buffer
+ * returned by *lppStatusBuffer.
+ */
+int rsGetStatus ( __out LPRS_STATUS *lppStatusBuffer )
+{
+ DWORD i, e;
+ DWORD dwNumEntries = 0;
+ struct rsocket *rs;
+ char *pst = "";
+
+ for (i = 0; i < IDX_MAX_INDEX; i++)
+ {
+ if (idm_lookup(&idm, i))
+ dwNumEntries++;
+ }
+
+ if ( 0 == dwNumEntries ||
+ NULL == (*lppStatusBuffer = (LPRS_STATUS)malloc(dwNumEntries * sizeof(**lppStatusBuffer)))
+ )
+ {
+ return 0;
+ }
+
+ for (
+ i = 0, e = 0;
+ i < IDX_MAX_INDEX && e < dwNumEntries;
+ i++
+ )
+ {
+ if (rs = (struct rsocket *)idm_lookup(&idm, i))
+ {
+ lppStatusBuffer[e]->src_addr = rs->cm_id->route.addr.src_addr;
+ lppStatusBuffer[e]->dst_addr = rs->cm_id->route.addr.dst_addr;
+ switch (rs->state)
+ {
+ case rs_init: pst = "INIT"; break;
+ case rs_bound: pst = "BOUND"; break;
+ case rs_listening: pst = "LISTENING"; break;
+ case rs_opening: pst = "OPENING"; break;
+ case rs_resolving_addr: pst = "RESOLVING_ADDR"; break;
+ case rs_resolving_route:pst = "RESOLVING_ROUTE";break;
+ case rs_connecting: pst = "CONNECTING"; break;
+ case rs_accepting: pst = "ACCEPTING"; break;
+ case rs_connected: pst = "CONNECTED"; break;
+ case rs_connect_wr: pst = "CONNECT_WR"; break;
+ case rs_connect_rd: pst = "CONNECT_RD"; break;
+ case rs_connect_rdwr: pst = "CONNECT_RDWR"; break;
+ case rs_connect_error: pst = "CONNECT_ERROR"; break;
+ case rs_disconnected: pst = "DISCONNECTED"; break;
+ case rs_error: pst = "ERROR"; break;
+ default: pst = "UNKNOWN";
+ }
+ strncpy(lppStatusBuffer[e]->state, pst, sizeof(lppStatusBuffer[e]->state) - 1);
+ lppStatusBuffer[e]->state[sizeof(lppStatusBuffer[e]->state)] = '\0';
+ e++;
+ }
+ }
+
+ return e;
+}
Index: ulp/librdmacm/src/Sources
===================================================================
--- ulp/librdmacm/src/Sources (revision 3419)
+++ ulp/librdmacm/src/Sources (working copy)
@@ -12,10 +12,12 @@
cma.rc \
cma_main.cpp \
cma.cpp \
- addrinfo.cpp
+ addrinfo.cpp \
+ rsocket.cpp \
+ indexer.cpp
INCLUDES = ..\include;..\..\..\inc;..\..\..\inc\user;..\..\libibverbs\include;\
- ..\..\..\inc\user\linux;
+ ..\..\..\inc\user\linux
USER_C_FLAGS = $(USER_C_FLAGS) -DEXPORT_CMA_SYMBOLS
@@ -26,4 +28,5 @@
$(SDK_LIB_PATH)\iphlpapi.lib \
$(TARGETPATH)\*\ibat.lib \
$(TARGETPATH)\*\libibverbs.lib \
- $(TARGETPATH)\*\winverbs.lib
+ $(TARGETPATH)\*\winverbs.lib \
+ $(TARGETPATH)\*\complib.lib
This message and attachment(s) are intended solely for use by the addressee and may contain information that is privileged, confidential or otherwise exempt from disclosure under applicable law.
If you are not the intended recipient or agent thereof responsible for delivering this message to the intended recipient, you are hereby notified that any dissemination, distribution or copying of this communication is strictly prohibited.
If you have received this communication in error, please notify the sender immediately by telephone and with a 'reply' message.
Thank you for your co-operation.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130110/885b63a8/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: RSocket.zip
Type: application/x-zip-compressed
Size: 42645 bytes
Desc: RSocket.zip
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130110/885b63a8/attachment.bin>
More information about the ofw
mailing list