[openib-general] [PATCH] update to new FMR API
Roland Dreier
roland at topspin.com
Fri Aug 27 19:50:40 PDT 2004
The patch below updates my branch to the new FMR API. I made a few
cleanups to the API while implementing the core support:
- I called the FMR free function dealloc_fmr rather than free_fmr, to
match alloc_mw/dealloc_mw and alloc_pd/dealloc_pd.
- I got rid of the map_fmr method since we don't do virtual memory
registration in the kernel.
- I added an iova parameter to the map_phys_fmr method since there
has to be a way for the consumer to specify the address.
I also moved a cleaned up version 'FMR pool' stuff into
core_fmr_pool.c, since it seems like a useful library for ULPs to have
access to. We can also discuss further changes to this interface.
mthca still doesn't actually implement FMRs.
- Roland
Index: src/linux-kernel/infiniband/ulp/srp/srp_host.c
===================================================================
--- src/linux-kernel/infiniband/ulp/srp/srp_host.c (revision 692)
+++ src/linux-kernel/infiniband/ulp/srp/srp_host.c (working copy)
@@ -3045,8 +3045,8 @@
status = srptp_register_memory(srp_pkt->conn,
sr_list,
- ((unsigned long)sr_list->
- data & (PAGE_SIZE - 1)),
+ ((unsigned long)sr_list->data &
+ (PAGE_SIZE - 1)),
dma_addr_list, dma_addr_index);
if (status == -EAGAIN) {
Index: src/linux-kernel/infiniband/ulp/srp/srp_host.h
===================================================================
--- src/linux-kernel/infiniband/ulp/srp/srp_host.h (revision 692)
+++ src/linux-kernel/infiniband/ulp/srp/srp_host.h (working copy)
@@ -53,7 +53,7 @@
#include "ts_kernel_trace.h"
#include "ts_kernel_thread.h"
#include <ib_verbs.h>
-#include "ts_ib_core.h"
+#include <ib_fmr_pool.h>
#include "ts_ib_dm_client_host.h"
#include "ts_ib_sa_client.h"
#include "ts_ib_cm_types.h"
@@ -234,7 +234,7 @@
u64 r_addr; /* RDMA buffer address to be used by the
* target */
u32 r_key;
- struct ib_fmr *mr_hndl; /* buffer's memory handle */
+ struct ib_pool_fmr *mr_hndl; /* buffer's memory handle */
} srp_host_buf_t;
typedef struct _srp_pkt_t {
Index: src/linux-kernel/infiniband/ulp/srp/srptp.c
===================================================================
--- src/linux-kernel/infiniband/ulp/srp/srptp.c (revision 692)
+++ src/linux-kernel/infiniband/ulp/srp/srptp.c (working copy)
@@ -334,7 +334,7 @@
* fmr_params.dirty_watermark = 256;
*/
- status = ib_fmr_pool_create(hca->pd_hndl,
+ status = ib_create_fmr_pool(hca->pd_hndl,
&fmr_params, &hca->fmr_pool);
if (status != 0) {
@@ -348,7 +348,7 @@
fmr_params.pool_size = 1024;
fmr_params.dirty_watermark = 256;
- status = ib_fmr_pool_create(hca->pd_hndl,
+ status = ib_create_fmr_pool(hca->pd_hndl,
&fmr_params, &hca->fmr_pool);
if (status != 0) {
@@ -370,7 +370,7 @@
while (hca_index >= 0) {
hca = &hca_params[hca_index];
if (hca->fmr_pool)
- ib_fmr_pool_destroy(hca->fmr_pool);
+ ib_destroy_fmr_pool(hca->fmr_pool);
if (hca->pd_hndl)
ib_dealloc_pd(hca->pd_hndl);
module_put(hca->ca_hndl->owner);
@@ -397,7 +397,7 @@
if (!hca_params[i].valid)
continue;
- status = ib_fmr_pool_destroy(hca->fmr_pool);
+ status = ib_destroy_fmr_pool(hca->fmr_pool);
if (status != 0)
TS_REPORT_STAGE(MOD_SRPTP,
@@ -801,7 +801,7 @@
{
int status;
u32 l_key;
- u64 start_address = (unsigned long) buf->data;
+ u64 start_address = (unsigned long) buf->data - offset;
if (buf == NULL) {
@@ -813,17 +813,20 @@
TS_REPORT_DATA(MOD_SRPTP, "iova %llx, iova_offset %x length 0x%x",
start_address, offset, buf->size);
- status = ib_fmr_register_physical(conn->target->port->hca->fmr_pool,
- buffer_list, list_len,
- &start_address, offset,
- &buf->mr_hndl, &l_key, &buf->r_key);
+ buf->mr_hndl = ib_fmr_pool_map_phys(conn->target->port->hca->fmr_pool,
+ buffer_list, list_len,
+ &start_address);
- if (status) {
+ if (IS_ERR(buf->mr_hndl)) {
+ status = PTR_ERR(buf->mr_hndl);
TS_REPORT_DATA(MOD_SRPTP, "Memory registration failed: %d",
status);
return (status);
}
+ l_key = buf->mr_hndl->fmr->lkey;
+ buf->r_key = buf->mr_hndl->fmr->rkey;
+
TS_REPORT_DATA(MOD_SRPTP,
"l_key %x, r_key %x, mr_hndl %x",
l_key, buf->r_key, buf->mr_hndl);
@@ -837,7 +840,7 @@
TS_REPORT_DATA(MOD_SRPTP, "releasing mr_hndl %x", buf->mr_hndl);
- status = ib_fmr_deregister(buf->mr_hndl);
+ status = ib_fmr_pool_unmap(buf->mr_hndl);
if (status != 0) {
TS_REPORT_WARN(MOD_SRPTP, "de-registration failed: %d", status);
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c (revision 692)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c (working copy)
@@ -1895,7 +1895,7 @@
static s32 _sdp_device_table_init(struct sdev_root *dev_root)
{
#ifdef _TS_SDP_AIO_SUPPORT
- tTS_IB_FMR_POOL_PARAM_STRUCT fmr_param_s;
+ struct ib_fmr_pool_param fmr_param_s;
#endif
struct ib_phys_buf buffer_list;
struct ib_device_properties node_info;
@@ -2012,7 +2012,7 @@
/*
* create SDP memory pool
*/
- result = ib_fmr_pool_create(hca->pd,
+ result = ib_create_fmr_pool(hca->pd,
&fmr_param_s,
&hca->fmr_pool);
if (0 > result) {
@@ -2096,7 +2096,7 @@
if (NULL != hca->fmr_pool) {
- (void)ib_fmr_pool_destroy(hca->fmr_pool);
+ (void)ib_destroy_fmr_pool(hca->fmr_pool);
}
if (hca->mem_h) {
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.c (revision 692)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.c (working copy)
@@ -72,28 +72,28 @@
/*
* prime io address with physical address of first byte?
*/
- iocb->io_addr = iocb->page_array[0] + iocb->page_offset;
+ iocb->io_addr = iocb->page_array[0];
/*
* register IOCBs physical memory
*/
- result = ib_fmr_register_physical(conn->fmr_pool,
+ iocb->mem = ib_fmr_pool_map_phys(conn->fmr_pool,
(u64 *) iocb->page_array,
iocb->page_count,
- (u64 *) & iocb->io_addr,
- iocb->page_offset,
- &iocb->mem,
- &iocb->l_key, &iocb->r_key);
- if (0 != result) {
- if (-EAGAIN != result) {
+ iocb->io_addr);
+ if (IS_ERR(iocb->mem)) {
+ if (-EAGAIN != PTR_ERR(result)) {
TS_TRACE(MOD_LNX_SDP, T_VERY_VERBOSE, TRACE_FLOW_WARN,
"POST: Error <%d> registering physical memory. <%d:%d:%d>",
- result, iocb->len, iocb->page_count,
+ PTR_ERR(result), iocb->len, iocb->page_count,
iocb->page_offset);
}
goto error_register;
}
+
+ iocb->l_key = iocb->mem->fmr->lkey;
+ iocb->r_key = iocb->mem->fmr->rkey;
/*
* some data may have already been consumed, adjust the io address
* to take this into account
@@ -121,7 +121,7 @@
if (NULL != iocb->page_array) {
- result = ib_fmr_deregister(iocb->mem);
+ result = ib_fmr_pool_unmap(iocb->mem);
if (0 > result) {
TS_TRACE(MOD_LNX_SDP, T_VERBOSE, TRACE_FLOW_WARN,
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.h
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.h (revision 692)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_iocb.h (working copy)
@@ -26,6 +26,8 @@
#include <linux/config.h>
+#include <ib_fmr_pool.h>
+
/*
* topspin specific includes.
*/
@@ -85,7 +87,7 @@
/*
* IB specific information for zcopy.
*/
- struct ib_fmr *mem; /* memory region handle */
+ struct ib_pool_fmr *mem; /* memory region handle */
u32 l_key; /* local access key */
u32 r_key; /* remote access key */
u64 io_addr; /* virtual IO address */
Index: src/linux-kernel/infiniband/include/ib_verbs.h
===================================================================
--- src/linux-kernel/infiniband/include/ib_verbs.h (revision 692)
+++ src/linux-kernel/infiniband/include/ib_verbs.h (working copy)
@@ -369,6 +369,12 @@
int mw_access_flags;
};
+struct ib_fmr_attr {
+ int max_pages;
+ int max_maps;
+ u8 page_size;
+};
+
struct ib_pd {
struct ib_device *device;
atomic_t usecnt; /* count all resources */
@@ -412,9 +418,15 @@
u32 rkey;
};
+struct ib_fmr {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct list_head list;
+ u32 lkey;
+ u32 rkey;
+};
+
struct ib_device {
- IB_DECLARE_MAGIC
-
struct module *owner;
struct pci_dev *dma_device;
@@ -490,10 +502,14 @@
struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int (*dealloc_mw)(struct ib_mw *mw);
- ib_fmr_create_func fmr_create;
- ib_fmr_destroy_func fmr_destroy;
- ib_fmr_map_func fmr_map;
- ib_fmr_unmap_func fmr_unmap;
+ struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+ int (*map_phys_fmr)(struct ib_fmr *fmr,
+ u64 *page_list, int list_len,
+ u64 iova);
+ int (*unmap_fmr)(struct list_head *fmr_list);
+ int (*dealloc_fmr)(struct ib_fmr *fmr);
int (*attach_mcast)(struct ib_qp *qp,
union ib_gid *gid,
u16 lid);
@@ -612,7 +628,7 @@
struct ib_mw *mw,
struct ib_mw_bind *mw_bind)
{
- /* XXX reference counting in mw? */
+ /* XXX reference counting in corresponding MR? */
return mw->device->bind_mw ?
mw->device->bind_mw(qp, mw, mw_bind) :
-ENOSYS;
@@ -620,6 +636,20 @@
int ib_dealloc_mw(struct ib_mw *mw);
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+static inline int ib_map_phys_fmr(struct ib_fmr *fmr,
+ u64 *page_list, int list_len,
+ u64 iova)
+{
+ return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova);
+}
+
+int ib_unmap_fmr(struct list_head *fmr_list);
+int ib_dealloc_fmr(struct ib_fmr *fmr);
+
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
Index: src/linux-kernel/infiniband/include/ts_ib_sma_provider_types.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_sma_provider_types.h (revision 692)
+++ src/linux-kernel/infiniband/include/ts_ib_sma_provider_types.h (working copy)
@@ -31,7 +31,6 @@
# include <stddef.h> /* for size_t */
#endif
-#include "ts_ib_magic.h"
#include <ib_verbs.h>
#include "ts_ib_mad_types.h"
#include "ts_ib_mad_smi_types.h"
@@ -291,8 +290,6 @@
/* The provider structure that a device-specific SMA needs to fill in. */
struct ib_sma_provider {
- IB_DECLARE_MAGIC
-
struct ib_device *device;
tTS_IB_SMA_PROVIDER_FLAGS flags;
void *sma; // Generic SMA use
Index: src/linux-kernel/infiniband/include/ts_ib_pma_provider_types.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_pma_provider_types.h (revision 692)
+++ src/linux-kernel/infiniband/include/ts_ib_pma_provider_types.h (working copy)
@@ -31,7 +31,6 @@
# include <stddef.h> /* for size_t */
#endif
-#include "ts_ib_magic.h"
#include <ib_verbs.h>
#include "ts_ib_mad_types.h"
@@ -119,8 +118,6 @@
/* The provider structure that a device-specific PMA needs to fill in. */
struct ib_pma_provider {
- IB_DECLARE_MAGIC
-
struct ib_device *device;
tTS_IB_PMA_PROVIDER_FLAGS flags;
void *pma; // Generic PMA use
Index: src/linux-kernel/infiniband/include/ts_ib_core_types.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_core_types.h (revision 692)
+++ src/linux-kernel/infiniband/include/ts_ib_core_types.h (working copy)
@@ -33,8 +33,6 @@
# include <stdint.h>
#endif
-#include <ts_ib_magic.h>
-
/* basic type definitions */
enum {
@@ -210,36 +208,6 @@
IB_PKEY_CHANGE,
};
-struct ib_async_obj {
- void * free_ptr;
- spinlock_t lock;
- int pending;
- int dead;
-};
-
-struct ib_fmr_pool; /* actual definition in core_fmr.c */
-struct ib_pd;
-
-struct ib_fmr {
- IB_DECLARE_MAGIC
- struct ib_device *device;
- void *private;
- struct ib_fmr_pool *pool;
- u32 lkey;
- u32 rkey;
- int ref_count;
- int remap_count;
- struct list_head list;
- tTS_HASH_NODE_STRUCT cache_node;
- u64 io_virtual_address;
- u64 iova_offset;
- int page_list_len;
- u64 page_list[0];
-};
-
-typedef void (*ib_fmr_flush_func)(struct ib_fmr_pool *pool,
- void *arg);
-
struct ib_async_event_handler; /* actual definition in core_async.c */
struct ib_async_event_record {
@@ -271,10 +239,6 @@
IB_DEVICE_SYSTEM_IMAGE_GUID = 1 << 0
};
-enum ib_memory_access {
- IB_ACCESS_ENABLE_WINDOW = 1 << 4
-};
-
/* structures */
enum {
@@ -294,16 +258,6 @@
tTS_IB_GUID system_image_guid;
};
-struct ib_fmr_pool_param {
- int max_pages_per_fmr;
- enum ib_memory_access access;
- int pool_size;
- int dirty_watermark;
- ib_fmr_flush_func flush_function;
- void *flush_arg;
- int cache:1;
-};
-
struct ib_sm_path {
u16 sm_lid;
tTS_IB_SL sm_sl;
@@ -332,21 +286,6 @@
tTS_IB_PORT port,
int index,
tTS_IB_GID gid);
-typedef int (*ib_fmr_create_func)(struct ib_pd *pd,
- enum ib_memory_access access,
- int max_pages,
- int max_remaps,
- struct ib_fmr *fmr);
-typedef int (*ib_fmr_destroy_func)(struct ib_fmr *fmr);
-typedef int (*ib_fmr_map_func)(struct ib_fmr *fmr,
- u64 *page_list,
- int list_len,
- u64 *io_virtual_address,
- u64 iova_offset,
- u32 *lkey,
- u32 *rkey);
-typedef int (*ib_fmr_unmap_func)(struct ib_device *device,
- struct list_head *fmr_list);
struct ib_mad;
Index: src/linux-kernel/infiniband/include/ts_ib_magic.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_magic.h (revision 652)
+++ src/linux-kernel/infiniband/include/ts_ib_magic.h (working copy)
@@ -1,69 +0,0 @@
-/*
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
- General Public License (GPL) Version 2, available at
- <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
- license, available in the LICENSE.TXT file accompanying this
- software. These details are also available at
- <http://openib.org/license.html>.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-
- Copyright (c) 2004 Topspin Communications. All rights reserved.
-
- $Id$
-*/
-
-#ifndef _IB_MAGIC_H
-#define _IB_MAGIC_H
-
-#include "ts_kernel_trace.h"
-
-#define IB_MAGIC_INVALID 0xbadf00d
-#define IB_MAGIC_DEVICE 0x11f11f
-#define IB_MAGIC_ADDRESS 0x33f11f
-#define IB_MAGIC_QP 0x44f11f
-#define IB_MAGIC_CQ 0x55f11f
-#define IB_MAGIC_MR 0x66f11f
-#define IB_MAGIC_FMR 0x77f11f
-#define IB_MAGIC_FMR_POOL 0x88f11f
-#define IB_MAGIC_ASYNC 0x99f11f
-#define IB_MAGIC_FILTER 0xaaf11f
-#define IB_MAGIC_SMA 0xbbf11f
-#define IB_MAGIC_PMA 0xccf11f
-#define IB_MAGIC_MW 0xddf11f
-
-#define IB_DECLARE_MAGIC \
- unsigned long magic;
-#define IB_GET_MAGIC(ptr) \
- (*(unsigned long *) (ptr))
-#define IB_SET_MAGIC(ptr, type) \
- do { \
- IB_GET_MAGIC(ptr) = IB_MAGIC_##type; \
- } while (0)
-#define IB_CLEAR_MAGIC(ptr) \
- do { \
- IB_GET_MAGIC(ptr) = IB_MAGIC_INVALID; \
- } while (0)
-#define IB_CHECK_MAGIC(ptr, type) \
- do { \
- if (!ptr) { \
- return -EINVAL; \
- } \
- if (IB_GET_MAGIC(ptr) != IB_MAGIC_##type) { \
- TS_REPORT_WARN(MOD_KERNEL_IB, "Bad magic 0x%lx at %p for %s", \
- IB_GET_MAGIC(ptr), ptr, #type); \
- return -EINVAL; \
- } \
- } while (0)
-#define IB_TEST_MAGIC(ptr, type) \
- (IB_GET_MAGIC(ptr) == IB_MAGIC_##type)
-
-#endif /* _IB_MAGIC_H */
Index: src/linux-kernel/infiniband/include/ts_ib_core.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_core.h (revision 692)
+++ src/linux-kernel/infiniband/include/ts_ib_core.h (working copy)
@@ -63,21 +63,6 @@
int index,
tTS_IB_GID gid);
-int ib_fmr_pool_create(struct ib_pd *pd,
- struct ib_fmr_pool_param *params,
- struct ib_fmr_pool **pool);
-int ib_fmr_pool_destroy(struct ib_fmr_pool *pool);
-int ib_fmr_pool_force_flush(struct ib_fmr_pool *pool);
-int ib_fmr_register_physical(struct ib_fmr_pool *pool,
- uint64_t *page_list,
- int list_len,
- uint64_t *io_virtual_address,
- uint64_t iova_offset,
- struct ib_fmr **fmr,
- u32 *lkey,
- u32 *rkey);
-int ib_fmr_deregister(struct ib_fmr *fmr);
-
int ib_async_event_handler_register(struct ib_async_event_record *record,
ib_async_event_handler_func function,
void *arg,
Index: src/linux-kernel/infiniband/include/ib_fmr_pool.h
===================================================================
--- src/linux-kernel/infiniband/include/ib_fmr_pool.h (revision 0)
+++ src/linux-kernel/infiniband/include/ib_fmr_pool.h (revision 0)
@@ -0,0 +1,69 @@
+/*
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available at
+ * <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
+ * license, available in the LICENSE.TXT file accompanying this
+ * software. These details are also available at
+ * <http://openib.org/license.html>.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ *
+ * $Id$
+ */
+
+#if !defined(IB_FMR_POOL_H)
+#define IB_FMR_POOL_H
+
+#include <ib_verbs.h>
+
+struct ib_fmr_pool;
+
+struct ib_fmr_pool_param {
+ int max_pages_per_fmr;
+ enum ib_access_flags access;
+ int pool_size;
+ int dirty_watermark;
+ void (*flush_function)(struct ib_fmr_pool *pool,
+ void * arg);
+ void *flush_arg;
+ unsigned cache:1;
+};
+
+struct ib_pool_fmr {
+ struct ib_fmr *fmr;
+ struct ib_fmr_pool *pool;
+ struct list_head list;
+ struct hlist_node cache_node;
+ int ref_count;
+ int remap_count;
+ u64 io_virtual_address;
+ int page_list_len;
+ u64 page_list[0];
+};
+
+int ib_create_fmr_pool(struct ib_pd *pd,
+ struct ib_fmr_pool_param *params,
+ struct ib_fmr_pool **pool_handle);
+
+int ib_destroy_fmr_pool(struct ib_fmr_pool *pool);
+
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool);
+
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+ u64 *page_list,
+ int list_len,
+ u64 *io_virtual_address);
+
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr);
+
+#endif /* IB_FMR_POOL_H */
Property changes on: src/linux-kernel/infiniband/include/ib_fmr_pool.h
___________________________________________________________________
Name: svn:keywords
+ Id
Index: src/linux-kernel/infiniband/core/Makefile
===================================================================
--- src/linux-kernel/infiniband/core/Makefile (revision 692)
+++ src/linux-kernel/infiniband/core/Makefile (working copy)
@@ -42,6 +42,7 @@
core_cq.o \
core_mr.o \
core_fmr.o \
+ core_fmr_pool.o \
core_mw.o \
core_mcast.o \
core_async.o \
Index: src/linux-kernel/infiniband/core/mad_ib.c
===================================================================
--- src/linux-kernel/infiniband/core/mad_ib.c (revision 692)
+++ src/linux-kernel/infiniband/core/mad_ib.c (working copy)
@@ -148,8 +148,6 @@
{
struct ib_mad *buf;
- IB_CHECK_MAGIC(mad->device, DEVICE);
-
buf = kmem_cache_alloc(mad_cache,
(in_atomic() || irqs_disabled()) ?
GFP_ATOMIC : GFP_KERNEL);
Index: src/linux-kernel/infiniband/core/core_cache.c
===================================================================
--- src/linux-kernel/infiniband/core/core_cache.c (revision 692)
+++ src/linux-kernel/infiniband/core/core_cache.c (working copy)
@@ -37,8 +37,6 @@
{
struct ib_device_private *priv;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
memcpy(node_guid, priv->node_guid, sizeof (tTS_IB_GUID));
@@ -53,8 +51,6 @@
struct ib_device_private *priv;
unsigned int seq;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
@@ -78,8 +74,6 @@
struct ib_device_private *priv;
unsigned int seq;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
@@ -103,8 +97,6 @@
struct ib_device_private *priv;
unsigned int seq;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
@@ -129,8 +121,6 @@
struct ib_device_private *priv;
unsigned int seq;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
@@ -217,8 +207,6 @@
struct ib_device_private *priv;
unsigned int seq;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
@@ -246,8 +234,6 @@
int i;
int found;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port)
Index: src/linux-kernel/infiniband/core/core_fmr.c
===================================================================
--- src/linux-kernel/infiniband/core/core_fmr.c (revision 692)
+++ src/linux-kernel/infiniband/core/core_fmr.c (working copy)
@@ -21,528 +21,58 @@
$Id$
*/
-#include "core_priv.h"
-
-#include "ts_kernel_trace.h"
-#include "ts_kernel_services.h"
-#include "ts_kernel_thread.h"
-#include "ts_kernel_hash.h"
-
-#include <linux/version.h>
-#include <linux/module.h>
-
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
-#if defined(IB_FMR_NODEBUG)
-# define IB_COMPILE_FMR_DEBUGGING_CODE 0
-#else
-# define IB_COMPILE_FMR_DEBUGGING_CODE 1
-#endif
+#include "core_priv.h"
-enum {
- TS_IB_FMR_MAX_REMAPS = 32,
-
- TS_IB_FMR_HASH_BITS = 8,
- TS_IB_FMR_HASH_SIZE = 1 << TS_IB_FMR_HASH_BITS,
- TS_IB_FMR_HASH_MASK = TS_IB_FMR_HASH_SIZE - 1
-};
-
-/*
- If an FMR is not in use, then the list member will point to either
- its pool's free_list (if the FMR can be mapped again; that is,
- remap_count < TS_IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
- FMR needs to be unmapped before being remapped). In either of these
- cases it is a bug if the ref_count is not 0. In other words, if
- ref_count is > 0, then the list member must not be linked into
- either free_list or dirty_list.
-
- The cache_node member is used to link the FMR into a cache bucket
- (if caching is enabled). This is independent of the reference count
- of the FMR. When a valid FMR is released, its ref_count is
- decremented, and if ref_count reaches 0, the FMR is placed in either
- free_list or dirty_list as appropriate. However, it is not removed
- from the cache and may be "revived" if a call to
- ib_fmr_register_physical() occurs before the FMR is remapped. In
- this case we just increment the ref_count and remove the FMR from
- free_list/dirty_list.
-
- Before we remap an FMR from free_list, we remove it from the cache
- (to prevent another user from obtaining a stale FMR). When an FMR
- is released, we add it to the tail of the free list, so that our
- cache eviction policy is "least recently used."
-
- All manipulation of ref_count, list and cache_node is protected by
- pool_lock to maintain consistency.
-*/
-
-struct ib_fmr_pool {
- IB_DECLARE_MAGIC
- struct ib_device *device;
-
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- tTS_HASH_HEAD cache_bucket;
-
- tTS_KERNEL_THREAD thread;
-
- ib_fmr_flush_func flush_function;
- void *flush_arg;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t thread_wait;
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
+struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
+ int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
{
- return tsKernelHashFunction((u32) (first_page >> PAGE_SHIFT),
- TS_IB_FMR_HASH_MASK);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address,
- u64 iova_offset)
-{
- tTS_HASH_HEAD bucket;
struct ib_fmr *fmr;
- if (!pool->cache_bucket) {
- return NULL;
- }
+ if (!pd->device->alloc_fmr)
+ return ERR_PTR(-ENOSYS);
- bucket = &pool->cache_bucket[ib_fmr_hash(*page_list)];
-
- TS_KERNEL_HASH_FOR_EACH_ENTRY(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- iova_offset == fmr->iova_offset &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list, page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-/* Caller must hold pool_lock */
-static inline void ib_fmr_cache_store(struct ib_fmr_pool *pool,
- struct ib_fmr *fmr)
-{
- tsKernelHashNodeAdd(&fmr->cache_node,
- &pool->cache_bucket[ib_fmr_hash(fmr->page_list[0])]);
-}
-
-/* Caller must hold pool_lock */
-static inline void ib_fmr_cache_remove(struct ib_fmr *fmr)
-{
- if (!tsKernelHashNodeUnhashed(&fmr->cache_node))
- tsKernelHashNodeRemove(&fmr->cache_node);
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct list_head *ptr;
- struct ib_fmr *fmr;
- LIST_HEAD(unmap_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each(ptr, &pool->dirty_list) {
- fmr = list_entry(ptr, struct ib_fmr, list);
-
- ib_fmr_cache_remove(fmr);
- fmr->remap_count = 0;
-
- if (IB_COMPILE_FMR_DEBUGGING_CODE) {
- if (fmr->ref_count !=0) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "Unmapping FMR 0x%08x with ref count %d",
- fmr, fmr->ref_count);
- }
- }
+ fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+ if (!IS_ERR(fmr)) {
+ fmr->device = pd->device;
+ fmr->pd = pd;
+ atomic_inc(&pd->usecnt);
}
- list_splice(&pool->dirty_list, &unmap_list);
- INIT_LIST_HEAD(&pool->dirty_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = pool->device->fmr_unmap(pool->device, &unmap_list);
- if (ret) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "fmr_unmap for %s returns %d",
- pool->device->name, ret);
- }
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
+ return fmr;
}
+EXPORT_SYMBOL(ib_alloc_fmr);
-static void ib_fmr_cleanup_thread(void *pool_ptr)
+int ib_unmap_fmr(struct list_head *fmr_list)
{
- struct ib_fmr_pool *pool = pool_ptr;
- int ret;
+ struct ib_fmr *fmr;
- while (!signal_pending(current)) {
- ret = wait_event_interruptible(pool->thread_wait,
- (pool->dirty_len >=
- pool->dirty_watermark) ||
- (atomic_read(&pool->flush_ser) -
- atomic_read(&pool->req_ser) < 0));
+ if (list_empty(fmr_list))
+ return 0;
- TS_TRACE(MOD_KERNEL_IB, T_VERY_VERBOSE, TRACE_KERNEL_IB_GEN,
- "cleanup thread woken up, dirty len = %d",
- pool->dirty_len);
-
- if (ret)
- break;
-
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
-
- TS_REPORT_CLEANUP(MOD_KERNEL_IB, "FMR cleanup thread exiting");
+ fmr = list_entry(fmr_list->next, struct ib_fmr, list);
+ return fmr->device->unmap_fmr(fmr_list);
}
+EXPORT_SYMBOL(ib_unmap_fmr);
-int ib_fmr_pool_create(struct ib_pd *pd,
- struct ib_fmr_pool_param *params,
- struct ib_fmr_pool **pool_handle)
+int ib_dealloc_fmr(struct ib_fmr *fmr)
{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
+ struct ib_pd *pd;
int ret;
- if (!params) {
- return -EINVAL;
- }
+ pd = fmr->pd;
+ ret = fmr->device->dealloc_fmr(fmr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
- device = pd->device;
- if (!device->fmr_create ||
- !device->fmr_destroy ||
- !device->fmr_map ||
- !device->fmr_unmap) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "Device %s does not support fast memory regions",
- device->name);
- return -ENOSYS;
- }
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "couldn't allocate pool struct");
- return -ENOMEM;
- }
-
- pool->cache_bucket = NULL;
-
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc(TS_IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL);
- if (!pool->cache_bucket) {
- TS_REPORT_WARN(MOD_KERNEL_IB, "Failed to allocate cache in pool");
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < TS_IB_FMR_HASH_SIZE; ++i) {
- tsKernelHashHeadInit(&pool->cache_bucket[i]);
- }
- }
-
- pool->device = device;
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->thread_wait);
- init_waitqueue_head(&pool->force_wait);
-
- ret = tsKernelThreadStart("ts_fmr",
- ib_fmr_cleanup_thread,
- pool,
- &pool->thread);
-
- if (ret) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "couldn't start cleanup thread");
- goto out_free_pool;
- }
-
- {
- struct ib_fmr *fmr;
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64),
- GFP_KERNEL);
- if (!fmr) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "failed to allocate fmr struct for FMR %d", i);
- goto out_fail;
- }
-
- fmr->device = device;
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- fmr->cache_node.pprev = NULL;
-
- if (device->fmr_create(pd,
- params->access,
- params->max_pages_per_fmr,
- TS_IB_FMR_MAX_REMAPS,
- fmr)) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "fmr_create failed for FMR %d", i);
- kfree(fmr);
- goto out_fail;
- }
-
- IB_SET_MAGIC(fmr, FMR);
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- IB_SET_MAGIC(pool, FMR_POOL);
- *pool_handle = pool;
- return 0;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
return ret;
-
- out_fail:
- IB_SET_MAGIC(pool, FMR_POOL);
- ib_fmr_pool_destroy(pool);
- *pool_handle = NULL;
-
- return -ENOMEM;
}
-EXPORT_SYMBOL(ib_fmr_pool_create);
+EXPORT_SYMBOL(ib_dealloc_fmr);
-int ib_fmr_pool_destroy(struct ib_fmr_pool *pool)
-{
- struct list_head *ptr;
- struct list_head *tmp;
- struct ib_fmr *fmr;
- int i;
-
- IB_CHECK_MAGIC(pool, FMR_POOL);
-
- tsKernelThreadStop(pool->thread);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_safe(ptr, tmp, &pool->free_list) {
- fmr = list_entry(ptr, struct ib_fmr, list);
- pool->device->fmr_destroy(fmr);
-
- list_del(ptr);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "pool still has %d regions registered",
- pool->pool_size - i);
- }
-
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_fmr_pool_destroy);
-
-int ib_fmr_pool_force_flush(struct ib_fmr_pool *pool)
-{
- int serial;
-
- atomic_inc(&pool->req_ser);
- /* It's OK if someone else bumps req_ser again here -- we'll
- just wait a little longer. */
- serial = atomic_read(&pool->req_ser);
-
- wake_up_interruptible(&pool->thread_wait);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) -
- atomic_read(&pool->req_ser) >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_fmr_pool_force_flush);
-
-int ib_fmr_register_physical(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 *io_virtual_address,
- u64 iova_offset,
- struct ib_fmr **fmr_handle,
- u32 *lkey,
- u32 *rkey)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_fmr *fmr;
- unsigned long flags;
- int result;
-
- IB_CHECK_MAGIC(pool, FMR_POOL);
-
- if (list_len < 1 || list_len > pool->max_pages) {
- return -EINVAL;
- }
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- *io_virtual_address,
- iova_offset);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- *lkey = fmr->lkey;
- *rkey = fmr->rkey;
- *fmr_handle = fmr;
-
- return 0;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return -EAGAIN;
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_fmr, list);
- list_del(&fmr->list);
- ib_fmr_cache_remove(fmr);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = pool->device->fmr_map(fmr,
- page_list,
- list_len,
- io_virtual_address,
- iova_offset,
- lkey,
- rkey);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "fmr_map returns %d",
- result);
- *fmr_handle = NULL;
-
- return -EINVAL;
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- *fmr_handle = fmr;
-
- if (pool->cache_bucket) {
- fmr->lkey = *lkey;
- fmr->rkey = *rkey;
- fmr->io_virtual_address = *io_virtual_address;
- fmr->iova_offset = iova_offset;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- ib_fmr_cache_store(pool, fmr);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return 0;
-}
-EXPORT_SYMBOL(ib_fmr_register_physical);
-
-int ib_fmr_deregister(struct ib_fmr *fmr_handle)
-{
- struct ib_fmr *fmr = fmr_handle;
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- IB_CHECK_MAGIC(fmr, FMR);
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < TS_IB_FMR_MAX_REMAPS) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- ++pool->dirty_len;
- wake_up_interruptible(&pool->thread_wait);
- }
- }
-
- if (IB_COMPILE_FMR_DEBUGGING_CODE) {
- if (fmr->ref_count < 0) {
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "FMR %p has ref count %d < 0",
- fmr, fmr->ref_count);
- }
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_fmr_deregister);
-
/*
Local Variables:
c-file-style: "linux"
Index: src/linux-kernel/infiniband/core/core_priv.h
===================================================================
--- src/linux-kernel/infiniband/core/core_priv.h (revision 692)
+++ src/linux-kernel/infiniband/core/core_priv.h (working copy)
@@ -79,7 +79,6 @@
void ib_remove_proc_dir(void);
void ib_completion_thread(struct list_head *entry, void *device_ptr);
void ib_async_thread(struct list_head *entry, void *device_ptr);
-void ib_async_obj_init(struct ib_async_obj *async_obj, void *free_ptr);
#endif /* _CORE_PRIV_H */
Index: src/linux-kernel/infiniband/core/core_async.c
===================================================================
--- src/linux-kernel/infiniband/core/core_async.c (revision 692)
+++ src/linux-kernel/infiniband/core/core_async.c (working copy)
@@ -33,7 +33,6 @@
#include <linux/slab.h>
struct ib_async_event_handler {
- IB_DECLARE_MAGIC
struct ib_async_event_record record;
ib_async_event_handler_func function;
void *arg;
@@ -75,14 +74,6 @@
[IB_PKEY_CHANGE] = { PORT, "P_Key Change" }
};
-void ib_async_obj_init(struct ib_async_obj *async_obj, void *free_ptr)
-{
- spin_lock_init(&async_obj->lock);
- async_obj->free_ptr = free_ptr;
- async_obj->pending = 0;
- async_obj->dead = 0;
-}
-
int ib_async_event_handler_register(struct ib_async_event_record *record,
ib_async_event_handler_func function,
void *arg,
@@ -92,8 +83,6 @@
int ret;
unsigned long flags;
- IB_CHECK_MAGIC(record->device, DEVICE);
-
if (record->event < 0 || record->event >= ARRAY_SIZE(event_table)) {
TS_REPORT_WARN(MOD_KERNEL_IB,
"Attempt to register handler for invalid async event %d",
@@ -137,7 +126,6 @@
break;
}
- IB_SET_MAGIC(handler, ASYNC);
*handle = handler;
return 0;
@@ -152,13 +140,10 @@
struct ib_async_event_handler *handler = handle;
unsigned long flags;
- IB_CHECK_MAGIC(handle, ASYNC);
-
spin_lock_irqsave(handler->list_lock, flags);
list_del(&handler->list);
spin_unlock_irqrestore(handler->list_lock, flags);
- IB_CLEAR_MAGIC(handle);
kfree(handle);
return 0;
}
@@ -168,7 +153,6 @@
{
struct ib_async_event_list *event;
struct ib_device_private *priv = event_record->device->core;
- struct ib_async_obj *async_obj = NULL;
unsigned long flags = 0; /* initialize to shut up gcc */
switch (event_table[event_record->event].mod) {
@@ -176,12 +160,6 @@
break;
}
- if (async_obj) {
- spin_lock_irqsave(&async_obj->lock, flags);
- if (async_obj->dead)
- goto out;
- }
-
event = kmalloc(sizeof *event, GFP_ATOMIC);
if (!event) {
return;
@@ -190,12 +168,6 @@
event->record = *event_record;
tsKernelQueueThreadAdd(priv->async_thread, &event->list);
- if (async_obj)
- ++async_obj->pending;
-
-out:
- if (async_obj)
- spin_unlock_irqrestore(&async_obj->lock, flags);
}
EXPORT_SYMBOL(ib_async_event_dispatch);
@@ -212,7 +184,6 @@
struct ib_async_event_handler *handler;
ib_async_event_handler_func function;
void *arg;
- struct ib_async_obj *async_obj = NULL;
event = list_entry(entry, struct ib_async_event_list, list);
priv = ((struct ib_device *) event->record.device)->core;
@@ -257,12 +228,6 @@
spin_lock_irq(handler_lock);
- if (async_obj) {
- spin_lock(&async_obj->lock);
- if (async_obj->dead)
- goto skip;
- }
-
list_for_each_safe(pos, n, handler_list) {
handler = list_entry(pos, struct ib_async_event_handler, list);
if (handler->record.event == event->record.event) {
@@ -275,14 +240,6 @@
}
}
-skip:
- if (async_obj) {
- --async_obj->pending;
- if (async_obj->dead && !async_obj->pending)
- kfree(async_obj->free_ptr);
- spin_unlock(&async_obj->lock);
- }
-
spin_unlock_irq(handler_lock);
kfree(event);
}
Index: src/linux-kernel/infiniband/core/core_fmr_pool.c
===================================================================
--- src/linux-kernel/infiniband/core/core_fmr_pool.c (revision 0)
+++ src/linux-kernel/infiniband/core/core_fmr_pool.c (revision 0)
@@ -0,0 +1,470 @@
+/*
+ This software is available to you under a choice of one of two
+ licenses. You may choose to be licensed under the terms of the GNU
+ General Public License (GPL) Version 2, available at
+ <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
+ license, available in the LICENSE.TXT file accompanying this
+ software. These details are also available at
+ <http://openib.org/license.html>.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
+ Copyright (c) 2004 Topspin Communications. All rights reserved.
+
+ $Id$
+*/
+
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/jhash.h>
+#include <linux/kthread.h>
+
+#include <ib_fmr_pool.h>
+
+#include "core_priv.h"
+
+enum {
+ IB_FMR_MAX_REMAPS = 32,
+
+ IB_FMR_HASH_BITS = 8,
+ IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
+ IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
+};
+
+/*
+ If an FMR is not in use, then the list member will point to either
+ its pool's free_list (if the FMR can be mapped again; that is,
+ remap_count < IB_FMR_MAX_REMAPS) or its pool's dirty_list (if the
+ FMR needs to be unmapped before being remapped). In either of these
+ cases it is a bug if the ref_count is not 0. In other words, if
+ ref_count is > 0, then the list member must not be linked into
+ either free_list or dirty_list.
+
+ The cache_node member is used to link the FMR into a cache bucket
+ (if caching is enabled). This is independent of the reference count
+ of the FMR. When a valid FMR is released, its ref_count is
+ decremented, and if ref_count reaches 0, the FMR is placed in either
+ free_list or dirty_list as appropriate. However, it is not removed
+ from the cache and may be "revived" if a call to
+ ib_fmr_register_physical() occurs before the FMR is remapped. In
+ this case we just increment the ref_count and remove the FMR from
+ free_list/dirty_list.
+
+ Before we remap an FMR from free_list, we remove it from the cache
+ (to prevent another user from obtaining a stale FMR). When an FMR
+ is released, we add it to the tail of the free list, so that our
+ cache eviction policy is "least recently used."
+
+ All manipulation of ref_count, list and cache_node is protected by
+ pool_lock to maintain consistency.
+*/
+
+struct ib_fmr_pool {
+ spinlock_t pool_lock;
+
+ int pool_size;
+ int max_pages;
+ int dirty_watermark;
+ int dirty_len;
+ struct list_head free_list;
+ struct list_head dirty_list;
+ struct hlist_head *cache_bucket;
+
+ void (*flush_function)(struct ib_fmr_pool *pool,
+ void * arg);
+ void *flush_arg;
+
+ struct task_struct *thread;
+
+ atomic_t req_ser;
+ atomic_t flush_ser;
+
+ wait_queue_head_t force_wait;
+};
+
+static inline u32 ib_fmr_hash(u64 first_page)
+{
+ return jhash_2words((u32) first_page,
+ (u32) (first_page >> 32),
+ 0);
+}
+
+/* Caller must hold pool_lock */
+static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
+ u64 *page_list,
+ int page_list_len,
+ u64 io_virtual_address)
+{
+ struct hlist_head *bucket;
+ struct ib_pool_fmr *fmr;
+ struct hlist_node *pos;
+
+ if (!pool->cache_bucket)
+ return NULL;
+
+ bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
+
+ hlist_for_each_entry(fmr, pos, bucket, cache_node)
+ if (io_virtual_address == fmr->io_virtual_address &&
+ page_list_len == fmr->page_list_len &&
+ !memcmp(page_list, fmr->page_list,
+ page_list_len * sizeof *page_list))
+ return fmr;
+
+ return NULL;
+}
+
+static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
+{
+ int ret;
+ struct ib_pool_fmr *fmr;
+ LIST_HEAD(unmap_list);
+ LIST_HEAD(fmr_list);
+
+ spin_lock_irq(&pool->pool_lock);
+
+ list_for_each_entry(fmr, &pool->dirty_list, list) {
+ hlist_del_init(&fmr->cache_node);
+ fmr->remap_count = 0;
+ list_add_tail(&fmr->fmr->list, &fmr_list);
+
+#ifdef DEBUG
+ if (fmr->ref_count !=0) {
+ printk(KERN_WARNING "Unmapping FMR 0x%08x with ref count %d",
+ fmr, fmr->ref_count);
+ }
+#endif
+ }
+
+ list_splice(&pool->dirty_list, &unmap_list);
+ INIT_LIST_HEAD(&pool->dirty_list);
+ pool->dirty_len = 0;
+
+ spin_unlock_irq(&pool->pool_lock);
+
+ if (list_empty(&unmap_list)) {
+ return;
+ }
+
+ ret = ib_unmap_fmr(&fmr_list);
+ if (ret)
+ printk(KERN_WARNING "ib_unmap_fmr returned %d", ret);
+
+ spin_lock_irq(&pool->pool_lock);
+ list_splice(&unmap_list, &pool->free_list);
+ spin_unlock_irq(&pool->pool_lock);
+}
+
+static int ib_fmr_cleanup_thread(void *pool_ptr)
+{
+ struct ib_fmr_pool *pool = pool_ptr;
+
+ do {
+ if (pool->dirty_len >= pool->dirty_watermark ||
+ atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
+ ib_fmr_batch_release(pool);
+
+ atomic_inc(&pool->flush_ser);
+ wake_up_interruptible(&pool->force_wait);
+
+ if (pool->flush_function)
+ pool->flush_function(pool, pool->flush_arg);
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (pool->dirty_len < pool->dirty_watermark &&
+ atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
+ !kthread_should_stop())
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+int ib_create_fmr_pool(struct ib_pd *pd,
+ struct ib_fmr_pool_param *params,
+ struct ib_fmr_pool **pool_handle)
+{
+ struct ib_device *device;
+ struct ib_fmr_pool *pool;
+ int i;
+ int ret;
+
+ if (!params) {
+ return -EINVAL;
+ }
+
+ device = pd->device;
+ if (!device->alloc_fmr ||
+ !device->dealloc_fmr ||
+ !device->map_phys_fmr ||
+ !device->unmap_fmr) {
+ printk(KERN_WARNING "Device %s does not support fast memory regions",
+ device->name);
+ return -ENOSYS;
+ }
+
+ pool = kmalloc(sizeof *pool, GFP_KERNEL);
+ if (!pool) {
+ printk(KERN_WARNING "couldn't allocate pool struct");
+ return -ENOMEM;
+ }
+
+ pool->cache_bucket = NULL;
+
+ pool->flush_function = params->flush_function;
+ pool->flush_arg = params->flush_arg;
+
+ INIT_LIST_HEAD(&pool->free_list);
+ INIT_LIST_HEAD(&pool->dirty_list);
+
+ if (params->cache) {
+ pool->cache_bucket =
+ kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
+ GFP_KERNEL);
+ if (!pool->cache_bucket) {
+ printk(KERN_WARNING "Failed to allocate cache in pool");
+ ret = -ENOMEM;
+ goto out_free_pool;
+ }
+
+ for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
+ INIT_HLIST_HEAD(pool->cache_bucket + i);
+ }
+
+ pool->pool_size = 0;
+ pool->max_pages = params->max_pages_per_fmr;
+ pool->dirty_watermark = params->dirty_watermark;
+ pool->dirty_len = 0;
+ spin_lock_init(&pool->pool_lock);
+ atomic_set(&pool->req_ser, 0);
+ atomic_set(&pool->flush_ser, 0);
+ init_waitqueue_head(&pool->force_wait);
+
+ pool->thread = kthread_create(ib_fmr_cleanup_thread,
+ pool,
+ "ib_fmr(%s)",
+ device->name);
+ if (IS_ERR(pool->thread)) {
+ printk(KERN_WARNING "couldn't start cleanup thread");
+ ret = PTR_ERR(pool->thread);
+ goto out_free_pool;
+ }
+
+ {
+ struct ib_pool_fmr *fmr;
+ struct ib_fmr_attr attr = {
+ .max_pages = params->max_pages_per_fmr,
+ .max_maps = IB_FMR_MAX_REMAPS,
+ .page_size = PAGE_SHIFT
+ };
+
+ for (i = 0; i < params->pool_size; ++i) {
+ fmr = kmalloc(sizeof *fmr + params->max_pages_per_fmr * sizeof (u64),
+ GFP_KERNEL);
+ if (!fmr) {
+ printk(KERN_WARNING "failed to allocate fmr struct for FMR %d", i);
+ goto out_fail;
+ }
+
+ fmr->pool = pool;
+ fmr->remap_count = 0;
+ fmr->ref_count = 0;
+ INIT_HLIST_NODE(&fmr->cache_node);
+
+ fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
+ if (IS_ERR(fmr->fmr)) {
+ printk(KERN_WARNING "fmr_create failed for FMR %d", i);
+ kfree(fmr);
+ goto out_fail;
+ }
+
+ list_add_tail(&fmr->list, &pool->free_list);
+ ++pool->pool_size;
+ }
+ }
+
+ *pool_handle = pool;
+ return 0;
+
+ out_free_pool:
+ kfree(pool->cache_bucket);
+ kfree(pool);
+
+ return ret;
+
+ out_fail:
+ ib_destroy_fmr_pool(pool);
+ *pool_handle = NULL;
+
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(ib_create_fmr_pool);
+
+int ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
+{
+ struct ib_pool_fmr *fmr;
+ struct ib_pool_fmr *tmp;
+ int i;
+
+ kthread_stop(pool->thread);
+ ib_fmr_batch_release(pool);
+
+ i = 0;
+ list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
+ ib_dealloc_fmr(fmr->fmr);
+ list_del(&fmr->list);
+ kfree(fmr);
+ ++i;
+ }
+
+ if (i < pool->pool_size)
+ printk(KERN_WARNING "pool still has %d regions registered",
+ pool->pool_size - i);
+
+ kfree(pool->cache_bucket);
+ kfree(pool);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_destroy_fmr_pool);
+
+int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
+{
+ int serial;
+
+ atomic_inc(&pool->req_ser);
+ /* It's OK if someone else bumps req_ser again here -- we'll
+ just wait a little longer. */
+ serial = atomic_read(&pool->req_ser);
+
+ wake_up_process(pool->thread);
+
+ if (wait_event_interruptible(pool->force_wait,
+ atomic_read(&pool->flush_ser) -
+ atomic_read(&pool->req_ser) >= 0))
+ return -EINTR;
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_flush_fmr_pool);
+
+struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
+ u64 *page_list,
+ int list_len,
+ u64 *io_virtual_address)
+{
+ struct ib_fmr_pool *pool = pool_handle;
+ struct ib_pool_fmr *fmr;
+ unsigned long flags;
+ int result;
+
+ if (list_len < 1 || list_len > pool->max_pages)
+ return ERR_PTR(-EINVAL);
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ fmr = ib_fmr_cache_lookup(pool,
+ page_list,
+ list_len,
+ *io_virtual_address);
+ if (fmr) {
+ /* found in cache */
+ ++fmr->ref_count;
+ if (fmr->ref_count == 1) {
+ list_del(&fmr->list);
+ }
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return fmr;
+ }
+
+ if (list_empty(&pool->free_list)) {
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
+ list_del(&fmr->list);
+ hlist_del_init(&fmr->cache_node);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
+ *io_virtual_address);
+
+ if (result) {
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ list_add(&fmr->list, &pool->free_list);
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ printk(KERN_WARNING "fmr_map returns %d",
+ result);
+
+ return ERR_PTR(result);
+ }
+
+ ++fmr->remap_count;
+ fmr->ref_count = 1;
+
+ if (pool->cache_bucket) {
+ fmr->io_virtual_address = *io_virtual_address;
+ fmr->page_list_len = list_len;
+ memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+ hlist_add_head(&fmr->cache_node,
+ pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+ }
+
+ return fmr;
+}
+EXPORT_SYMBOL(ib_fmr_pool_map_phys);
+
+int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+{
+ struct ib_fmr_pool *pool;
+ unsigned long flags;
+
+ pool = fmr->pool;
+
+ spin_lock_irqsave(&pool->pool_lock, flags);
+
+ --fmr->ref_count;
+ if (!fmr->ref_count) {
+ if (fmr->remap_count < IB_FMR_MAX_REMAPS) {
+ list_add_tail(&fmr->list, &pool->free_list);
+ } else {
+ list_add_tail(&fmr->list, &pool->dirty_list);
+ ++pool->dirty_len;
+ wake_up_process(pool->thread);
+ }
+ }
+
+#ifdef DEBUG
+ if (fmr->ref_count < 0)
+ printk(KERN_WARNING "FMR %p has ref count %d < 0",
+ fmr, fmr->ref_count);
+#endif
+
+ spin_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_fmr_pool_unmap);
+
+/*
+ Local Variables:
+ c-file-style: "linux"
+ indent-tabs-mode: t
+ End:
+*/
Property changes on: src/linux-kernel/infiniband/core/core_fmr_pool.c
___________________________________________________________________
Name: svn:keywords
+ Id
Index: src/linux-kernel/infiniband/core/mad_priv.h
===================================================================
--- src/linux-kernel/infiniband/core/mad_priv.h (revision 692)
+++ src/linux-kernel/infiniband/core/mad_priv.h (working copy)
@@ -81,7 +81,6 @@
};
struct ib_mad_filter_list {
- IB_DECLARE_MAGIC
struct ib_mad_filter filter;
ib_mad_dispatch_func function;
void *arg;
Index: src/linux-kernel/infiniband/core/mad_filter.c
===================================================================
--- src/linux-kernel/infiniband/core/mad_filter.c (revision 692)
+++ src/linux-kernel/infiniband/core/mad_filter.c (working copy)
@@ -334,7 +334,6 @@
filter->arg = arg;
filter->matches = 0;
filter->in_callback = 0;
- IB_SET_MAGIC(filter, FILTER);
if (down_interruptible(&filter_sem)) {
kfree(filter);
@@ -354,8 +353,6 @@
{
struct ib_mad_filter_list *filter = handle;
- IB_CHECK_MAGIC(filter, FILTER);
-
if (down_interruptible(&filter_sem))
return -EINTR;
@@ -371,7 +368,6 @@
up(&filter_sem);
- IB_CLEAR_MAGIC(filter);
kfree(filter);
return 0;
}
Index: src/linux-kernel/infiniband/core/core_device.c
===================================================================
--- src/linux-kernel/infiniband/core/core_device.c (revision 692)
+++ src/linux-kernel/infiniband/core/core_device.c (working copy)
@@ -215,8 +215,6 @@
goto out_stop_async;
}
- IB_SET_MAGIC(device, DEVICE);
-
list_add_tail(&device->core_list, &device_list);
{
struct list_head *ptr;
@@ -253,8 +251,6 @@
{
struct ib_device_private *priv;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (tsKernelQueueThreadStop(priv->async_thread)) {
@@ -279,7 +275,6 @@
}
up(&device_lock);
- IB_CLEAR_MAGIC(device);
kfree(priv->port_data);
kfree(priv);
@@ -352,8 +347,6 @@
int ib_device_properties_get(struct ib_device *device,
struct ib_device_properties *properties)
{
- IB_CHECK_MAGIC(device, DEVICE);
-
return device->device_query ? device->device_query(device, properties) : -ENOSYS;
}
EXPORT_SYMBOL(ib_device_properties_get);
@@ -361,8 +354,6 @@
int ib_device_properties_set(struct ib_device *device,
struct ib_device_changes *properties)
{
- IB_CHECK_MAGIC(device, DEVICE);
-
return device->device_modify ? device->device_modify(device, properties) : -ENOSYS;
}
@@ -370,8 +361,6 @@
tTS_IB_PORT port,
struct ib_port_properties *properties)
{
- IB_CHECK_MAGIC(device, DEVICE);
-
return device->port_query ? device->port_query(device, port, properties) : -ENOSYS;
}
EXPORT_SYMBOL(ib_port_properties_get);
@@ -384,8 +373,6 @@
struct ib_port_changes prop_set;
unsigned long flags;
- IB_CHECK_MAGIC(device, DEVICE);
-
priv = device->core;
if (port < priv->start_port || port > priv->end_port) {
@@ -455,8 +442,6 @@
int index,
u16 *pkey)
{
- IB_CHECK_MAGIC(device, DEVICE);
-
return device->pkey_query ? device->pkey_query(device, port, index, pkey) : -ENOSYS;
}
EXPORT_SYMBOL(ib_pkey_entry_get);
@@ -466,8 +451,6 @@
int index,
tTS_IB_GID gid)
{
- IB_CHECK_MAGIC(device, DEVICE);
-
return device->gid_query ? device->gid_query(device, port, index, gid) : -ENOSYS;
}
EXPORT_SYMBOL(ib_gid_entry_get);
More information about the general
mailing list