[openib-general] [PATCH] MR API changes
Roland Dreier
roland at topspin.com
Fri Jul 30 06:34:05 PDT 2004
This set of changes (also committed to gen2/branches/roland-merge)
changes to the MR API. The code in mthca_reg_phys_mr() for figuring
out alignment/page size is a little ugly and may still have bugs but
it works for the case we actually use: register all of lowmem in one
HCA page.
- R.
Index: src/linux-kernel/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- src/linux-kernel/infiniband/ulp/ipoib/ipoib_verbs.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/ipoib/ipoib_verbs.c (working copy)
@@ -220,31 +220,19 @@
{
/* XXX we assume physical memory starts at address 0. */
- struct ib_physical_buffer buffer_list = {
- .address = 0,
- .size = 1
+ struct ib_phys_buf buffer_list = {
+ .addr = 0,
+ .size = (unsigned long) high_memory - PAGE_OFFSET
};
uint64_t dummy_iova = 0;
- unsigned long tsize = (unsigned long)high_memory - PAGE_OFFSET;
u32 rkey;
- /*
- * Make our region have size the size of low memory rounded
- * up to the next power of 2 (so we use as few TPT entries
- * as possible)
- */
- while (tsize) {
- buffer_list.size <<= 1;
- tsize >>= 1;
- }
-
- if (ib_memory_register_physical(priv->pd, &buffer_list,
- 1, /* list_len */
- &dummy_iova, buffer_list.size,
- 0, /* iova_offset */
- IB_ACCESS_LOCAL_WRITE,
- &priv->mr,
- &priv->lkey, &rkey)) {
+ priv->mr = ib_reg_phys_mr(priv->pd, &buffer_list,
+ 1, /* list_len */
+ IB_MR_LOCAL_WRITE,
+ &dummy_iova,
+ &priv->lkey, &rkey);
+ if (IS_ERR(priv->mr)) {
TS_REPORT_FATAL(MOD_IB_NET,
"%s: ib_memory_register_physical failed",
dev->name);
@@ -276,9 +264,9 @@
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
- if (ib_memory_deregister(priv->mr))
+ if (ib_dereg_mr(priv->mr))
TS_REPORT_WARN(MOD_IB_NET,
- "%s: ib_memory_deregister failed", dev->name);
+ "%s: ib_dereg_mr failed", dev->name);
if (ib_destroy_cq(priv->cq))
TS_REPORT_WARN(MOD_IB_NET,
Index: src/linux-kernel/infiniband/ulp/srp/srp_host.c
===================================================================
--- src/linux-kernel/infiniband/ulp/srp/srp_host.c (revision 531)
+++ src/linux-kernel/infiniband/ulp/srp/srp_host.c (working copy)
@@ -522,10 +522,11 @@
*/
int srp_host_alloc_pkts(srp_target_t * target)
{
- int pkt_num;
+ struct ib_phys_buf buffer_list;
+ u64 iova;
srp_pkt_t *srp_pkt;
void *srp_pkt_data;
- int status;
+ int pkt_num;
int max_num_pkts;
srp_host_hca_params_t *hca;
int cq_entries;
@@ -605,20 +606,25 @@
if (ib_req_notify_cq(target->cqr_hndl[hca_index], IB_CQ_NEXT_COMP))
goto CQ_MR_FAIL;
- status = ib_memory_register(hca->pd_hndl,
- target->srp_pkt_data_area,
- max_num_pkts * srp_cmd_pkt_size,
- IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ,
- &target->
- srp_pkt_data_mhndl[hca_index],
- &target->l_key[hca_index],
- &target->r_key[hca_index]);
+ /* XXX UGH -- use pci_alloc_consistent? */
+ buffer_list.addr = virt_to_phys(target->srp_pkt_data_area);
+ buffer_list.size = max_num_pkts * srp_cmd_pkt_size;
+ iova = (unsigned long) target->srp_pkt_data_area;
- if (status != 0) {
+ target->srp_pkt_data_mhndl[hca_index] =
+ ib_reg_phys_mr(hca->pd_hndl, &buffer_list,
+ 1, /*list_len */
+ IB_MR_LOCAL_WRITE |
+ IB_MR_REMOTE_READ,
+ &iova,
+ &target->l_key[hca_index],
+ &target->r_key[hca_index]);
+
+ if (IS_ERR(target->srp_pkt_data_mhndl[hca_index])) {
TS_REPORT_FATAL(MOD_SRPTP,
"Memory registration failed: %d",
- status);
+ PTR_ERR(target->srp_pkt_data_mhndl[hca_index]));
+ target->srp_pkt_data_mhndl[hca_index] = NULL;
goto CQ_MR_FAIL;
}
}
@@ -660,8 +666,8 @@
if (hca->valid == FALSE)
break;
- ib_memory_deregister(target->srp_pkt_data_mhndl[hca_index]);
-
+ if (target->srp_pkt_data_mhndl[hca_index])
+ ib_dereg_mr(target->srp_pkt_data_mhndl[hca_index]);
if (target->cqr_hndl[hca_index])
ib_destroy_cq(target->cqr_hndl[hca_index]);
if (target->cqs_hndl[hca_index])
@@ -695,7 +701,7 @@
if (hca->valid == FALSE)
break;
- ib_memory_deregister(target->srp_pkt_data_mhndl[hca_index]);
+ ib_dereg_mr(target->srp_pkt_data_mhndl[hca_index]);
ib_destroy_cq(target->cqr_hndl[hca_index]);
ib_destroy_cq(target->cqs_hndl[hca_index]);
Index: src/linux-kernel/infiniband/ulp/srp/srptp.c
===================================================================
--- src/linux-kernel/infiniband/ulp/srp/srptp.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/srp/srptp.c (working copy)
@@ -50,9 +50,9 @@
static void cq_send_handler(struct ib_wc *cq_entry, srp_target_t *target)
{
switch (cq_entry->status) {
- case IB_COMPLETION_STATUS_SUCCESS:
+ case IB_WC_SUCCESS:
- if (cq_entry->opcode != IB_COMPLETION_OP_SEND) {
+ if (cq_entry->opcode != IB_WC_SEND) {
TS_REPORT_FATAL(MOD_SRPTP, "Wrong Opcode");
return;
}
@@ -60,7 +60,7 @@
srp_send_done(cq_entry->wr_id, target);
break;
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
TS_REPORT_STAGE(MOD_SRPTP,
"Send WR_FLUSH_ERR wr_id %d",
cq_entry->wr_id);
@@ -96,9 +96,9 @@
int status;
switch (cq_entry->status) {
- case IB_COMPLETION_STATUS_SUCCESS:
+ case IB_WC_SUCCESS:
- if (cq_entry->opcode != IB_COMPLETION_OP_RECEIVE) {
+ if (cq_entry->opcode != IB_WC_RECV) {
TS_REPORT_FATAL(MOD_SRPTP, "Wrong Opcode");
return;
}
@@ -113,7 +113,7 @@
srp_recv(cq_entry->wr_id, target);
break;
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
TS_REPORT_STAGE(MOD_SRPTP,
"Recv WR_FLUSH_ERR wr_id %d",
cq_entry->wr_id);
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_write.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_write.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_write.c (working copy)
@@ -49,10 +49,10 @@
/*
* error handling
*/
- if (IB_COMPLETION_STATUS_SUCCESS != comp->status) {
+ if (IB_WC_SUCCESS != comp->status) {
switch (comp->status) {
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
/*
* clear posted buffers from error'd queue
*/
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_rcvd.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_rcvd.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_rcvd.c (working copy)
@@ -1376,10 +1376,10 @@
/*
* error handling
*/
- if (IB_COMPLETION_STATUS_SUCCESS != comp->status) {
+ if (IB_WC_SUCCESS != comp->status) {
switch (comp->status) {
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
/*
* clear posted buffers from error'd queue
*/
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_read.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_read.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_read.c (working copy)
@@ -126,10 +126,10 @@
/*
* error handling
*/
- if (IB_COMPLETION_STATUS_SUCCESS != comp->status) {
+ if (IB_WC_SUCCESS != comp->status) {
switch (comp->status) {
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
/*
* clear posted buffers from error'd queue
*/
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_conn.c (working copy)
@@ -1893,7 +1893,7 @@
#ifdef _TS_SDP_AIO_SUPPORT
tTS_IB_FMR_POOL_PARAM_STRUCT fmr_param_s;
#endif
- struct ib_physical_buffer buffer_list;
+ struct ib_phys_buf buffer_list;
struct ib_device_properties node_info;
struct ib_device *hca_handle;
struct sdev_hca_port *port;
@@ -1971,22 +1971,19 @@
/*
* memory registration
*/
- buffer_list.address = 0;
+ buffer_list.addr = 0;
buffer_list.size = (unsigned long)high_memory - PAGE_OFFSET;
hca->iova = 0;
- result = ib_memory_register_physical(hca->pd,
- &buffer_list,
- 1, /* list_len */
- &hca->iova,
- (unsigned long)(high_memory - PAGE_OFFSET),
- 0, /* iova_offset */
- IB_ACCESS_LOCAL_WRITE,
- &hca->mem_h,
- &hca->l_key, &hca->r_key);
- if (0 != result) {
-
+ hca->mem_h = ib_reg_phys_mr(hca->pd,
+ &buffer_list,
+ 1, /* list_len */
+ IB_ACCESS_LOCAL_WRITE,
+ &hca->iova,
+ &hca->l_key, &hca->r_key);
+ if (IS_ERR(hca->mem_h)) {
+ result = PTR_ERR(hca->mem_h);
TS_TRACE(MOD_LNX_SDP, T_TERSE, TRACE_FLOW_FATAL,
"INIT: Error <%d> registering HCA <%x:%d> memory.",
result, hca_handle, hca_count);
@@ -2097,7 +2094,7 @@
if (hca->mem_h) {
- (void)ib_memory_deregister(hca->mem_h);
+ (void)ib_dereg_mr(hca->mem_h);
}
if (hca->pd) {
Index: src/linux-kernel/infiniband/ulp/sdp/sdp_sent.c
===================================================================
--- src/linux-kernel/infiniband/ulp/sdp/sdp_sent.c (revision 530)
+++ src/linux-kernel/infiniband/ulp/sdp/sdp_sent.c (working copy)
@@ -445,10 +445,10 @@
/*
* error handling
*/
- if (IB_COMPLETION_STATUS_SUCCESS != comp->status) {
+ if (IB_WC_SUCCESS != comp->status) {
switch (comp->status) {
- case IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR:
+ case IB_WC_WR_FLUSH_ERR:
/*
* clear posted buffers from error'd queue
*/
Index: src/linux-kernel/infiniband/include/ib_verbs.h
===================================================================
--- src/linux-kernel/infiniband/include/ib_verbs.h (revision 530)
+++ src/linux-kernel/infiniband/include/ib_verbs.h (working copy)
@@ -70,7 +70,11 @@
IB_WC_COMP_SWAP,
IB_WC_FETCH_ADD,
IB_WC_BIND_MW,
- IB_WC_RECV,
+/*
+ * Set value of IB_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IB_WC_RECV).
+ */
+ IB_WC_RECV = 1 << 7,
IB_WC_RECV_RDMA_WITH_IMM
};
@@ -95,9 +99,37 @@
IB_CQ_NEXT_COMP
};
+enum ib_mr_access_flags {
+ IB_MR_LOCAL_WRITE = 1,
+ IB_MR_REMOTE_WRITE = (1<<1),
+ IB_MR_REMOTE_READ = (1<<2),
+ IB_MR_REMOTE_ATOMIC = (1<<3),
+ IB_MR_MW_BIND = (1<<4)
+};
+
+struct ib_phys_buf {
+ u64 addr;
+ u64 size;
+};
+
+struct ib_mr_attr {
+ struct ib_pd *pd;
+ u64 device_virt_addr;
+ u64 size;
+ int mr_access_flags;
+ u32 lkey;
+ u32 rkey;
+};
+
+enum ib_mr_rereg_flags {
+ IB_MR_REREG_TRANS = 1,
+ IB_MR_REREG_PD = (1<<1),
+ IB_MR_REREG_ACCESS = (1<<2)
+};
+
struct ib_pd {
struct ib_device *device;
- atomic_t usecnt;
+ atomic_t usecnt; /* count all resources */
};
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
@@ -107,9 +139,15 @@
ib_comp_handler comp_handler;
void * context;
int cqe;
- atomic_t usecnt;
+ atomic_t usecnt; /* count number of work queues */
};
+struct ib_mr {
+ struct ib_device *device;
+ struct ib_pd *pd;
+ atomic_t usecnt; /* count number of MWs */
+};
+
struct ib_device {
IB_DECLARE_MAGIC
@@ -153,9 +191,25 @@
enum ib_cq_notify cq_notify);
int (*req_ncomp_notif)(struct ib_cq *cq,
int wc_cnt);
- ib_mr_register_func mr_register;
- ib_mr_register_physical_func mr_register_physical;
- ib_mr_deregister_func mr_deregister;
+ struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey);
+ int (*query_mr)(struct ib_mr *mr,
+ struct ib_mr_attr *mr_attr);
+ int (*dereg_mr)(struct ib_mr *mr);
+ int (*rereg_phys_mr)(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey);
ib_mw_create_func mw_create;
ib_mw_destroy_func mw_destroy;
ib_mw_bind_func mw_bind;
@@ -214,6 +268,27 @@
int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt);
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey);
+
+int ib_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey);
+
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
+int ib_dereg_mr(struct ib_mr *mr);
+
#endif /* __KERNEL __ */
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
Index: src/linux-kernel/infiniband/include/ts_ib_core_types.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_core_types.h (revision 530)
+++ src/linux-kernel/infiniband/include/ts_ib_core_types.h (working copy)
@@ -211,40 +211,6 @@
IB_OP_MEMORY_WINDOW_BIND
};
-enum ib_completion_op {
- IB_COMPLETION_OP_RECEIVE,
- IB_COMPLETION_OP_RDMA_WRITE_RECEIVE,
- IB_COMPLETION_OP_SEND,
- IB_COMPLETION_OP_RDMA_WRITE,
- IB_COMPLETION_OP_RDMA_READ,
- IB_COMPLETION_OP_COMPARE_SWAP,
- IB_COMPLETION_OP_FETCH_ADD,
- IB_COMPLETION_OP_MEMORY_WINDOW_BIND,
-};
-
-enum ib_completion_status {
- IB_COMPLETION_STATUS_SUCCESS,
- IB_COMPLETION_STATUS_LOCAL_LENGTH_ERROR,
- IB_COMPLETION_STATUS_LOCAL_QP_OPERATION_ERROR,
- IB_COMPLETION_STATUS_LOCAL_EEC_OPERATION_ERROR,
- IB_COMPLETION_STATUS_LOCAL_PROTECTION_ERROR,
- IB_COMPLETION_STATUS_WORK_REQUEST_FLUSHED_ERROR,
- IB_COMPLETION_STATUS_MEMORY_WINDOW_BIND_ERROR,
- IB_COMPLETION_STATUS_BAD_RESPONSE_ERROR,
- IB_COMPLETION_STATUS_LOCAL_ACCESS_ERROR,
- IB_COMPLETION_STATUS_REMOTE_INVALID_REQUEST_ERROR,
- IB_COMPLETION_STATUS_REMOTE_ACCESS_ERORR,
- IB_COMPLETION_STATUS_REMOTE_OPERATION_ERROR,
- IB_COMPLETION_STATUS_TRANSPORT_RETRY_COUNTER_EXCEEDED,
- IB_COMPLETION_STATUS_RNR_RETRY_COUNTER_EXCEEDED,
- IB_COMPLETION_STATUS_LOCAL_RDD_VIOLATION_ERROR,
- IB_COMPLETION_STATUS_REMOTE_INVALID_RD_REQUEST,
- IB_COMPLETION_STATUS_REMOTE_ABORTED_ERROR,
- IB_COMPLETION_STATUS_INVALID_EEC_NUMBER,
- IB_COMPLETION_STATUS_INVALID_EEC_STATE,
- IB_COMPLETION_STATUS_UNKNOWN_ERROR
-};
-
enum ib_async_event {
IB_QP_PATH_MIGRATED,
IB_EEC_PATH_MIGRATED,
@@ -287,14 +253,6 @@
void *private;
};
-struct ib_mr {
- IB_DECLARE_MAGIC
- struct ib_device *device;
- u32 lkey;
- u32 rkey;
- void *private;
-};
-
struct ib_fmr_pool; /* actual definition in core_fmr.c */
struct ib_fmr {
@@ -590,11 +548,6 @@
int fence:1;
};
-struct ib_physical_buffer {
- u64 address;
- u64 size;
-};
-
struct ib_fmr_pool_param {
int max_pages_per_fmr;
enum ib_memory_access access;
@@ -658,20 +611,6 @@
typedef int (*ib_receive_post_func)(struct ib_qp *qp,
struct ib_receive_param *param,
int num_work_requests);
-typedef int (*ib_mr_register_func)(struct ib_pd *pd,
- void *start_address,
- u64 buffer_size,
- enum ib_memory_access access,
- struct ib_mr *mr);
-typedef int (*ib_mr_register_physical_func)(struct ib_pd *pd,
- struct ib_physical_buffer *buffer_list,
- int list_len,
- u64 *io_virtual_address,
- u64 buffer_size,
- u64 iova_offset,
- enum ib_memory_access access,
- struct ib_mr *mr);
-typedef int (*ib_mr_deregister_func)(struct ib_mr *mr);
typedef int (*ib_mw_create_func)(struct ib_pd *pd,
struct ib_mw **mw,
u32 *rkey);
Index: src/linux-kernel/infiniband/include/ts_ib_core.h
===================================================================
--- src/linux-kernel/infiniband/include/ts_ib_core.h (revision 530)
+++ src/linux-kernel/infiniband/include/ts_ib_core.h (working copy)
@@ -100,25 +100,6 @@
return qp->device->receive_post(qp, param, num_work_requests);
}
-int ib_memory_register(struct ib_pd *pd,
- void *start_address,
- uint64_t buffer_size,
- enum ib_memory_access access,
- struct ib_mr **memory,
- u32 *lkey,
- u32 *rkey);
-int ib_memory_register_physical(struct ib_pd *pd,
- struct ib_physical_buffer *buffer_list,
- int list_len,
- uint64_t *io_virtual_address,
- uint64_t buffer_size,
- uint64_t iova_offset,
- enum ib_memory_access access,
- struct ib_mr **mr,
- u32 *lkey,
- u32 *rkey);
-int ib_memory_deregister(struct ib_mr *memory);
-
int ib_mw_create(struct ib_pd *pd,
struct ib_mw **mw,
u32 *rkey);
Index: src/linux-kernel/infiniband/core/mad_ib.c
===================================================================
--- src/linux-kernel/infiniband/core/mad_ib.c (revision 530)
+++ src/linux-kernel/infiniband/core/mad_ib.c (working copy)
@@ -23,7 +23,6 @@
*/
#include "mad_priv.h"
-#include "mad_mem_compat.h"
#include "ts_kernel_trace.h"
#include "ts_kernel_services.h"
Index: src/linux-kernel/infiniband/core/mad_mem_compat.h
===================================================================
--- src/linux-kernel/infiniband/core/mad_mem_compat.h (revision 521)
+++ src/linux-kernel/infiniband/core/mad_mem_compat.h (working copy)
@@ -1,71 +0,0 @@
-/*
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
- General Public License (GPL) Version 2, available at
- <http://www.fsf.org/copyleft/gpl.html>, or the OpenIB.org BSD
- license, available in the LICENSE.TXT file accompanying this
- software. These details are also available at
- <http://openib.org/license.html>.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-
- Copyright (c) 2004 Topspin Communications. All rights reserved.
-
- $Id$
-*/
-
-#ifndef _MAD_MEM_COMPAT_H
-#define _MAD_MEM_COMPAT_H
-
-/* Need the definition of high_memory: */
-#include <linux/mm.h>
-
-static inline int ib_mad_register_memory(struct ib_pd *pd,
- struct ib_mr **mr,
- u32 *lkey)
-{
- u32 rkey;
- u64 iova = 0;
- struct ib_physical_buffer buffer_list;
- int result;
-
- buffer_list.address = 0;
-
- /* make our region have size the size of low memory rounded up to
- the next power of 2, so we use as few TPT entries as possible
- and don't confuse the verbs driver when lowmem has an odd size
- (cf bug 1921) */
- for (buffer_list.size = 1;
- buffer_list.size < (unsigned long) high_memory - PAGE_OFFSET;
- buffer_list.size <<= 1) {
- /* nothing */
- }
-
- result = ib_memory_register_physical(pd,
- &buffer_list,
- 1, /* list_len */
- &iova,
- buffer_list.size,
- 0, /* iova_offset */
- IB_ACCESS_LOCAL_WRITE,
- mr,
- lkey,
- &rkey);
- if (result)
- TS_REPORT_WARN(MOD_KERNEL_IB,
- "ib_memory_register_physical failed "
- "size 0x%016" TS_U64_FMT "x, iova 0x%016" TS_U64_FMT "x"
- " (return code %d)",
- buffer_list.size, iova, result);
-
- return result;
-}
-
-#endif /* _MAD_COMPAT_H */
Index: src/linux-kernel/infiniband/core/core_cq.c
===================================================================
--- src/linux-kernel/infiniband/core/core_cq.c (revision 530)
+++ src/linux-kernel/infiniband/core/core_cq.c (working copy)
@@ -21,17 +21,13 @@
$Id$
*/
-#include "core_priv.h"
-
-#include "ts_kernel_trace.h"
-#include "ts_kernel_services.h"
-
#include <linux/version.h>
#include <linux/module.h>
#include <linux/errno.h>
-#include <linux/slab.h>
+#include "core_priv.h"
+
struct ib_cq *ib_create_cq(struct ib_device *device,
ib_comp_handler comp_handler,
void *cq_context, int *cqe)
Index: src/linux-kernel/infiniband/core/core_mr.c
===================================================================
--- src/linux-kernel/infiniband/core/core_mr.c (revision 521)
+++ src/linux-kernel/infiniband/core/core_mr.c (working copy)
@@ -21,118 +21,79 @@
$Id$
*/
-#include "core_priv.h"
-
-#include "ts_kernel_trace.h"
-#include "ts_kernel_services.h"
-
#include <linux/version.h>
#include <linux/module.h>
#include <linux/errno.h>
-#include <linux/slab.h>
-int ib_memory_register(struct ib_pd *pd,
- void *start_address,
- u64 buffer_size,
- enum ib_memory_access access,
- struct ib_mr **mr_handle,
- u32 *lkey,
- u32 *rkey)
+#include "core_priv.h"
+
+struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey)
{
struct ib_mr *mr;
- int ret;
- if (!pd->device->mr_register) {
- return -ENOSYS;
- }
+ mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start, lkey, rkey);
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr) {
- return -ENOMEM;
- }
-
- ret = pd->device->mr_register(pd, start_address, buffer_size, access, mr);
-
- if (!ret) {
- IB_SET_MAGIC(mr, MR);
+ if (!IS_ERR(mr)) {
mr->device = pd->device;
- *mr_handle = mr;
- *lkey = mr->lkey;
- *rkey = mr->rkey;
- } else {
- kfree(mr);
+ mr->pd = pd;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
}
- return ret;
+ return mr;
}
-EXPORT_SYMBOL(ib_memory_register);
+EXPORT_SYMBOL(ib_reg_phys_mr);
-int ib_memory_register_physical(struct ib_pd *pd,
- struct ib_physical_buffer *buffer_list,
- int list_len,
- u64 *io_virtual_address,
- u64 buffer_size,
- u64 iova_offset,
- enum ib_memory_access access,
- struct ib_mr **mr_handle,
- u32 *lkey,
- u32 *rkey)
+int ib_rereg_phys_mr(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *phys_buf_array,
+ int num_phys_buf,
+ int mr_access_flags,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey)
{
- struct ib_mr *mr;
- int ret;
+ return mr->device->rereg_phys_mr ?
+ mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd,
+ phys_buf_array, num_phys_buf,
+ mr_access_flags, iova_start,
+ lkey, rkey) :
+ -ENOSYS;
+}
+EXPORT_SYMBOL(ib_rereg_phys_mr);
- if (!pd->device->mr_register_physical) {
- return -ENOSYS;
- }
-
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr) {
- return -ENOMEM;
- }
-
- ret = pd->device->mr_register_physical(pd,
- buffer_list,
- list_len,
- io_virtual_address,
- buffer_size,
- iova_offset,
- access,
- mr);
-
- if (!ret) {
- IB_SET_MAGIC(mr, MR);
- mr->device = pd->device;
- *mr_handle = mr;
- *lkey = mr->lkey;
- *rkey = mr->rkey;
- } else {
- kfree(mr);
- }
-
- return ret;
+int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
+{
+ return mr->device->query_mr ?
+ mr->device->query_mr(mr, mr_attr) : -ENOSYS;
}
-EXPORT_SYMBOL(ib_memory_register_physical);
+EXPORT_SYMBOL(ib_query_mr);
-int ib_memory_deregister(struct ib_mr *mr)
+int ib_dereg_mr(struct ib_mr *mr)
{
- int ret;
+ struct ib_pd *pd;
+ int ret;
- IB_CHECK_MAGIC(mr, MR);
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
- if (!mr->device->mr_deregister) {
- return -ENOSYS;
- }
+ pd = mr->pd;
+ ret = mr->device->dereg_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
- ret = mr->device->mr_deregister(mr);
- if (!ret) {
- IB_CLEAR_MAGIC(mr);
- kfree(mr);
- }
-
return ret;
}
-EXPORT_SYMBOL(ib_memory_deregister);
+EXPORT_SYMBOL(ib_dereg_mr);
/*
Local Variables:
Index: src/linux-kernel/infiniband/core/mad_main.c
===================================================================
--- src/linux-kernel/infiniband/core/mad_main.c (revision 530)
+++ src/linux-kernel/infiniband/core/mad_main.c (working copy)
@@ -24,13 +24,7 @@
#include <linux/config.h>
#include "mad_priv.h"
-#include "mad_mem_compat.h"
-#if defined(CONFIG_INFINIBAND_MELLANOX_HCA) || \
- defined(CONFIG_INFINIBAND_MELLANOX_HCA_MODULE)
-#include "ts_ib_tavor_provider.h"
-#endif
-
#include "ts_kernel_trace.h"
#include "ts_kernel_services.h"
@@ -40,6 +34,9 @@
#include <linux/init.h>
#include <linux/errno.h>
+/* Need the definition of high_memory: */
+#include <linux/mm.h>
+
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
#endif
@@ -50,6 +47,31 @@
kmem_cache_t *mad_cache;
+static inline int ib_mad_register_memory(struct ib_pd *pd,
+ struct ib_mr **mr,
+ u32 *lkey)
+{
+ u32 rkey;
+ u64 iova = 0;
+ struct ib_phys_buf buffer_list = {
+ .addr = 0,
+ .size = (unsigned long) high_memory - PAGE_OFFSET
+ };
+
+ *mr = ib_reg_phys_mr(pd, &buffer_list,
+ 1, /* list_len */
+ IB_MR_LOCAL_WRITE,
+ &iova, lkey, &rkey);
+ if (IS_ERR(*mr))
+ TS_REPORT_WARN(MOD_KERNEL_IB,
+ "ib_reg_phys_mr failed "
+ "size 0x%016" TS_U64_FMT "x, iova 0x%016" TS_U64_FMT "x"
+ " (return code %d)",
+ buffer_list.size, iova, PTR_ERR(*mr));
+
+ return IS_ERR(*mr) ? PTR_ERR(*mr) : 0;
+}
+
static int ib_mad_qp_create(struct ib_device *device,
tTS_IB_PORT port,
u32 qpn)
@@ -277,7 +299,7 @@
}
error_free_mr:
- ib_memory_deregister(priv->mr);
+ ib_dereg_mr(priv->mr);
error_free_cq:
ib_destroy_cq(priv->cq);
@@ -317,7 +339,7 @@
}
}
- ib_memory_deregister(priv->mr);
+ ib_dereg_mr(priv->mr);
ib_destroy_cq(priv->cq);
ib_dealloc_pd(priv->pd);
Index: src/linux-kernel/infiniband/core/core_device.c
===================================================================
--- src/linux-kernel/infiniband/core/core_device.c (revision 530)
+++ src/linux-kernel/infiniband/core/core_device.c (working copy)
@@ -61,8 +61,8 @@
IB_MANDATORY_FUNC(destroy_cq),
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
- IB_MANDATORY_FUNC(mr_register_physical),
- IB_MANDATORY_FUNC(mr_deregister)
+ IB_MANDATORY_FUNC(reg_phys_mr),
+ IB_MANDATORY_FUNC(dereg_mr)
};
int i;
Index: src/linux-kernel/infiniband/core/core_pd.c
===================================================================
--- src/linux-kernel/infiniband/core/core_pd.c (revision 521)
+++ src/linux-kernel/infiniband/core/core_pd.c (working copy)
@@ -21,17 +21,13 @@
$Id$
*/
-#include "core_priv.h"
-
-#include "ts_kernel_trace.h"
-#include "ts_kernel_services.h"
-
#include <linux/version.h>
#include <linux/module.h>
#include <linux/errno.h>
-#include <linux/slab.h>
+#include "core_priv.h"
+
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
Index: src/linux-kernel/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_dev.h (revision 530)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_dev.h (working copy)
@@ -291,8 +291,7 @@
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
u64 *buffer_list, int buffer_size_shift,
int list_len, u64 iova, u64 total_size,
- u64 iova_offset, u32 access,
- struct mthca_mr *mr);
+ u32 access, struct mthca_mr *mr);
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
Index: src/linux-kernel/infiniband/hw/mthca/mthca_provider.c
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_provider.c (revision 530)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_provider.c (working copy)
@@ -415,79 +415,122 @@
return 0;
}
-static int mthca_mr_register_physical(struct ib_pd *pd,
- struct ib_physical_buffer *buffer_list,
- int list_len,
- uint64_t *io_virtual_address,
- uint64_t buffer_size,
- uint64_t iova_offset,
- enum ib_memory_access acc,
- struct ib_mr *mr)
+static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start,
+ u32 *lkey,
+ u32 *rkey)
{
+ struct mthca_mr *mr;
u64 *page_list;
+ u64 total_size;
+ u64 mask;
int shift;
int npages;
u32 access;
int err = -ENOMEM;
- int i;
+ int i, j, n;
- /* We only support one buffer for now */
- if (list_len > 1 || buffer_list[0].size != buffer_size)
- return -EINVAL;
+ /* First check that we have enough alignment */
+ if ((*iova_start & PAGE_MASK) != (buffer_list[0].addr & PAGE_MASK))
+ return ERR_PTR(-EINVAL);
- for (shift = 1; shift < 31 && 1ULL << shift < buffer_size; ++shift)
- ; /* nothing */
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & PAGE_MASK))
+ return ERR_PTR(-EINVAL);
- npages = (buffer_size + (1ULL << shift) - 1) >> shift;
+ mask = 0;
+ total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (buffer_list[i].addr & PAGE_MASK)
+ return ERR_PTR(-EINVAL);
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & PAGE_MASK))
+ return ERR_PTR(-EINVAL);
+ total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ }
+
+ /* Find largest page shift we can use to cover buffers */
+ for (shift = PAGE_SHIFT; shift < 31; ++shift)
+ if (num_phys_buf > 1) {
+ if ((1ULL << shift) & mask)
+ break;
+ } else {
+ if (1ULL << shift >=
+ buffer_list[0].size +
+ (buffer_list[0].addr & ((1ULL << shift) - 1)))
+ break;
+ }
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
+ buffer_list[0].addr &= ~0ull << shift;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+
if (!npages)
- return 0;
+ return (struct ib_mr *) mr;
page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
+ if (!page_list) {
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+ }
- for (i = 0; i < npages; ++i)
- page_list[i] = buffer_list[0].address + ((u64) i << shift);
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
+ ++j)
+ page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
- mr->private = kmalloc(sizeof (struct mthca_mr), GFP_KERNEL);
- if (!mr->private)
- goto out;
-
access =
- (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
+ (acc & IB_MR_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) |
+ (acc & IB_MR_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
+ (acc & IB_MR_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) |
+ (acc & IB_MR_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) |
MTHCA_MPT_FLAG_LOCAL_READ;
mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
"in PD %x; shift %d, npages %d.\n",
- (unsigned long long) buffer_list[0].address,
- (unsigned long long) *io_virtual_address,
+ (unsigned long long) buffer_list[0].addr,
+ (unsigned long long) *iova_start,
((struct mthca_pd *) pd)->pd_num,
shift, npages);
err = mthca_mr_alloc_phys(to_mdev(pd->device),
((struct mthca_pd *) pd)->pd_num,
page_list, shift, npages,
- *io_virtual_address, buffer_size,
- iova_offset, access, mr->private);
+ *iova_start, total_size,
+ access, mr);
- if (err)
- kfree(mr->private);
+ if (err) {
+ kfree(mr);
+ mr = ERR_PTR(err);
+ goto out;
+ }
- mr->lkey = mr->rkey = ((struct mthca_mr *) mr->private)->key;
+ *lkey = *rkey = mr->key;
- out:
+out:
kfree(page_list);
- return err;
+ return (struct ib_mr *) mr;
}
-static int mthca_mr_deregister(struct ib_mr *mr)
+static int mthca_dereg_mr(struct ib_mr *mr)
{
- mthca_free_mr(to_mdev(mr->device), mr->private);
- kfree(mr->private);
+ mthca_free_mr(to_mdev(mr->device), (struct mthca_mr *) mr);
+ kfree(mr);
return 0;
}
@@ -516,8 +559,8 @@
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.req_notify_cq = mthca_req_notify_cq;
- dev->ib_dev.mr_register_physical = mthca_mr_register_physical;
- dev->ib_dev.mr_deregister = mthca_mr_deregister;
+ dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.multicast_attach = mthca_multicast_attach;
dev->ib_dev.multicast_detach = mthca_multicast_detach;
dev->ib_dev.mad_process = mthca_process_mad;
Index: src/linux-kernel/infiniband/hw/mthca/mthca_provider.h
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_provider.h (revision 541)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_provider.h (working copy)
@@ -39,6 +39,7 @@
};
struct mthca_mr {
+ struct ib_mr ibmr;
u32 key;
int order;
u32 first_seg;
Index: src/linux-kernel/infiniband/hw/mthca/mthca_cq.c
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_cq.c (revision 543)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_cq.c (working copy)
@@ -636,7 +636,7 @@
err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
dma_list, shift, npages,
- 0, size, 0,
+ 0, size,
MTHCA_MPT_FLAG_LOCAL_WRITE |
MTHCA_MPT_FLAG_LOCAL_READ,
&cq->mr);
Index: src/linux-kernel/infiniband/hw/mthca/mthca_eq.c
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_eq.c (revision 521)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_eq.c (working copy)
@@ -389,7 +389,7 @@
err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num,
dma_list, PAGE_SHIFT, npages,
- 0, npages * PAGE_SIZE, 0,
+ 0, npages * PAGE_SIZE,
MTHCA_MPT_FLAG_LOCAL_WRITE |
MTHCA_MPT_FLAG_LOCAL_READ,
&eq->mr);
Index: src/linux-kernel/infiniband/hw/mthca/TODO
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/TODO (revision 521)
+++ src/linux-kernel/infiniband/hw/mthca/TODO (working copy)
@@ -3,10 +3,8 @@
immediate operations in send queues.
APM support: ib_mthca's QP modify does not set alternate path
fields in QP context.
- Full support for asynchronous events: only port change
- asynchronous events will be dispatched.
- Full support for physical memory registration: Physical memory
- registration is limited to a single buffer.
+ Full support for asynchronous events: dispatching CQ and various
+ unaffiliated errors still needs to be implemented.
UD address vectors in HCA memory with DDR hidden: Even if the HCA
does not expose its memory via a PCI BAR, the ACCESS_DDR
firmware command could be used to write UD address vectors
@@ -19,8 +17,6 @@
FMR support: ib_mthca does not support Mellanox-style "fast
memory regions" as used by SDP and SRP.
SRQ support: ib_mthca does not support shared receive queues.
- Virtual memory registration: the register virtual memory region
- verb is not implemented.
Native Arbel support: Support for Arbel in native mode needs to be
added, including at least memory-free mode and base memory
management extensions. Or should native Arbel and Tavor have
Index: src/linux-kernel/infiniband/hw/mthca/mthca_mr.c
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_mr.c (revision 521)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_mr.c (working copy)
@@ -161,8 +161,7 @@
int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
u64 *buffer_list, int buffer_size_shift,
int list_len, u64 iova, u64 total_size,
- u64 iova_offset, u32 access,
- struct mthca_mr *mr)
+ u32 access, struct mthca_mr *mr)
{
void *mailbox;
u64 *mtt_entry;
Index: src/linux-kernel/infiniband/hw/mthca/mthca_qp.c
===================================================================
--- src/linux-kernel/infiniband/hw/mthca/mthca_qp.c (revision 541)
+++ src/linux-kernel/infiniband/hw/mthca/mthca_qp.c (working copy)
@@ -816,7 +816,7 @@
err = mthca_mr_alloc_phys(dev, qp->pd->pd_num,
dma_list, shift, npages,
- 0, size, 0,
+ 0, size,
MTHCA_MPT_FLAG_LOCAL_WRITE |
MTHCA_MPT_FLAG_LOCAL_READ,
&qp->mr);
More information about the general
mailing list