[ofa-general] [PATCH] drivers/infiniband/ulp/srpt: Fix target data corruption

davem at systemfabricworks.com davem at systemfabricworks.com
Fri Jan 11 08:28:15 PST 2008


   Change the local buffer allocator to use a spin-lock protected linked
   list instead of an array of atomic_t used/free variables.  The atomic_t
   code was open to a multi-thread race between test and set.  This has
   been observed with the result that the same data buffer was used for
   more than one SCSI operation, either writing the wrong data to the disk
   or sending the wrong data to the initiator.

Signed-off-by: Robert Pearson <rpearson at systemfabricworks.com>
Signed-off-by: David A. McMillen <davem at systemfabricworks.com>
---
 ib_srpt.c |  113 +++++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 73 insertions(+), 40 deletions(-)

diff --git a/ib_srpt.c b/ib_srpt.c
index 1fb9d56..44cb98a 100644
--- a/ib_srpt.c
+++ b/ib_srpt.c
@@ -56,10 +56,14 @@ MODULE_LICENSE("Dual BSD/GPL");
 
 struct mem_elem {
 	struct page *page;
+	struct mem_elem *next;
+	int ndx;
 	u32 len;
-	atomic_t used;
 };
 
+static spinlock_t mempool_lock;
+static struct mem_elem *mempool_freelist;
+
 struct srpt_thread {
 	spinlock_t thread_lock;
 	struct list_head thread_ioctx_list;
@@ -70,7 +74,7 @@ static u64 mellanox_ioc_guid = 0;
 static struct list_head srpt_devices;
 static int mem_size = 32768;
 static int mem_elements = 4096;
-static atomic_t mem_avail;
+static int mem_avail;
 static int cur_pos = 1;
 static struct mem_elem *srpt_mempool = NULL;
 static int thread = 1;
@@ -100,26 +104,22 @@ static struct ib_client srpt_client = {
 
 static struct mem_elem *srpt_get_mem_elem(int *index)
 {
-	int i, end_pos;
+	struct mem_elem *elem;
 
 	*index = 0;
-	end_pos = mem_elements;
-	for (i = cur_pos; i <= end_pos; ++i) {
-		if (i == mem_elements) {
-			end_pos = cur_pos;
-			i = 1;
-		}
 
-		if (atomic_read(&srpt_mempool[i].used) == 0) {
-			atomic_inc(&srpt_mempool[i].used);
-			smp_mb__after_atomic_inc();
-			*index = i;
-			cur_pos = i + 1;
-			atomic_dec(&mem_avail);
-			return &srpt_mempool[i];
-		}
+	spin_lock(&mempool_lock);
+	elem = mempool_freelist;
+	if (elem) {
+		mempool_freelist = elem->next;
+		mem_avail--;
+		*index = elem->ndx;
+	} else {
+		*index = 0;
 	}
-	return NULL;
+	spin_unlock(&mempool_lock);
+
+	return elem;
 }
 
 static int srpt_free_mem_elem(int index)
@@ -127,17 +127,42 @@ static int srpt_free_mem_elem(int index)
 	if (!index || index >= mem_elements)
 		return -EINVAL;
 
-	atomic_dec(&srpt_mempool[index].used);
-	smp_mb__after_atomic_dec();
-	atomic_inc(&mem_avail);
+	spin_lock(&mempool_lock);
+	srpt_mempool[index].next = mempool_freelist;
+	mempool_freelist = &srpt_mempool[index];
+	mem_avail++;
+	spin_unlock(&mempool_lock);
 	return 0;
 }
 
 static int srpt_mempool_create(void)
 {
-	int i, order;
+	int i, order, array_size;
 
-	order = get_order(mem_elements * sizeof(struct mem_elem));
+	if (mem_elements <= 0)
+		return -ENOMEM;
+	array_size = mem_elements * sizeof(struct mem_elem);
+	while (array_size & (array_size - 1))
+		array_size += array_size & ~(array_size - 1);
+	/* array_size now first power of 2 >= actual array size */
+
+	if (mem_size < PAGE_SIZE) {
+		printk(KERN_ERR PFX "mem_size parameter changed from"
+			" %d to %lu (PAGE_SIZE)\n", mem_size, PAGE_SIZE);
+		mem_size = PAGE_SIZE;
+	}
+	i = mem_size;
+	while (i & (i - 1))
+		i += i & (i - 1);
+	if (i != mem_size) {
+		printk(KERN_ERR PFX "mem_size parameter rounded up from"
+			" %d to %d\n", mem_size, i);
+		mem_size = i;
+	}
+	/* mem_size now also a power of 2 >= PAGE_SIZE */
+
+	spin_lock_init(&mempool_lock);
+	order = get_order(array_size);
 	srpt_mempool = (struct mem_elem *)  __get_free_pages(GFP_KERNEL, order);
 	if (!srpt_mempool)
 		return -ENOMEM;
@@ -151,32 +176,39 @@ static int srpt_mempool_create(void)
 			       mem_size, i);
 			goto free_mem;
 		}
-		atomic_set(&srpt_mempool[i].used, 0);
+		srpt_mempool[i].ndx = i;
+		srpt_mempool[i].next = mempool_freelist;
+		mempool_freelist = &srpt_mempool[i];
 	}
 
-	atomic_set(&mem_avail, mem_elements);
+	mem_avail = mem_elements;
 	return 0;
 
       free_mem:
 	while (i > 1)
 		__free_pages(srpt_mempool[--i].page, order);
 
-	free_pages((unsigned long) srpt_mempool,
-		   get_order(mem_elements * sizeof(struct mem_elem)));
+	free_pages((unsigned long) srpt_mempool, get_order(array_size));
 
 	return -ENOMEM;
 }
 
 static void srpt_mempool_destroy(void)
 {
-	int i, order;
+	int i, order, array_size;
+
+	if (srpt_mempool == NULL) return;
+
+	array_size = mem_elements * sizeof(struct mem_elem);
+	while (array_size & (array_size - 1))
+		array_size += array_size & ~(array_size - 1);
+	/* array_size now first power of 2 >= actual array size */
 
 	order = get_order(mem_size);
 	for (i = 1; i < mem_elements; ++i)
 		__free_pages(srpt_mempool[i].page, order);
 
-	free_pages((unsigned long) srpt_mempool,
-		   get_order(mem_elements * sizeof(struct mem_elem)));
+	free_pages((unsigned long) srpt_mempool, get_order(array_size));
 }
 
 static void srpt_event_handler(struct ib_event_handler *handler,
@@ -450,6 +482,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
 		goto err_query_port;
 
 	if (!sport->mad_agent) {
+		memset(&reg_req, 0, sizeof reg_req);
 		reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
 		reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
 		set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
@@ -900,7 +933,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
 				dir = SCST_DATA_WRITE;
 			else
 				dir = SCST_DATA_NONE;
-		} else 
+		} else
 			dir = SCST_DATA_NONE;
 
 		scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) & srp_cmd->lun,
@@ -939,7 +972,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
 		/*
 		 * if we have buffer in mem pool and
 		 * IO is > mem_size - we will allocate the memory buffer
-		 * for data xfer to avoid big scatterlist with 4KB each 
+		 * for data xfer to avoid big scatterlist with 4KB each
 		 * allocated by scst memory module.
 		 */
 		if (mem_elements && (ioctx->data_len >= mem_size))
@@ -1682,7 +1715,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
 	dma_len = sg_dma_len(&scat[0]);
 	riu = ioctx->rdma_ius;
 
-	/* 
+	/*
 	 * For each remote desc - calculate the #ib_sge.
 	 * If #ib_sge < SRPT_DEF_SG_TABLESIZE per rdma operation then
 	 *      each remote desc rdma_iu is required a rdma wr;
@@ -2116,8 +2149,8 @@ static int srpt_alloc_data_buf(struct scst_cmd *scmnd)
 	tsize = scst_cmd_get_bufflen(scmnd);
 	nrdma = tsize / mem_size + ioctx->n_rbuf;
 
-	if (nrdma > atomic_read(&mem_avail)) {
-		printk(KERN_ERR PFX "!!ALERT mem_avail= %d\n", atomic_read(&mem_avail));
+	if (nrdma > mem_avail) {
+		printk(KERN_ERR PFX "!!ALERT mem_avail= %d\n", mem_avail);
 		goto out;
 	}
 
@@ -2161,7 +2194,7 @@ static int srpt_alloc_data_buf(struct scst_cmd *scmnd)
 
       release_mem:
 	printk(KERN_WARNING PFX "ALERT! alloc_mem mem_avail= %d n_mem_elem=%d\n",
-	       atomic_read(&mem_avail), i);
+	       mem_avail, i);
 
 	riu = ioctx->rdma_ius;
 	while (i)
@@ -2295,7 +2328,7 @@ static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
 static ssize_t show_mem_info(struct class_device *class_dev, char *buf)
 {
 	return sprintf(buf, "mem_avail= %d mem_elements= %d mem_size= %d\n",
-		       atomic_read(&mem_avail), mem_elements, mem_size);
+		       mem_avail, mem_elements, mem_size);
 }
 
 static CLASS_DEVICE_ATTR(mem_info, S_IRUGO, show_mem_info, NULL);
@@ -2362,10 +2395,10 @@ static void srpt_add_one(struct ib_device *device)
 		(unsigned long long) mellanox_ioc_guid,
 		(unsigned long long) mellanox_ioc_guid);
 
-	/* 
+	/*
 	 * We do not have a consistent service_id (ie. also id_ext of target_id)
-	 * to identify this target. We currently use the guid of the first HCA 
-	 * in the system as service_id; therefore, the target_id will change 
+	 * to identify this target. We currently use the guid of the first HCA
+	 * in the system as service_id; therefore, the target_id will change
 	 * if this HCA is gone bad and replaced by different HCA
 	 */
 	if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))



More information about the general mailing list