[openib-general] [PATCH] iser: fixes for RDMA unaligned SGs, cleanups around SG handling

Or Gerlitz ogerlitz at voltaire.com
Thu Feb 16 05:01:35 PST 2006


------------------------------------------------------------------------
r5427 | ogerlitz | 2006-02-16 14:55:12 +0200 (Thu, 16 Feb 2006) | 4 lines

various cleanups in the SG handling code

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

------------------------------------------------------------------------
r5426 | ogerlitz | 2006-02-16 14:52:23 +0200 (Thu, 16 Feb 2006) | 4 lines

fixes for the rare case of SGs which are unaligned for RDMA

Signed-off-by: Or Gerlitz <ogerlitz at voltaire.com>

------------------------------------------------------------------------

Index: iser_memory.c
===================================================================
--- iser_memory.c	(revision 5415)
+++ iser_memory.c	(revision 5426)
@@ -40,16 +40,7 @@
 
 #include "iscsi_iser.h"
 
-/**
- * iser_page_to_virt - Translates page descriptor to virtual kernel address
- * returns virtual kernel address
- */
-inline void *
-iser_page_to_virt(struct page *page)
-{
-    return phys_to_virt(page_to_phys(page));
-}
-
+#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
 /**
  * Decrements the reference count for the
  * registered buffer & releases it
@@ -141,22 +132,26 @@ void iser_start_rdma_unaligned_sg(struct
 	struct iser_data_buf *p_mem = &p_iser_task->data[cmd_dir];
 	unsigned long  cmd_data_len = iser_sg_size(p_mem);
 
-	mem = kmalloc(cmd_data_len, GFP_KERNEL | __GFP_NOFAIL);
+	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
+               mem = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOFAIL,
+		      long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
+       else
+               mem = kmalloc(cmd_data_len, GFP_KERNEL | __GFP_NOFAIL);
+
 	if (mem == NULL) {
 		iser_bug("Failed to allocate mem size %d %d for copying sglist\n",
 			 p_mem->size,(int)cmd_data_len);
 	}
 
 	if (cmd_dir == ISER_DIR_OUT) {
-		/* copy the sglist to p */
-		/* iser_data_buf_memcpy() */
+		/* copy the unaligned sg the buffer which is used for RDMA */
 		struct scatterlist *p_sg = (struct scatterlist *)p_mem->p_buf;
 		int i;
 		char *p;
 
 		for (p = mem, i = 0; i < p_mem->size; i++) {
 			memcpy(p,
-			       iser_page_to_virt(p_sg[i].page)+ p_sg[i].offset,
+			       page_address(p_sg[i].page) + p_sg[i].offset,
 			       p_sg[i].length);
 			p += p_sg[i].length;
 		}
@@ -207,17 +202,21 @@ void iser_finalize_rdma_unaligned_sg(str
 		dma_unmap_single(dma_device, dma_addr, size, DMA_FROM_DEVICE);
 		/* copy back read RDMA to unaligned sg */
 		mem	= p_mem_copy->p_buf;
-		p_sg	= (struct scatterlist *)&p_iser_task->data[ISER_DIR_IN].p_buf;
+		p_sg	= (struct scatterlist *)p_iser_task->data[ISER_DIR_IN].p_buf;
 		sg_size = p_iser_task->data[ISER_DIR_IN].size;
 
 		for (p = mem, i = 0; i < sg_size; i++){
-			memcpy(iser_page_to_virt(p_sg[i].page)+p_sg[i].offset,
+			memcpy(page_address(p_sg[i].page) + p_sg[i].offset,
 			       p,
 			       p_sg[i].length);
 			p += p_sg[i].length;
 		}
 
-		kfree(p_mem_copy->p_buf);
+		if (size > ISER_KMALLOC_THRESHOLD)
+			free_pages((unsigned long)p_mem_copy->p_buf,
+          		  long_log2(roundup_pow_of_two((int)size)) - PAGE_SHIFT);
+		else
+			kfree(p_mem_copy->p_buf);
 		p_mem_copy->p_buf = NULL;
 	}
 
@@ -226,7 +225,11 @@ void iser_finalize_rdma_unaligned_sg(str
 		size	 = p_mem_copy->size;
 		dma_addr = p_mem_copy->dma_addr;
 		dma_unmap_single(dma_device, dma_addr, size, DMA_TO_DEVICE);
-		kfree(p_mem_copy->p_buf);
+		if (size > ISER_KMALLOC_THRESHOLD)
+			free_pages((unsigned long)p_mem_copy->p_buf,
+          		  long_log2(roundup_pow_of_two((int)size)) - PAGE_SHIFT);
+		else
+			kfree(p_mem_copy->p_buf);
 		p_mem_copy->p_buf = NULL;
 	}
 }

Index: iscsi_iser.h
===================================================================
--- iscsi_iser.h	(revision 5426)
+++ iscsi_iser.h	(revision 5427)
@@ -173,11 +173,11 @@ enum iser_buf_type {
 };
 
 struct iser_data_buf {
-	void               *p_buf;
-	unsigned int       size;
-	enum iser_buf_type type;
-	dma_addr_t         dma_addr;
-	unsigned int       dma_nents;
+	enum iser_buf_type type;      /* single or scatterlist                */
+	void               *p_buf;    /* single -> data  scatterlist -> sg    */
+	unsigned int       size;      /* data len for single, nentries for sg */
+	dma_addr_t         dma_addr;  /* returned by dma_map_single           */
+	unsigned int       dma_nents; /* returned by dma_map_sg for           */
 };
 
 /* fwd declarations */
Index: iser_memory.c
===================================================================
--- iser_memory.c	(revision 5426)
+++ iser_memory.c	(revision 5427)
@@ -98,29 +98,21 @@ void iser_reg_single(struct iser_adaptor
 	p_regd_buf->direction = direction;
 }
 
-static int iser_sg_subset_len(struct iser_data_buf *p_data,
-			      int skip_entries,
-			      int count_entries)
+
+/**
+ * iser_sg_size - returns the total data length in an sg list
+ */
+int iser_sg_size(struct iser_data_buf *p_data)
 {
 	struct scatterlist *p_sg = (struct scatterlist *)p_data->p_buf;
-	int i, last_entry, total_len = 0;
+	int i, total_len=0;
 
-	last_entry = skip_entries + count_entries;
-	for (i = skip_entries; i < last_entry; i++)
+	for (i = 0; i < p_data->dma_nents; i++)
 		total_len += sg_dma_len(&p_sg[i]);
 	return total_len;
 }
 
 /**
- * iser_sg_size - returns the total data length in sg list
- */
-int iser_sg_size(struct iser_data_buf *p_mem)
-{
-	return
-		iser_sg_subset_len(p_mem, 0, p_mem->dma_nents);
-}
-
-/**
  * iser_start_rdma_unaligned_sg
  */
 void iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task  *p_iser_task,
@@ -130,7 +122,7 @@ void iser_start_rdma_unaligned_sg(struct
 	struct device *dma_device;
 	char *mem = NULL;
 	struct iser_data_buf *p_mem = &p_iser_task->data[cmd_dir];
-	unsigned long  cmd_data_len = iser_sg_size(p_mem);
+	unsigned long  cmd_data_len = p_iser_task->data_len[cmd_dir];
 
 	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
                mem = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOFAIL,
@@ -247,8 +239,7 @@ void iser_finalize_rdma_unaligned_sg(str
  * consecutive elements. Also, it handles one entry SG.
  */
 static int iser_sg_to_page_vec(struct iser_data_buf *p_data,
-			       struct iser_page_vec *page_vec,
-			       int skip, int cnt)
+			       struct iser_page_vec *page_vec)
 {
 	struct scatterlist *p_sg = (struct scatterlist *)p_data->p_buf;
 	dma_addr_t first_addr, last_addr, page;
@@ -259,9 +250,9 @@ static int iser_sg_to_page_vec(struct is
 
 	/* compute the offset of first element */
 	/* FIXME page_vec->offset type should be dma_addr_t */
-	page_vec->offset = (u64) p_sg[skip].offset;
+	page_vec->offset = (u64) p_sg[0].offset;
 
-	for (i = skip; i < skip + cnt; i++) {
+	for (i = 0; i < p_data->dma_nents; i++) {
 		total_sz += sg_dma_len(&p_sg[i]);
 
 		first_addr = sg_dma_address(&p_sg[i]);
@@ -271,7 +262,7 @@ static int iser_sg_to_page_vec(struct is
 		end_aligned   = !(last_addr  & ~PAGE_MASK);
 
 		/* continue to collect page fragments till aligned or SG ends */
-		while (!end_aligned && (i + 1 < skip + cnt)) {
+		while (!end_aligned && (i + 1 < p_data->dma_nents)) {
 			i++;
 			total_sz += sg_dma_len(&p_sg[i]);
 			last_addr = sg_dma_address(&p_sg[i]) + sg_dma_len(&p_sg[i]);
@@ -330,19 +321,16 @@ static int iser_single_to_page_vec(struc
  * the number of entries which are aligned correctly. Supports the case where
  * consecutive SG elements are actually fragments of the same physcial page.
  */
-static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *p_data,
-					      int skip)
+static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *p_data)
 {
 	struct scatterlist *p_sg;
 	dma_addr_t end_addr, next_addr;
 	int i, cnt;
 	unsigned int ret_len = 0;
 
-	if (p_data->type == ISER_BUF_TYPE_SINGLE)
-		return 1;
 	p_sg = (struct scatterlist *)p_data->p_buf;
 
-	for (cnt = 0, i = skip; i < p_data->dma_nents; i++, cnt++) {
+	for (cnt = 0, i = 0; i < p_data->dma_nents; i++, cnt++) {
 		/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
 		   "offset: %ld sz: %ld\n", i,
 		   (unsigned long)page_to_phys(p_sg[i].page),
@@ -393,15 +381,11 @@ static void iser_data_buf_dump(struct is
  * iser_page_vec_alloc - allocate page_vec covering a given data buffer
  */
 static struct iser_page_vec *iser_page_vec_alloc(struct iser_data_buf *p_data,
-						 int skip, int cnt)
+						 int total_size)
 {
 	struct iser_page_vec *page_vec;
-	int npages, total_size;
+	int npages;
 
-	if (p_data->type == ISER_BUF_TYPE_SINGLE)
-		total_size = p_data->size;
-	else
-		total_size = iser_sg_subset_len(p_data, skip, cnt);
 	npages = total_size / PAGE_SIZE + 2;
 
 	page_vec = kmalloc(sizeof(struct iser_page_vec) +
@@ -431,8 +415,7 @@ static void iser_dump_page_vec(struct is
 }
 
 static void iser_page_vec_build(struct iser_data_buf *p_data,
-				struct iser_page_vec *page_vec,
-				int skip, int cnt)
+				struct iser_page_vec *page_vec)
 {
 	int page_vec_len = 0;
 
@@ -441,9 +424,9 @@ static void iser_page_vec_build(struct i
 		page_vec_len = iser_single_to_page_vec(p_data, page_vec);
 	} else {
 		iser_dbg("Translating sg sz: %d\n", p_data->dma_nents);
-		page_vec_len = iser_sg_to_page_vec(p_data,page_vec, skip,cnt);
-		iser_dbg("sg size %d skip %d cnt %d page_vec_len %d\n",
-			 p_data->dma_nents,skip,cnt,page_vec_len);
+		page_vec_len = iser_sg_to_page_vec(p_data,page_vec);
+		iser_dbg("sg len %d page_vec_len %d\n",
+			 p_data->dma_nents,page_vec_len);
 	}
 	page_vec->length = page_vec_len;
 
@@ -470,7 +453,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_
 	struct iser_data_buf *p_mem = &p_iser_task->data[cmd_dir];
 	struct iser_page_vec *page_vec;
 	struct iser_regd_buf *p_regd_buf;
-	int cnt_to_reg = 0;
+	int aligned_len;
 	int err;
 
 	p_regd_buf = &p_iser_task->rdma_regd[cmd_dir];
@@ -478,31 +461,23 @@ int iser_reg_rdma_mem(struct iscsi_iser_
 	iser_dbg("p_mem %p p_mem->type %d\n", p_mem,p_mem->type);
 
 	if (p_mem->type != ISER_BUF_TYPE_SINGLE) {
-		int aligned_len;
-
-		iser_dbg("converting sg to page_vec\n");
-		aligned_len = iser_data_buf_aligned_len(p_mem,0);
-		if (aligned_len == p_mem->size)
-			cnt_to_reg = aligned_len;
-		else {
-			iser_err("can't reg for rdma, alignment violation\n");
+		aligned_len = iser_data_buf_aligned_len(p_mem);
+		if (aligned_len != p_mem->size) {
+			iser_err("rdma alignment violation %d/%d aligned\n",
+				 aligned_len, p_mem->size);
 			iser_data_buf_dump(p_mem);
 			/* allocate copy buf, if we are writing, copy the */
-			/* unaligned scatterlist, anyway dma map the copy */
+			/* unaligned scatterlist, dma map the copy        */
 			iser_start_rdma_unaligned_sg(p_iser_task, cmd_dir);
-			p_regd_buf->virt_addr = p_iser_task->data_copy[cmd_dir].p_buf;
 			p_mem = &p_iser_task->data_copy[cmd_dir];
 		}
-	} else {
-		iser_dbg("converting single to page_vec\n");
-		p_regd_buf->virt_addr = p_mem->p_buf;
 	}
 
-	page_vec = iser_page_vec_alloc(p_mem,0,cnt_to_reg);
+	page_vec = iser_page_vec_alloc(p_mem, p_iser_task->data_len[cmd_dir]);
 	if(!page_vec)
 		return -ENOMEM;
 
-	iser_page_vec_build(p_mem, page_vec,0,cnt_to_reg);
+	iser_page_vec_build(p_mem, page_vec);
 	err = iser_reg_page_vec(p_iser_conn,page_vec,&p_regd_buf->reg);
 	kfree(page_vec);
 	if(err)




More information about the general mailing list