[openib-general] kernel VM monitor for memory registration caching

Gleb Natapov glebn at voltaire.com
Sun Jul 31 03:31:57 PDT 2005


Hello Pete,

On Fri, Jul 29, 2005 at 01:42:25PM -0400, Pete Wyckoff wrote:
> I'll be happy to discuss the code with anyone who ends up wanting
> to use it or improve upon it.

I glanced over the code and I have couple of questions/improvments.

 First of all, you have one user_delta per mm that user can poll from 
userspace. Is it possible to make user_delta to be part of dreg_region
instead of dreg_context and module will set it whenever
registration becomes invalid. Field 'invalid' will be added to buf_info
structure and pointer to it will be passed to kernel at registration
time.
 This way the userpace can look up cache and check if registration is
still valid. No need to rescan cache from userspace, we already scanned
it once from kernel after all. With your current approach userspace will
need to search for mr_handle in the cache and invalidate the entry that
holds it.


 You change vma_ops in vma to catch open/close events. What about
nopage() method in vma_ops? We have to forward it to original vma_ops?

Something like included patch (not even compiled).


--- dreg.c.org	2005-07-31 13:10:17.375403091 +0300
+++ dreg.c	2005-07-31 13:24:35.404872561 +0300
@@ -162,7 +162,10 @@
 
     pr_debug("%s: reg %p vma %p addr %lx\n", __func__, reg, vma, reg->addr);
     if (vma)
+    {
+	kfree (vma->vm_ops);
 	vma->vm_ops = reg->orig_ops;
+    }
     if (reg->addr)
 	mem_deregister(dc, reg);
     list_del(&reg->subordinate_list);
@@ -305,6 +308,7 @@
      * forget about it and do not build a new region for it.
      */
     if (list_empty(&temp_new_subordinate_list)) {
+	kfree (newvma->vm_ops);
 	newvma->vm_ops = orig_ops;
     } else {
 	reg = kmem_cache_alloc(dreg_region_cache, GFP_KERNEL);
@@ -510,7 +514,7 @@
       vma->vm_start, vma->vm_end, reg);
 
     reg->orig_ops = vma->vm_ops;
-    if (vma->vm_ops == &dreg_vm_ops) {
+    if (vma->vm_ops->close == dreg_vm_ops.close) {
 	/* chain off proper owner */
 	struct dreg_region *topreg;
 	pr_debug("%s: marked subordinate\n", __func__);
@@ -523,10 +527,22 @@
 	}
 	list_add(&reg->subordinate_list, &topreg->subordinate_list);
     } else {
+	struct vm_operations_struct *tmp_vm_ops;
 	/* non subordinate */
 	reg->vma = vma;
 	INIT_LIST_HEAD(&reg->subordinate_list);
-	vma->vm_ops = &dreg_vm_ops;  /* own this vma */
+
+	tmp_vm_ops = kmalloc (sizeof (struct vm_operations_struct), GFP_KERNEL);
+	memcpy (tmp_vm_ops, &dreg_vm_ops, sizeof (struct vm_operations_struct));
+	if (vma->vm_ops)
+	{
+		tmp_vm_ops->nopage = vma->vm_ops->nopage;
+#ifdef CONFIG_NUMA
+		tmp_vm_ops->set_policy = vma->vm_ops->set_policy;
+		tmp_vm_ops->get_policy = vma->vm_ops->get_policy;
+#endif
+	}
+	vma->vm_ops = tmp_vm_ops;  /* own this vma */
 	reg->orig_vm_start = vma->vm_start;
 	reg->orig_vm_end = vma->vm_end;
     }
--
			Gleb.



More information about the general mailing list