[openfabrics-ewg] [PATCH ofed-1.1 1/2] ehca: fix ehca device registration

Hoang-Nam Nguyen hnguyen at de.ibm.com
Mon Oct 2 15:50:02 PDT 2006


Hi Michael!
Please consider this patch of ehca for ofed-1.1 as it fixes a bug
(crash) that occured when ib_ehca is loaded after ib_ipoib.
This patch initializes struct ehca_shca with struct device*, then 
creates internal resources and finally registers the ehca IB device. 
And that is the proper sequence we have to implement.

I wanted to create this patch against the ofed git tree branch
ehca_branch, but saw that ehca_main.c has version SVNEHCA_0012,
which is much older than the version SVNEHCA_0015 in ofed-1.1-rc6.
Tried to do a pull and git said that it's already updated. Thus 
I don't know what I did wrong. Anyway I created this patch against
the dir openib-1.1 extracted from ofed-1.1-rc6/SOURCES/openib-1.1.tgz. 
Hope that it still works for you.

Thanks!
Nam Nguyen


Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
---


 ehca_main.c |   35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)


diff -Nurp openib-1.1_orig/drivers/infiniband/hw/ehca/ehca_main.c openib-1.1_work/drivers/infiniband/hw/ehca/ehca_main.c
--- openib-1.1_orig/drivers/infiniband/hw/ehca/ehca_main.c 2006-09-20 06:28:56.000000000 -0700
+++ openib-1.1_work/drivers/infiniband/hw/ehca/ehca_main.c 2006-10-02 15:24:48.010001888 -0700
@@ -5,6 +5,7 @@
  *
  *  Authors: Heiko J Schick <schickhj at de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+ *           Joachim Fenkes <fenkes at de.ibm.com>
  *
  *  Copyright (c) 2005 IBM Corporation
  *
@@ -238,7 +239,7 @@ init_node_guid1:
  return ret;
 }
 
-int ehca_register_device(struct ehca_shca *shca)
+int ehca_init_device(struct ehca_shca *shca)
 {
  int ret;
 
@@ -316,11 +317,6 @@ int ehca_register_device(struct ehca_shc
  /* shca->ib_device.process_mad     = ehca_process_mad;     */
  shca->ib_device.mmap      = ehca_mmap;
 
- ret = ib_register_device(&shca->ib_device);
- if (ret)
-  ehca_err(&shca->ib_device,
-    "ib_register_device() failed ret=%x", ret);
-
  return ret;
 }
 
@@ -446,7 +442,7 @@ static ssize_t  ehca_show_##name(struct 
   kfree(rblock);            \
   return 0;            \
  }           \
-                                                                           \
+            \
  data = rblock->name;                                               \
  kfree(rblock);                                                     \
             \
@@ -560,9 +556,9 @@ static int __devinit ehca_probe(struct i
   goto probe1;
  }
 
- ret = ehca_register_device(shca);
+ ret = ehca_init_device(shca);
  if (ret) {
-  ehca_gen_err("Cannot register Infiniband device");
+  ehca_gen_err("Cannot init ehca  device struct");
   goto probe1;
  }
 
@@ -570,7 +566,7 @@ static int __devinit ehca_probe(struct i
  ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
  if (ret) {
   ehca_err(&shca->ib_device, "Cannot create EQ.");
-  goto probe2;
+  goto probe1;
  }
 
  ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
@@ -599,6 +595,13 @@ static int __devinit ehca_probe(struct i
   goto probe5;
  }
 
+ ret = ib_register_device(&shca->ib_device);
+ if (ret) {
+  ehca_err(&shca->ib_device,
+    "ib_register_device() failed ret=%x", ret);
+  goto probe6;
+ }
+
  /* create AQP1 for port 1 */
  if (ehca_open_aqp1 == 1) {
   shca->sport[0].port_state = IB_PORT_DOWN;
@@ -606,7 +609,7 @@ static int __devinit ehca_probe(struct i
   if (ret) {
    ehca_err(&shca->ib_device,
      "Cannot create AQP1 for port 1.");
-   goto probe6;
+   goto probe7;
   }
  }
 
@@ -617,7 +620,7 @@ static int __devinit ehca_probe(struct i
   if (ret) {
    ehca_err(&shca->ib_device,
      "Cannot create AQP1 for port 2.");
-   goto probe7;
+   goto probe8;
   }
  }
 
@@ -629,12 +632,15 @@ static int __devinit ehca_probe(struct i
 
  return 0;
 
-probe7:
+probe8:
  ret = ehca_destroy_aqp1(&shca->sport[0]);
  if (ret)
   ehca_err(&shca->ib_device,
     "Cannot destroy AQP1 for port 1. ret=%x", ret);
 
+probe7:
+ ib_unregister_device(&shca->ib_device);
+
 probe6:
  ret = ehca_dereg_internal_maxmr(shca);
  if (ret)
@@ -659,9 +665,6 @@ probe3:
   ehca_err(&shca->ib_device,
     "Cannot destroy EQ. ret=%x", ret);
 
-probe2:
- ib_unregister_device(&shca->ib_device);
-
 probe1:
  ib_dealloc_device(&shca->ib_device);
 
-------------- next part --------------
diff -Nurp openib-1.1_orig/drivers/infiniband/hw/ehca/ehca_main.c openib-1.1_work/drivers/infiniband/hw/ehca/ehca_main.c
--- openib-1.1_orig/drivers/infiniband/hw/ehca/ehca_main.c	2006-09-20 06:28:56.000000000 -0700
+++ openib-1.1_work/drivers/infiniband/hw/ehca/ehca_main.c	2006-10-02 15:24:48.010001888 -0700
@@ -5,6 +5,7 @@
  *
  *  Authors: Heiko J Schick <schickhj at de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+ *           Joachim Fenkes <fenkes at de.ibm.com>
  *
  *  Copyright (c) 2005 IBM Corporation
  *
@@ -238,7 +239,7 @@ init_node_guid1:
 	return ret;
 }
 
-int ehca_register_device(struct ehca_shca *shca)
+int ehca_init_device(struct ehca_shca *shca)
 {
 	int ret;
 
@@ -316,11 +317,6 @@ int ehca_register_device(struct ehca_shc
 	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
 	shca->ib_device.mmap		    = ehca_mmap;
 
-	ret = ib_register_device(&shca->ib_device);
-	if (ret)
-		ehca_err(&shca->ib_device,
-			 "ib_register_device() failed ret=%x", ret);
-
 	return ret;
 }
 
@@ -446,7 +442,7 @@ static ssize_t  ehca_show_##name(struct 
 		kfree(rblock);					   	   \
 		return 0;					   	   \
 	}								   \
-                                                                           \
+									   \
 	data = rblock->name;                                               \
 	kfree(rblock);                                                     \
 									   \
@@ -560,9 +556,9 @@ static int __devinit ehca_probe(struct i
 		goto probe1;
 	}
 
-	ret = ehca_register_device(shca);
+	ret = ehca_init_device(shca);
 	if (ret) {
-		ehca_gen_err("Cannot register Infiniband device");
+		ehca_gen_err("Cannot init ehca  device struct");
 		goto probe1;
 	}
 
@@ -570,7 +566,7 @@ static int __devinit ehca_probe(struct i
 	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
 	if (ret) {
 		ehca_err(&shca->ib_device, "Cannot create EQ.");
-		goto probe2;
+		goto probe1;
 	}
 
 	ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
@@ -599,6 +595,13 @@ static int __devinit ehca_probe(struct i
 		goto probe5;
 	}
 
+	ret = ib_register_device(&shca->ib_device);
+	if (ret) {
+		ehca_err(&shca->ib_device,
+			 "ib_register_device() failed ret=%x", ret);
+		goto probe6;
+	}
+
 	/* create AQP1 for port 1 */
 	if (ehca_open_aqp1 == 1) {
 		shca->sport[0].port_state = IB_PORT_DOWN;
@@ -606,7 +609,7 @@ static int __devinit ehca_probe(struct i
 		if (ret) {
 			ehca_err(&shca->ib_device,
 				 "Cannot create AQP1 for port 1.");
-			goto probe6;
+			goto probe7;
 		}
 	}
 
@@ -617,7 +620,7 @@ static int __devinit ehca_probe(struct i
 		if (ret) {
 			ehca_err(&shca->ib_device,
 				 "Cannot create AQP1 for port 2.");
-			goto probe7;
+			goto probe8;
 		}
 	}
 
@@ -629,12 +632,15 @@ static int __devinit ehca_probe(struct i
 
 	return 0;
 
-probe7:
+probe8:
 	ret = ehca_destroy_aqp1(&shca->sport[0]);
 	if (ret)
 		ehca_err(&shca->ib_device,
 			 "Cannot destroy AQP1 for port 1. ret=%x", ret);
 
+probe7:
+	ib_unregister_device(&shca->ib_device);
+
 probe6:
 	ret = ehca_dereg_internal_maxmr(shca);
 	if (ret)
@@ -659,9 +665,6 @@ probe3:
 		ehca_err(&shca->ib_device,
 			 "Cannot destroy EQ. ret=%x", ret);
 
-probe2:
-	ib_unregister_device(&shca->ib_device);
-
 probe1:
 	ib_dealloc_device(&shca->ib_device);
 


More information about the ewg mailing list