[openib-general] [PATCH] libibverbs-1.0: fix static linking

Roland Dreier (rdreier) rdreier at cisco.com
Fri Oct 27 16:42:23 PDT 2006


It turns out that static linking of libibverbs never really worked,
which makes me wonder whether people who insisted building mthca.a
every actually tried it.  Anyway, here's a patch that tries to fix
things up, although only one device driver can be linked in at a time
because everyone exports the same driver entry point.  Comments / test
results appreciated.

I'll check this in now, and if I don't get any bug reports then I'll
put out the latest libibverbs-1.0 tree as libibverbs-1.0.4 around
Tuesday or Wednesday of next week.

(I have a more complicated plan for libibverbs 1.1 that I'm still
coding up)

Thanks,
  Roland

--- libibverbs-1.0/ChangeLog	(revision 9973)
+++ libibverbs-1.0/ChangeLog	(working copy)
@@ -1,3 +1,17 @@
+2006-10-27  Roland Dreier  <rdreier at cisco.com>
+
+	* src/init.c: Revise initialization order to fix static linking.
+	Using dlopen() on a device-specific driver from a statically
+	linked copy of libibverbs will crash, because the driver will
+	bring in dynamic copies of libibverbs and libdl that clash with
+	the copies already linked statically.
+
+	To fix this, we change the way we search for drivers: first we
+	find all uverbs devices and try the driver (if any) that is
+	linked in directly.  If all devices are handled by that driver,
+	then we don't proceed any further.  If not, then we try dynamic
+	loading of drivers and match them against any remaining devices.
+
 2006-10-17  Roland Dreier  <rdreier at cisco.com>
 
 	* include/infiniband/arch.h: Update i386 and x86_64 memory barrier
--- libibverbs-1.0/src/ibverbs.h	(revision 9973)
+++ libibverbs-1.0/src/ibverbs.h	(working copy)
@@ -60,6 +60,12 @@
 
 #define PFX		"libibverbs: "
 
+struct ibv_sysfs_dev {
+	struct sysfs_class_device      *verbs_dev;
+	struct ibv_sysfs_dev	       *next;
+	int				have_driver;
+};
+
 struct ibv_driver {
 	ibv_driver_init_func	init_func;
 	struct ibv_driver      *next;
--- libibverbs-1.0/src/init.c	(revision 9973)
+++ libibverbs-1.0/src/init.c	(working copy)
@@ -52,11 +52,52 @@
 
 HIDDEN int abi_ver;
 
-static char default_path[] = DRIVER_PATH;
+static const char default_path[] = DRIVER_PATH;
 static const char *user_path;
 
+static struct ibv_sysfs_dev *sysfs_dev_list;
 static struct ibv_driver *driver_list;
 
+static void find_sysfs_devs(void)
+{
+	struct sysfs_class *cls;
+	struct dlist *verbs_dev_list;
+	struct sysfs_class_device *verbs_dev;
+	struct ibv_sysfs_dev *dev;
+
+	cls = sysfs_open_class("infiniband_verbs");
+	if (!cls) {
+		fprintf(stderr, PFX "Fatal: couldn't open sysfs class 'infiniband_verbs'.\n");
+		return;
+	}
+
+	verbs_dev_list = sysfs_get_class_devices(cls);
+	if (!verbs_dev_list) {
+		fprintf(stderr, PFX "Fatal: no infiniband class devices found.\n");
+		return;
+	}
+
+	dlist_for_each_data(verbs_dev_list, verbs_dev, struct sysfs_class_device) {
+		dev = malloc(sizeof *dev);
+		if (!dev) {
+			fprintf(stderr, PFX "Warning: couldn't allocate device for %s\n",
+				verbs_dev->name);
+			continue;
+		}
+
+		dev->verbs_dev   = verbs_dev;
+		dev->next        = sysfs_dev_list;
+		dev->have_driver = 0;
+		sysfs_dev_list   = dev;
+	}
+}
+
+__attribute__((weak))
+struct ibv_device *openib_driver_init(struct sysfs_class_device *dev)
+{
+        return NULL;
+}
+
 static void load_driver(char *so_path)
 {
 	void *dlhandle;
@@ -79,7 +120,7 @@ static void load_driver(char *so_path)
 
 	driver = malloc(sizeof *driver);
 	if (!driver) {
-		fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path);
+		fprintf(stderr, PFX "Warning: couldn't allocate driver for %s\n", so_path);
 		dlclose(dlhandle);
 		return;
 	}
@@ -89,7 +130,7 @@ static void load_driver(char *so_path)
 	driver_list       = driver;
 }
 
-static void find_drivers(char *dir)
+static void find_drivers(const char *dir)
 {
 	size_t len = strlen(dir);
 	glob_t so_glob;
@@ -101,9 +142,9 @@ static void find_drivers(char *dir)
 		return;
 
 	while (len && dir[len - 1] == '/')
-		dir[--len] = '\0';
+		--len;
 
-	asprintf(&pat, "%s/*.so", dir);
+	asprintf(&pat, "%.*s/*.so", (int) len, dir);
 
 	ret = glob(pat, 0, NULL, &so_glob);
 	free(pat);
@@ -120,10 +161,10 @@ static void find_drivers(char *dir)
 	globfree(&so_glob);
 }
 
-static struct ibv_device *init_drivers(struct sysfs_class_device *verbs_dev)
+static struct ibv_device *try_driver(ibv_driver_init_func init_func,
+				     struct sysfs_class_device *verbs_dev)
 {
 	struct sysfs_class_device *ib_dev;
-	struct ibv_driver *driver;
 	struct ibv_device *dev;
 	char ibdev_name[64];
 
@@ -141,24 +182,14 @@ static struct ibv_device *init_drivers(s
 		return NULL;
 	}
 
-	for (driver = driver_list; driver; driver = driver->next) {
-		dev = driver->init_func(verbs_dev);
-		if (dev) {
-			dev->dev    = verbs_dev;
-			dev->ibdev  = ib_dev;
-			dev->driver = driver;
-
-			return dev;
-		}
+	dev = init_func(verbs_dev);
+	if (dev) {
+		dev->dev    = verbs_dev;
+		dev->ibdev  = ib_dev;
+		dev->driver = NULL;
 	}
 
-	fprintf(stderr, PFX "Warning: no userspace device-specific driver found for %s\n"
-		"	driver search path: ", verbs_dev->name);
-	if (user_path)
-		fprintf(stderr, "%s:", user_path);
-	fprintf(stderr, "%s\n", default_path);
-
-	return NULL;
+	return dev;
 }
 
 static int check_abi_version(void)
@@ -191,26 +222,87 @@ static int check_abi_version(void)
 	return 0;
 }
 
+static void add_device(struct ibv_device *dev,
+		       struct ibv_device ***dev_list,
+		       int *num_devices,
+		       int *list_size)
+{
+	struct ibv_device **new_list;
+
+	if (*list_size <= *num_devices) {
+		*list_size = *list_size ? *list_size * 2 : 1;
+		new_list = realloc(*dev_list, *list_size * sizeof (struct ibv_device *));
+		if (!new_list)
+			return;
+		*dev_list = new_list;
+	}
+
+	*dev_list[*num_devices++] = dev;
+}
+
 HIDDEN int ibverbs_init(struct ibv_device ***list)
 {
 	char *wr_path, *dir;
-	struct sysfs_class *cls;
-	struct dlist *verbs_dev_list;
-	struct sysfs_class_device *verbs_dev;
+	struct ibv_sysfs_dev *sysfs_dev, *next_dev;
 	struct ibv_device *device;
-	struct ibv_device **new_list;
+	struct ibv_driver *driver;
 	int num_devices = 0;
 	int list_size = 0;
+	int no_driver = 0;
+	int statically_linked = 0;
 
 	*list = NULL;
 
+	if (check_abi_version())
+		return 0;
+
 	if (ibv_init_mem_map())
 		return 0;
 
+	find_sysfs_devs();
+
+	/*
+	 * First check if a driver statically linked in can support
+	 * all the devices.  This is needed to avoid dlopen() in the
+	 * all-static case (which will break because we end up with
+	 * both a static and dynamic copy of libdl.
+	 */
+	for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) {
+		device = try_driver(openib_driver_init, sysfs_dev->verbs_dev);
+		if (device) {
+			add_device(device, list, &num_devices, &list_size);
+			sysfs_dev->have_driver = 1;
+		} else
+			++no_driver;
+	}
+
+	if (!no_driver)
+		goto out;
+
+	/*
+	 * Check if we can dlopen() ourselves.  If this fails,
+	 * libibverbs is probably statically linked into the
+	 * executable, and we should just give up, since trying to
+	 * dlopen() a driver module will fail spectacularly (loading a
+	 * driver .so will bring in dynamic copies of libibverbs and
+	 * libdl to go along with the static copies the executable
+	 * has, which quickly leads to a crash.
+	 */
+	{
+		void *hand = dlopen(NULL, RTLD_NOW);
+		if (!hand) {
+			fprintf(stderr, PFX "Warning: dlopen(NULL) failed, "
+				"assuming static linking.\n");
+			statically_linked = 1;
+			goto out;
+		}
+		dlclose(hand);
+	}
+
 	find_drivers(default_path);
 
 	/*
-	 * Only follow use path passed in through the calling user's
+	 * Only use path passed in through the calling user's
 	 * environment if we're not running SUID.
 	 */
 	if (getuid() == geteuid()) {
@@ -222,42 +314,37 @@ HIDDEN int ibverbs_init(struct ibv_devic
 		}
 	}
 
-	/*
-	 * Now check if a driver is statically linked.  Since we push
-	 * drivers onto our driver list, the last driver we find will
-	 * be the first one we try.
-	 */
-	load_driver(NULL);
-
-	cls = sysfs_open_class("infiniband_verbs");
-	if (!cls) {
-		fprintf(stderr, PFX "Fatal: couldn't open sysfs class 'infiniband_verbs'.\n");
-		return 0;
-	}
-
-	if (check_abi_version())
-		return 0;
-
-	verbs_dev_list = sysfs_get_class_devices(cls);
-	if (!verbs_dev_list) {
-		fprintf(stderr, PFX "Fatal: no infiniband class devices found.\n");
-		return 0;
+	for (sysfs_dev = sysfs_dev_list; sysfs_dev; sysfs_dev = sysfs_dev->next) {
+		if (sysfs_dev->have_driver)
+			continue;
+		for (driver = driver_list; driver; driver = driver->next) {
+			device = try_driver(driver->init_func, sysfs_dev->verbs_dev);
+			if (device) {
+				add_device(device, list, &num_devices, &list_size);
+				sysfs_dev->have_driver = 1;
+			}
+		}
 	}
 
-	dlist_for_each_data(verbs_dev_list, verbs_dev, struct sysfs_class_device) {
-		device = init_drivers(verbs_dev);
-		if (device) {
-			if (list_size <= num_devices) {
-				list_size = list_size ? list_size * 2 : 1;
-				new_list = realloc(*list, list_size * sizeof (struct ibv_device *));
-				if (!new_list)
-					goto out;
-				*list = new_list;
+out:
+	for (sysfs_dev = sysfs_dev_list, next_dev = sysfs_dev->next;
+	     sysfs_dev;
+	     sysfs_dev = next_dev, next_dev = sysfs_dev ? sysfs_dev->next : NULL) {
+		if (!sysfs_dev->have_driver) {
+			fprintf(stderr, PFX "Warning: no userspace device-specific "
+				" driver found for %s\n", sysfs_dev->verbs_dev->name);
+			if (statically_linked)
+				fprintf(stderr, "	When linking libibverbs statically, "
+					"driver must be statically linked too.\n");
+			else {
+				fprintf(stderr, "	driver search path: ");
+				if (user_path)
+					fprintf(stderr, "%s:", user_path);
+				fprintf(stderr, "%s\n", default_path);
 			}
-			(*list)[num_devices++] = device;
 		}
+		free(sysfs_dev);
 	}
 
-out:
 	return num_devices;
 }
--- libibverbs-1.0/README	(revision 9973)
+++ libibverbs-1.0/README	(working copy)
@@ -60,6 +60,23 @@ via the file /etc/security/limits.conf. 
 necessary if you are logging in via OpenSSH and your sshd is
 configured to use privilege separation.
 
+Static linking
+--------------
+
+In almost all cases it is better to dynamically link libibverbs into
+an application.  However, if you are forced to use static linking for
+libibverbs, then you will also have to link a device-specific
+userspace driver (such as libmthca, libipathverbs, libehca, etc)
+statically into your application.  This is because of limitations on
+dynamically loading new modules into a static executable.
+
+In particular, a static application can only be linked against a
+single device-specific driver, which means that the application will
+only work with a single type of device.  This limitation will be
+removed in future libibverbs releases, but this will require a change
+to the libibverbs ABI, so it cannot be done as part of the libibverbs
+1.0 release series.
+
 Valgrind support
 ----------------
 




More information about the general mailing list