[openib-general] [PATCH] Convert cache.c to use RCU

Roland Dreier roland at topspin.com
Mon Nov 8 20:52:04 PST 2004


Use RCU instead of seqlocks, and simplify the code.

Index: core/device.c
===================================================================
--- core/device.c	(revision 1178)
+++ core/device.c	(working copy)
@@ -190,8 +190,7 @@
 
 int ib_register_device(struct ib_device *device)
 {
-	struct ib_device_private   *priv;
-	int                         ret;
+	int ret;
 
 	down(&device_sem);
 
@@ -206,51 +205,16 @@
 		goto out;
 	}
 
-	priv = kmalloc(sizeof *priv, GFP_KERNEL);
-	if (!priv) {
-		printk(KERN_WARNING "Couldn't allocate private struct for %s\n",
-			       device->name);
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	*priv = (struct ib_device_private) { 0 };
-
-	if (device->node_type == IB_NODE_SWITCH) {
-		priv->start_port = priv->end_port = 0;
-	} else {
-		priv->start_port = 1;
-		priv->end_port   = device->phys_port_cnt;
-	}
-
-	priv->port_data = kmalloc((priv->end_port + 1) * sizeof (struct ib_port_data),
-				  GFP_KERNEL);
-	if (!priv->port_data) {
-		printk(KERN_WARNING "Couldn't allocate port info for %s\n",
-			       device->name);
-		ret = -ENOMEM;
-		goto out_free;
-	}
-
-	device->core = priv;
-
 	INIT_LIST_HEAD(&device->event_handler_list);
 	INIT_LIST_HEAD(&device->client_data_list);
 	spin_lock_init(&device->event_handler_lock);
 	spin_lock_init(&device->client_data_lock);
 
-	ret = ib_cache_setup(device);
-	if (ret) {
-		printk(KERN_WARNING "Couldn't create device info cache for %s\n",
-			       device->name);
-		goto out_free_port;
-	}
-
 	ret = ib_device_register_sysfs(device);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't register device %s with driver model\n",
 		       device->name);
-		goto out_free_cache;
+		goto out;
 	}
 
 	list_add_tail(&device->core_list, &device_list);
@@ -265,18 +229,6 @@
 				client->add(device);
 	}
 
-	up(&device_sem);
-	return 0;
-
- out_free_cache:
-	ib_cache_cleanup(device);
-
- out_free_port:
-	kfree(priv->port_data);
-
- out_free:
-	kfree(priv);
-
  out:
 	up(&device_sem);
 	return ret;
@@ -285,7 +237,6 @@
 
 void ib_unregister_device(struct ib_device *device)
 {
-	struct ib_device_private *priv = device->core;
 	struct ib_client *client;
 	struct ib_client_data *context, *tmp;
 	unsigned long flags;
@@ -305,11 +256,6 @@
 		kfree(context);
 	spin_unlock_irqrestore(&device->client_data_lock, flags);
 
-	ib_cache_cleanup(device);
-
-	kfree(priv->port_data);
-	kfree(priv);
-
 	device->reg_state = IB_DEV_UNREGISTERED;
 }
 EXPORT_SYMBOL(ib_unregister_device);
@@ -490,11 +436,18 @@
 	if (ret)
 		printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
 
+	ret = ib_cache_setup();
+	if (ret) {
+		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+		ib_sysfs_cleanup();
+	}
+
 	return ret;
 }
 
 static void __exit ib_core_cleanup(void)
 {
+	ib_cache_cleanup();
 	ib_sysfs_cleanup();
 }
 
Index: core/cache.c
===================================================================
--- core/cache.c	(revision 1178)
+++ core/cache.c	(working copy)
@@ -23,57 +23,77 @@
 
 #include <linux/version.h>
 #include <linux/module.h>
-
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 
 #include "core_priv.h"
 
-int ib_cached_lid_get(struct ib_device   *device,
-		      u8                  port,
-		      struct ib_port_lid *port_lid)
-{
-	struct ib_device_private *priv;
-	unsigned int seq;
+struct ib_pkey_cache {
+	struct rcu_head rcu;
+	int             table_len;
+	u16             table[0];
+};
 
-	priv = device->core;
+struct ib_gid_cache {
+	struct rcu_head rcu;
+	int             table_len;
+	union ib_gid    table[0];
+};
 
-	if (port < priv->start_port || port > priv->end_port)
-		return -EINVAL;
+struct ib_update_work {
+	struct work_struct work;
+	struct ib_device  *device;
+	u8                 port_num;
+};
 
-	do {
-		seq = read_seqcount_begin(&priv->port_data[port].lock);
-		memcpy(port_lid,
-		       &priv->port_data[port].port_lid,
-		       sizeof (struct ib_port_lid));
-	} while (read_seqcount_retry(&priv->port_data[port].lock, seq));
+static inline int start_port(struct ib_device *device)
+{
+	return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+}
 
-	return 0;
+static inline int end_port(struct ib_device *device)
+{
+	return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
 }
-EXPORT_SYMBOL(ib_cached_lid_get);
 
+static void rcu_free_pkey(struct rcu_head *head)
+{
+	struct ib_pkey_cache *cache =
+		container_of(head, struct ib_pkey_cache, rcu);
+	kfree(cache);
+}
+
+static void rcu_free_gid(struct rcu_head *head)
+{
+	struct ib_gid_cache *cache =
+		container_of(head, struct ib_gid_cache, rcu);
+	kfree(cache);
+}
+
 int ib_cached_gid_get(struct ib_device *device,
 		      u8                port,
 		      int               index,
 		      union ib_gid     *gid)
 {
-	struct ib_device_private *priv;
-	unsigned int seq;
+	struct ib_gid_cache *cache;
+	int ret = 0;
 
-	priv = device->core;
-
-	if (port < priv->start_port || port > priv->end_port)
+	if (port < start_port(device) || port > end_port(device))
 		return -EINVAL;
 
-	if (index < 0 || index >= priv->port_data[port].properties.gid_tbl_len)
-		return -EINVAL;
+	rcu_read_lock();
 
-	do {
-		seq = read_seqcount_begin(&priv->port_data[port].lock);
-		*gid = priv->port_data[port].gid_table[index];
-	} while (read_seqcount_retry(&priv->port_data[port].lock, seq));
+	cache = rcu_dereference(device->cache.gid_cache[port - start_port(device)]);
 
-	return 0;
+	if (index < 0 || index >= cache->table_len)
+		ret = -EINVAL;
+	else
+		*gid = cache->table[index];
+
+	rcu_read_unlock();
+
+	return ret;
 }
 EXPORT_SYMBOL(ib_cached_gid_get);
 
@@ -82,23 +102,24 @@
 		       int               index,
 		       u16              *pkey)
 {
-	struct ib_device_private *priv;
-	unsigned int              seq;
+	struct ib_pkey_cache *cache;
+	int ret = 0;
 
-	priv = device->core;
-
-	if (port < priv->start_port || port > priv->end_port)
+	if (port < start_port(device) || port > end_port(device))
 		return -EINVAL;
 
-	if (index < 0 || index >= priv->port_data[port].properties.pkey_tbl_len)
-		return -EINVAL;
+	rcu_read_lock();
 
-	do {
-		seq = read_seqcount_begin(&priv->port_data[port].lock);
-		*pkey = priv->port_data[port].pkey_table[index];
-	} while (read_seqcount_retry(&priv->port_data[port].lock, seq));
+	cache = rcu_dereference(device->cache.pkey_cache[port - start_port(device)]);
 
-	return 0;
+	if (index < 0 || index >= cache->table_len)
+		ret = -EINVAL;
+	else
+		*pkey = cache->table[index];
+
+	rcu_read_unlock();
+
+	return ret;
 }
 EXPORT_SYMBOL(ib_cached_pkey_get);
 
@@ -107,207 +128,214 @@
 			u16               pkey,
 			u16              *index)
 {
-	struct ib_device_private *priv;
-	unsigned int              seq;
-	int                       i;
-	int                       found;
+	struct ib_pkey_cache *cache;
+	int i;
+	int ret = -ENOENT;
 
-	priv = device->core;
-
-	if (port < priv->start_port || port > priv->end_port)
+	if (port < start_port(device) || port > end_port(device))
 		return -EINVAL;
 
-	do {
-		seq = read_seqcount_begin(&priv->port_data[port].lock);
-		found = -1;
-		for (i = 0; i < priv->port_data[port].properties.pkey_tbl_len; ++i) {
-			if ((priv->port_data[port].pkey_table[i] & 0x7fff) ==
-			    (pkey & 0x7fff)) {
-				found = i;
-				break;
-			}
+	rcu_read_lock();
+
+	cache = rcu_dereference(device->cache.pkey_cache[port - start_port(device)]);
+
+	*index = -1;
+
+	for (i = 0; i < cache->table_len; ++i)
+		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
+			*index = i;
+			ret = 0;
+			break;
 		}
-	} while (read_seqcount_retry(&priv->port_data[port].lock, seq));
 
-	if (found < 0) {
-		return -ENOENT;
-	} else {
-		*index = found;
-		return 0;
-	}
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(ib_cached_pkey_find);
 
 static void ib_cache_update(struct ib_device *device,
 			    u8                port)
 {
-	struct ib_device_private  *priv = device->core;
-	struct ib_port_data       *info = &priv->port_data[port];
 	struct ib_port_attr       *tprops = NULL;
-	union ib_gid              *tgid = NULL;
-	u16                       *tpkey = NULL;
+	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
+	struct ib_gid_cache       *gid_cache = NULL, *old_gid_cache;
 	int                        i;
 	int                        ret;
 
 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 	if (!tprops)
-		goto out;
+		return;
 
-	ret = device->query_port(device, port, tprops);
+	ret = ib_query_port(device, port, tprops);
 	if (ret) {
-		printk(KERN_WARNING "query_port failed (%d) for %s\n",
+		printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
 		       ret, device->name);
-		goto out;
+		goto err;
 	}
 
-	tprops->gid_tbl_len = min(tprops->gid_tbl_len,
-				  info->gid_table_alloc_length);
-	tgid = kmalloc(tprops->gid_tbl_len * sizeof *tgid, GFP_KERNEL);
-	if (!tgid)
-		goto out;
+	pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
+			     sizeof *pkey_cache->table, GFP_KERNEL);
+	if (!pkey_cache)
+		goto err;
 
-	for (i = 0; i < tprops->gid_tbl_len; ++i) {
-		ret = device->query_gid(device, port, i, tgid + i);
+	INIT_RCU_HEAD(&pkey_cache->rcu);
+	pkey_cache->table_len = tprops->pkey_tbl_len;
+
+	gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len *
+			    sizeof *gid_cache->table, GFP_KERNEL);
+	if (!gid_cache)
+		goto err;
+
+	INIT_RCU_HEAD(&gid_cache->rcu);
+	gid_cache->table_len = tprops->gid_tbl_len;
+
+	for (i = 0; i < pkey_cache->table_len; ++i) {
+		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
 		if (ret) {
-			printk(KERN_WARNING "query_gid failed (%d) for %s (index %d)\n",
+			printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
 			       ret, device->name, i);
-			goto out;
+			goto err;
 		}
 	}
 
-	tprops->pkey_tbl_len = min(tprops->pkey_tbl_len,
-				   info->pkey_table_alloc_length);
-	tpkey = kmalloc(tprops->pkey_tbl_len * sizeof (u16),
-			GFP_KERNEL);
-	if (!tpkey)
-		goto out;
-
-	for (i = 0; i < tprops->pkey_tbl_len; ++i) {
-		ret = device->query_pkey(device, port, i, &tpkey[i]);
+	for (i = 0; i < gid_cache->table_len; ++i) {
+		ret = ib_query_gid(device, port, i, gid_cache->table + i);
 		if (ret) {
-			printk(KERN_WARNING "query_pkey failed (%d) "
-			       "for %s, port %d, index %d\n",
-			       ret, device->name, port, i);
-			goto out;
+			printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
+			       ret, device->name, i);
+			goto err;
 		}
 	}
 
-	write_seqcount_begin(&info->lock);
+	old_pkey_cache = device->cache.pkey_cache[port - start_port(device)];
+	old_gid_cache  = device->cache.gid_cache [port - start_port(device)];
 
-	info->properties = *tprops;
+#warning Delete definition of rcu_assign_pointer when 2.6.10 is released!
+#ifndef rcu_assign_pointer
+#define rcu_assign_pointer(p, v)	({ \
+						smp_wmb(); \
+						(p) = (v); \
+					})
+#endif
 
-	info->port_lid.lid = info->properties.lid;
-	info->port_lid.lmc = info->properties.lmc;
+	rcu_assign_pointer(device->cache.pkey_cache[port - start_port(device)],
+			   pkey_cache);
+	rcu_assign_pointer(device->cache.gid_cache [port - start_port(device)],
+			   gid_cache);
 
-	memcpy(info->gid_table, tgid,
-	       tprops->gid_tbl_len * sizeof *tgid);
-	memcpy(info->pkey_table, tpkey,
-	       tprops->pkey_tbl_len * sizeof *tpkey);
+	if (old_pkey_cache)
+		call_rcu(&old_pkey_cache->rcu, rcu_free_pkey);
+	if (old_gid_cache)
+		call_rcu(&old_gid_cache->rcu, rcu_free_gid);
 
-	write_seqcount_end(&info->lock);
+	kfree(tprops);
+	return;
 
- out:
+err:
+	kfree(pkey_cache);
+	kfree(gid_cache);
 	kfree(tprops);
-	kfree(tpkey);
-	kfree(tgid);
 }
 
-static void ib_cache_task(void *port_ptr)
+static void ib_cache_task(void *work_ptr)
 {
-	struct ib_port_data *port_data = port_ptr;
+	struct ib_update_work *work = work_ptr;
 
-	ib_cache_update(port_data->device, port_data->port_num);
+	ib_cache_update(work->device, work->port_num);
+	kfree(work);
 }
 
 static void ib_cache_event(struct ib_event_handler *handler,
 			   struct ib_event *event)
 {
+	struct ib_update_work *work;
+
 	if (event->event == IB_EVENT_PORT_ERR    ||
 	    event->event == IB_EVENT_PORT_ACTIVE ||
 	    event->event == IB_EVENT_LID_CHANGE  ||
 	    event->event == IB_EVENT_PKEY_CHANGE ||
 	    event->event == IB_EVENT_SM_CHANGE) {
-		struct ib_device_private *priv = event->device->core;
-		schedule_work(&priv->port_data[event->element.port_num].refresh_task);
+		work = kmalloc(sizeof *work, GFP_ATOMIC);
+		if (work) {
+			INIT_WORK(&work->work, ib_cache_task, work);
+			work->device   = event->device;
+			work->port_num = event->element.port_num;
+			schedule_work(&work->work);
+		}
 	}
 }
 
-int ib_cache_setup(struct ib_device *device)
+void ib_cache_setup_one(struct ib_device *device)
 {
-	struct ib_device_private *priv = device->core;
-	struct ib_port_attr       prop;
-	int                       p;
-	int                       ret;
+	int p;
 
-	for (p = priv->start_port; p <= priv->end_port; ++p) {
-		priv->port_data[p].device = device;
-		priv->port_data[p].port_num = p;
-		INIT_WORK(&priv->port_data[p].refresh_task,
-			  ib_cache_task, &priv->port_data[p]);
-		priv->port_data[p].gid_table  = NULL;
-		priv->port_data[p].pkey_table = NULL;
-		priv->port_data[p].event_handler.device = NULL;
+	device->cache.pkey_cache =
+		kmalloc(sizeof *device->cache.pkey_cache *
+			end_port(device) - start_port(device), GFP_KERNEL);
+	device->cache.gid_cache =
+		kmalloc(sizeof *device->cache.pkey_cache *
+			end_port(device) - start_port(device), GFP_KERNEL);
+
+	if (!device->cache.pkey_cache || !device->cache.gid_cache) {
+		printk(KERN_WARNING "Couldn't allocate cache "
+		       "for %s\n", device->name);
+		goto err;
 	}
 
-	for (p = priv->start_port; p <= priv->end_port; ++p) {
-		seqcount_init(&priv->port_data[p].lock);
-		ret = device->query_port(device, p, &prop);
-		if (ret) {
-			printk(KERN_WARNING "query_port failed for %s\n",
-			       device->name);
-			goto error;
-		}
-		priv->port_data[p].gid_table_alloc_length = prop.gid_tbl_len;
-		priv->port_data[p].gid_table = kmalloc(prop.gid_tbl_len *
-						       sizeof (union ib_gid),
-						       GFP_KERNEL);
-		if (!priv->port_data[p].gid_table) {
-			ret = -ENOMEM;
-			goto error;
-		}
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		device->cache.pkey_cache[p] = NULL;
+		device->cache.gid_cache [p] = NULL;
+		ib_cache_update(device, p + start_port(device));
+	}
 
-		priv->port_data[p].pkey_table_alloc_length = prop.pkey_tbl_len;
-		priv->port_data[p].pkey_table = kmalloc(prop.pkey_tbl_len * sizeof (u16),
-							GFP_KERNEL);
-		if (!priv->port_data[p].pkey_table) {
-			ret = -ENOMEM;
-			goto error;
-		}
+	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+			      device, ib_cache_event);
+	if (ib_register_event_handler(&device->cache.event_handler))
+		goto err_cache;
 
-		ib_cache_update(device, p);
+	return;
 
-		INIT_IB_EVENT_HANDLER(&priv->port_data[p].event_handler,
-				      device, ib_cache_event);
-		ret = ib_register_event_handler(&priv->port_data[p].event_handler);
-		if (ret) {
-			priv->port_data[p].event_handler.device = NULL;
-			goto error;
-		}
+err_cache:
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		kfree(device->cache.pkey_cache[p]);
+		kfree(device->cache.gid_cache[p]);
 	}
 
-	return 0;
+err:
+	kfree(device->cache.pkey_cache);
+	kfree(device->cache.gid_cache);
+}
 
- error:
-	for (p = priv->start_port; p <= priv->end_port; ++p) {
-		if (priv->port_data[p].event_handler.device)
-			ib_unregister_event_handler(&priv->port_data[p].event_handler);
-		kfree(priv->port_data[p].gid_table);
-		kfree(priv->port_data[p].pkey_table);
+void ib_cache_cleanup_one(struct ib_device *device)
+{
+	int p;
+
+	ib_unregister_event_handler(&device->cache.event_handler);
+	flush_scheduled_work();
+
+	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
+		kfree(device->cache.pkey_cache[p]);
+		kfree(device->cache.gid_cache[p]);
 	}
 
-	return ret;
+	kfree(device->cache.pkey_cache);
+	kfree(device->cache.gid_cache);
 }
 
-void ib_cache_cleanup(struct ib_device *device)
+struct ib_client cache_client = {
+	.name   = "cache",
+	.add    = ib_cache_setup_one,
+	.remove = ib_cache_cleanup_one
+};
+
+int __init ib_cache_setup(void)
 {
-	struct ib_device_private *priv = device->core;
-	int                       p;
+	return ib_register_client(&cache_client);
+}
 
-	for (p = priv->start_port; p <= priv->end_port; ++p) {
-		ib_unregister_event_handler(&priv->port_data[p].event_handler);
-		kfree(priv->port_data[p].gid_table);
-		kfree(priv->port_data[p].pkey_table);
-	}
+void __exit ib_cache_cleanup(void)
+{
+	ib_unregister_client(&cache_client);
 }
 
 /*
Index: core/core_priv.h
===================================================================
--- core/core_priv.h	(revision 1178)
+++ core/core_priv.h	(working copy)
@@ -29,39 +29,15 @@
 
 #include <ib_verbs.h>
 
-struct ib_device_private {
-	int                     start_port;
-	int                     end_port;
-	u64                     node_guid;
-	struct ib_port_data    *port_data;
-};
-
-struct ib_port_data {
-	struct ib_device          *device;
-
-	struct ib_event_handler    event_handler;
-	struct work_struct         refresh_task;
-
-	seqcount_t                 lock;
-	struct ib_port_attr        properties;
-	struct ib_port_lid         port_lid;
-	int                        gid_table_alloc_length;
-	u16                        pkey_table_alloc_length;
-	union ib_gid              *gid_table;
-	u16                       *pkey_table;
-	u8                         port_num;
-};
-
-int  ib_cache_setup(struct ib_device *device);
-void ib_cache_cleanup(struct ib_device *device);
-void ib_completion_thread(struct list_head *entry, void *device_ptr);
-void ib_async_thread(struct list_head *entry, void *device_ptr);
-
 int  ib_device_register_sysfs(struct ib_device *device);
 void ib_device_unregister_sysfs(struct ib_device *device);
+
 int  ib_sysfs_setup(void);
 void ib_sysfs_cleanup(void);
 
+int  ib_cache_setup(void);
+void ib_cache_cleanup(void);
+
 #endif /* _CORE_PRIV_H */
 
 /*
Index: include/ib_verbs.h
===================================================================
--- include/ib_verbs.h	(revision 1178)
+++ include/ib_verbs.h	(working copy)
@@ -672,6 +672,12 @@
 
 #define IB_DEVICE_NAME_MAX 64
 
+struct ib_cache {
+	struct ib_event_handler event_handler;
+	struct ib_pkey_cache  **pkey_cache;
+	struct ib_gid_cache   **gid_cache;
+};
+
 struct ib_device {
 	struct pci_dev               *dma_device;
 
@@ -684,7 +690,8 @@
 	struct list_head              client_data_list;
 	spinlock_t                    client_data_lock;
 
-	void                         *core;
+	struct ib_cache               cache;
+
 	u32                           flags;
 
 	int		           (*query_device)(struct ib_device *device,
Index: include/ts_ib_core.h
===================================================================
--- include/ts_ib_core.h	(revision 1178)
+++ include/ts_ib_core.h	(working copy)
@@ -24,14 +24,6 @@
 #ifndef _TS_IB_CORE_H
 #define _TS_IB_CORE_H
 
-struct ib_port_lid {
-	u16        lid;
-	u8         lmc;
-};
-
-int ib_cached_lid_get(struct ib_device    *device,
-		      u8                   port,
-		      struct ib_port_lid  *port_lid);
 int ib_cached_gid_get(struct ib_device    *device,
 		      u8                   port,
 		      int                  index,
Index: ulp/ipoib/ipoib_multicast.c
===================================================================
--- ulp/ipoib/ipoib_multicast.c	(revision 1178)
+++ ulp/ipoib/ipoib_multicast.c	(working copy)
@@ -517,10 +517,12 @@
 	}
 
 	{
-		struct ib_port_lid port_lid;
+		struct ib_port_attr attr;
 
-		ib_cached_lid_get(priv->ca, priv->port, &port_lid);
-		priv->local_lid = port_lid.lid;
+		if (!ib_query_port(priv->ca, priv->port, &attr))
+			priv->local_lid = attr.lid;
+		else
+			ipoib_warn(priv, "ib_query_port failed\n");
 	}
 
 	priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -



More information about the general mailing list