[openib-general] Re: [PATCH] [kdapl] Various AT changes

James Lentini jlentini at netapp.com
Tue May 17 14:07:30 PDT 2005


Committed in revision 2371.

On Mon, 16 May 2005, Hal Rosenstock wrote:

halr> Various AT changes including:
halr> Get API in line with changes on shahar-ibat branch
halr> Integrate various changes from shahar-ibat branch which apply to the
halr> "level of functionality" originally implemented (e.g. no Service Record
halr> support)
halr> Fix a couple of minor bugs found by code inspection
halr> Better debug support
halr> 
halr> Note that this does not fix the slab corruption problem :-(
halr> 
halr> Signed-off-by: Hal Rosenstock <halr at voltaire.com>
halr> 
halr> Index: ib_at.h
halr> ===================================================================
halr> --- ib_at.h	(revision 2331)
halr> +++ ib_at.h	(working copy)
halr> @@ -30,25 +30,28 @@
halr>   * SOFTWARE.
halr>   *
halr>   *
halr> - * $Id:$
halr> + * $Id$
halr>   */
halr>  
halr>  #if !defined( IB_AT_H )
halr>  #define IB_AT_H
halr>  
halr> +#include <ib_verbs.h>
halr> +#include <ib_sa.h>
halr> +
halr>  enum ib_at_multipathing_type {
halr>  	IB_AT_PATH_SAME_PORT	= 0,
halr> -	IB_AT_PATH_SAME_HCA	= 1,		/* but different ports if applicable */
halr> -	IB_AT_PATH_SAME_SYSTEM	= 2,		/* but different ports if applicable */
halr> +	IB_AT_PATH_SAME_HCA	= 1,	/* but different ports if applicable */
halr> +	IB_AT_PATH_SAME_SYSTEM	= 2,	/* but different ports if applicable */
halr>  	IB_AT_PATH_INDEPENDENT_HCA = 3,
halr> -	IB_AT_PATH_SRC_ROUTE	= 4,		/* application controlled multipathing */
halr> +	IB_AT_PATH_SRC_ROUTE	= 4,	/* application controlled multipathing */
halr>  };
halr>  
halr>  enum ib_at_route_flags {
halr> -	IB_AT_ROUTE_USE_DEFAULTS = 0,
halr> -	IB_AT_ROUTE_FORCE_ATS,
halr> -	IB_AT_ROUTE_FORCE_ARP,
halr> -	IB_AT_ROUTE_FORCE_RESOLVE,
halr> +	IB_AT_ROUTE_USE_DEFAULTS	= 0,
halr> +	IB_AT_ROUTE_FORCE_ATS		= 1,
halr> +	IB_AT_ROUTE_FORCE_ARP		= 2,
halr> +	IB_AT_ROUTE_FORCE_RESOLVE	= 4,
halr>  };
halr>  
halr>  struct ib_at_path_attr {
halr> @@ -169,7 +172,7 @@
halr>   * See ib_at_completion structure documentation for asynchronous
halr>   * operation details.
halr>   */
halr> -int ib_at_ips_by_gid(union ib_gid gid, u32 *dst_ips, int nips,
halr> +int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips,
halr>  		    struct ib_at_completion *async_comp);
halr>  
halr>  /**
halr> @@ -208,7 +211,7 @@
halr>   * @req_id: asynchronous request ID ib_at_op_status
halr>   *
halr>   * Return non-negative ib_at_op_status value, 
halr> - * or -EINVAL if the reqest ID is invalid.
halr> + * or -EINVAL if the request ID is invalid.
halr>   */
halr>  int ib_at_status(u64 req_id);
halr>  
halr> Index: at.c
halr> ===================================================================
halr> --- at.c	(revision 2331)
halr> +++ at.c	(working copy)
halr> @@ -30,7 +30,7 @@
halr>   * SOFTWARE.
halr>   *
halr>   *
halr> - * $Id:$
halr> + * $Id$
halr>   */
halr>  
halr>  #include <linux/module.h>
halr> @@ -118,7 +118,7 @@
halr>  	int sa_id;
halr>  };
halr>  
halr> -static struct async pending_reqs;	/* dummy head for cyclic list */
halr> +struct async pending_reqs;	/* dummy head for cyclic list */
halr>  
halr>  struct ib_at_src {
halr>  	u32 ip;
halr> @@ -158,7 +158,6 @@
halr>  static void path_req_complete(int stat, struct ib_sa_path_rec *ret, void *ctx);
halr>  static int resolve_path(struct path_req *req);
halr>  
halr> -
halr>  static int resolve_ip(struct ib_at_src *src, u32 dst_ip, u32 src_ip,
halr>  			int tos, union ib_gid *dgid)
halr>  {
halr> @@ -254,7 +253,7 @@
halr>  	src->dev = priv->ca;
halr>  	src->port = priv->port;
halr>  	src->pkey = cpu_to_be16(priv->pkey);
halr> -	memcpy(&src->gid, (ipoib_dev->dev_addr + 4), sizeof(src->gid));
halr> +	memcpy(&src->gid, ipoib_dev->dev_addr + 4, sizeof(src->gid));
halr>  
halr>  	if (!dgid)
halr>  		return 0;
halr> @@ -264,7 +263,7 @@
halr>  	 * the IB device which was found.
halr>  	 */
halr>  	if (rt->u.dst.neighbour->dev->flags & IFF_LOOPBACK) {
halr> -		memcpy(dgid, (ipoib_dev->dev_addr + 4),
halr> +		memcpy(dgid, ipoib_dev->dev_addr + 4,
halr>  		       sizeof(union ib_gid));
halr>  
halr>  		return 1;
halr> @@ -272,7 +271,7 @@
halr>  
halr>  	if ((NUD_CONNECTED|NUD_DELAY|NUD_PROBE) &
halr>  	    rt->u.dst.neighbour->nud_state) {
halr> -		memcpy(dgid, (rt->u.dst.neighbour->ha + 4),
halr> +		memcpy(dgid, rt->u.dst.neighbour->ha + 4,
halr>  		       sizeof(union ib_gid));
halr>  
halr>  		return 1;
halr> @@ -285,9 +284,17 @@
halr>  
halr>  static u64 alloc_req_id(void)
halr>  {
halr> -	static u64 req_id = 1;
halr> +	static u64 req_id = 0;
halr> +	u64 new_id;
halr> +	unsigned long flags;
halr>  
halr> -	return ++req_id;
halr> +	spin_lock_irqsave(&pending_reqs.lock, flags);
halr> +	new_id = ++req_id;
halr> +	if (!new_id)
halr> +		new_id = ++req_id;
halr> +	spin_unlock_irqrestore(&pending_reqs.lock, flags);
halr> +
halr> +	return new_id;
halr>  }
halr>  
halr>  static void req_init(struct async *pend, void *data, int nelem, int type,
halr> @@ -361,7 +368,7 @@
halr>  {
halr>  	struct async *pend = v;
halr>  
halr> -	DEBUG("complete req %p\n", pend);
halr> +	DEBUG("complete pend %p", pend);
halr>  
halr>  	pend->comp.fn(pend->comp.req_id, pend->comp.context, pend->nelem);
halr>  
halr> @@ -373,20 +380,23 @@
halr>  	struct async **rr, *waiting;
halr>  	unsigned long flags = 0;
halr>  
halr> -	DEBUG("pend %p nrec %d", pend, nrec);
halr> +	DEBUG("pend %p nrec %d async %p", pend, nrec, q);
halr>  
halr>  	if (pend->status != IB_AT_STATUS_PENDING)
halr> -		WARN("pend %p already completed??", pend);
halr> +		WARN("pend %p already completed? status %d", pend, pend->status);
halr>  
halr>  	pend->status = nrec < 0 ? IB_AT_STATUS_ERROR : IB_AT_STATUS_COMPLETED;
halr>  
halr> -	if (pend->sa_query)
halr> +	if (pend->sa_query) {
halr>  		ib_sa_cancel_query(pend->sa_id, pend->sa_query);
halr> +		pend->sa_query = NULL;
halr> +	}
halr>  
halr>  	if (q)
halr>  		spin_lock_irqsave(&q->lock, flags);
halr>  
halr>  	if (pend->parent) {
halr> +		DEBUG("pend->parent %p", pend->parent);
halr>  		for (rr = &pend->parent->waiting; *rr; rr = &(*rr)->waiting)
halr>  			if (*rr == pend) {
halr>  				*rr = (*rr)->waiting;
halr> @@ -476,11 +486,13 @@
halr>  	unsigned long flags;
halr>  	struct async *a;
halr>  
halr> -	DEBUG("lookup in q %p req %p", q, new);
halr> +	DEBUG("lookup in q %p pending %p", q, new);
halr>  	spin_lock_irqsave(&q->lock, flags);
halr> -	for (a = q->next; a != q; a = a->next)
halr> +	for (a = q->next; a != q; a = a->next) {
halr> +		DEBUG("%d %d", a->type, type);
halr>  		if (a->type == type && same_fn(a, new))
halr>  			break;
halr> +	}
halr>  
halr>  	spin_unlock_irqrestore(&q->lock, flags);
halr>  	return a == q ? NULL : a;
halr> @@ -574,13 +586,14 @@
halr>  	DEBUG("req %p", req);
halr>  
halr>  	if (req->pend.parent) {
halr> -		WARN("path_req_complete for child req %p???", req);
halr> +		WARN("for child req %p???", req);
halr>  		return;
halr>  	}
halr>  
halr>  	if (status) {
halr> -		DEBUG("timed out - check if should retry");
halr> -		if (jiffies - req->pend.start < IB_AT_REQ_TIMEOUT)
halr> +		DEBUG("status %d - check if should retry", status);
halr> +		if (status == -ETIMEDOUT &&
halr> +		    jiffies - req->pend.start < IB_AT_REQ_TIMEOUT)
halr>  			resolve_path(req);
halr>  		else
halr>  			req_end(&req->pend, -ETIMEDOUT, &pending_reqs);
halr> @@ -605,6 +618,7 @@
halr>  {
halr>  	struct async *pend, *next;
halr>  	struct route_req *req;
halr> +	struct path_req *preq;
halr>  	unsigned long flags;
halr>  
halr>  	DEBUG("start sweeping");
halr> @@ -613,18 +627,36 @@
halr>  	for (pend = pending_reqs.next; pend != &pending_reqs; pend = next) {
halr>  		next = pend->next;
halr>  
halr> -		req = container_of(pend, struct route_req, pend);
halr> +		switch (pend->type) {
halr> +		case IBAT_REQ_ARP:
halr> +		case IBAT_REQ_ATS:
halr> +			req = container_of(pend, struct route_req, pend);
halr>  
halr> -		DEBUG("examining route req %p pend %p", req, pend);
halr> -		if (jiffies > (pend->start + IB_AT_REQ_TIMEOUT)) {
halr> -			DEBUG("req delete <%d.%d.%d.%d> <%lu:%lu>",
halr> -			     (req->dst_ip & 0x000000ff),
halr> -			     (req->dst_ip & 0x0000ff00) >> 8,
halr> -			     (req->dst_ip & 0x00ff0000) >> 16,
halr> -			     (req->dst_ip & 0xff000000) >> 24,
halr> -			     jiffies, pend->start);
halr> +			DEBUG("examining route req %p pend %p", req, pend);
halr> +			if (jiffies > pend->start + IB_AT_REQ_TIMEOUT) {
halr> +				DEBUG("req delete <%d.%d.%d.%d> <%lu:%lu>",
halr> +				     (req->dst_ip & 0x000000ff),
halr> +				     (req->dst_ip & 0x0000ff00) >> 8,
halr> +				     (req->dst_ip & 0x00ff0000) >> 16,
halr> +				     (req->dst_ip & 0xff000000) >> 24,
halr> +				     jiffies, pend->start);
halr>  
halr> -			req_end(pend, -ETIMEDOUT, NULL);
halr> +				req_end(pend, -ETIMEDOUT, NULL);
halr> +			}
halr> +			break;
halr> +		case IBAT_REQ_PATHREC:
halr> +			preq = container_of(pend, struct path_req, pend);
halr> +
halr> +			DEBUG("examining path req %p pend %p", preq, pend);
halr> +			if (jiffies > pend->start + IB_AT_REQ_TIMEOUT) {
halr> +				DEBUG("req delete path <%lu:%lu>",
halr> +				      jiffies, pend->start);
halr> +
halr> +				req_end(pend, -ETIMEDOUT, NULL);
halr> +			}
halr> +			break;
halr> +		default:
halr> +			WARN("unknown async req type %d", pend->type);
halr>  		}
halr>  	}
halr>  
halr> @@ -651,7 +683,7 @@
halr>  
halr>  	if (req->pend.type == IBAT_REQ_ATS) {
halr>  		WARN("ATS - not yet");
halr> -		return 0;
halr> +		return -1;	/* 0 when supported */
halr>  	}
halr>  
halr>  	WARN("bad req %p type %d", req, req->pend.type);
halr> @@ -666,32 +698,31 @@
halr>  		.dgid = req->rt.dgid,
halr>  		.sgid = req->rt.sgid,
halr>  	};
halr> -	int r;
halr>  
halr>  	if (req->pend.type != IBAT_REQ_PATHREC) {
halr>  		WARN("bad req %p type %d", req, req->pend.type);
halr>  		return -1;
halr>  	}
halr>  
halr> -	r = ib_sa_path_rec_get(req->rt.out_dev,
halr> -				req->rt.out_port,
halr> -				&rec,
halr> -				(IB_SA_PATH_REC_DGID |
halr> -					IB_SA_PATH_REC_SGID |
halr> -			        	IB_SA_PATH_REC_PKEY |
halr> -					IB_SA_PATH_REC_NUMB_PATH),
halr> -				req->pend.timeout_ms,
halr> -				GFP_KERNEL,
halr> -				path_req_complete,
halr> -				req,
halr> -				&req->pend.sa_query);
halr> +	req->pend.sa_id = ib_sa_path_rec_get(req->rt.out_dev,
halr> +					     req->rt.out_port,
halr> +					    &rec,
halr> +					    (IB_SA_PATH_REC_DGID |
halr> +					     IB_SA_PATH_REC_SGID |
halr> +			        	     IB_SA_PATH_REC_PKEY |
halr> +					     IB_SA_PATH_REC_NUMB_PATH),
halr> +					     req->pend.timeout_ms,
halr> +					     GFP_KERNEL,
halr> +					     path_req_complete,
halr> +					     req,
halr> +					    &req->pend.sa_query);
halr>  
halr> -	if (r < 0)
halr> -		return r;
halr> +	if (req->pend.sa_id < 0) {
halr> +		WARN("ib_sa_path_rec_get %d", req->pend.sa_id);
halr> +		return req->pend.sa_id;
halr> +	}
halr>  
halr>  	req->pend.timeout_ms <<= 1;		/* exponential backoff */
halr> -	req->pend.sa_id = r;
halr> -
halr>  	return 0;
halr>  }
halr>  
halr> @@ -716,10 +747,12 @@
halr>  
halr>  	spin_lock_irqsave(&q->lock, flags);
halr>  	for (a = q->next; a != q; a = a->next) {
halr> +		DEBUG("a %p", a);
halr>  		if (a->type != IBAT_REQ_ARP)
halr>  			continue;
halr>  
halr>  		req = container_of(a, struct route_req, pend);
halr> +		DEBUG("req %p", req);
halr>  
halr>  		if (arp->op == __constant_htons(ARPOP_REPLY)) {
halr>  			if (arp->dst_ip == req->dst_ip)
halr> @@ -751,7 +784,6 @@
halr>  	 * queue IB arp packet onto work queue.
halr>  	 */
halr>  	DEBUG("recv IB ARP - queue work");
halr> -
halr>  	work = kmalloc(sizeof(*work), GFP_ATOMIC);
halr>  	if (!work)
halr>  		goto done;
halr> @@ -763,7 +795,6 @@
halr>  
halr>  done:
halr>  	kfree_skb(skb);
halr> -
halr>  	return 0;
halr>  }
halr>  
halr> @@ -796,17 +827,20 @@
halr>  
halr>  	r = resolve_ip(&rreq->src, dst_ip, src_ip, tos, &rreq->dgid);
halr>  	if (r < 0) {
halr> +		DEBUG("resolve_ip r < 0 free req %p", rreq);
halr>  		kmem_cache_free(route_req_cache, rreq);
halr>  		return r;
halr>  	}
halr>  
halr>  	if (r > 0) {
halr>  		route_req_output(rreq, ib_route);
halr> +		DEBUG("resolve_ip r > 0 free req %p", rreq);
halr>  		kmem_cache_free(route_req_cache, rreq);
halr>  		return 1;
halr>  	}	
halr>  
halr>  	if (!async_comp) {
halr> +		DEBUG("!async_comp free req %p", rreq);
halr>  		kmem_cache_free(route_req_cache, rreq);
halr>  		return -EWOULDBLOCK;
halr>  	}
halr> @@ -855,6 +889,7 @@
halr>  	*/
halr>  
halr>  	if (!async_comp) {
halr> +		DEBUG("!async_comp free req %p", preq);
halr>  		kmem_cache_free(path_req_cache, preq);
halr>  		return -EWOULDBLOCK;
halr>  	}
halr> @@ -871,7 +906,7 @@
halr>  }
halr>  EXPORT_SYMBOL(ib_at_paths_by_route);
halr>  
halr> -int ib_at_ips_by_gid(union ib_gid gid, u32 *dst_ips, int nips,
halr> +int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips,
halr>  		    struct ib_at_completion *async_comp)
halr>  {
halr>  	return -1;	/* FIXME: not implemented yet */
halr> @@ -910,7 +945,7 @@
halr>  		a->next->prev = child;
halr>  		a->next = child;
halr>  
halr> -		a->waiting = NULL;	/* clear to avoid cancelling childs */
halr> +		a->waiting = NULL;	/* clear to avoid cancelling children */
halr>  	}
halr>  
halr>  	req_end(a, -EINTR, NULL);
halr> @@ -950,7 +985,16 @@
halr>  
halr>  	DEBUG("IB AT services init");
halr>  
halr> -	route_req_cache = kmem_cache_create("ib_at_route_reqs",
halr> +	/*
halr> +	 * init pending lists' dummies.
halr> +	 */
halr> +	pending_reqs.next = pending_reqs.prev = &pending_reqs;
halr> +	spin_lock_init(&pending_reqs.lock);
halr> +
halr> +	/*
halr> +	 * Init memory pools
halr> +	 */
halr> +	route_req_cache = kmem_cache_create("ib_at_routes",
halr>  					sizeof(struct route_req),
halr>  					0, SLAB_HWCACHE_ALIGN,
halr>  					NULL, NULL);
halr> @@ -960,7 +1004,7 @@
halr>  		goto err_route;
halr>  	}
halr>  
halr> -	path_req_cache = kmem_cache_create("ib_at_path_reqs",
halr> +	path_req_cache = kmem_cache_create("ib_at_paths",
halr>  					sizeof(struct path_req),
halr>  					0, SLAB_HWCACHE_ALIGN,
halr>  					NULL, NULL);
halr> @@ -970,6 +1014,9 @@
halr>  		goto err_path;
halr>  	}
halr>  
halr> +	/*
halr> +	 * Init ib at worker thread and queue
halr> +	 */
halr>  	ib_at_wq = create_workqueue("ib_at_wq");
halr>  	if (!ib_at_wq) {
halr>  		WARN("Failed to allocate IB AT wait queue.");
halr> @@ -979,6 +1026,7 @@
halr>  	
halr>  	INIT_WORK(&ib_at_timer, ib_at_sweep, NULL);
halr>  	queue_delayed_work(ib_at_wq, &ib_at_timer, IB_AT_SWEEP_INTERVAL);
halr> +
halr>  	/*
halr>  	 * install device for receiving ARP packets in parallel to the normal
halr>  	 * Linux ARP, this will be the SDP notifier that an ARP request has
halr> 
halr> 
halr> 



More information about the general mailing list