[openib-general] Re: [PATCH] [kdapl] Various AT changes
James Lentini
jlentini at netapp.com
Tue May 17 14:07:30 PDT 2005
Committed in revision 2371.
On Mon, 16 May 2005, Hal Rosenstock wrote:
halr> Various AT changes including:
halr> Get API in line with changes on shahar-ibat branch
halr> Integrate various changes from shahar-ibat branch which apply to the
halr> "level of functionality" originally implemented (e.g. no Service Record
halr> support)
halr> Fix a couple of minor bugs found by code inspection
halr> Better debug support
halr>
halr> Note that this does not fix the slab corruption problem :-(
halr>
halr> Signed-off-by: Hal Rosenstock <halr at voltaire.com>
halr>
halr> Index: ib_at.h
halr> ===================================================================
halr> --- ib_at.h (revision 2331)
halr> +++ ib_at.h (working copy)
halr> @@ -30,25 +30,28 @@
halr> * SOFTWARE.
halr> *
halr> *
halr> - * $Id:$
halr> + * $Id$
halr> */
halr>
halr> #if !defined( IB_AT_H )
halr> #define IB_AT_H
halr>
halr> +#include <ib_verbs.h>
halr> +#include <ib_sa.h>
halr> +
halr> enum ib_at_multipathing_type {
halr> IB_AT_PATH_SAME_PORT = 0,
halr> - IB_AT_PATH_SAME_HCA = 1, /* but different ports if applicable */
halr> - IB_AT_PATH_SAME_SYSTEM = 2, /* but different ports if applicable */
halr> + IB_AT_PATH_SAME_HCA = 1, /* but different ports if applicable */
halr> + IB_AT_PATH_SAME_SYSTEM = 2, /* but different ports if applicable */
halr> IB_AT_PATH_INDEPENDENT_HCA = 3,
halr> - IB_AT_PATH_SRC_ROUTE = 4, /* application controlled multipathing */
halr> + IB_AT_PATH_SRC_ROUTE = 4, /* application controlled multipathing */
halr> };
halr>
halr> enum ib_at_route_flags {
halr> - IB_AT_ROUTE_USE_DEFAULTS = 0,
halr> - IB_AT_ROUTE_FORCE_ATS,
halr> - IB_AT_ROUTE_FORCE_ARP,
halr> - IB_AT_ROUTE_FORCE_RESOLVE,
halr> + IB_AT_ROUTE_USE_DEFAULTS = 0,
halr> + IB_AT_ROUTE_FORCE_ATS = 1,
halr> + IB_AT_ROUTE_FORCE_ARP = 2,
halr> + IB_AT_ROUTE_FORCE_RESOLVE = 4,
halr> };
halr>
halr> struct ib_at_path_attr {
halr> @@ -169,7 +172,7 @@
halr> * See ib_at_completion structure documentation for asynchronous
halr> * operation details.
halr> */
halr> -int ib_at_ips_by_gid(union ib_gid gid, u32 *dst_ips, int nips,
halr> +int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips,
halr> struct ib_at_completion *async_comp);
halr>
halr> /**
halr> @@ -208,7 +211,7 @@
halr> * @req_id: asynchronous request ID ib_at_op_status
halr> *
halr> * Return non-negative ib_at_op_status value,
halr> - * or -EINVAL if the reqest ID is invalid.
halr> + * or -EINVAL if the request ID is invalid.
halr> */
halr> int ib_at_status(u64 req_id);
halr>
halr> Index: at.c
halr> ===================================================================
halr> --- at.c (revision 2331)
halr> +++ at.c (working copy)
halr> @@ -30,7 +30,7 @@
halr> * SOFTWARE.
halr> *
halr> *
halr> - * $Id:$
halr> + * $Id$
halr> */
halr>
halr> #include <linux/module.h>
halr> @@ -118,7 +118,7 @@
halr> int sa_id;
halr> };
halr>
halr> -static struct async pending_reqs; /* dummy head for cyclic list */
halr> +struct async pending_reqs; /* dummy head for cyclic list */
halr>
halr> struct ib_at_src {
halr> u32 ip;
halr> @@ -158,7 +158,6 @@
halr> static void path_req_complete(int stat, struct ib_sa_path_rec *ret, void *ctx);
halr> static int resolve_path(struct path_req *req);
halr>
halr> -
halr> static int resolve_ip(struct ib_at_src *src, u32 dst_ip, u32 src_ip,
halr> int tos, union ib_gid *dgid)
halr> {
halr> @@ -254,7 +253,7 @@
halr> src->dev = priv->ca;
halr> src->port = priv->port;
halr> src->pkey = cpu_to_be16(priv->pkey);
halr> - memcpy(&src->gid, (ipoib_dev->dev_addr + 4), sizeof(src->gid));
halr> + memcpy(&src->gid, ipoib_dev->dev_addr + 4, sizeof(src->gid));
halr>
halr> if (!dgid)
halr> return 0;
halr> @@ -264,7 +263,7 @@
halr> * the IB device which was found.
halr> */
halr> if (rt->u.dst.neighbour->dev->flags & IFF_LOOPBACK) {
halr> - memcpy(dgid, (ipoib_dev->dev_addr + 4),
halr> + memcpy(dgid, ipoib_dev->dev_addr + 4,
halr> sizeof(union ib_gid));
halr>
halr> return 1;
halr> @@ -272,7 +271,7 @@
halr>
halr> if ((NUD_CONNECTED|NUD_DELAY|NUD_PROBE) &
halr> rt->u.dst.neighbour->nud_state) {
halr> - memcpy(dgid, (rt->u.dst.neighbour->ha + 4),
halr> + memcpy(dgid, rt->u.dst.neighbour->ha + 4,
halr> sizeof(union ib_gid));
halr>
halr> return 1;
halr> @@ -285,9 +284,17 @@
halr>
halr> static u64 alloc_req_id(void)
halr> {
halr> - static u64 req_id = 1;
halr> + static u64 req_id = 0;
halr> + u64 new_id;
halr> + unsigned long flags;
halr>
halr> - return ++req_id;
halr> + spin_lock_irqsave(&pending_reqs.lock, flags);
halr> + new_id = ++req_id;
halr> + if (!new_id)
halr> + new_id = ++req_id;
halr> + spin_unlock_irqrestore(&pending_reqs.lock, flags);
halr> +
halr> + return new_id;
halr> }
halr>
halr> static void req_init(struct async *pend, void *data, int nelem, int type,
halr> @@ -361,7 +368,7 @@
halr> {
halr> struct async *pend = v;
halr>
halr> - DEBUG("complete req %p\n", pend);
halr> + DEBUG("complete pend %p", pend);
halr>
halr> pend->comp.fn(pend->comp.req_id, pend->comp.context, pend->nelem);
halr>
halr> @@ -373,20 +380,23 @@
halr> struct async **rr, *waiting;
halr> unsigned long flags = 0;
halr>
halr> - DEBUG("pend %p nrec %d", pend, nrec);
halr> + DEBUG("pend %p nrec %d async %p", pend, nrec, q);
halr>
halr> if (pend->status != IB_AT_STATUS_PENDING)
halr> - WARN("pend %p already completed??", pend);
halr> + WARN("pend %p already completed? status %d", pend, pend->status);
halr>
halr> pend->status = nrec < 0 ? IB_AT_STATUS_ERROR : IB_AT_STATUS_COMPLETED;
halr>
halr> - if (pend->sa_query)
halr> + if (pend->sa_query) {
halr> ib_sa_cancel_query(pend->sa_id, pend->sa_query);
halr> + pend->sa_query = NULL;
halr> + }
halr>
halr> if (q)
halr> spin_lock_irqsave(&q->lock, flags);
halr>
halr> if (pend->parent) {
halr> + DEBUG("pend->parent %p", pend->parent);
halr> for (rr = &pend->parent->waiting; *rr; rr = &(*rr)->waiting)
halr> if (*rr == pend) {
halr> *rr = (*rr)->waiting;
halr> @@ -476,11 +486,13 @@
halr> unsigned long flags;
halr> struct async *a;
halr>
halr> - DEBUG("lookup in q %p req %p", q, new);
halr> + DEBUG("lookup in q %p pending %p", q, new);
halr> spin_lock_irqsave(&q->lock, flags);
halr> - for (a = q->next; a != q; a = a->next)
halr> + for (a = q->next; a != q; a = a->next) {
halr> + DEBUG("%d %d", a->type, type);
halr> if (a->type == type && same_fn(a, new))
halr> break;
halr> + }
halr>
halr> spin_unlock_irqrestore(&q->lock, flags);
halr> return a == q ? NULL : a;
halr> @@ -574,13 +586,14 @@
halr> DEBUG("req %p", req);
halr>
halr> if (req->pend.parent) {
halr> - WARN("path_req_complete for child req %p???", req);
halr> + WARN("for child req %p???", req);
halr> return;
halr> }
halr>
halr> if (status) {
halr> - DEBUG("timed out - check if should retry");
halr> - if (jiffies - req->pend.start < IB_AT_REQ_TIMEOUT)
halr> + DEBUG("status %d - check if should retry", status);
halr> + if (status == -ETIMEDOUT &&
halr> + jiffies - req->pend.start < IB_AT_REQ_TIMEOUT)
halr> resolve_path(req);
halr> else
halr> req_end(&req->pend, -ETIMEDOUT, &pending_reqs);
halr> @@ -605,6 +618,7 @@
halr> {
halr> struct async *pend, *next;
halr> struct route_req *req;
halr> + struct path_req *preq;
halr> unsigned long flags;
halr>
halr> DEBUG("start sweeping");
halr> @@ -613,18 +627,36 @@
halr> for (pend = pending_reqs.next; pend != &pending_reqs; pend = next) {
halr> next = pend->next;
halr>
halr> - req = container_of(pend, struct route_req, pend);
halr> + switch (pend->type) {
halr> + case IBAT_REQ_ARP:
halr> + case IBAT_REQ_ATS:
halr> + req = container_of(pend, struct route_req, pend);
halr>
halr> - DEBUG("examining route req %p pend %p", req, pend);
halr> - if (jiffies > (pend->start + IB_AT_REQ_TIMEOUT)) {
halr> - DEBUG("req delete <%d.%d.%d.%d> <%lu:%lu>",
halr> - (req->dst_ip & 0x000000ff),
halr> - (req->dst_ip & 0x0000ff00) >> 8,
halr> - (req->dst_ip & 0x00ff0000) >> 16,
halr> - (req->dst_ip & 0xff000000) >> 24,
halr> - jiffies, pend->start);
halr> + DEBUG("examining route req %p pend %p", req, pend);
halr> + if (jiffies > pend->start + IB_AT_REQ_TIMEOUT) {
halr> + DEBUG("req delete <%d.%d.%d.%d> <%lu:%lu>",
halr> + (req->dst_ip & 0x000000ff),
halr> + (req->dst_ip & 0x0000ff00) >> 8,
halr> + (req->dst_ip & 0x00ff0000) >> 16,
halr> + (req->dst_ip & 0xff000000) >> 24,
halr> + jiffies, pend->start);
halr>
halr> - req_end(pend, -ETIMEDOUT, NULL);
halr> + req_end(pend, -ETIMEDOUT, NULL);
halr> + }
halr> + break;
halr> + case IBAT_REQ_PATHREC:
halr> + preq = container_of(pend, struct path_req, pend);
halr> +
halr> + DEBUG("examining path req %p pend %p", preq, pend);
halr> + if (jiffies > pend->start + IB_AT_REQ_TIMEOUT) {
halr> + DEBUG("req delete path <%lu:%lu>",
halr> + jiffies, pend->start);
halr> +
halr> + req_end(pend, -ETIMEDOUT, NULL);
halr> + }
halr> + break;
halr> + default:
halr> + WARN("unknown async req type %d", pend->type);
halr> }
halr> }
halr>
halr> @@ -651,7 +683,7 @@
halr>
halr> if (req->pend.type == IBAT_REQ_ATS) {
halr> WARN("ATS - not yet");
halr> - return 0;
halr> + return -1; /* 0 when supported */
halr> }
halr>
halr> WARN("bad req %p type %d", req, req->pend.type);
halr> @@ -666,32 +698,31 @@
halr> .dgid = req->rt.dgid,
halr> .sgid = req->rt.sgid,
halr> };
halr> - int r;
halr>
halr> if (req->pend.type != IBAT_REQ_PATHREC) {
halr> WARN("bad req %p type %d", req, req->pend.type);
halr> return -1;
halr> }
halr>
halr> - r = ib_sa_path_rec_get(req->rt.out_dev,
halr> - req->rt.out_port,
halr> - &rec,
halr> - (IB_SA_PATH_REC_DGID |
halr> - IB_SA_PATH_REC_SGID |
halr> - IB_SA_PATH_REC_PKEY |
halr> - IB_SA_PATH_REC_NUMB_PATH),
halr> - req->pend.timeout_ms,
halr> - GFP_KERNEL,
halr> - path_req_complete,
halr> - req,
halr> - &req->pend.sa_query);
halr> + req->pend.sa_id = ib_sa_path_rec_get(req->rt.out_dev,
halr> + req->rt.out_port,
halr> + &rec,
halr> + (IB_SA_PATH_REC_DGID |
halr> + IB_SA_PATH_REC_SGID |
halr> + IB_SA_PATH_REC_PKEY |
halr> + IB_SA_PATH_REC_NUMB_PATH),
halr> + req->pend.timeout_ms,
halr> + GFP_KERNEL,
halr> + path_req_complete,
halr> + req,
halr> + &req->pend.sa_query);
halr>
halr> - if (r < 0)
halr> - return r;
halr> + if (req->pend.sa_id < 0) {
halr> + WARN("ib_sa_path_rec_get %d", req->pend.sa_id);
halr> + return req->pend.sa_id;
halr> + }
halr>
halr> req->pend.timeout_ms <<= 1; /* exponential backoff */
halr> - req->pend.sa_id = r;
halr> -
halr> return 0;
halr> }
halr>
halr> @@ -716,10 +747,12 @@
halr>
halr> spin_lock_irqsave(&q->lock, flags);
halr> for (a = q->next; a != q; a = a->next) {
halr> + DEBUG("a %p", a);
halr> if (a->type != IBAT_REQ_ARP)
halr> continue;
halr>
halr> req = container_of(a, struct route_req, pend);
halr> + DEBUG("req %p", req);
halr>
halr> if (arp->op == __constant_htons(ARPOP_REPLY)) {
halr> if (arp->dst_ip == req->dst_ip)
halr> @@ -751,7 +784,6 @@
halr> * queue IB arp packet onto work queue.
halr> */
halr> DEBUG("recv IB ARP - queue work");
halr> -
halr> work = kmalloc(sizeof(*work), GFP_ATOMIC);
halr> if (!work)
halr> goto done;
halr> @@ -763,7 +795,6 @@
halr>
halr> done:
halr> kfree_skb(skb);
halr> -
halr> return 0;
halr> }
halr>
halr> @@ -796,17 +827,20 @@
halr>
halr> r = resolve_ip(&rreq->src, dst_ip, src_ip, tos, &rreq->dgid);
halr> if (r < 0) {
halr> + DEBUG("resolve_ip r < 0 free req %p", rreq);
halr> kmem_cache_free(route_req_cache, rreq);
halr> return r;
halr> }
halr>
halr> if (r > 0) {
halr> route_req_output(rreq, ib_route);
halr> + DEBUG("resolve_ip r > 0 free req %p", rreq);
halr> kmem_cache_free(route_req_cache, rreq);
halr> return 1;
halr> }
halr>
halr> if (!async_comp) {
halr> + DEBUG("!async_comp free req %p", rreq);
halr> kmem_cache_free(route_req_cache, rreq);
halr> return -EWOULDBLOCK;
halr> }
halr> @@ -855,6 +889,7 @@
halr> */
halr>
halr> if (!async_comp) {
halr> + DEBUG("!async_comp free req %p", preq);
halr> kmem_cache_free(path_req_cache, preq);
halr> return -EWOULDBLOCK;
halr> }
halr> @@ -871,7 +906,7 @@
halr> }
halr> EXPORT_SYMBOL(ib_at_paths_by_route);
halr>
halr> -int ib_at_ips_by_gid(union ib_gid gid, u32 *dst_ips, int nips,
halr> +int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips,
halr> struct ib_at_completion *async_comp)
halr> {
halr> return -1; /* FIXME: not implemented yet */
halr> @@ -910,7 +945,7 @@
halr> a->next->prev = child;
halr> a->next = child;
halr>
halr> - a->waiting = NULL; /* clear to avoid cancelling childs */
halr> + a->waiting = NULL; /* clear to avoid cancelling children */
halr> }
halr>
halr> req_end(a, -EINTR, NULL);
halr> @@ -950,7 +985,16 @@
halr>
halr> DEBUG("IB AT services init");
halr>
halr> - route_req_cache = kmem_cache_create("ib_at_route_reqs",
halr> + /*
halr> + * init pending lists' dummies.
halr> + */
halr> + pending_reqs.next = pending_reqs.prev = &pending_reqs;
halr> + spin_lock_init(&pending_reqs.lock);
halr> +
halr> + /*
halr> + * Init memory pools
halr> + */
halr> + route_req_cache = kmem_cache_create("ib_at_routes",
halr> sizeof(struct route_req),
halr> 0, SLAB_HWCACHE_ALIGN,
halr> NULL, NULL);
halr> @@ -960,7 +1004,7 @@
halr> goto err_route;
halr> }
halr>
halr> - path_req_cache = kmem_cache_create("ib_at_path_reqs",
halr> + path_req_cache = kmem_cache_create("ib_at_paths",
halr> sizeof(struct path_req),
halr> 0, SLAB_HWCACHE_ALIGN,
halr> NULL, NULL);
halr> @@ -970,6 +1014,9 @@
halr> goto err_path;
halr> }
halr>
halr> + /*
halr> + * Init ib at worker thread and queue
halr> + */
halr> ib_at_wq = create_workqueue("ib_at_wq");
halr> if (!ib_at_wq) {
halr> WARN("Failed to allocate IB AT wait queue.");
halr> @@ -979,6 +1026,7 @@
halr>
halr> INIT_WORK(&ib_at_timer, ib_at_sweep, NULL);
halr> queue_delayed_work(ib_at_wq, &ib_at_timer, IB_AT_SWEEP_INTERVAL);
halr> +
halr> /*
halr> * install device for receiving ARP packets in parallel to the normal
halr> * Linux ARP, this will be the SDP notifier that an ARP request has
halr>
halr>
halr>
More information about the general
mailing list