[openib-general] [PATCH 10/22] ehca IRQ handling

Roland Dreier rolandd at cisco.com
Fri Feb 17 16:57:27 PST 2006


From: Roland Dreier <rolandd at cisco.com>

Where is the irq_count field of struct ehca_irq_info ever used?
I couldn't find anywhere, so it can be deleted.

The logic in ehca_interrupt_eq() is too convoluted for me to
follow; there are two nested while () {} loops inside a 
do {} while () loop, and ehca_poll_eq() is called in three
different places.  Is there any way to untangle this?
---

 drivers/infiniband/hw/ehca/ehca_irq.c |  436 +++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/ehca/ehca_irq.h |   90 +++++++
 2 files changed, 526 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 0000000..1bba58e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,436 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Functions for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj at de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  $Id: ehca_irq.c,v 1.64 2006/02/15 08:15:25 schickhj Exp $
+ */
+
+#include "ehca_kernel.h"
+#include "ehca_irq.h"
+
+#define DEB_PREFIX "eirq"
+
+#include "ehca_kernel.h"
+#include "ehca_classes.h"
+#include "ehca_tools.h"
+#include "ehca_eq.h"
+#include "ehca_irq.h"
+#include "hcp_if.h"
+
+#define EQE_COMPLETION_EVENT   EHCA_BMASK_IBM(1,1)
+#define EQE_CQ_QP_NUMBER       EHCA_BMASK_IBM(8,31)
+#define EQE_EE_IDENTIFIER      EHCA_BMASK_IBM(2,7)
+#define EQE_CQ_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_NUMBER          EHCA_BMASK_IBM(8,31)
+#define EQE_QP_TOKEN           EHCA_BMASK_IBM(32,63)
+#define EQE_CQ_TOKEN           EHCA_BMASK_IBM(32,63)
+
+#define NEQE_COMPLETION_EVENT  EHCA_BMASK_IBM(1,1)
+#define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
+#define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
+#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+
+#define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
+
+static inline void comp_event_callback(struct ehca_cq *cq)
+{
+	unsigned long spl_flags = 0;
+
+	EDEB_EN(7, "cq=%p", cq);
+
+	if (cq->ib_cq.comp_handler == NULL)
+		return;
+
+	spin_lock_irqsave(&cq->cb_lock, spl_flags);
+	cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
+	spin_unlock_irqrestore(&cq->cb_lock, spl_flags);
+
+	EDEB_EX(7, "cq=%p", cq);
+
+	return;
+}
+
+int ehca_error_data(struct ehca_shca *shca,
+				  u64 ressource)
+{
+
+	unsigned long ret = 0;
+	u64 *rblock;
+	unsigned long block_count;
+
+	EDEB_EN(7, "ressource=%lx", ressource);
+
+	rblock = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (rblock == NULL) {
+		EDEB_ERR(4, "Cannot allocate rblock memory.");
+		ret = -ENOMEM;
+		goto error_data1;
+	}
+
+	memset(rblock, 0, PAGE_SIZE);
+
+	ret = hipz_h_error_data(shca->ipz_hca_handle,
+				ressource,
+				rblock,
+				&block_count);
+
+	if (ret == H_R_STATE) {
+		EDEB_ERR(4, "No error data is available: %lx.", ressource);
+	}
+	else if (ret == H_Success) {
+		int length;
+
+		length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
+
+		if (length > PAGE_SIZE)
+			length = PAGE_SIZE;
+
+		EDEB_ERR(4, "Error data is available: %lx.", ressource);
+		EDEB_ERR(4, "EHCA ----- error data begin "
+			 "---------------------------------------------------");
+		EDEB_DMP(4, rblock, length, "ressource=%lx", ressource);
+		EDEB_ERR(4, "EHCA ----- error data end "
+			 "-----------------------------------------------------");
+	}
+	else {
+		EDEB_ERR(4, "Error data could not be fetched: %lx", ressource);
+	}
+
+	kfree(rblock);
+
+      error_data1:
+	return ret;
+
+}
+
+static void qp_event_callback(struct ehca_shca *shca,
+					  u64 eqe,
+					  enum ib_event_type event_type)
+{
+	struct ib_event event;
+	struct ehca_qp *qp;
+	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
+
+	EDEB_EN(7, "eqe=%lx", eqe);
+
+	down_read(&ehca_qp_idr_sem);
+	qp = idr_find(&ehca_qp_idr, token);
+	up_read(&ehca_qp_idr_sem);
+
+	if (qp == NULL)
+		return;
+
+	if (event_type == IB_EVENT_QP_FATAL)
+		EDEB_ERR(4, "QP 0x%x (ressource=%lx) has errors.",
+			 qp->ib_qp.qp_num, qp->ipz_qp_handle.handle);
+
+	ehca_error_data(shca, qp->ipz_qp_handle.handle);
+
+	if (qp->ib_qp.event_handler == NULL)
+		return;
+
+	event.device     = &shca->ib_device;
+	event.event      = event_type;
+	event.element.qp = &qp->ib_qp;
+
+	qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+
+	EDEB_EX(7, "qp=%p", qp);
+
+	return;
+}
+
+static void cq_event_callback(struct ehca_shca *shca,
+					  u64 eqe)
+{
+	struct ehca_cq *cq;
+	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
+
+	EDEB_EN(7, "eqe=%lx", eqe);
+
+	down_read(&ehca_cq_idr_sem);
+	cq = idr_find(&ehca_cq_idr, token);
+	up_read(&ehca_cq_idr_sem);
+
+	if (cq == NULL)
+		return;
+
+	EDEB_ERR(4, "CQ 0x%x (ressource=%lx) has errors.",
+		 cq->cq_number, cq->ipz_cq_handle.handle);
+
+	ehca_error_data(shca, cq->ipz_cq_handle.handle);
+
+	EDEB_EX(7, "cq=%p", cq);
+
+	return;
+}
+
+static void parse_identifier(struct ehca_shca *shca, u64 eqe)
+{
+	u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
+
+	EDEB_EN(7, "shca=%p eqe=%lx", shca, eqe);
+
+	switch (identifier) {
+	case 0x02:		/* path migrated */
+		qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
+		break;
+	case 0x03:		/* communication established */
+		qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
+		break;
+	case 0x04:		/* send queue drained */
+		qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
+		break;
+	case 0x05:		/* QP error */
+	case 0x06:		/* QP error */
+		qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
+		break;
+	case 0x07:		/* CQ error */
+	case 0x08:		/* CQ error */
+		cq_event_callback(shca, eqe);
+		break;
+	case 0x09:		/* MRMWPTE error */
+	case 0x0A:		/* port event */
+	case 0x0B:		/* MR access error */
+	case 0x0C:		/* EQ error */
+	case 0x0D:		/* P/Q_Key mismatch */
+	case 0x10:		/* sampling complete */
+	case 0x11:		/* unaffiliated access error */
+	case 0x12:		/* path migrating error */
+	case 0x13:		/* interface trace stopped */
+	case 0x14:		/* first error capture info available */
+	default:
+		EDEB_ERR(4, "Unknown identifier: %x on %s.", 
+			 identifier, shca->ib_device.name);
+		break;
+	}
+
+	EDEB_EN(7, "eqe=%lx identifier=%x", eqe, identifier);
+
+	return;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
+	struct ib_event event;
+	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+
+	EDEB_EN(7, "shca=%p eqe=%lx", shca, eqe);
+
+	switch (ec) {
+	case 0x30:		/* port availability change */
+		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+			EDEB(4, "%s: port %x is active.", 
+			     shca->ib_device.name, port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ACTIVE;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			ib_dispatch_event(&event);
+		} else {
+			EDEB(4, "%s: port %x is inactive.", 
+			     shca->ib_device.name, port);
+			event.device = &shca->ib_device;
+			event.event = IB_EVENT_PORT_ERR;
+			event.element.port_num = port;
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			ib_dispatch_event(&event);
+		}
+		break;
+	case 0x31:
+		/* port configuration change      */
+		/* disruptive change is caused by */
+		/* LID, PKEY or SM change         */
+		EDEB(4, "EHCA disruptive port %x "
+		     "configuration change.", port);
+
+		EDEB(4, "%s: port %x is inactive.", 
+		     shca->ib_device.name, port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ERR;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_DOWN;
+		ib_dispatch_event(&event);
+
+		EDEB(4, "%s: port %x is active.", 
+			     shca->ib_device.name, port);
+		event.device = &shca->ib_device;
+		event.event = IB_EVENT_PORT_ACTIVE;
+		event.element.port_num = port;
+		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+		ib_dispatch_event(&event);
+		break;
+	case 0x32:		/* adapter malfunction */
+	case 0x33:		/* trace stopped */
+	default:
+		EDEB_ERR(4, "Unknown event code: %x on %s.", 
+			 ec, shca->ib_device.name);
+		break;
+	}
+
+	EDEB_EN(7, "eqe=%lx ec=%x", eqe, ec);
+
+	return;
+}
+
+static inline void reset_eq_pending(struct ehca_cq *cq)
+{
+	u64 CQx_EP = 0;
+	struct h_galpa gal = cq->ehca_cq_core.galpas.kernel;
+
+	EDEB_EN(7, "cq=%p", cq);
+
+	hipz_galpa_store_cq(gal, CQx_EP, 0x0);
+	CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(CQx_EP));
+	EDEB(7, "CQx_EP=%lx", CQx_EP);
+
+	EDEB_EX(7, "cq=%p", cq);
+
+	return;
+}
+
+void ehca_interrupt_eq(void *data)
+{
+	struct ehca_irq_info *irq_info;
+	struct ehca_shca *shca;
+	struct ehca_eqe *eqe;
+	int int_state;
+
+	EDEB_EN(7, "data=%p", data);
+
+	irq_info = (struct ehca_irq_info *)data;
+	shca = to_shca(eq);
+
+	do {
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		if ((shca->hw_level >= 2) && (eqe != NULL))
+			int_state = 1;
+		else
+			int_state = 0;
+
+		while ((int_state == 1) || (eqe != 0)) {
+			while (eqe) {
+				u64 eqe_value = eqe->entry;
+
+				EDEB(7, "eqe_value=%lx", eqe_value);
+
+				/* TODO: better structure */
+				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
+						   eqe_value)) {
+					extern struct idr ehca_cq_idr;
+					u32 token;
+					struct ehca_cq *cq;
+
+					EDEB(7, "... completion event");
+					token =
+					    EHCA_BMASK_GET(EQE_CQ_TOKEN,
+							   eqe_value);
+					down_read(&ehca_cq_idr_sem);
+					cq = idr_find(&ehca_cq_idr, token);
+					up_read(&ehca_cq_idr_sem);
+					reset_eq_pending(cq);
+					comp_event_callback(cq);
+				} else {
+					EDEB(7, "... non completion event");
+					parse_identifier(shca, eqe_value);
+				}
+				eqe =
+				    (struct ehca_eqe *)ehca_poll_eq(shca,
+								    &shca->eq);
+			}
+
+			/* TODO: do we need hw_level  */
+			if (shca->hw_level >= 2)
+				int_state =
+				    hipz_h_query_int_state(shca->ipz_hca_handle,
+							   irq_info);
+			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+
+		}
+	} while (int_state != 0);
+
+	EDEB_EX(7, "shca=%p", shca);
+
+	return;
+}
+
+void ehca_interrupt_neq(void *data)
+{
+	struct ehca_irq_info *irq_info;
+	struct ehca_shca *shca;
+	struct ehca_eqe *eqe;
+	u64 ret = H_Success;
+
+	EDEB_EN(7, "data=%p", data);
+
+	irq_info = (struct ehca_irq_info *)data;
+	shca = to_shca(neq);
+	eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+
+	while (eqe) {
+		if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
+			parse_ec(shca, eqe->entry);
+
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+	}
+
+	ret = hipz_h_reset_event(shca->ipz_hca_handle,
+				 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFF);
+
+	if (ret != H_Success)
+		EDEB_ERR(4, "Can't clear notification events.");
+
+	EDEB_EX(7, "shca=%p", shca);
+
+	return;
+}
+
+irqreturn_t ehca_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct ehca_irq_info *info = (struct ehca_irq_info *)dev_id;
+
+	EDEB_EN(7, "dev_id=%p", dev_id);
+
+	queue_work(info->wq, info->work);
+
+	EDEB_EX(7, "");
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 0000000..43b2e3e
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,90 @@
+/*
+ *  IBM eServer eHCA Infiniband device driver for Linux on POWER
+ *
+ *  Function definitions and structs for EQs, NEQs and interrupts
+ *
+ *  Authors: Heiko J Schick <schickhj at de.ibm.com>
+ *
+ *  Copyright (c) 2005 IBM Corporation
+ *
+ *  All rights reserved.
+ *
+ *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ *  BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  $Id: ehca_irq.h,v 1.25 2006/02/06 10:17:34 schickhj Exp $
+ */
+
+#ifndef __EHCA_IRQ_H
+#define __EHCA_IRQ_H
+
+
+struct ehca_shca;
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+
+#ifndef EHCA_USERDRIVER
+#include <linux/interrupt.h>
+#endif
+
+#ifndef __KERNEL__
+#define NO_IRQ (-1)
+#include <linux/version.h>
+#include <errno.h>
+#endif
+
+#ifndef EHCA_USERDRIVER
+#define to_shca(queue) container_of(irq_info->eq,     \
+				    struct ehca_shca, \
+				    queue)
+#else
+extern struct ehca_module ehca_module;
+#define to_shca(queue) list_entry(ehca_module.shca_list.next, \
+				  struct ehca_shca, shca_list)
+#endif
+
+struct ehca_irq_info {
+	__u32 ist;
+	__u32 irq;
+	void *eq;
+
+	atomic_t irq_count;
+	struct workqueue_struct *wq;
+	struct work_struct *work;
+
+	pid_t pid;
+};
+
+void ehca_interrupt_eq(void *data);
+void ehca_interrupt_neq(void *data);
+irqreturn_t ehca_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+irqreturn_t ehca_interrupt_direct(int irq, void *dev_id, struct pt_regs *regs);
+int ehca_error_data(struct ehca_shca *shca, u64 ressource);
+
+#endif



More information about the general mailing list