[ofa-general] [OpenSM][RFC] OpenSM Proposed Perf Manager

Ira Weiny weiny2 at llnl.gov
Tue May 8 18:49:38 PDT 2007


I would like to submit to the list a performance manager which I have been
working on for OpenSM.

It is implemented as the first proposed architecture model set forth by Hal (As
an integrated thread to OpenSM.)  As such it works fine on our small test
cluster but there is some concern about its scalability.

I have extended this architecture with an idea of my own.  This idea is to have
a plug-able module for the "event database".  With this interface one could
write their own Data reduction, logging, and tracking methods.  Here at LLNL I
propose to use this to add counter and subnet events directly to our management
database which is used to show system status to our operators.  Other
installations might prefer other methods of logging, SNMP for example.  This
patch includes a "reference" implementation of this "event database" which
stores the information internally until the user requests a "dump".

Let the flames begin,
Ira Weiny
weiny2 at llnl.gov



>From 4ce288b6a5a371872cf160f6d4e29e768a065cb9 Mon Sep 17 00:00:00 2001
From: Ira K. Weiny <weiny2 at llnl.gov>
Date: Tue, 24 Apr 2007 23:44:15 -0700
Subject: [PATCH] OpenSM Proposed Perf Manager

   Features include:
      * Create "PerfMgr" thread and sweep all ports on the subnet every
        sweep_time seconds
      * port counter clear on overflow
      * plugable architecture for the "event" database
      * Output machine and human readable output in the default event database
        dump
      * Control using the "perfmgr" command in the console

   Known Issues
      * Not tested at scale.
      * Event database should record trap events and other "intresting" subnet
        events.
      * port counter log warnings should be configureable not hard coded.
      * partitions are not handled yet.
      * Code might not be as pristine as I would like

   Enable using --enable-perf-mgr

Signed-off-by: Ira K. Weiny <weiny2 at llnl.gov>
---
 osm/Makefile.am                   |    3 +-
 osm/config/osmvsel.m4             |   26 ++
 osm/configure.in                  |    5 +-
 osm/eventdb/Makefile.am           |   37 ++
 osm/eventdb/autogen.sh            |   15 +
 osm/eventdb/configure.in          |   70 ++++
 osm/eventdb/libibeventdb.map      |    5 +
 osm/eventdb/libibeventdb.spec.in  |   38 ++
 osm/eventdb/libibeventdb.ver      |    9 +
 osm/eventdb/src/ibeventdb.c       |  622 +++++++++++++++++++++++++++++++++
 osm/include/Makefile.am           |    2 +
 osm/include/iba/ib_types.h        |   74 ++++
 osm/include/opensm/osm_base.h     |   23 ++
 osm/include/opensm/osm_event_db.h |  151 ++++++++
 osm/include/opensm/osm_madw.h     |   40 +++
 osm/include/opensm/osm_msgdef.h   |    1 +
 osm/include/opensm/osm_opensm.h   |    4 +
 osm/include/opensm/osm_perfmgr.h  |  223 ++++++++++++
 osm/include/opensm/osm_subnet.h   |   18 +
 osm/opensm.spec.in                |   11 +-
 osm/opensm/Makefile.am            |    5 +-
 osm/opensm/configure.in           |    3 +
 osm/opensm/main.c                 |   19 +
 osm/opensm/osm_console.c          |   78 +++++
 osm/opensm/osm_event_db.c         |  172 +++++++++
 osm/opensm/osm_opensm.c           |   24 ++
 osm/opensm/osm_perfmgr.c          |  686 +++++++++++++++++++++++++++++++++++++
 osm/opensm/osm_subnet.c           |   51 +++
 osm/opensm/osm_trap_rcv.c         |   15 +
 29 files changed, 2425 insertions(+), 5 deletions(-)

diff --git a/osm/Makefile.am b/osm/Makefile.am
index ec66883..32f5f64 100644
--- a/osm/Makefile.am
+++ b/osm/Makefile.am
@@ -1,6 +1,7 @@
 
 # note that order matters: make the libs first then use them 
-SUBDIRS 		= complib libvendor opensm osmtest include
+SUBDIRS 		= complib libvendor opensm osmtest include $(EVENTDB)
+DIST_SUBDIRS = complib libvendor opensm osmtest include eventdb
 
 # this will control the update of the files in order
 MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure config-h.in 
diff --git a/osm/config/osmvsel.m4 b/osm/config/osmvsel.m4
index 9234f36..ce6039c 100644
--- a/osm/config/osmvsel.m4
+++ b/osm/config/osmvsel.m4
@@ -180,3 +180,29 @@ if test "$disable_libcheck" != "yes"; th
 fi
 # --- END OPENIB_APP_OSMV_CHECK_HEADER ---
 ]) dnl OPENIB_APP_OSMV_CHECK_HEADER
+
+
+AC_DEFUN([OPENIB_OSM_PERF_MGR_SEL], [
+# --- BEGIN OPENIB_OSM_PERF_MGR_SEL ---
+
+dnl enable the perf-mgr
+AC_ARG_ENABLE(perf-mgr,
+[  --enable-perf-mgr Enable the performance manager (default no)],
+   [case $enableval in
+     yes) perf_mgr=yes ;;
+     no)  perf_mgr=no ;;
+   esac],
+   perf_mgr=no)
+if test $perf_mgr = yes; then
+  AC_DEFINE(ENABLE_OSM_PERF_MGR,
+	    1,
+	    [Define as 1 if you want to enable the performance manager])
+  EVENTDB=eventdb
+else
+  EVENTDB=
+fi
+AC_SUBST([EVENTDB])
+
+# --- END OPENIB_OSM_PERF_MGR_SEL ---
+]) dnl OPENIB_OSM_PERF_MGR_SEL
+
diff --git a/osm/configure.in b/osm/configure.in
index eb6552f..94d4483 100644
--- a/osm/configure.in
+++ b/osm/configure.in
@@ -27,11 +27,14 @@ AC_ARG_ENABLE(debug,
 esac],[debug=false])
 AM_CONDITIONAL(DEBUG, test x$debug = xtrue)
 
+dnl select performance manager or not
+OPENIB_OSM_PERF_MGR_SEL
+
 dnl Provide user option to select vendor
 OPENIB_APP_OSMV_SEL
 
 dnl Configure the following subdirs
-AC_CONFIG_SUBDIRS(complib libvendor opensm osmtest include)
+AC_CONFIG_SUBDIRS(complib libvendor opensm osmtest include eventdb)
 
 dnl Create the following Makefiles
 AC_OUTPUT(Makefile)
diff --git a/osm/eventdb/Makefile.am b/osm/eventdb/Makefile.am
new file mode 100644
index 0000000..18f2db9
--- /dev/null
+++ b/osm/eventdb/Makefile.am
@@ -0,0 +1,37 @@
+
+INCLUDES = -I$(srcdir)/../include \
+	   -I$(includedir)/infiniband
+
+lib_LTLIBRARIES = libibeventdb.la
+
+if DEBUG
+DBGFLAGS = -ggdb -D_DEBUG_
+else
+DBGFLAGS = -g
+endif
+
+libibeventdb_la_CFLAGS = -Wall $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1 -Wno-deprecated-declarations
+
+if HAVE_LD_VERSION_SCRIPT
+    libibeventdb_version_script = -Wl,--version-script=$(srcdir)/libibeventdb.map
+else
+    libibeventdb_version_script =
+endif
+
+libibeventdb_la_SOURCES = src/ibeventdb.c
+libibeventdb_la_LDFLAGS = -version-info $(ibeventdb_api_version) \
+	 -export-dynamic $(libibeventdb_version_script)
+libibeventdb_la_LIBADD = -L../complib $(OSMV_LDADD) -losmcomp
+libibeventdb_la_DEPENDENCIES = $(srcdir)/libibeventdb.map
+
+libibeventdbincludedir = $(includedir)/infiniband/complib
+
+libibeventdbinclude_HEADERS =
+
+# headers are distributed as part of the include dir
+EXTRA_DIST = $(srcdir)/libibeventdb.spec.in $(srcdir)/libibeventdb.map \
+	$(srcdir)/libibeventdb.ver
+
+dist-hook: libibeventdb.spec
+	cp libibeventdb.spec $(distdir)
+
diff --git a/osm/eventdb/autogen.sh b/osm/eventdb/autogen.sh
new file mode 100755
index 0000000..ec20fc5
--- /dev/null
+++ b/osm/eventdb/autogen.sh
@@ -0,0 +1,15 @@
+#! /bin/sh
+
+# We change dir since the later utilities assume to work in the project dir
+cd ${0%*/*}
+
+# create config dir if not exist
+test -d config || mkdir config
+
+set -x
+(aclocal -I config -I ../config 2>&1 ) && \
+(libtoolize --force --copy) && \
+(autoheader) && \
+(automake --foreign --add-missing --copy) && \
+autoconf
+
diff --git a/osm/eventdb/configure.in b/osm/eventdb/configure.in
new file mode 100644
index 0000000..f5fa345
--- /dev/null
+++ b/osm/eventdb/configure.in
@@ -0,0 +1,70 @@
+dnl Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.57)
+AC_INIT(libibeventdb, 1.0.0, openib-general at openib.org)
+AC_CONFIG_AUX_DIR(config)
+AM_CONFIG_HEADER(config.h)
+AM_INIT_AUTOMAKE
+
+dnl the library version info is available in the file: libibeventdb.ver
+ibeventdb_api_version=`grep LIBVERSION $srcdir/libibeventdb.ver | sed 's/LIBVERSION=//'`
+if test -z $ibeventdb_api_version; then
+   ibeventdb_api_version=1:0:0
+fi
+AC_SUBST(ibeventdb_api_version)
+
+dnl Checks for programs
+AC_PROG_CC
+AC_PROG_GCC_TRADITIONAL
+AC_PROG_LIBTOOL
+
+dnl Checks for libraries
+AC_CHECK_LIB(pthread, pthread_mutex_init, [],
+	AC_MSG_ERROR([pthread_mutex_init() not found.  libibeventdb requires libpthread.]))
+
+dnl Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h stdlib.h string.h sys/ioctl.h sys/time.h syslog.h unistd.h])
+
+dnl Checks for library functions
+AC_FUNC_MALLOC
+AC_FUNC_MEMCMP
+AC_CHECK_FUNC([time])
+dnl AC_CHECK_FUNC([cl_plock_excl_acquire], [],
+dnl AC_MSG_ERROR([cl_plock_excl_acquire not found, libibeventdb requires libosmcomp]))
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_HEADER_TIME
+
+dnl We use --version-script with ld if possible
+AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
+    if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then
+        ac_cv_version_script=yes
+    else
+        ac_cv_version_script=no
+    fi)
+
+AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes")
+
+dnl Support debug mode build - if enable-debug provided the DEBUG variable is set
+AC_ARG_ENABLE(debug,
+[  --enable-debug Turn on debug mode],
+[case "${enableval}" in
+  yes) debug=true ;;
+  no)  debug=false ;;
+  *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;;
+esac],[debug=false])
+AM_CONDITIONAL(DEBUG, test x$debug = xtrue)
+
+# we have to revive the env CFLAGS as some how they are being overwritten...
+# see http://sources.redhat.com/automake/automake.html#Flag-Variables-Ordering
+# for why they should NEVER be modified by the configure to allow for user
+# overrides.
+CFLAGS=$ac_env_CFLAGS_value
+
+
+AC_CONFIG_FILES([Makefile libibeventdb.spec])
+AC_OUTPUT
diff --git a/osm/eventdb/libibeventdb.map b/osm/eventdb/libibeventdb.map
new file mode 100644
index 0000000..ca4f78c
--- /dev/null
+++ b/osm/eventdb/libibeventdb.map
@@ -0,0 +1,5 @@
+OSMPMDB_1.0 {
+	global:
+      __osm_event_db;
+	local: *;
+};
diff --git a/osm/eventdb/libibeventdb.spec.in b/osm/eventdb/libibeventdb.spec.in
new file mode 100644
index 0000000..ac66545
--- /dev/null
+++ b/osm/eventdb/libibeventdb.spec.in
@@ -0,0 +1,38 @@
+
+%define ver @VERSION@
+%define RELEASE 1
+%define rel %{?CUSTOM_RELEASE} %{!?CUSTOM_RELEASE:%RELEASE}
+
+Summary: OpenIB InfiniBand OpenSM Component Library
+Name: libibeventdb
+Version: %ver
+Release: %rel%{?dist}
+License: GPL/BSD
+Group: System Environment/Libraries
+BuildRoot: %{_tmppath}/%{name}-%{version}-root
+Source: http://openib.org/downloads/%{name}-%{version}.tar.gz
+Url: http://openib.org/
+Requires: opensm
+
+%description
+libibeventdb provides a default plugin for the OpenSM event database
+
+%prep
+%setup -q
+
+%build
+%configure
+make
+
+%install
+make DESTDIR=${RPM_BUILD_ROOT} install
+# remove unpackaged files from the buildroot
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%{_libdir}/libibeventdb*.so.*
+%doc ChangeLog
diff --git a/osm/eventdb/libibeventdb.ver b/osm/eventdb/libibeventdb.ver
new file mode 100644
index 0000000..7a703b7
--- /dev/null
+++ b/osm/eventdb/libibeventdb.ver
@@ -0,0 +1,9 @@
+# In this file we track the current API version
+# of the vendor interface (and libraries)
+# The version is built of the following 
+# tree numbers:
+# API_REV:RUNNING_REV:AGE
+# API_REV - advance on any added API
+# RUNNING_REV - advance any change to the vendor files
+# AGE - number of backward versions the API still supports
+LIBVERSION=1:0:0
diff --git a/osm/eventdb/src/ibeventdb.c b/osm/eventdb/src/ibeventdb.c
new file mode 100644
index 0000000..e98f85c
--- /dev/null
+++ b/osm/eventdb/src/ibeventdb.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2007 The Regents of the University of California.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <dlfcn.h>
+#include <stdint.h>
+#include <opensm/osm_event_db.h>
+#include <complib/cl_qmap.h>
+#include <complib/cl_passivelock.h>
+
+/**
+ * Port counter object.
+ * Store all the port counters for a single port.
+ */
+typedef struct _osm_event_pc {
+	struct {
+		uint64_t symbol_err_cnt;
+		uint64_t link_err_recover;
+		uint64_t link_downed;
+		uint64_t rcv_err;
+		uint64_t rcv_rem_phys_err;
+		uint64_t rcv_switch_relay_err;
+		uint64_t xmit_discards;
+		uint64_t xmit_constraint_err;
+		uint64_t rcv_constraint_err;
+		uint64_t link_int_err;
+		uint64_t buffer_overrun_err;
+		uint64_t vl15_dropped;
+		uint64_t xmit_data;
+		uint64_t rcv_data;
+		uint64_t xmit_pkts;
+		uint64_t rcv_pkts;
+		time_t   last_reset;
+	} totals;
+	osm_pc_reading_t previous;
+} osm_event_pc_t;
+
+/**
+ * group port counters for ports into the nodes
+ */
+typedef struct _osm_pc_node {
+	cl_map_item_t  map_item; /* must be first */
+	uint64_t       node_guid;
+	osm_event_pc_t   *ports;
+	uint8_t        num_ports;
+} osm_pc_node_t;
+
+/**
+ * all nodes in the system.
+ */
+typedef struct _osm_pc_db {
+	cl_qmap_t   pc_data; /* stores type (osm_pc_node_t *) */
+	cl_plock_t  lock;
+	osm_log_t  *osm_log;
+} osm_pc_db_t;
+
+
+/** =========================================================================
+ */
+static void *
+db_construct(osm_log_t *osm_log)
+{
+	/* use the default */
+	osm_pc_db_t *db = malloc(sizeof(*db));
+	if (!db) {
+		return (NULL);
+	}
+	cl_plock_construct(&(db->lock));
+	cl_plock_init(&(db->lock));
+	cl_qmap_init(&(db->pc_data));
+	db->osm_log = osm_log;
+	return ((void *)db);
+}
+
+/** =========================================================================
+ */
+static void
+db_destroy(void *_db)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	cl_plock_excl_acquire(&(db->lock));
+	/* remove all the items in the qmap */
+	while (!cl_is_qmap_empty(&(db->pc_data))) {
+		cl_map_item_t *rc = cl_qmap_head(&(db->pc_data));
+		cl_qmap_remove_item(&(db->pc_data), rc);
+	}
+	cl_plock_release(&(db->lock));
+	cl_plock_destroy(&(db->lock));
+	free(db);
+}
+
+/** =========================================================================
+ */
+static osm_pc_node_t *
+malloc_node(void *_db, uint64_t guid, uint8_t num_ports)
+{
+	int            i = 0;
+	time_t         cur_time = 0;
+	osm_pc_node_t *rc = malloc(sizeof(*rc));
+	if (!rc)
+		return (NULL);
+
+	rc->ports = calloc(num_ports, sizeof(osm_event_pc_t));
+	if (!rc->ports) {
+		goto free_rc;
+	}
+	rc->num_ports = num_ports;
+	rc->node_guid = guid;
+
+	cur_time = time(NULL);
+	for (i = 0; i < num_ports; i++) {
+		rc->ports[i].totals.last_reset = cur_time;
+		rc->ports[i].previous.time = cur_time;
+	}
+
+	return (rc);
+free_rc:
+	free(rc);
+	return (NULL);
+}
+
+/** =========================================================================
+ */
+static void
+free_node(osm_pc_node_t *node)
+{
+	if (!node)
+		return;
+	if (node->ports)
+		free(node->ports);
+	free(node);
+}
+
+/* insert nodes to the database */
+static osm_event_db_err_t
+insert(void *_db, osm_pc_node_t *node)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	cl_map_item_t *rc = cl_qmap_insert(&(db->pc_data), node->node_guid, (cl_map_item_t *)node);
+	if ((void *)rc != (void *)node)
+		return (OSM_EVENT_DB_FAIL);
+	return (OSM_EVENT_DB_SUCCESS);
+}
+
+/**********************************************************************
+ * Internal call db->lock should be held when calling
+ **********************************************************************/
+static inline osm_pc_node_t *
+get(void *_db, uint64_t guid)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	cl_map_item_t       *rc = cl_qmap_get(&(db->pc_data), guid);
+	const cl_map_item_t *end = cl_qmap_end(&(db->pc_data));
+	if (rc == end)
+		return (NULL);
+	return ((osm_pc_node_t *)rc);
+}
+
+/** =========================================================================
+ */
+static osm_event_db_err_t
+db_create_entry(void *_db, uint64_t guid, uint8_t num_ports)
+{
+  osm_pc_db_t        *db = (osm_pc_db_t *)_db;
+  osm_event_db_err_t  rc = OSM_EVENT_DB_SUCCESS;
+  cl_plock_excl_acquire(&(db->lock));
+  if (!get(db, guid)) {
+        osm_pc_node_t *pc_node = malloc_node(db, guid, num_ports);
+	if (!pc_node) {
+		rc = OSM_EVENT_DB_NOMEM;
+		goto Exit;
+	}
+	if (insert(db, pc_node)) {
+		free_node(pc_node);
+		rc = OSM_EVENT_DB_FAIL;
+		goto Exit;
+	}
+  }
+Exit:
+  cl_plock_release(&(db->lock));
+  return (rc);
+}
+
+/**********************************************************************
+ **********************************************************************/
+static osm_event_db_err_t
+db_get_prev(void *_db, uint64_t guid,
+		uint8_t port, osm_pc_reading_t *reading)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	osm_pc_node_t       *node = NULL;
+	cl_map_item_t       *rc = NULL;
+	const cl_map_item_t *end = NULL;
+
+	cl_plock_acquire(&(db->lock));
+
+	rc = cl_qmap_get(&(db->pc_data), guid);
+	end = cl_qmap_end(&(db->pc_data));
+	if (rc == end)
+		return (OSM_EVENT_DB_GUIDNOTFOUND);
+
+	node = (osm_pc_node_t *)rc;
+	if (port >= node->num_ports)
+		return (OSM_EVENT_DB_PORTNOTFOUND);
+
+	*reading = node->ports[port].previous;
+
+	cl_plock_release(&(db->lock));
+	return (OSM_EVENT_DB_SUCCESS);
+}
+
+/**********************************************************************
+ * Output a tab deliminated output of the port counters
+ **********************************************************************/
+static void
+__dump_node_mr(osm_pc_node_t *node, FILE *fp)
+{
+	int i = 0;
+
+	fprintf(fp, "\nGUID            Port\t%s\t%s\t"
+			"%s\t%s\t%s\t%s\t%s\t%s\t%s\t"
+			"%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+			"symbol_err_cnt",
+			"link_err_recover",
+			"link_downed",
+			"rcv_err",
+			"rcv_rem_phys_err",
+			"rcv_switch_relay_err",
+			"xmit_discards",
+			"xmit_constraint_err",
+			"rcv_constraint_err",
+			"link_int_err",
+			"buf_overrun_err",
+			"vl15_dropped",
+			"xmit_data",
+			"rcv_data",
+			"xmit_pkts",
+			"rcv_pkts");
+	for (i = 1; i < node->num_ports; i++)
+	{
+		fprintf(fp, "0x%" PRIx64 "\t%d\t%"PRIu64"\t%"PRIu64"\t"
+			"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t"
+			"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t"
+			"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t"
+			"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",
+			node->node_guid,
+			i,
+			node->ports[i].totals.symbol_err_cnt,
+			node->ports[i].totals.link_err_recover,
+			node->ports[i].totals.link_downed,
+			node->ports[i].totals.rcv_err,
+			node->ports[i].totals.rcv_rem_phys_err,
+			node->ports[i].totals.rcv_switch_relay_err,
+			node->ports[i].totals.xmit_discards,
+			node->ports[i].totals.xmit_constraint_err,
+			node->ports[i].totals.rcv_constraint_err,
+			node->ports[i].totals.link_int_err,
+			node->ports[i].totals.buffer_overrun_err,
+			node->ports[i].totals.vl15_dropped,
+			node->ports[i].totals.xmit_data,
+			node->ports[i].totals.rcv_data,
+			node->ports[i].totals.xmit_pkts,
+			node->ports[i].totals.rcv_pkts
+			);
+	}
+}
+
+/**********************************************************************
+ * Output a human readable output of the port counters
+ **********************************************************************/
+static void
+__dump_node_hr(osm_pc_node_t *node, FILE *fp)
+{
+	int i = 0;
+
+	fprintf(fp, "\n");
+	for (i = 1; i < node->num_ports; i++)
+	{
+		fprintf(fp, "GUID 0x%"PRIx64": Port %d:\n"
+			"     symbol_err_cnt: %"PRIu64"\n"
+			"     link_err_recover: %"PRIu64"\n"
+			"     link_downed: %"PRIu64"\n"
+			"     rcv_err: %"PRIu64"\n"
+			"     rcv_rem_phys_err: %"PRIu64"\n"
+			"     rcv_switch_relay_err: %"PRIu64"\n"
+			"     xmit_discards: %"PRIu64"\n"
+			"     xmit_constraint_err: %"PRIu64"\n"
+			"     rcv_constraint_err: %"PRIu64"\n"
+			"     link_int_err: %"PRIu64"\n"
+			"     buf_overrun_err: %"PRIu64"\n"
+			"     vl15_dropped: %"PRIu64"\n"
+			"     xmit_data: %"PRIu64"\n"
+			"     rcv_data: %"PRIu64"\n"
+			"     xmit_pkts: %"PRIu64"\n"
+			"     rcv_pkts: %"PRIu64"\n"
+			,
+			node->node_guid,
+			i,
+			node->ports[i].totals.symbol_err_cnt,
+			node->ports[i].totals.link_err_recover,
+			node->ports[i].totals.link_downed,
+			node->ports[i].totals.rcv_err,
+			node->ports[i].totals.rcv_rem_phys_err,
+			node->ports[i].totals.rcv_switch_relay_err,
+			node->ports[i].totals.xmit_discards,
+			node->ports[i].totals.xmit_constraint_err,
+			node->ports[i].totals.rcv_constraint_err,
+			node->ports[i].totals.link_int_err,
+			node->ports[i].totals.buffer_overrun_err,
+			node->ports[i].totals.vl15_dropped,
+			node->ports[i].totals.xmit_data,
+			node->ports[i].totals.rcv_data,
+			node->ports[i].totals.xmit_pkts,
+			node->ports[i].totals.rcv_pkts
+			);
+	}
+}
+
+/* Define a context for the __db_dump callback */
+typedef struct {
+	FILE                *fp;
+	osm_event_db_dump_t  dump_type;
+} dump_context_t;
+
+/**********************************************************************
+ **********************************************************************/
+static void
+__db_dump(cl_map_item_t * const p_map_item, void *context )
+{
+	osm_pc_node_t  *node = (osm_pc_node_t *)p_map_item;
+	dump_context_t *c = (dump_context_t *)context;
+	FILE           *fp = c->fp;
+
+	switch (c->dump_type)
+	{
+		case OSM_EVENT_DB_DUMP_MR:
+			__dump_node_mr(node, fp);
+			break;
+		case OSM_EVENT_DB_DUMP_HR:
+		default:
+			__dump_node_hr(node, fp);
+			break;
+	}
+}
+
+/**********************************************************************
+ * dump the data to the file "file"
+ **********************************************************************/
+static osm_event_db_err_t
+db_dump(void *_db, char *file, osm_event_db_dump_t dump_type)
+{
+	osm_pc_db_t    *db = (osm_pc_db_t *)_db;
+	dump_context_t  context;
+
+	context.fp = fopen(file, "w+");
+	if (!context.fp)
+		return (OSM_EVENT_DB_FAIL);
+	context.dump_type = dump_type;
+
+	cl_plock_acquire(&(db->lock));
+        cl_qmap_apply_func(&(db->pc_data), __db_dump, (void *)&context);
+	cl_plock_release(&(db->lock));
+	fclose(context.fp);
+	return (OSM_EVENT_DB_SUCCESS);
+}
+
+/**********************************************************************
+ * call back to support the below
+ **********************************************************************/
+static void
+__clear_counters(cl_map_item_t * const p_map_item, void *context )
+{
+	osm_pc_node_t *node = (osm_pc_node_t *)p_map_item;
+	int            i = 0;
+	for (i = 0; i < node->num_ports; i++) {
+		node->ports[i].totals.symbol_err_cnt = 0;
+		node->ports[i].totals.link_err_recover = 0;
+		node->ports[i].totals.link_downed = 0;
+		node->ports[i].totals.rcv_err = 0;
+		node->ports[i].totals.rcv_rem_phys_err = 0;
+		node->ports[i].totals.rcv_switch_relay_err = 0;
+		node->ports[i].totals.xmit_discards = 0;
+		node->ports[i].totals.xmit_constraint_err = 0;
+		node->ports[i].totals.rcv_constraint_err = 0;
+		node->ports[i].totals.link_int_err = 0;
+		node->ports[i].totals.buffer_overrun_err = 0;
+		node->ports[i].totals.vl15_dropped = 0;
+		node->ports[i].totals.xmit_data = 0;
+		node->ports[i].totals.rcv_data = 0;
+		node->ports[i].totals.xmit_pkts = 0;
+		node->ports[i].totals.rcv_pkts = 0;
+		node->ports[i].totals.last_reset = time(NULL);
+	}
+}
+
+/**********************************************************************
+ * Clear the counters from the db
+ **********************************************************************/
+static void
+db_clear_port_counters(void *_db)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	cl_plock_excl_acquire(&(db->lock));
+	cl_qmap_apply_func(&(db->pc_data), __clear_counters, (void *)db);
+	cl_plock_release(&(db->lock));
+}
+
+#if 0
+/**********************************************************************
+ * Dump a reading vs the previous reading to stdout
+ **********************************************************************/
+static void
+dump_reading(osm_event_pc_t *port, ib_port_counters_t *cur)
+{
+	printf("sym %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->symbol_err_cnt),
+			cl_ntoh16(port->previous.reading.symbol_err_cnt), port->totals.symbol_err_cnt);
+	printf("ler %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->link_err_recover),
+		cl_ntoh16(port->previous.reading.link_err_recover), port->totals.link_err_recover);
+	printf("ld %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->link_downed),
+		cl_ntoh16(port->previous.reading.link_downed), port->totals.link_downed);
+	printf("re %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->rcv_err),
+		cl_ntoh16(port->previous.reading.rcv_err), port->totals.rcv_err);
+	printf("rrp %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->rcv_rem_phys_err),
+		cl_ntoh16(port->previous.reading.rcv_rem_phys_err), port->totals.rcv_rem_phys_err);
+	printf("rsr %u - %u (%" PRIx64 ")\n",
+		cl_ntoh16(cur->rcv_switch_relay_err),
+		cl_ntoh16(port->previous.reading.rcv_switch_relay_err), port->totals.rcv_switch_relay_err);
+	printf("xd %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->xmit_discards),
+		cl_ntoh16(port->previous.reading.xmit_discards), port->totals.xmit_discards);
+	printf("xce %u - %u (%" PRIx64 ")\n",
+		cl_ntoh16(cur->xmit_constraint_err),
+		cl_ntoh16(port->previous.reading.xmit_constraint_err), port->totals.xmit_constraint_err);
+	printf("rce %u - %u (%" PRIx64 ")\n",
+		cl_ntoh16(cur->rcv_constraint_err),
+		cl_ntoh16(port->previous.reading.rcv_constraint_err), port->totals.rcv_constraint_err);
+	printf("li %x - %x (%" PRIx64 ")\n",
+		cl_ntoh16(cur->link_int_buffer_overrun),
+		cl_ntoh16(port->previous.reading.link_int_buffer_overrun), port->totals.link_int_err);
+	printf("bo %x - %x (%" PRIx64 ")\n",
+		cl_ntoh16(cur->link_int_buffer_overrun),
+		cl_ntoh16(port->previous.reading.link_int_buffer_overrun), port->totals.buffer_overrun_err);
+	printf("vld %u - %u (%" PRIx64 ")\n", cl_ntoh16(cur->vl15_dropped),
+		cl_ntoh16(port->previous.reading.vl15_dropped), port->totals.vl15_dropped);
+	
+	printf("xd %u - %u (%" PRIx64 ")\n", cl_ntoh32(cur->xmit_data),
+		cl_ntoh32(port->previous.reading.xmit_data), port->totals.xmit_data);
+	printf("rd %u - %u (%" PRIx64 ")\n", cl_ntoh32(cur->rcv_data),
+		cl_ntoh32(port->previous.reading.rcv_data), port->totals.rcv_data);
+	printf("xp %u - %u (%" PRIx64 ")\n", cl_ntoh32(cur->xmit_pkts),
+		cl_ntoh32(port->previous.reading.xmit_pkts), port->totals.xmit_pkts);
+	printf("rp %u - %u (%" PRIx64 ")\n", cl_ntoh32(cur->rcv_pkts),
+		cl_ntoh32(port->previous.reading.rcv_pkts), port->totals.rcv_pkts);
+}
+#endif
+
+/**********************************************************************
+ * Add the reading to the osm_pc_node_t
+ **********************************************************************/
+static osm_event_db_err_t
+db_clear_prev_pc(void *_db, uint64_t guid, uint8_t port)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	osm_event_pc_t        *p_port = NULL;
+	osm_pc_node_t      *p_node = NULL;
+	ib_port_counters_t *previous = NULL;
+	osm_event_db_err_t     rc = OSM_EVENT_DB_SUCCESS;
+
+	cl_plock_excl_acquire(&(db->lock));
+	p_node = get(db, guid);
+
+	if (!p_node)
+		return (OSM_EVENT_DB_GUIDNOTFOUND);
+
+	if (port >= p_node->num_ports)
+		return (OSM_EVENT_DB_PORTNOTFOUND);
+
+	p_port = &(p_node->ports[port]);
+	previous = &(p_node->ports[port].previous.reading);
+
+	memset(previous, 0, sizeof(*previous));
+	p_port->previous.time = time(NULL);
+
+	cl_plock_release(&(db->lock));
+	return (rc);
+}
+
+/**********************************************************************
+ * Add the reading to the osm_pc_node_t
+ **********************************************************************/
+static osm_event_db_err_t
+db_add_reading(void *_db, uint64_t guid,
+                   uint8_t port, ib_port_counters_t *reading)
+{
+	osm_pc_db_t *db = (osm_pc_db_t *)_db;
+	osm_event_pc_t        *p_port = NULL;
+	osm_pc_node_t      *p_node = NULL;
+	ib_port_counters_t *previous = NULL;
+	osm_event_db_err_t     rc = OSM_EVENT_DB_SUCCESS;
+
+	cl_plock_excl_acquire(&(db->lock));
+	p_node = get(db, guid);
+
+	if (!p_node)
+		return (OSM_EVENT_DB_GUIDNOTFOUND);
+
+	if (port >= p_node->num_ports)
+		return (OSM_EVENT_DB_PORTNOTFOUND);
+
+	p_port = &(p_node->ports[port]);
+	previous = &(p_node->ports[port].previous.reading);
+
+#if 0
+	dump_reading(p_port, reading);
+#endif
+
+	/* calculate changes from previous reading */
+	p_port->totals.symbol_err_cnt
+		+= (cl_ntoh16(reading->symbol_err_cnt)
+				- cl_ntoh16(previous->symbol_err_cnt));
+	p_port->totals.link_err_recover
+		+= (reading->link_err_recover - previous->link_err_recover);
+	p_port->totals.link_downed
+		+= (reading->link_downed - previous->link_downed);
+	p_port->totals.rcv_err
+		+= (cl_ntoh16(reading->rcv_err)
+				- cl_ntoh16(previous->rcv_err));
+	p_port->totals.rcv_rem_phys_err
+		+= (cl_ntoh16(reading->rcv_rem_phys_err)
+				- cl_ntoh16(previous->rcv_rem_phys_err));
+	p_port->totals.rcv_switch_relay_err
+		+= (cl_ntoh16(reading->rcv_switch_relay_err)
+				- cl_ntoh16(previous->rcv_switch_relay_err));
+	p_port->totals.xmit_discards
+		+= (cl_ntoh16(reading->xmit_discards)
+				- cl_ntoh16(previous->xmit_discards));
+	p_port->totals.xmit_constraint_err
+		+= (reading->xmit_constraint_err - previous->xmit_constraint_err);
+	p_port->totals.rcv_constraint_err
+		+= (reading->rcv_constraint_err - previous->rcv_constraint_err);
+	p_port->totals.link_int_err
+		+= PC_LINK_INT(reading->link_int_buffer_overrun)
+			- PC_LINK_INT(previous->link_int_buffer_overrun);
+	p_port->totals.buffer_overrun_err
+		+= PC_BUF_OVERRUN(reading->link_int_buffer_overrun)
+			- PC_BUF_OVERRUN(previous->link_int_buffer_overrun);
+	p_port->totals.vl15_dropped
+		+= (cl_ntoh16(reading->vl15_dropped)
+				- cl_ntoh16(previous->vl15_dropped));
+	
+	p_port->totals.xmit_data
+		+= (cl_ntoh32(reading->xmit_data)
+				- cl_ntoh32(previous->xmit_data));
+	p_port->totals.rcv_data
+		+= (cl_ntoh32(reading->rcv_data)
+				- cl_ntoh32(previous->rcv_data));
+	p_port->totals.xmit_pkts
+		+= (cl_ntoh32(reading->xmit_pkts)
+				- cl_ntoh32(previous->xmit_pkts));
+	p_port->totals.rcv_pkts
+		+= (cl_ntoh32(reading->rcv_pkts)
+				- cl_ntoh32(previous->rcv_pkts));
+
+	p_port->previous.reading = *reading;
+	p_port->previous.time = time(NULL);
+
+	cl_plock_release(&(db->lock));
+	return (rc);
+}
+
+/** =========================================================================
+ * Define the object symbol for loading
+ */
+__osm_event_db_t __osm_event_db =
+{
+interface_version: OSM_EVENT_DB_INTERFACE_VER,
+construct : db_construct,
+destroy : db_destroy,
+create_entry : db_create_entry,
+get_prev_pc : db_get_prev,
+dump : db_dump,
+clear_port_counters : db_clear_port_counters,
+add_pc_reading : db_add_reading,
+clear_prev_pc : db_clear_prev_pc
+};
+
diff --git a/osm/include/Makefile.am b/osm/include/Makefile.am
index 8499d3b..fd874c8 100644
--- a/osm/include/Makefile.am
+++ b/osm/include/Makefile.am
@@ -87,6 +87,8 @@ EXTRA_DIST = \
 	$(srcdir)/opensm/osm_drop_mgr.h \
 	$(srcdir)/opensm/osm_port_info_rcv.h \
 	$(srcdir)/opensm/osm_state_mgr_ctrl.h \
+	$(srcdir)/opensm/osm_perfmgr.h \
+	$(srcdir)/opensm/osm_event_db.h \
 	$(srcdir)/complib/cl_thread_osd.h \
 	$(srcdir)/complib/cl_packon.h \
 	$(srcdir)/complib/cl_atomic_osd.h \
diff --git a/osm/include/iba/ib_types.h b/osm/include/iba/ib_types.h
index b3937cb..2a4057b 100644
--- a/osm/include/iba/ib_types.h
+++ b/osm/include/iba/ib_types.h
@@ -7353,6 +7353,80 @@ typedef struct _ib_inform_info_record
 }	PACK_SUFFIX ib_inform_info_record_t;
 #include <complib/cl_packoff.h>
 
+/****s* IBA Base: Types/ib_perfmgr_mad_t
+* NAME
+*	ib_perfmgr_mad_t
+*
+* DESCRIPTION
+*	IBA defined Perf Management MAD (16.3.1)
+*
+* SYNOPSIS
+*/
+#include <complib/cl_packon.h>
+typedef struct _ib_perfmgr_mad
+{
+	ib_mad_t		header;
+	uint8_t			resv[40];
+
+#define	IB_PM_DATA_SIZE		192
+	uint8_t			data[IB_PM_DATA_SIZE];
+
+}	PACK_SUFFIX ib_perfmgr_mad_t;
+#include <complib/cl_packoff.h>
+/*
+* FIELDS
+*	header
+*		Common MAD header.
+*
+*	resv
+*		Reserved.
+*
+*	data
+*		Performance Management payload.  The structure and content of this field
+*		depend upon the method, attr_id, and attr_mod fields in the header.
+*
+* SEE ALSO
+* ib_mad_t
+*********/
+
+/****s* IBA Base: Types/ib_port_counters
+* NAME
+*	ib_port_counters_t
+*
+* DESCRIPTION
+*	IBA defined PortCounters Attribute. (16.1.3.5)
+*
+* SYNOPSIS
+*/
+#include <complib/cl_packon.h>
+typedef struct _ib_port_counters
+{
+	uint8_t 			reserved;
+	uint8_t                         port_select;
+	ib_net16_t                      counter_select;
+	ib_net16_t                      symbol_err_cnt;
+	uint8_t                         link_err_recover;
+	uint8_t                         link_downed;
+	ib_net16_t                      rcv_err;
+	ib_net16_t                      rcv_rem_phys_err;
+	ib_net16_t                      rcv_switch_relay_err;
+	ib_net16_t                      xmit_discards;
+	uint8_t                         xmit_constraint_err;
+	uint8_t                         rcv_constraint_err;
+	uint8_t                         res1;
+	uint8_t                         link_int_buffer_overrun;
+	ib_net16_t                      res2;
+	ib_net16_t                      vl15_dropped;
+	ib_net32_t                      xmit_data;
+	ib_net32_t                      rcv_data;
+	ib_net32_t                      xmit_pkts;
+	ib_net32_t                      rcv_pkts;
+}	PACK_SUFFIX ib_port_counters_t;
+#include <complib/cl_packoff.h>
+
+#define PC_LINK_INT(integ_buf_over) ((integ_buf_over & 0xF0) >> 4)
+#define PC_BUF_OVERRUN(integ_buf_over) (integ_buf_over & 0x0F)
+
 /****d* IBA Base: Types/DM_SVC_NAME
 * NAME
 *	DM_SVC_NAME
diff --git a/osm/include/opensm/osm_base.h b/osm/include/opensm/osm_base.h
index b38b511..51cef49 100644
--- a/osm/include/opensm/osm_base.h
+++ b/osm/include/opensm/osm_base.h
@@ -448,6 +448,29 @@ BEGIN_C_DECLS
 */
 #define OSM_SM_DEFAULT_QP1_SEND_SIZE 256
 
+/****d* OpenSM: Base/OSM_PM_DEFAULT_QP1_RCV_SIZE
+* NAME
+*   OSM_PM_DEFAULT_QP1_RCV_SIZE
+*
+* DESCRIPTION
+*   Specifies the default size (in MADs) of the QP1 receive queue
+*
+* SYNOPSIS
+*/
+#define OSM_PM_DEFAULT_QP1_RCV_SIZE 256
+/***********/
+
+/****d* OpenSM: Base/OSM_PM_DEFAULT_QP1_SEND_SIZE
+* NAME
+*   OSM_PM_DEFAULT_QP1_SEND_SIZE
+*
+* DESCRIPTION
+*   Specifies the default size (in MADs) of the QP1 send queue
+*
+* SYNOPSIS
+*/
+#define OSM_PM_DEFAULT_QP1_SEND_SIZE 256
+
 
 /****d* OpenSM: Base/OSM_SM_DEFAULT_POLLING_TIMEOUT_MILLISECS
 * NAME
diff --git a/osm/include/opensm/osm_event_db.h b/osm/include/opensm/osm_event_db.h
new file mode 100644
index 0000000..17effaf
--- /dev/null
+++ b/osm/include/opensm/osm_event_db.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2007 The Regents of the University of California.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _OSM_EVENT_DB_H_
+#define _OSM_EVENT_DB_H_
+
+#include <time.h>
+#include <opensm/osm_log.h>
+#include <iba/ib_types.h>
+
+#ifdef __cplusplus
+#  define BEGIN_C_DECLS extern "C" {
+#  define END_C_DECLS   }
+#else /* !__cplusplus */
+#  define BEGIN_C_DECLS
+#  define END_C_DECLS
+#endif /* __cplusplus */
+
+BEGIN_C_DECLS
+
+/****h* OpenSM/Event Database
+* DESCRIPTION
+*       Database interface to record subnet events
+*
+*       Implementations of this object _MUST_ be thread safe.
+*
+* AUTHOR
+*	Ira Weiny, LLNL
+*
+*********/
+
+typedef enum {
+	OSM_EVENT_DB_SUCCESS = 0,
+	OSM_EVENT_DB_FAIL,
+	OSM_EVENT_DB_NOMEM,
+	OSM_EVENT_DB_GUIDNOTFOUND,
+	OSM_EVENT_DB_PORTNOTFOUND
+} osm_event_db_err_t;
+
+/** =========================================================================
+ * Port counter reading
+ */
+typedef struct {
+	ib_port_counters_t reading;
+	time_t             time;
+} osm_pc_reading_t;
+
+/** =========================================================================
+ * Dump output options
+ */
+typedef enum {
+	OSM_EVENT_DB_DUMP_HR = 0, /* Human readable */
+	OSM_EVENT_DB_DUMP_MR      /* Machine readable */
+} osm_event_db_dump_t;
+
+/** =========================================================================
+ * Plugin creators should allocate an object of this type
+ *    (name __osm_event_db_t)
+ * The version should be set to OSM_EVENT_DB_INTERFACE_VER
+ */
+#define OSM_EVENT_DB_INTERFACE_VER (1)
+typedef struct
+{
+	int                 interface_version;
+	void               *(*construct)(osm_log_t *osm_log);
+	void                (*destroy)(void *db);
+	osm_event_db_err_t  (*create_entry)(void *db, uint64_t guid, uint8_t num_ports);
+	osm_event_db_err_t  (*get_prev_pc)(void *db, uint64_t guid,
+				uint8_t port, osm_pc_reading_t *reading);
+	osm_event_db_err_t  (*dump)(void *db, char *file, osm_event_db_dump_t dump_type);
+	void                (*clear_port_counters)(void *db);
+	osm_event_db_err_t  (*add_pc_reading)(void *db, uint64_t guid,
+				uint8_t port, ib_port_counters_t *reading);
+	osm_event_db_err_t  (*clear_prev_pc)(void *db, uint64_t guid, uint8_t port);
+} __osm_event_db_t;
+
+/** =========================================================================
+ * The database structure which should be considered opaque
+ */
+typedef struct {
+	void             *handle;
+	__osm_event_db_t *db_impl;
+	void             *db_data;
+	osm_log_t        *p_log;
+} osm_event_db_t;
+
+
+/**
+ * functions
+ */
+osm_event_db_t     *osm_event_db_construct(osm_log_t *p_log, char *type);
+void                osm_event_db_destroy(osm_event_db_t *db);
+
+osm_event_db_err_t  osm_event_db_create_entry(osm_event_db_t *db, uint64_t guid,
+					uint8_t num_ports);
+osm_event_db_err_t  osm_event_db_get_prev_pc(osm_event_db_t *db,
+					uint64_t guid, uint8_t port,
+					osm_pc_reading_t *reading);
+osm_event_db_err_t  osm_event_db_dump(osm_event_db_t *db, char *file,
+					osm_event_db_dump_t dump_type);
+osm_event_db_err_t  osm_event_db_add_pc_reading(osm_event_db_t *db, uint64_t guid,
+					uint8_t port, ib_port_counters_t *reading);
+void                osm_event_db_clear_port_counters(osm_event_db_t *db);
+osm_event_db_err_t  osm_event_db_clear_prev_pc(osm_event_db_t *db, uint64_t guid,
+					uint8_t port);
+
+#if 0
+/* work out the tracking of notice (trap) events. */
+
+typedef struct {
+	ib_mad_notice_attr_t reading;
+	time_t               time;
+} osm_notice_reading_t;
+osm_event_db_err_t  osm_event_db_add_notice_reading(osm_event_db_t *db, uint64_t guid,
+					uint8_t port, ib_mad_notice_attr_t *reading);
+#endif
+
+END_C_DECLS
+
+#endif		/* _OSM_PM_DB_H_ */
+
diff --git a/osm/include/opensm/osm_madw.h b/osm/include/opensm/osm_madw.h
index 95be0f4..80258f4 100644
--- a/osm/include/opensm/osm_madw.h
+++ b/osm/include/opensm/osm_madw.h
@@ -315,6 +315,19 @@ typedef struct _osm_vla_context
 } osm_vla_context_t;
 /*********/
 
+/****s* OpenSM: MAD Wrapper/osm_perfmgr_context_t
+* DESCRIPTION
+*	Context for Performance manager queries
+*/
+typedef struct _osm_perfmgr_context {
+  uint64_t node_guid;
+  uint16_t port;
+  uint8_t num_ports;
+  uint8_t mad_method; /* was this a get or a set */
+  struct timeval query_start;
+} osm_perfmgr_context_t;
+/*********/
+
 #ifndef OSM_VENDOR_INTF_OPENIB
 /****s* OpenSM: MAD Wrapper/osm_arbitrary_context_t
 * NAME
@@ -354,6 +367,7 @@ typedef union _osm_madw_context
 	osm_slvl_context_t	slvl_context;
 	osm_pkey_context_t	pkey_context;
 	osm_vla_context_t	vla_context;
+	osm_perfmgr_context_t	perfmgr_context;
 #ifndef OSM_VENDOR_INTF_OPENIB
 	osm_arbitrary_context_t arb_context;
 #endif
@@ -639,6 +653,32 @@ osm_madw_get_sa_mad_ptr(
 *	MAD Wrapper object, osm_madw_construct, osm_madw_destroy
 *********/
 
+/****f* OpenSM: MAD Wrapper/osm_madw_get_perfmgr_mad_ptr
+* DESCRIPTION
+*	Gets a pointer to the PerfMgr MAD in this MAD wrapper.
+*
+* SYNOPSIS
+*/
+static inline ib_perfmgr_mad_t*
+osm_madw_get_perfmgr_mad_ptr(
+	IN const osm_madw_t* const p_madw )
+{
+	return((ib_perfmgr_mad_t*)p_madw->p_mad);
+}
+/*
+* PARAMETERS
+*	p_madw
+*		[in] Pointer to an osm_madw_t object.
+*
+* RETURN VALUES
+*	Pointer to the start of the PM MAD.
+*
+* NOTES
+*
+* SEE ALSO
+*	MAD Wrapper object, osm_madw_construct, osm_madw_destroy
+*********/
+
 /****f* OpenSM: MAD Wrapper/osm_madw_get_ni_context_ptr
 * NAME
 *	osm_madw_get_ni_context_ptr
diff --git a/osm/include/opensm/osm_msgdef.h b/osm/include/opensm/osm_msgdef.h
index a90e3b9..6732992 100644
--- a/osm/include/opensm/osm_msgdef.h
+++ b/osm/include/opensm/osm_msgdef.h
@@ -186,6 +186,7 @@ enum
 #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP)
 	OSM_MSG_MAD_MULTIPATH_RECORD,
 #endif
+	OSM_MSG_MAD_PORT_COUNTERS,
 	OSM_MSG_MAX
 };
 
diff --git a/osm/include/opensm/osm_opensm.h b/osm/include/opensm/osm_opensm.h
index 482de28..bdaa8f3 100644
--- a/osm/include/opensm/osm_opensm.h
+++ b/osm/include/opensm/osm_opensm.h
@@ -57,6 +57,7 @@
 #include <opensm/osm_log.h>
 #include <opensm/osm_sm.h>
 #include <opensm/osm_sa.h>
+#include <opensm/osm_perfmgr.h>
 #include <opensm/osm_db.h>
 #include <opensm/osm_subnet.h>
 #include <opensm/osm_mad_pool.h>
@@ -157,6 +158,9 @@ typedef struct _osm_opensm_t
   osm_subn_t		subn;
   osm_sm_t		sm;
   osm_sa_t		sa;
+#ifdef ENABLE_OSM_PERF_MGR
+  osm_perfmgr_t         perfmgr;
+#endif /* ENABLE_OSM_PERF_MGR */
   osm_db_t		db;
   osm_mad_pool_t	mad_pool;
   osm_vendor_t		*p_vendor;
diff --git a/osm/include/opensm/osm_perfmgr.h b/osm/include/opensm/osm_perfmgr.h
new file mode 100644
index 0000000..6138ec3
--- /dev/null
+++ b/osm/include/opensm/osm_perfmgr.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2007 The Regents of the University of California.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _OSM_PERFMGR_H_
+#define _OSM_PERFMGR_H_
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef ENABLE_OSM_PERF_MGR
+
+#include <iba/ib_types.h>
+#include <complib/cl_passivelock.h>
+#include <complib/cl_event.h>
+#include <complib/cl_thread.h>
+#include <opensm/osm_subnet.h>
+#include <opensm/osm_req.h>
+#include <opensm/osm_log.h>
+#include <opensm/osm_event_db.h>
+#include <opensm/osm_sm.h>
+#include <opensm/osm_base.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/****h* OpenSM/PERFMGR
+* NAME
+*	PERFMGR
+*
+* DESCRIPTION
+*       Performance manager thread which takes care of polling the fabric for
+*       Port counters values.
+*
+*	The PERFMGR object is thread safe.
+*
+* AUTHOR
+*	Ira Weiny, LLNL
+*
+*********/
+
+#define OSM_PERFMGR_DEFAULT_SWEEP_TIME_S 180
+#define OSM_PERFMGR_DEFAULT_DUMP_FILE OSM_DEFAULT_TMP_DIR "/osm_port_counters.log"
+#define OSM_DEFAULT_EVENT_PLUGIN "ibeventdb"
+
+/****s* OpenSM: PERFMGR/osm_perfmgr_state_t */
+typedef enum
+{
+  PERFMGR_STATE_DISABLE,
+  PERFMGR_STATE_ENABLED,
+  PERFMGR_STATE_NO_DB
+} osm_perfmgr_state_t;
+
+/****s* OpenSM: PERFMGR/osm_perfmgr_t
+*  This object should be treated as opaque and should
+*  be manipulated only through the provided functions.
+*/
+typedef struct _osm_perfmgr
+{
+  osm_thread_state_t    thread_state;
+  cl_event_t            sig_sweep;
+  cl_thread_t           sweeper;
+  osm_subn_t           *subn;
+  osm_sm_t             *sm;
+  cl_plock_t           *lock;
+  osm_log_t            *log;
+  osm_mad_pool_t       *mad_pool;
+  atomic32_t            trans_id;
+  osm_vendor_t         *vendor;
+  osm_bind_handle_t     bind_handle;
+  cl_disp_reg_handle_t  pc_disp_h;
+  osm_perfmgr_state_t   state;
+  uint16_t              sweep_time_s;
+  char                 *db_file;
+  char                 *event_db_dump_file;
+  char                 *event_db_plugin;
+  osm_event_db_t       *db;
+} osm_perfmgr_t;
+/*
+* FIELDS
+*	subn
+*	      Subnet object for this subnet.
+*
+*	log
+*	      Pointer to the log object.
+*
+*	mad_pool
+*		Pointer to the MAD pool.
+*
+*       event_db_dump_file
+*               File to be used to dump the Port Counters
+*
+*	mad_ctrl
+*		Mad Controller
+*********/
+
+/****f* OpenSM: Creation Functions */
+void osm_perfmgr_shutdown(osm_perfmgr_t *const p_perfmgr );
+void osm_perfmgr_destroy(osm_perfmgr_t * const p_perfmgr );
+
+/****f* OpenSM: Inline accessor functions */
+inline static void osm_perfmgr_set_state(osm_perfmgr_t *p_perfmgr,
+		osm_perfmgr_state_t state)
+{
+	p_perfmgr->state = state;
+}
+inline static osm_perfmgr_state_t osm_perfmgr_get_state(osm_perfmgr_t
+		*p_perfmgr) { return (p_perfmgr->state); }
+inline static char *osm_perfmgr_get_state_str(osm_perfmgr_t *p_perfmgr)
+{
+	switch (p_perfmgr->state)
+	{
+		case PERFMGR_STATE_DISABLE: return ("Disabled"); break;
+		case PERFMGR_STATE_ENABLED: return ("Enabled"); break;
+		case PERFMGR_STATE_NO_DB: return ("No Database"); break;
+	}
+	return ("UNKNOWN");
+}
+inline static void osm_perfmgr_set_sweep_time_s(osm_perfmgr_t *p_perfmgr, uint16_t time_s)
+{
+	p_perfmgr->sweep_time_s = time_s;
+   cl_event_signal(&p_perfmgr->sig_sweep);
+}
+inline static uint16_t osm_perfmgr_get_sweep_time_s(osm_perfmgr_t *p_perfmgr)
+{
+	return (p_perfmgr->sweep_time_s);
+}
+void osm_perfmgr_clear_counters(osm_perfmgr_t *p_perfmgr);
+void osm_perfmgr_dump_counters(osm_perfmgr_t *p_perfmgr,
+		osm_event_db_dump_t dump_type);
+
+ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * const p_perfmgr, const ib_net64_t port_guid);
+
+#if 0
+/* Work out the tracking of notice events */
+ib_api_status_t osm_report_notice_to_perfmgr(osm_log_t *const p_log, osm_subn_t *p_subn,
+  					ib_mad_notice_attr_t *p_ntc )
+#endif
+
+/****f* OpenSM: PERFMGR/osm_perfmgr_init */
+ib_api_status_t
+osm_perfmgr_init(
+	osm_perfmgr_t* const perfmgr,
+	osm_subn_t* const subn,
+        osm_sm_t * const sm,
+	osm_log_t* const log,
+	osm_mad_pool_t * const mad_pool,
+	osm_vendor_t * const vendor,
+        cl_dispatcher_t* const disp,
+   	cl_plock_t* const lock,
+	const osm_subn_opt_t * const p_opt );
+/*
+* PARAMETERS
+*	perfmgr
+*		[in] Pointer to an osm_perfmgr_t object to initialize.
+*
+*	subn
+*		[in] Pointer to the Subnet object for this subnet.
+*
+*	sm
+*		[in] Pointer to the Subnet object for this subnet.
+*
+*	log
+*		[in] Pointer to the log object.
+*
+*	mad_pool
+*		[in] Pointer to the MAD pool.
+*
+*	vendor
+*		[in] Pointer to the vendor specific interfaces object.
+*
+*	disp
+*		[in] Pointer to the OpenSM central Dispatcher.
+*
+*	lock
+*		[in] Pointer to the OpenSM serializing lock.
+*
+*	p_opt
+*		[in] Starting options
+*
+* RETURN VALUES
+*	IB_SUCCESS if the PERFMGR object was initialized successfully.
+*********/
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* ENABLE_OSM_PERF_MGR */
+
+#endif		/* _OSM_PERFMGR_H_ */
+
diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h
index fc52b5e..0fdc18b 100644
--- a/osm/include/opensm/osm_subnet.h
+++ b/osm/include/opensm/osm_subnet.h
@@ -291,6 +291,12 @@ typedef struct _osm_subn_opt
   osm_qos_options_t        qos_rtr_options;
   boolean_t                enable_quirks;
   boolean_t                no_clients_rereg;
+#ifdef ENABLE_OSM_PERF_MGR
+  boolean_t                perfmgr;
+  uint16_t                 perfmgr_sweep_time_s;
+  char *                   event_db_dump_file;
+  char *                   event_db_plugin;
+#endif /* ENABLE_OSM_PERF_MGR */
 } osm_subn_opt_t;
 /*
 * FIELDS
@@ -468,6 +474,18 @@ typedef struct _osm_subn_opt
 *	sm_inactive
 *		OpenSM will start with SM in not active state.
 *	
+*	perfmgr
+*		Enable or disable the performance manager
+*
+*	perfmgr_sweep_time_s
+*		Define the period of PM sweep (in seconds).
+*
+*       event_db_dump_file
+*               File to dump the event database to
+*
+*       event_db_plugin
+*               specify the name of the event plugin
+*
 *	qos_options
 *		Default set of QoS options
 *
diff --git a/osm/opensm.spec.in b/osm/opensm.spec.in
index c4e1798..8857a7b 100644
--- a/osm/opensm.spec.in
+++ b/osm/opensm.spec.in
@@ -38,10 +38,19 @@ Static libraries and header files for Op
 %define _disable_console_socket --disable-console-socket
 %endif
 
+%if %{?_with_perf_mgr:1}%{!?_with_perf_mgr:0}
+%define _enable_perf_mgr --enable-perf-mgr
+%endif
+%if %{?_without_perf_mgr:1}%{!?_without_perf_mgr:0}
+%define _disable_perf_mgr --disable-perf-mgr
+%endif
+
 %build
 %configure \
         %{?_enable_console_socket} \
-        %{?_disable_console_socket}
+        %{?_disable_console_socket} \
+        %{?_enable_perf_mgr} \
+        %{?_disable_perf_mgr}
 make %{?_smp_mflags}
 
 %install
diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am
index e2520b8..9a1f6f4 100644
--- a/osm/opensm/Makefile.am
+++ b/osm/opensm/Makefile.am
@@ -55,7 +55,8 @@ opensm_SOURCES = main.c osm_console.c os
 		 osm_trap_rcv.c osm_ucast_mgr.c osm_ucast_updn.c \
 		 osm_ucast_lash.c osm_ucast_file.c osm_ucast_ftree.c \
 		 osm_vl15intf.c osm_vl_arb_rcv.c \
-		 st.c
+		 st.c \
+		 osm_perfmgr.c osm_event_db.c
 if OSMV_OPENIB
 opensm_CFLAGS = -Wall $(OSMV_CFLAGS) -fno-strict-aliasing -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1
 opensm_CXXFLAGS = -Wall $(OSMV_CFLAGS) -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) -D_XOPEN_SOURCE=600 -D_BSD_SOURCE=1
@@ -78,7 +79,7 @@ endif
 # we always give precedence to local tree libs and then use the pre-installed ones.
 opensm_LDADD = -L../complib -L../libvendor -L. $(OSMV_LDADD) -lopensm -losmcomp -losmvendor
 
-opensm_LDFLAGS = -Wl,--rpath -Wl,$(libdir) -lpthread
+opensm_LDFLAGS = -Wl,--rpath -Wl,$(libdir) -lpthread -ldl
 
 opensmincludedir = $(includedir)/infiniband/opensm
 
diff --git a/osm/opensm/configure.in b/osm/opensm/configure.in
index ad3333a..9e23719 100644
--- a/osm/opensm/configure.in
+++ b/osm/opensm/configure.in
@@ -78,6 +78,9 @@ if test $console_socket = yes; then
 	    [Define as 1 if you want to enable a console on a socket connection])
 fi
 
+dnl select performance manager or not
+OPENIB_OSM_PERF_MGR_SEL
+
 dnl Provide user option to select vendor
 OPENIB_APP_OSMV_SEL
 
diff --git a/osm/opensm/main.c b/osm/opensm/main.c
index 153e44d..4fa3563 100644
--- a/osm/opensm/main.c
+++ b/osm/opensm/main.c
@@ -59,6 +59,7 @@
 #include <opensm/osm_version.h>
 #include <opensm/osm_opensm.h>
 #include <opensm/osm_console.h>
+#include <opensm/osm_perfmgr.h>
 
 volatile unsigned int osm_exit_flag = 0;
 
@@ -273,6 +274,13 @@ show_usage(void)
   printf("-I\n"
          "--inactive\n"
          "           Start SM in inactive rather than normal init SM state.\n\n");
+#ifdef ENABLE_OSM_PERF_MGR
+  printf( "--pm\n"
+          "          Activate the performance manager.\n\n");
+  printf( "--pm_sweep_time_s\n"
+          "          Define the period for PerfMgr sweeps (in seconds) default %ds.\n\n",
+	  OSM_PERFMGR_DEFAULT_SWEEP_TIME_S);
+#endif /* ENABLE_OSM_PERF_MGR */
   printf( "-v\n"
           "--verbose\n"
           "          This option increases the log verbosity level.\n"
@@ -630,6 +638,8 @@ main(
 #endif
       {  "daemon",        0, NULL, 'B'},
       {  "inactive",      0, NULL, 'I'},
+      {  "pm",            0, NULL, 1}, /* no short options for PM stuff */
+      {  "pm_sweep_time_s", 1, NULL, 2},
       {  NULL,            0, NULL,  0 }  /* Required at the end of the array */
     };
 
@@ -907,6 +917,15 @@ main(
       printf(" SM started in inactive state\n");
       break;
 
+#ifdef ENABLE_OSM_PERF_MGR
+    case 1:
+      opt.perfmgr = TRUE;
+      break;
+    case 2:
+      opt.perfmgr_sweep_time_s = atoi(optarg);
+      break;
+#endif /* ENABLE_OSM_PERF_MGR */
+
     case 'h':
     case '?':
     case ':':
diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c
index 38b978a..d6c30d8 100644
--- a/osm/opensm/osm_console.c
+++ b/osm/opensm/osm_console.c
@@ -52,6 +52,7 @@
 #include <ctype.h>
 #include <opensm/osm_console.h>
 #include <opensm/osm_version.h>
+#include <opensm/osm_perfmgr.h>
 
 struct command {
 	char *name;
@@ -136,6 +137,20 @@ static void help_logflush(FILE *out, int
 	fprintf(out, "logflush -- flush the osm.log file\n");
 }
 
+#ifdef ENABLE_OSM_PERF_MGR
+static void help_perfmgr(FILE *out, int detail)
+{
+	fprintf(out, "perfmgr [enable|disable|clear_counters|dump_counters|sweep_time][seconds]\n");
+	if (detail) {
+		fprintf(out, "perfmgr -- print the performance manager state\n");
+		fprintf(out, "   [enable|disable] -- change the perfmgr state\n");
+		fprintf(out, "   [sweep_time] -- change the perfmgr sweep time (requires [seconds] option)\n");
+		fprintf(out, "   [clear_counters] -- clear the counters stored\n");
+		fprintf(out, "   [dump_counters [mach]] -- dump the counters\n");
+	}
+}
+#endif /* ENABLE_OSM_PERF_MGR */
+
 /* more help routines go here */
 
 static void help_parse(char **p_last, osm_opensm_t *p_osm, FILE *out)
@@ -427,6 +442,66 @@ static void logflush_parse(char **p_last
 	fflush(p_osm->log.out_port);
 }
 
+#ifdef ENABLE_OSM_PERF_MGR
+static void perfmgr_parse(char **p_last, osm_opensm_t *p_osm, FILE *out)
+{
+	char *p_cmd;
+
+	p_cmd = next_token(p_last);
+	if (p_cmd)
+	{
+	   if (strcmp(p_cmd, "enable") == 0)
+	   {
+		   osm_perfmgr_set_state(&(p_osm->perfmgr), PERFMGR_STATE_ENABLED);
+	   }
+	   else if (strcmp(p_cmd, "disable") == 0)
+	   {
+		   osm_perfmgr_set_state(&(p_osm->perfmgr), PERFMGR_STATE_DISABLE);
+	   }
+	   else if (strcmp(p_cmd, "clear_counters") == 0)
+	   {
+		   osm_perfmgr_clear_counters(&(p_osm->perfmgr));
+	   }
+	   else if (strcmp(p_cmd, "dump_counters") == 0)
+	   {
+		p_cmd = next_token(p_last);
+		if (p_cmd && (strcmp(p_cmd, "mach") == 0)) {
+			osm_perfmgr_dump_counters(&(p_osm->perfmgr),
+					OSM_EVENT_DB_DUMP_MR);
+		} else {
+			osm_perfmgr_dump_counters(&(p_osm->perfmgr),
+					OSM_EVENT_DB_DUMP_HR);
+		}
+	   }
+	   else if (strcmp(p_cmd, "sweep_time") == 0)
+	   {
+		p_cmd = next_token(p_last);
+		if (p_cmd)
+		{
+			uint16_t time_s = atoi(p_cmd);
+		   	osm_perfmgr_set_sweep_time_s(&(p_osm->perfmgr), time_s);
+		}
+		else
+		{
+			fprintf(out, "sweep_time requires a time specified\n");
+		}
+	   }
+	   else
+	   {
+		fprintf(out, "\"%s\" option not found\n", p_cmd);
+	   }
+	} else {
+		fprintf(out, "Performance Manager status:\n"
+			     "state      : %s\n"
+		             "sweep time : %us\n"
+		        ,
+			osm_perfmgr_get_state_str(&(p_osm->perfmgr)),
+			osm_perfmgr_get_sweep_time_s(&(p_osm->perfmgr))
+			);
+	}
+}
+#endif /* ENABLE_OSM_PERF_MGR */
+
 /* This is public to be able to close it on exit */
 void osm_console_close_socket(osm_opensm_t *p_osm)
 {
@@ -456,6 +531,9 @@ static const struct command console_cmds
 	{ "resweep",	&help_resweep,		&resweep_parse},
 	{ "status",	&help_status,		&status_parse},
 	{ "logflush",	&help_logflush,		&logflush_parse},
+#ifdef ENABLE_OSM_PERF_MGR
+	{ "perfmgr",	&help_perfmgr,		&perfmgr_parse},
+#endif /* ENABLE_OSM_PERF_MGR */
 	{ NULL,		NULL,			NULL}	/* end of array */
 };
 
diff --git a/osm/opensm/osm_event_db.c b/osm/opensm/osm_event_db.c
new file mode 100644
index 0000000..90ca8da
--- /dev/null
+++ b/osm/opensm/osm_event_db.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2007 The Regents of the University of California.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <dlfcn.h>
+#include <sys/stat.h>
+
+#include <opensm/osm_event_db.h>
+
+/** =========================================================================
+ */
+osm_event_db_t *
+osm_event_db_construct(osm_log_t *p_log, char *type)
+{
+	char            lib_name[PATH_MAX];
+	osm_event_db_t *rc = NULL;
+
+	if (!type)
+		return (NULL);
+
+	/* find the plugin */
+	snprintf(lib_name, PATH_MAX, "lib%s.so", type);
+
+	rc = malloc(sizeof(*rc));
+	if (!rc)
+		return (NULL);
+
+	rc->handle = dlopen(lib_name, RTLD_LAZY);
+	if (!rc->handle)
+	{
+		osm_log(p_log, OSM_LOG_ERROR,
+			"Failed to open PM Database \"%s\" : \"%s\"\n",
+			lib_name, dlerror());
+		goto DLOPENFAIL;
+	}
+
+	rc->db_impl = (__osm_event_db_t *)dlsym(rc->handle, "__osm_event_db");
+	if (!rc->db_impl)
+	{
+		osm_log(p_log, OSM_LOG_ERROR,
+			"Failed to find __osm_event_db symbol in \"%s\" : \"%s\"\n",
+			lib_name, dlerror());
+		goto Exit;
+	}
+
+	/* Check the version to make sure this module will work with us */
+	if (rc->db_impl->interface_version != OSM_EVENT_DB_INTERFACE_VER)
+	{
+		osm_log(p_log, OSM_LOG_ERROR,
+			"__osm_event_db symbol is the wrong version %d != %d\n",
+			rc->db_impl->interface_version,
+			OSM_EVENT_DB_INTERFACE_VER);
+		goto Exit;
+	}
+
+	rc->db_data = rc->db_impl->construct(p_log);
+
+	if (!rc->db_data)
+		goto Exit;
+
+	rc->p_log = p_log;
+	return (rc);
+
+Exit:
+	dlclose(rc->handle);
+DLOPENFAIL:
+	free(rc);
+	return (NULL);
+}
+
+/** =========================================================================
+ */
+void
+osm_event_db_destroy(osm_event_db_t *db)
+{
+	if (db)
+	{
+		db->db_impl->destroy(db->db_data);
+		free(db);
+	}
+}
+
+/** =========================================================================
+ */
+osm_event_db_err_t
+osm_event_db_create_entry(osm_event_db_t *db, uint64_t guid, uint8_t num_ports)
+{
+	return(db->db_impl->create_entry(db->db_data, guid, num_ports));
+}
+
+/**********************************************************************
+ **********************************************************************/
+osm_event_db_err_t osm_event_db_get_prev_pc(osm_event_db_t *db, uint64_t guid,
+		uint8_t port, osm_pc_reading_t *reading)
+{
+	return (db->db_impl->get_prev_pc(db->db_data, guid, port, reading));
+}
+
+/**********************************************************************
+ * dump the data to the file "file"
+ **********************************************************************/
+osm_event_db_err_t
+osm_event_db_dump(osm_event_db_t *db, char *file, osm_event_db_dump_t dump_type)
+{
+	return (db->db_impl->dump(db->db_data, file, dump_type));
+}
+
+/**********************************************************************
+ * Clear the port counters from the db
+ **********************************************************************/
+void osm_event_db_clear_port_counters(osm_event_db_t *db)
+{
+	db->db_impl->clear_port_counters(db->db_data);
+}
+
+/**********************************************************************
+ * Add the reading to the osm_pm_node_t
+ **********************************************************************/
+osm_event_db_err_t
+osm_event_db_add_pc_reading(osm_event_db_t *db, uint64_t guid,
+                   uint8_t port, ib_port_counters_t *reading)
+{
+	return (db->db_impl->add_pc_reading(db->db_data, guid,
+				port, reading));
+}
+
+/**********************************************************************
+ * Add the reading to the osm_pm_node_t
+ **********************************************************************/
+osm_event_db_err_t
+osm_event_db_clear_prev_pc(osm_event_db_t *db, uint64_t guid, uint8_t port)
+{
+	return (db->db_impl->clear_prev_pc(db->db_data, guid, port));
+}
+
diff --git a/osm/opensm/osm_opensm.c b/osm/opensm/osm_opensm.c
index 8430605..fa572c5 100644
--- a/osm/opensm/osm_opensm.c
+++ b/osm/opensm/osm_opensm.c
@@ -172,6 +172,9 @@ osm_opensm_destroy(
      p_osm->routing_engine.delete(p_osm->routing_engine.context);
    osm_sa_destroy( &p_osm->sa );
    osm_sm_destroy( &p_osm->sm );
+#ifdef ENABLE_OSM_PERF_MGR
+   osm_perfmgr_destroy( &p_osm->perfmgr );
+#endif /* ENABLE_OSM_PERF_MGR */
    osm_db_destroy( &p_osm->db );
    osm_vl15_destroy( &p_osm->vl15, &p_osm->mad_pool );
    osm_mad_pool_destroy( &p_osm->mad_pool );
@@ -286,6 +289,21 @@ osm_opensm_init(
    if( status != IB_SUCCESS )
       goto Exit;
 
+#ifdef ENABLE_OSM_PERF_MGR
+   status = osm_perfmgr_init( &p_osm->perfmgr,
+                         &p_osm->subn,
+			 &p_osm->sm,
+                         &p_osm->log,
+			 &p_osm->mad_pool,
+			 p_osm->p_vendor,
+			 &p_osm->disp,
+			 &p_osm->lock,
+			 p_opt);
+
+   if( status != IB_SUCCESS )
+      goto Exit;
+#endif /* ENABLE_OSM_PERF_MGR */
+
    if( p_opt->routing_engine_name &&
        setup_routing_engine(p_osm, p_opt->routing_engine_name)) {
       osm_log( &p_osm->log, OSM_LOG_VERBOSE,
@@ -319,6 +337,12 @@ osm_opensm_bind(
    if( status != IB_SUCCESS )
       goto Exit;
 
+#ifdef ENABLE_OSM_PERF_MGR
+   status = osm_perfmgr_bind( &p_osm->perfmgr, guid );
+   if( status != IB_SUCCESS )
+      goto Exit;
+#endif /* ENABLE_OSM_PERF_MGR */
+
  Exit:
    OSM_LOG_EXIT( &p_osm->log );
    return ( status );
diff --git a/osm/opensm/osm_perfmgr.c b/osm/opensm/osm_perfmgr.c
new file mode 100644
index 0000000..297a0e2
--- /dev/null
+++ b/osm/opensm/osm_perfmgr.c
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) 2007 The Regents of the University of California.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+/*
+ * Abstract:
+ *    Implementation of osm_perfmgr_t.
+ *
+ * Author:
+ *    Ira Weiny, LLNL
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef ENABLE_OSM_PERF_MGR
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <poll.h>
+#include <netinet/in.h>
+#include <complib/cl_debug.h>
+#include <iba/ib_types.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <opensm/osm_perfmgr.h>
+#include <opensm/osm_log.h>
+#include <opensm/osm_node.h>
+#include <complib/cl_thread.h>
+#include <vendor/osm_vendor_api.h>
+
+#define  OSM_PERFMGR_INITIAL_TID_VALUE 0xcafe
+
+/**********************************************************************
+ * Recieve the MAD from the vendor layer and post it for processing by the
+ * dispatcher.
+ **********************************************************************/
+static void
+osm_perfmgr_mad_recv_callback(osm_madw_t *p_madw, void* bind_context,
+   				osm_madw_t *p_req_madw )
+{
+	osm_perfmgr_t      *pm = (osm_perfmgr_t *)bind_context;
+	cl_status_t         cl_status = CL_SUCCESS;
+	
+	OSM_LOG_ENTER( pm->log, osm_pm_mad_recv_callback );
+	
+	osm_madw_copy_context( p_madw, p_req_madw );
+	osm_mad_pool_put( pm->mad_pool, p_req_madw );
+	
+	/* post this message for later processing. */
+	cl_status = cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS,
+	      	           	(void *)p_madw, NULL, NULL);
+#if 0
+	do {
+		struct timeval      rcv_time;
+		gettimeofday(&rcv_time, NULL);
+		osm_log(pm->log, OSM_LOG_INFO,
+			"perfmgr rcv time %ld\n",
+			rcv_time.tv_usec -
+			p_madw->context.perfmgr_context.query_start.tv_usec);
+	} while (0);
+#endif
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Process errors from the MAD send.
+ **********************************************************************/
+static void
+osm_perfmgr_mad_send_err_callback(void* bind_context, osm_madw_t *p_madw)
+{
+	osm_perfmgr_t *pm = (osm_perfmgr_t *)bind_context;
+	osm_madw_context_t *context = &(p_madw->context);
+	
+	OSM_LOG_ENTER( pm->log, osm_pm_mad_send_err_callback );
+	
+	osm_log( pm->log, OSM_LOG_ERROR,
+	           "osm_pm_mad_send_err_callback: 0x%" PRIx64 " port %d\n",
+	      	  context->perfmgr_context.node_guid,
+	      	  context->perfmgr_context.port);
+	
+	osm_mad_pool_put( pm->mad_pool, p_madw );
+	
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Bind the PM to the vendor layer for MAD sends/receives
+ **********************************************************************/
+ib_api_status_t
+osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid)
+{
+	osm_bind_info_t bind_info;
+	ib_api_status_t status = IB_SUCCESS;
+	
+	OSM_LOG_ENTER( pm->log, osm_pm_bind );
+	
+	if( pm->bind_handle != OSM_BIND_INVALID_HANDLE ) {
+		osm_log( pm->log, OSM_LOG_ERROR,
+		         "osm_pm_mad_ctrl_bind: Multiple binds not allowed\n" );
+		status = IB_ERROR;
+		goto Exit;
+	}
+	
+	bind_info.port_guid = port_guid;
+	bind_info.mad_class = IB_MCLASS_PERF;
+	bind_info.class_version = 1;
+	bind_info.is_responder = FALSE;
+	bind_info.is_report_processor = FALSE;
+	bind_info.is_trap_processor = FALSE;
+	bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
+	bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
+	
+	osm_log( pm->log, OSM_LOG_VERBOSE,
+	         "osm_pm_mad_bind: "
+	         "Binding to port GUID 0x%" PRIx64 "\n",
+	         cl_ntoh64( port_guid ) );
+	
+	pm->bind_handle = osm_vendor_bind( pm->vendor,
+	                                  &bind_info,
+	                                  pm->mad_pool,
+	                                  osm_perfmgr_mad_recv_callback,
+	                                  osm_perfmgr_mad_send_err_callback,
+	                                  pm );
+	
+	if( pm->bind_handle == OSM_BIND_INVALID_HANDLE ) {
+		status = IB_ERROR;
+		osm_log( pm->log, OSM_LOG_ERROR,
+		         "osm_pm_mad_bind: Vendor specific bind failed (%s)\n",
+		         ib_get_err_str(status) );
+		goto Exit;
+	}
+
+Exit:
+ 	OSM_LOG_EXIT( pm->log );
+	return( status );
+}
+
+/**********************************************************************
+ * Unbind the PM to the vendor layer for MAD sends/receives
+ **********************************************************************/
+void
+osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm)
+{
+	OSM_LOG_ENTER( pm->log, osm_sa_mad_ctrl_unbind );
+	if( pm->bind_handle == OSM_BIND_INVALID_HANDLE ) {
+		osm_log( pm->log, OSM_LOG_ERROR,
+		         "osm_pm_mad_unbind: No previous bind\n" );
+		goto Exit;
+	}
+	osm_vendor_unbind( pm->bind_handle );
+Exit:
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Given a node and a port return the appropriate lid to query that port
+ **********************************************************************/
+static ib_net16_t
+get_lid(osm_node_t *p_node, uint8_t port)
+{
+	ib_net16_t lid = 0;
+	
+	switch (p_node->node_info.node_type)
+	{
+		case IB_NODE_TYPE_CA:
+		case IB_NODE_TYPE_ROUTER:
+			  lid = osm_node_get_base_lid(p_node, port);
+			  break;
+		case IB_NODE_TYPE_SWITCH:
+			  lid = osm_node_get_base_lid(p_node, 0);
+			  break;
+		default:
+			  break;
+	}
+	return (lid);
+}
+
+/**********************************************************************
+ * Form the Port Counter MAD and send the MAD for a single port.
+ **********************************************************************/
+static ib_api_status_t
+osm_perfmgr_send_pc_mad(osm_perfmgr_t *perfmgr, ib_net16_t dest_lid, uint8_t port,
+			uint8_t mad_method, osm_madw_context_t* const p_context )
+{
+	ib_api_status_t     status = IB_SUCCESS;
+	ib_port_counters_t *port_counter = NULL;
+	ib_perfmgr_mad_t   *pm_mad = NULL;
+	osm_madw_t         *p_madw = NULL;
+	
+	OSM_LOG_ENTER(perfmgr->log, osm_perfmgr_send_pc_mad);
+	
+	p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, MAD_BLOCK_SIZE, NULL);
+	if (p_madw == NULL)
+		return (IB_INSUFFICIENT_MEMORY);
+	
+	pm_mad = osm_madw_get_perfmgr_mad_ptr(p_madw);
+	
+	/* build the mad */
+	pm_mad->header.base_ver = 1;
+	pm_mad->header.mgmt_class = IB_MCLASS_PERF;
+	pm_mad->header.class_ver = 1;
+	pm_mad->header.method = mad_method;
+	pm_mad->header.status = 0;
+	pm_mad->header.class_spec = 0;
+	pm_mad->header.trans_id = cl_hton64((uint64_t)cl_atomic_inc(&(perfmgr->trans_id)));
+	pm_mad->header.attr_id = IB_MAD_ATTR_PORT_CNTRS;
+	pm_mad->header.resv = 0;
+	pm_mad->header.attr_mod = 0;
+	
+	port_counter = (ib_port_counters_t *)&(pm_mad->data);
+	memset(port_counter, 0, sizeof(*port_counter));
+	port_counter->port_select = port;
+	port_counter->counter_select = 0xFFFF;
+	
+	p_madw->mad_addr.dest_lid = dest_lid;
+	p_madw->mad_addr.addr_type.gsi.remote_qp = cl_hton32(1);
+	p_madw->mad_addr.addr_type.gsi.remote_qkey = cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
+	/* FIXME what about other partitions */
+	p_madw->mad_addr.addr_type.gsi.pkey = cl_hton16(0xFFFF);
+	p_madw->mad_addr.addr_type.gsi.service_level = 0;
+	p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
+	p_madw->resp_expected = TRUE;
+	
+	if( p_context )
+		p_madw->context = *p_context;
+	
+	status = osm_vendor_send(perfmgr->bind_handle, p_madw, TRUE);
+	
+	OSM_LOG_EXIT(perfmgr->log);
+	return( status );
+}
+
+/**********************************************************************
+ * query the Port Counters of all the nodes in the subnet.
+ **********************************************************************/
+static void
+__osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context )
+{
+	ib_api_status_t     status = IB_SUCCESS;
+	uint8_t             port = 0;
+	osm_perfmgr_t      *pm = (osm_perfmgr_t *)context;
+	osm_node_t         *p_node = (osm_node_t *)p_map_item;
+	uint8_t             node_desc[IB_NODE_DESCRIPTION_SIZE];
+	osm_madw_context_t  mad_context;
+	uint8_t             num_ports = 0;
+	uint64_t            node_guid = 0;
+	
+	OSM_LOG_ENTER( pm->log, __osm_pm_query_counters );
+	
+	memcpy(node_desc, p_node->node_desc.description,
+			IB_NODE_DESCRIPTION_SIZE);
+	node_desc[IB_NODE_DESCRIPTION_SIZE-1] = '\0';
+	
+	num_ports = osm_node_get_num_physp(p_node);
+	node_guid = cl_ntoh64(p_node->node_info.node_guid);
+	
+	/* make sure we have a database object ready to store this information */
+	if (osm_event_db_create_entry(pm->db, node_guid, num_ports) !=
+	      	  OSM_EVENT_DB_SUCCESS)
+	{
+		osm_log(pm->log, OSM_LOG_ERROR,
+			"PerfMgr DB create entry failed for 0x%" PRIx64 " : %s\n",
+			node_guid, strerror(errno));
+		goto Exit;
+	}
+	
+	/* issue the queries for each port */
+	for (port = 1; port < num_ports; port++)
+	{
+		ib_net16_t lid = get_lid(p_node, port);
+		if (lid == 0)
+		{
+			osm_log(pm->log, OSM_LOG_DEBUG,
+				"WARN: node 0x%" PRIx64 " port %d (%s): port out of range, skipping\n",
+				cl_ntoh64(p_node->node_info.node_guid), port, node_desc);
+			continue;
+		}
+		
+		mad_context.perfmgr_context.node_guid = node_guid;
+		mad_context.perfmgr_context.port = port;
+		mad_context.perfmgr_context.num_ports = num_ports;
+		mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
+#if 0
+		gettimeofday(&(mad_context.perfmgr_context.query_start), NULL);
+#endif
+		osm_log(pm->log, OSM_LOG_VERBOSE,
+				"   Getting stats for node 0x%" PRIx64 " port %d (lid %X) (%s)\n",
+				node_guid, port, cl_ntoh16(lid), node_desc);
+		status = osm_perfmgr_send_pc_mad(pm, lid, port, IB_MAD_METHOD_GET, &mad_context);
+		if (status != IB_SUCCESS)
+		{
+		      osm_log(pm->log, OSM_LOG_ERROR,
+				"Failed to issue port counter query for node 0x%" PRIx64 " port %d (%s)\n",
+				p_node->node_info.node_guid, port, node_desc);
+		}
+	}
+Exit:
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Main PerfMgr Thread.
+ * Loop continueously and query the performance counters.
+ **********************************************************************/
+void
+__osm_perfmgr_sweeper(void *p_ptr)
+{
+	ib_api_status_t status;
+	osm_perfmgr_t *const pm = ( osm_perfmgr_t * ) p_ptr;
+	
+	OSM_LOG_ENTER( pm->log, __osm_pm_sweeper );
+	
+	if( pm->thread_state == OSM_THREAD_STATE_INIT )
+		pm->thread_state = OSM_THREAD_STATE_RUN;
+	
+	while( pm->thread_state == OSM_THREAD_STATE_RUN ) {
+		/*  do the sweep only if we are in MASTER state
+		 *  AND we have been activated.
+		 *  FIXME put something in here to try and reduce the load on the system
+		 *  when it is not IDLE.
+		if (pm->sm->state_mgr.state != OSM_SM_STATE_IDLE)
+		 */
+		if( pm->subn->sm_state == IB_SMINFO_STATE_MASTER
+		    && pm->state == PERFMGR_STATE_ENABLED) {
+#if 0
+			struct timeval before, after;
+			gettimeofday(&before, NULL);
+#endif
+			/* for each node query their counters */
+			cl_plock_acquire(pm->lock);
+			osm_log(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n");
+			cl_qmap_apply_func(&(pm->subn->node_guid_tbl),
+			    	  __osm_perfmgr_query_counters, (void *)pm);
+			cl_plock_release(pm->lock);
+#if 0
+			gettimeofday(&after, NULL);
+			osm_log(pm->log, OSM_LOG_INFO,
+				"total sweep time : %ld us\n", after.tv_usec - before.tv_usec);
+#endif
+		}
+
+		/* Wait for a forced sweep or period timeout. */
+		status = cl_event_wait_on( &pm->sig_sweep,
+		                   		pm->sweep_time_s * 1000000,
+		                   		TRUE );
+	}
+	
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ **********************************************************************/
+void
+osm_perfmgr_shutdown(osm_perfmgr_t * const pm)
+{
+	OSM_LOG_ENTER( pm->log, osm_perfmgr_shutdown );
+	osm_perfmgr_mad_unbind(pm);
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ **********************************************************************/
+void
+osm_perfmgr_destroy(osm_perfmgr_t * const pm)
+{
+	OSM_LOG_ENTER( pm->log, osm_perfmgr_destroy );
+	free(pm->event_db_dump_file);
+	free(pm->event_db_plugin);
+	osm_event_db_destroy(pm->db);
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Return 1 if the value has overflowed
+ **********************************************************************/
+int counter_overflow_4(uint8_t val)
+{
+	return (val >= 10);
+}
+int counter_overflow_8(uint8_t val)
+{
+	return (val >= (UINT8_MAX - (UINT8_MAX/4)));
+}
+int counter_overflow_16(uint16_t val)
+{
+	return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX/4)));
+}
+int counter_overflow_32(uint32_t val)
+{
+	return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX/4)));
+}
+
+/**********************************************************************
+ * Check if the port counters have overflowed and if so issue a clear MAD to
+ * the port.
+ **********************************************************************/
+static void
+osm_perfmgr_check_clear(osm_perfmgr_t *pm, uint64_t node_guid,
+	     uint8_t port, int num_ports, ib_port_counters_t *cr)
+{
+  	osm_madw_context_t  mad_context;
+
+  	OSM_LOG_ENTER( pm->log, osm_pm_check_clear );
+	if (counter_overflow_16(cr->symbol_err_cnt)
+		|| counter_overflow_8(cr->link_err_recover)
+		|| counter_overflow_8(cr->link_downed)
+		|| counter_overflow_16(cr->rcv_err)
+		|| counter_overflow_16(cr->rcv_rem_phys_err)
+		|| counter_overflow_16(cr->rcv_switch_relay_err)
+		|| counter_overflow_16(cr->xmit_discards)
+		|| counter_overflow_8(cr->xmit_constraint_err)
+		|| counter_overflow_8(cr->rcv_constraint_err)
+		|| counter_overflow_4(PC_LINK_INT(cr->link_int_buffer_overrun))
+		|| counter_overflow_4(PC_BUF_OVERRUN(cr->link_int_buffer_overrun))
+		|| counter_overflow_16(cr->vl15_dropped)
+		|| counter_overflow_32(cr->xmit_data)
+		|| counter_overflow_32(cr->rcv_data)
+		|| counter_overflow_32(cr->xmit_pkts)
+		|| counter_overflow_32(cr->rcv_pkts)
+		)
+	{
+		osm_log(pm->log, OSM_LOG_INFO,
+			"Counter overflow: 0x%" PRIx64 " port %d; clearing counters\n",
+			node_guid, port);
+  		osm_node_t *p_node = NULL;
+		ib_net16_t  lid = 0;
+        	cl_plock_acquire(pm->lock);
+        	p_node = (osm_node_t *)cl_qmap_get(&(pm->subn->node_guid_tbl),
+						cl_hton64(node_guid));
+    		lid = get_lid(p_node, port);
+        	cl_plock_release(pm->lock);
+    		if (lid == 0)
+    		{
+    			osm_log(pm->log, OSM_LOG_INFO,
+    				"Failed to clear counters for node 0x%" PRIx64 " port %d; failed to get lid\n",
+    				node_guid, port);
+        		goto Exit;
+    		}
+    		mad_context.perfmgr_context.node_guid = node_guid;
+    		mad_context.perfmgr_context.port = port;
+    		mad_context.perfmgr_context.num_ports = num_ports;
+    		mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
+		/* clear port counter */
+		osm_perfmgr_send_pc_mad(pm, lid, port, IB_MAD_METHOD_SET, &mad_context);
+	}
+Exit:
+  	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Check values for logging of errors
+ **********************************************************************/
+static void
+osm_perfmgr_log_events(osm_perfmgr_t *pm, uint64_t node_guid, uint8_t port,
+			ib_port_counters_t *reading)
+{
+	osm_pc_reading_t    prev_read;
+	ib_port_counters_t *prev;
+	time_t              time_diff = 0;
+  	osm_event_db_err_t  err = osm_event_db_get_prev_pc(pm->db, node_guid, port, &prev_read);
+  	if (err != OSM_EVENT_DB_SUCCESS)
+  	{
+		osm_log(pm->log, OSM_LOG_VERBOSE,
+			"failed to find previous reading for 0x%" PRIx64 " port %u\n",
+			node_guid, port);
+		return;
+  	}
+	time_diff = (time(NULL) - prev_read.time);
+	prev = &(prev_read.reading);
+
+	/* FIXME these events should be defineable by the user in a config
+	 * file somewhere. */
+	if (reading->symbol_err_cnt > prev->symbol_err_cnt) {
+		osm_log(pm->log, OSM_LOG_ERROR,
+			"Found %u Symbol errors in %lu sec on node 0x%" PRIx64 " port %u\n",
+			(cl_ntoh16(reading->symbol_err_cnt) - cl_ntoh16(prev->symbol_err_cnt)),
+			time_diff,
+			node_guid,
+			port);
+	}
+	if (reading->rcv_err > prev->rcv_err) {
+		osm_log(pm->log, OSM_LOG_ERROR,
+			"Found %u Recieve errors in %lu sec on node 0x%" PRIx64 " port %u\n",
+			(cl_ntoh16(reading->rcv_err) - cl_ntoh16(prev->rcv_err)),
+			time_diff,
+			node_guid,
+			port);
+	}
+	if (reading->xmit_discards > prev->xmit_discards) {
+		osm_log(pm->log, OSM_LOG_ERROR,
+			"Found %u XMIT Discards in %lu sec on node 0x%" PRIx64 " port %u\n",
+			(cl_ntoh16(reading->xmit_discards) - cl_ntoh16(prev->xmit_discards)),
+			time_diff,
+			node_guid,
+			port);
+	}
+}
+
+
+/**********************************************************************
+ * The dispatcher uses a thread pool which will call this function when we have
+ * a thread available to process our mad recieved from the wire.
+ **********************************************************************/
+static void
+osm_pc_rcv_process(void *context, void *data)
+{
+	osm_perfmgr_t      *const pm = (osm_perfmgr_t *)context;
+	osm_madw_t         *p_madw = (osm_madw_t *)data;
+	osm_madw_context_t *mad_context = &(p_madw->context);
+	ib_port_counters_t *counter_reading =
+				(ib_port_counters_t *)&(osm_madw_get_perfmgr_mad_ptr(p_madw)->data);
+	uint64_t            node_guid = mad_context->perfmgr_context.node_guid;
+	uint8_t             port_num = mad_context->perfmgr_context.port;
+	int                 num_ports = mad_context->perfmgr_context.num_ports;
+	
+	OSM_LOG_ENTER( pm->log, osm_pc_rcv_process );
+	
+	osm_log(pm->log, OSM_LOG_VERBOSE,
+	      	  "Processing recieved MAD context 0x%" PRIx64 " port %u/%d\n",
+	      	  node_guid, port_num, num_ports);
+	
+	/* log any critical events from this reading */
+	osm_perfmgr_log_events(pm, node_guid, port_num, counter_reading);
+	
+	if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET)
+		osm_event_db_add_pc_reading(pm->db, node_guid, port_num, counter_reading);
+	else
+		osm_event_db_clear_prev_pc(pm->db, node_guid, port_num);
+	osm_perfmgr_check_clear(pm, node_guid, port_num, num_ports, counter_reading);
+	
+#if 0
+	do {
+		struct timeval      proc_time;
+		gettimeofday(&proc_time, NULL);
+		osm_log(pm->log, OSM_LOG_INFO,
+			"perfmgr done processing time %ld\n",
+			proc_time.tv_usec -
+			p_madw->context.perfmgr_context.query_start.tv_usec);
+	} while (0);
+#endif
+
+	osm_mad_pool_put( pm->mad_pool, p_madw );
+	
+	OSM_LOG_EXIT( pm->log );
+}
+
+/**********************************************************************
+ * Initialize the PERFMGR object
+ **********************************************************************/
+ib_api_status_t
+osm_perfmgr_init(
+	osm_perfmgr_t * const pm,
+	osm_subn_t * const subn,
+	osm_sm_t * const sm,
+	osm_log_t * const log,
+	osm_mad_pool_t * const mad_pool,
+	osm_vendor_t * const vendor,
+	cl_dispatcher_t* const disp,
+	cl_plock_t* const lock,
+	const osm_subn_opt_t * const p_opt )
+{
+	ib_api_status_t    status = IB_SUCCESS;
+	
+	OSM_LOG_ENTER( log, osm_pm_init );
+	
+	osm_log(log, OSM_LOG_VERBOSE, "initing PM\n");
+	
+	memset( pm, 0, sizeof( *pm ) );
+	
+	cl_event_construct(&pm->sig_sweep);
+	cl_event_init(&pm->sig_sweep, FALSE);
+	pm->subn = subn;
+	pm->sm = sm;
+	pm->log = log;
+	pm->mad_pool = mad_pool;
+	pm->vendor = vendor;
+	pm->trans_id = OSM_PERFMGR_INITIAL_TID_VALUE;
+	pm->lock = lock;
+	pm->state = p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE;
+	pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
+	pm->event_db_dump_file = strdup(p_opt->event_db_dump_file);
+	pm->event_db_plugin = strdup(p_opt->event_db_plugin);
+	
+	pm->db = osm_event_db_construct(pm->log, pm->event_db_plugin);
+	if (!pm->db)
+	{
+	      pm->state = PERFMGR_STATE_NO_DB;
+	      goto Exit;
+	}
+	
+	pm->pc_disp_h = cl_disp_register(disp, OSM_MSG_MAD_PORT_COUNTERS,
+	                              osm_pc_rcv_process, pm);
+	if( pm->pc_disp_h == CL_DISP_INVALID_HANDLE )
+		goto Exit;
+	
+	pm->thread_state = OSM_THREAD_STATE_INIT;
+	status = cl_thread_init( &pm->sweeper, __osm_perfmgr_sweeper, pm,
+	                       "PerfMgr sweeper" );
+	if( status != IB_SUCCESS )
+	 	goto Exit;
+	
+Exit:
+	OSM_LOG_EXIT( log );
+	return ( status );
+}
+
+/**********************************************************************
+ * Clear the counters from the db
+ **********************************************************************/
+void
+osm_perfmgr_clear_counters(osm_perfmgr_t *pm)
+{
+	/**
+	 * FIXME todo issue clear on the fabric?
+	 */
+	osm_event_db_clear_port_counters(pm->db);
+  	osm_log( pm->log, OSM_LOG_INFO, "PerfMgr counters cleared\n");
+}
+
+/*******************************************************************
+ * Have the DB dump it's information to the file specified.
+ *******************************************************************/
+void
+osm_perfmgr_dump_counters(osm_perfmgr_t *pm, osm_event_db_dump_t dump_type)
+{
+	if (osm_event_db_dump(pm->db, pm->event_db_dump_file, dump_type) != 0)
+	{
+      		osm_log( pm->log, OSM_LOG_ERROR,
+               		"PB dump port counters: Failed to file %s : %s",
+               		pm->event_db_dump_file, strerror(errno));
+	}
+}
+
+#if 0
+/*******************************************************************
+ * Use this later to track events on the fabric
+ **********************************************************************/
+ib_api_status_t
+osm_report_notice_to_perfmgr(osm_log_t* const log, osm_subn_t*  subn,
+  			ib_mad_notice_attr_t *p_ntc )
+{
+  OSM_LOG_ENTER( log, osm_report_trap_to_pm );
+  if ((p_ntc->generic_type & 0x80)
+	  && (cl_ntoh16(p_ntc->g_or_v.generic.trap_num) == 128)) {
+	  osm_log( log, OSM_LOG_INFO, "PerfMgr notified of trap 128\n");
+  }
+  OSM_LOG_EXIT( log );
+  return (IB_SUCCESS);
+}
+#endif
+
+#endif /* ENABLE_OSM_PERF_MGR */
+
diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c
index c8c3ddc..77c19a5 100644
--- a/osm/opensm/osm_subnet.c
+++ b/osm/opensm/osm_subnet.c
@@ -66,6 +66,7 @@
 #include <opensm/osm_multicast.h>
 #include <opensm/osm_inform.h>
 #include <opensm/osm_console.h>
+#include <opensm/osm_perfmgr.h>
 
 #if defined(PATH_MAX)
 #define OSM_PATH_MAX	(PATH_MAX + 1)
@@ -471,6 +472,12 @@ osm_subn_set_default_opt(
   p_opt->honor_guid2lid_file = FALSE;
   p_opt->daemon = FALSE;
   p_opt->sm_inactive = FALSE;
+#ifdef ENABLE_OSM_PERF_MGR
+  p_opt->perfmgr = FALSE;
+  p_opt->perfmgr_sweep_time_s = OSM_PERFMGR_DEFAULT_SWEEP_TIME_S;
+  p_opt->event_db_dump_file = OSM_PERFMGR_DEFAULT_DUMP_FILE;
+  p_opt->event_db_plugin = OSM_DEFAULT_EVENT_PLUGIN;
+#endif /* ENABLE_OSM_PERF_MGR */
 
   p_opt->dump_files_dir = getenv("OSM_TMP_DIR");
   if (!p_opt->dump_files_dir || !(*p_opt->dump_files_dir))
@@ -1076,6 +1083,24 @@ osm_subn_parse_conf_file(
         "sm_inactive",
         p_key, p_val, &p_opts->sm_inactive);
 
+#ifdef ENABLE_OSM_PERF_MGR
+      __osm_subn_opts_unpack_boolean(
+        "perfmgr",
+        p_key, p_val, &p_opts->perfmgr);
+
+      __osm_subn_opts_unpack_uint16(
+        "perfmgr_sweep_time_s",
+        p_key, p_val, &p_opts->perfmgr_sweep_time_s);
+
+      __osm_subn_opts_unpack_charp(
+        "event_db_dump_file",
+        p_key, p_val, &p_opts->event_db_dump_file);
+
+      __osm_subn_opts_unpack_charp(
+        "event_db_plugin",
+        p_key, p_val, &p_opts->event_db_plugin);
+#endif /* ENABLE_OSM_PERF_MGR */
+
       subn_parse_qos_options("qos",
         p_key, p_val, &p_opts->qos_options);
 
@@ -1321,6 +1346,32 @@ osm_subn_write_conf_file(
     p_opts->sm_inactive ? "TRUE" : "FALSE"
     );
 
+#ifdef ENABLE_OSM_PERF_MGR
+  fprintf(
+    opts_file,
+    "#\n# Performance Manager Options\n#\n"
+    "# perfmgr enable\n"
+    "perfmgr %s\n\n"
+    "# sweep time in seconds\n"
+    "perfmgr_sweep_time_s %d\n\n"
+    ,
+    p_opts->perfmgr ? "TRUE" : "FALSE",
+    p_opts->perfmgr_sweep_time_s
+    );
+
+  fprintf(
+    opts_file,
+    "#\n# Event DB Options\n#\n"
+    "# Dump file to dump the events to\n"
+    "event_db_dump_file %s\n\n"
+    "# Event db plugin\n"
+    "event_db_plugin %s\n\n"
+    ,
+    p_opts->event_db_dump_file,
+    p_opts->event_db_plugin
+    );
+#endif /* ENABLE_OSM_PERF_MGR */
+
   fprintf( 
     opts_file,
     "#\n# DEBUG FEATURES\n#\n"
diff --git a/osm/opensm/osm_trap_rcv.c b/osm/opensm/osm_trap_rcv.c
index 0858968..19be781 100644
--- a/osm/opensm/osm_trap_rcv.c
+++ b/osm/opensm/osm_trap_rcv.c
@@ -698,6 +698,21 @@ __osm_trap_rcv_process_request(
     goto Exit;
   }
 
+#ifdef ENABLE_OSM_PERF_MGR
+#if 0
+  /* we still need to work out how this will work */
+  status = osm_report_notice_to_perfmgr(p_rcv->p_log, p_rcv->p_subn, p_ntci);
+  if( status != IB_SUCCESS )
+  {
+    osm_log( p_rcv->p_log, OSM_LOG_ERROR,
+             "__osm_trap_rcv_process_request: ERR 3803: "
+             "Error sending trap reports (%s)\n",
+             ib_get_err_str( status ) );
+    goto Exit;
+  }
+#endif
+#endif /* ENABLE_OSM_PERF_MGR */
+
  Exit:
   OSM_LOG_EXIT( p_rcv->p_log );
 }
-- 
1.4.4



More information about the general mailing list