From jackm at dev.mellanox.co.il Thu Nov 1 00:37:44 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 1 Nov 2007 09:37:44 +0200 Subject: [ofa-general] [PATCH] mlx4: Add bad flow check when freeing in mlx4_buf_free (potential Oops) Message-ID: <200711010937.44339.jackm@dev.mellanox.co.il> mlx4: Bad flow check is missing from mlx4_buf_free(). This could result in a kernel Oops (NULL dereference). Signed-off-by: Ali Ayoub Signed-off-by: Jack Morgenstein --- Roland, Please try and get this one into 2.6.24 -- it is a bug fix. Jack diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index f8d63d3..b226e01 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -171,9 +171,10 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) buf->u.direct.map); else { for (i = 0; i < buf->nbufs; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->u.page_list[i].buf, - buf->u.page_list[i].map); + if (buf->u.page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->u.page_list[i].buf, + buf->u.page_list[i].map); kfree(buf->u.page_list); } } From sashak at voltaire.com Thu Nov 1 01:10:11 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 10:10:11 +0200 Subject: [ofa-general] [PATCH RFC] libibumad: store pkeys in umad_port structure Message-ID: <20071101081011.GN20136@sashak.voltaire.com> This fetches pkey values from sysfs and stores it in umad_port structure, so an user can find which proper pkey index to use with now working umad_set_pkey(). Signed-off-by: Sasha Khapyorsky --- libibumad/include/infiniband/umad.h | 2 + libibumad/src/umad.c | 52 +++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/libibumad/include/infiniband/umad.h b/libibumad/include/infiniband/umad.h index 21cf729..681b440 100644 --- a/libibumad/include/infiniband/umad.h +++ b/libibumad/include/infiniband/umad.h @@ -132,6 +132,8 @@ typedef struct umad_port { uint64_t capmask; uint64_t gid_prefix; uint64_t port_guid; + unsigned pkeys_size; + uint16_t *pkeys; } umad_port_t; typedef struct umad_ca { diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c index 9d9f9c3..b63f220 100644 --- a/libibumad/src/umad.c +++ b/libibumad/src/umad.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "umad.h" @@ -106,20 +107,33 @@ put_ca(umad_ca_t *ca) static int release_port(umad_port_t *port) { - return 0; /* nothing yet */ + free(port->pkeys); + port->pkeys = NULL; + port->pkeys_size = 0; + return 0; +} + +static int check_for_digit_name(const struct dirent *dent) +{ + const char *p = dent->d_name; + while (*p && isdigit(*p)) + p++; + return *p ? 0 : 1; } static int -get_port(char *ca_name, char *dir_name, int portnum, umad_port_t *port) +get_port(char *ca_name, char *dir, int portnum, umad_port_t *port) { char port_dir[256]; uint8_t gid[16]; + struct dirent **namelist = NULL; + int i, len, ret; strncpy(port->ca_name, ca_name, sizeof port->ca_name - 1); port->portnum = portnum; + port->pkeys = NULL; - snprintf(port_dir, sizeof port_dir - 1, "%s/%d", dir_name, portnum); - port_dir[sizeof port_dir - 1] = 0; + len = snprintf(port_dir, sizeof port_dir - 1, "%s/%d", dir, portnum); if (sys_read_uint(port_dir, SYS_PORT_LMC, &port->lmc) < 0) goto clean; @@ -146,10 +160,38 @@ get_port(char *ca_name, char *dir_name, int portnum, umad_port_t *port) memcpy(&port->gid_prefix, gid, sizeof port->gid_prefix); memcpy(&port->port_guid, gid + 8, sizeof port->port_guid); - /* FIXME: handle pkeys and gids */ + snprintf(port_dir + len, sizeof(port_dir) - len, "/pkeys"); + ret = scandir(port_dir, &namelist, check_for_digit_name, NULL); + if (ret <= 0) { + IBWARN("no pkeys found for %s:%u (at dir %s)...", + port->ca_name, port->portnum, port_dir); + goto clean; + } + port->pkeys = calloc(ret, sizeof(port->pkeys[0])); + if (!port->pkeys) { + IBWARN("get_port: calloc failed: %s", strerror(errno)); + goto clean; + } + for (i = 0; i < ret ; i++) { + unsigned idx, val; + idx = strtoul(namelist[i]->d_name, NULL, 0); + sys_read_uint(port_dir, namelist[i]->d_name, &val); + port->pkeys[idx] = val; + } + port->pkeys_size = ret; + free(namelist); + namelist = NULL; + port_dir[len] = '\0'; + + /* FIXME: handle gids */ + return 0; clean: + if (namelist) + free(namelist); + if (port->pkeys) + free(port->pkeys); return -EIO; } -- 1.5.3.rc2.29.gc4640f From eli at mellanox.co.il Thu Nov 1 01:51:46 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Thu, 01 Nov 2007 10:51:46 +0200 Subject: [ofa-general] Re: opensm partitions In-Reply-To: <20071101015738.GJ20136@sashak.voltaire.com> References: <1193581081.25235.91.camel@mtls03> <20071028145029.GV6945@sashak.voltaire.com> <1193643780.25235.117.camel@mtls03> <20071101015738.GJ20136@sashak.voltaire.com> Message-ID: <1193907106.6053.73.camel@mtls03> I used 1.2.5 and I wonder if that makes the difference. On Thu, 2007-11-01 at 03:57 +0200, Sasha Khapyorsky wrote: > On 09:43 Mon 29 Oct , Eli Cohen wrote: > > Here's the file I used (attached). I used this with ofa 1.2.5 so I will > > try now with ofa 1.3 just to be sure. > > I cannot get any errors with ofed_1_2 branch too. > > Sasha From kliteyn at mellanox.co.il Thu Nov 1 02:19:54 2007 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 01 Nov 2007 11:19:54 +0200 Subject: [ofa-general] [PATCH] libibumad: support for new pkey enabled user_mad API In-Reply-To: <20071027161841.GH22317@sashak.voltaire.com> References: <20071027161841.GH22317@sashak.voltaire.com> Message-ID: <47299A3A.2040603@mellanox.co.il> Hi Sasha, See below. Sasha Khapyorsky wrote: > This adds support for new pkey enabled user_mad API. When ABI version > is 5 this tries to use IB_USER_MAD_ENABLE_PKEY ioctl(). > > Signed-off-by: Sasha Khapyorsky > --- > libibumad/include/infiniband/umad.h | 4 ++- > libibumad/src/umad.c | 65 +++++++++++++++++++++++------------ > 2 files changed, 46 insertions(+), 23 deletions(-) > > diff --git a/libibumad/include/infiniband/umad.h b/libibumad/include/infiniband/umad.h > index 2ec8b37..21cf729 100644 > --- a/libibumad/include/infiniband/umad.h > +++ b/libibumad/include/infiniband/umad.h > @@ -60,6 +60,8 @@ typedef struct ib_mad_addr { > uint8_t traffic_class; > uint8_t gid[16]; > uint32_t flow_label; > + uint16_t pkey_index; > + uint8_t reserved[6]; > } ib_mad_addr_t; > > typedef struct ib_user_mad { > @@ -80,8 +82,8 @@ typedef struct ib_user_mad { > > #define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ > struct ib_user_mad_reg_req) > - > #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, uint32_t) > +#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) > > #define UMAD_CA_NAME_LEN 20 > #define UMAD_CA_MAX_PORTS 10 /* 0 - 9 */ > diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c > index 41373e7..9d9f9c3 100644 > --- a/libibumad/src/umad.c > +++ b/libibumad/src/umad.c > @@ -85,6 +85,9 @@ int umaddebug = 0; > static char *def_ca_name = "mthca0"; > static int def_ca_port = 1; > > +static unsigned abi_version; > +static unsigned new_user_mad_api; > + > /************************************* > * Port > */ > @@ -428,16 +431,14 @@ dev_to_umad_id(char *dev, unsigned port) > int > umad_init(void) > { > - unsigned abi_version; > - > TRACE("umad_init"); > if (sys_read_uint(IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, &abi_version) < 0) { > IBWARN("can't read ABI version from %s/%s (%m): is ib_umad module loaded?", > IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE); > return -1; > } > - if (abi_version != IB_UMAD_ABI_VERSION) { > - IBWARN("wrong ABI version: %s/%s is %d but library ABI is %d", > + if (abi_version < IB_UMAD_ABI_VERSION) { > + IBWARN("wrong ABI version: %s/%s is %d but library minimal ABI is %d", > IB_UMAD_ABI_DIR, IB_UMAD_ABI_FILE, abi_version, IB_UMAD_ABI_VERSION); > return -1; > } > @@ -554,6 +555,21 @@ umad_open_port(char *ca_name, int portnum) > return -EIO; > } > > + if (abi_version > 5) > + new_user_mad_api = 1; > + else { > + int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); > I got (ret = -1) and (errno = 515) > + if (ret == 0) > + new_user_mad_api = 1; > + else if (ret < 0 && errno == EINVAL) > + new_user_mad_api = 0; > > + else { > So eventually I got here. I have kernel 2.6.9-42. -- Yevgeny > + close(fd); > + IBWARN("cannot detect is user_mad P_Key enabled API supported."); > + return ret; > + } > + } > + > DEBUG("opened %s fd %d portid %d", dev_file, fd, umad_id); > return fd; > } > @@ -636,13 +652,15 @@ umad_close_port(int fd) > void * > umad_get_mad(void *umad) > { > - return ((struct ib_user_mad *)umad)->data; > + return new_user_mad_api ? ((struct ib_user_mad *)umad)->data : > + (void *)&((struct ib_user_mad *)umad)->addr.pkey_index; > } > > size_t > umad_size(void) > { > - return sizeof (struct ib_user_mad); > + return new_user_mad_api ? sizeof (struct ib_user_mad) : > + sizeof(struct ib_user_mad) - 8; > } > > int > @@ -663,11 +681,13 @@ umad_set_grh(void *umad, void *mad_addr) > } > > int > -umad_set_pkey(void *umad, int pkey) > +umad_set_pkey(void *umad, int pkey_index) > { > -#if 0 > - mad->addr.pkey = 0; /* FIXME - PKEY support */ > -#endif > + struct ib_user_mad *mad = umad; > + > + if (new_user_mad_api) > + mad->addr.pkey_index = htons(pkey_index); > + > return 0; > } > > @@ -719,12 +739,12 @@ umad_send(int fd, int agentid, void *umad, int length, > if (umaddebug > 1) > umad_dump(mad); > > - n = write(fd, mad, length + sizeof *mad); > - if (n == length + sizeof *mad) > + n = write(fd, mad, length + umad_size()); > + if (n == length + umad_size()) > return 0; > > DEBUG("write returned %d != sizeof umad %zu + length %d (%m)", > - n, sizeof *mad, length); > + n, umad_size(), length); > if (!errno) > errno = EIO; > return -EIO; > @@ -768,14 +788,14 @@ umad_recv(int fd, void *umad, int *length, int timeout_ms) > return n; > } > > - n = read(fd, umad, sizeof *mad + *length); > + n = read(fd, umad, umad_size() + *length); > > - VALGRIND_MAKE_MEM_DEFINED(umad, sizeof *mad + *length); > + VALGRIND_MAKE_MEM_DEFINED(umad, umad_size() + *length); > > - if ((n >= 0) && (n <= sizeof *mad + *length)) { > + if ((n >= 0) && (n <= umad_size() + *length)) { > DEBUG("mad received by agent %d length %d", mad->agent_id, n); > - if (n > sizeof *mad) > - *length = n - sizeof *mad; > + if (n > umad_size()) > + *length = n - umad_size(); > else > *length = 0; > return mad->agent_id; > @@ -788,9 +808,9 @@ umad_recv(int fd, void *umad, int *length, int timeout_ms) > } > > DEBUG("read returned %zu > sizeof umad %zu + length %d (%m)", > - mad->length - sizeof *mad, sizeof *mad, *length); > + mad->length - umad_size(), umad_size(), *length); > > - *length = mad->length - sizeof *mad; > + *length = mad->length - umad_size(); > if (!errno) > errno = EIO; > return -errno; > @@ -929,11 +949,12 @@ umad_addr_dump(ib_mad_addr_t *addr) > } > gid_str[i*2] = 0; > IBWARN("qpn %d qkey 0x%x lid 0x%x sl %d\n" > - "grh_present %d gid_index %d hop_limit %d traffic_class %d flow_label 0x%x\n" > + "grh_present %d gid_index %d hop_limit %d traffic_class %d flow_label 0x%x pkey_index 0x%x\n" > "Gid 0x%s", > ntohl(addr->qpn), ntohl(addr->qkey), ntohs(addr->lid), addr->sl, > addr->grh_present, (int)addr->gid_index, (int)addr->hop_limit, > - (int)addr->traffic_class, addr->flow_label, gid_str); > + (int)addr->traffic_class, addr->flow_label, addr->pkey_index, > + gid_str); > } > > void > From vlad at lists.openfabrics.org Thu Nov 1 02:59:18 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 1 Nov 2007 02:59:18 -0700 (PDT) Subject: [ofa-general] ofa_1_3_kernel 20071101-0200 daily build status Message-ID: <20071101095918.11173E603A5@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-34.ELsmp Failed: From sashak at voltaire.com Thu Nov 1 03:38:32 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 12:38:32 +0200 Subject: [ofa-general] [PATCH] libibumad: support for new pkey enabled user_mad API In-Reply-To: <47299A3A.2040603@mellanox.co.il> References: <20071027161841.GH22317@sashak.voltaire.com> <47299A3A.2040603@mellanox.co.il> Message-ID: <20071101103832.GO20136@sashak.voltaire.com> On 11:19 Thu 01 Nov , Yevgeny Kliteynik wrote: > > + if (abi_version > 5) > > + new_user_mad_api = 1; > > + else { > > + int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); > > > I got (ret = -1) and (errno = 515) It is ENOIOCTLCMD, and it is correct value when unlocked_ioctl() method is not defined by driver. So I think we need to handle this one too. > > + if (ret == 0) > > + new_user_mad_api = 1; > > + else if (ret < 0 && errno == EINVAL) > > + new_user_mad_api = 0; > > + else { > > > So eventually I got here. > I have kernel 2.6.9-42. I will send update soon. Sasha From klanism at dream-seeker.com Thu Nov 1 04:29:10 2007 From: klanism at dream-seeker.com (Ping Carlson) Date: Thu, 01 Nov 2007 06:29:10 -0500 Subject: [ofa-general] Ado6e Photoshop CS3 & Acro6at 8 Pro, New for Vista/XP 79$ Save 1999.95$ 0ff Retai| Message-ID: <000001c81c71$7fbc7b00$0100007f@localhost> newadobedeals . com From sashak at voltaire.com Thu Nov 1 03:56:24 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 12:56:24 +0200 Subject: [ofa-general] [PATCH] libibumad: support for new pkey enabled user_mad API In-Reply-To: <20071101103832.GO20136@sashak.voltaire.com> References: <20071027161841.GH22317@sashak.voltaire.com> <47299A3A.2040603@mellanox.co.il> <20071101103832.GO20136@sashak.voltaire.com> Message-ID: <20071101105624.GP20136@sashak.voltaire.com> On 12:38 Thu 01 Nov , Sasha Khapyorsky wrote: > On 11:19 Thu 01 Nov , Yevgeny Kliteynik wrote: > > > + if (abi_version > 5) > > > + new_user_mad_api = 1; > > > + else { > > > + int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); > > > > > I got (ret = -1) and (errno = 515) > > It is ENOIOCTLCMD, OTOH it is defined in include/linux/errno.h under #ifdef __KERNEL__ and with caution: /* Should never be seen by user programs */ Roland! Any idea? Sasha > and it is correct value when unlocked_ioctl() method > is not defined by driver. So I think we need to handle this one too. > > > > + if (ret == 0) > > > + new_user_mad_api = 1; > > > + else if (ret < 0 && errno == EINVAL) > > > + new_user_mad_api = 0; > > > + else { > > > > > So eventually I got here. > > I have kernel 2.6.9-42. > > I will send update soon. > > Sasha From sashak at voltaire.com Thu Nov 1 04:31:25 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 13:31:25 +0200 Subject: [ofa-general] [PATCH] libibumad: support for new pkey enabled user_mad API In-Reply-To: <20071101103832.GO20136@sashak.voltaire.com> References: <20071027161841.GH22317@sashak.voltaire.com> <47299A3A.2040603@mellanox.co.il> <20071101103832.GO20136@sashak.voltaire.com> Message-ID: <20071101113125.GQ20136@sashak.voltaire.com> On 12:38 Thu 01 Nov , Sasha Khapyorsky wrote: > On 11:19 Thu 01 Nov , Yevgeny Kliteynik wrote: > > > + if (abi_version > 5) > > > + new_user_mad_api = 1; > > > + else { > > > + int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); > > > > > I got (ret = -1) and (errno = 515) > > It is ENOIOCTLCMD, and it is correct value when unlocked_ioctl() method > is not defined by driver. So I think we need to handle this one too. > > > > + if (ret == 0) > > > + new_user_mad_api = 1; > > > + else if (ret < 0 && errno == EINVAL) > > > + new_user_mad_api = 0; > > > + else { > > > > > So eventually I got here. > > I have kernel 2.6.9-42. > > I will send update soon. For me it looks that best we can do is to just remove any errno checks - as below. Sasha >From fe395b0c3de4dbbe69c31b9d97818c81ac76a99c Mon Sep 17 00:00:00 2001 From: Sasha Khapyorsky Date: Thu, 1 Nov 2007 13:52:06 +0200 Subject: [PATCH] libibumad: don't check errno when pkey enabling API ioctl() fails With some old kernel ioctl() returns 515 (ENOIOCTLCMD), which is not defined in userspace at all. OTOH the usage of this ioctl() is hidden inside libibumad library where failure scenarios are limited - it is likely enough to just refer return status and don't analyze errno value at all. Signed-off-by: Sasha Khapyorsky --- libibumad/src/umad.c | 16 +++------------- 1 files changed, 3 insertions(+), 13 deletions(-) diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c index b63f220..1012695 100644 --- a/libibumad/src/umad.c +++ b/libibumad/src/umad.c @@ -597,20 +597,10 @@ umad_open_port(char *ca_name, int portnum) return -EIO; } - if (abi_version > 5) + if (abi_version > 5 || !ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL)) new_user_mad_api = 1; - else { - int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); - if (ret == 0) - new_user_mad_api = 1; - else if (ret < 0 && errno == EINVAL) - new_user_mad_api = 0; - else { - close(fd); - IBWARN("cannot detect is user_mad P_Key enabled API supported."); - return ret; - } - } + else + new_user_mad_api = 0; DEBUG("opened %s fd %d portid %d", dev_file, fd, umad_id); return fd; -- 1.5.3.rc2.29.gc4640f From kliteyn at dev.mellanox.co.il Thu Nov 1 04:30:20 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 01 Nov 2007 13:30:20 +0200 Subject: [ofa-general] [PATCH] libibumad: support for new pkey enabled user_mad API In-Reply-To: <20071101113125.GQ20136@sashak.voltaire.com> References: <20071027161841.GH22317@sashak.voltaire.com> <47299A3A.2040603@mellanox.co.il> <20071101103832.GO20136@sashak.voltaire.com> <20071101113125.GQ20136@sashak.voltaire.com> Message-ID: <4729B8CC.60300@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 12:38 Thu 01 Nov , Sasha Khapyorsky wrote: > >> On 11:19 Thu 01 Nov , Yevgeny Kliteynik wrote: >> >>>> + if (abi_version > 5) >>>> + new_user_mad_api = 1; >>>> + else { >>>> + int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); >>>> >>>> >>> I got (ret = -1) and (errno = 515) >>> >> It is ENOIOCTLCMD, and it is correct value when unlocked_ioctl() method >> is not defined by driver. So I think we need to handle this one too. >> >> >>>> + if (ret == 0) >>>> + new_user_mad_api = 1; >>>> + else if (ret < 0 && errno == EINVAL) >>>> + new_user_mad_api = 0; >>>> + else { >>>> >>>> >>> So eventually I got here. >>> I have kernel 2.6.9-42. >>> >> I will send update soon. >> > > For me it looks that best we can do is to just remove any errno checks - > as below. > I'm all for it. -- Yevgeny > Sasha > > > >From fe395b0c3de4dbbe69c31b9d97818c81ac76a99c Mon Sep 17 00:00:00 2001 > From: Sasha Khapyorsky > Date: Thu, 1 Nov 2007 13:52:06 +0200 > Subject: [PATCH] libibumad: don't check errno when pkey enabling API ioctl() fails > > With some old kernel ioctl() returns 515 (ENOIOCTLCMD), which is not > defined in userspace at all. OTOH the usage of this ioctl() is hidden > inside libibumad library where failure scenarios are limited - it is > likely enough to just refer return status and don't analyze errno value > at all. > > Signed-off-by: Sasha Khapyorsky > --- > libibumad/src/umad.c | 16 +++------------- > 1 files changed, 3 insertions(+), 13 deletions(-) > > diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c > index b63f220..1012695 100644 > --- a/libibumad/src/umad.c > +++ b/libibumad/src/umad.c > @@ -597,20 +597,10 @@ umad_open_port(char *ca_name, int portnum) > return -EIO; > } > > - if (abi_version > 5) > + if (abi_version > 5 || !ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL)) > new_user_mad_api = 1; > - else { > - int ret = ioctl(fd, IB_USER_MAD_ENABLE_PKEY, NULL); > - if (ret == 0) > - new_user_mad_api = 1; > - else if (ret < 0 && errno == EINVAL) > - new_user_mad_api = 0; > - else { > - close(fd); > - IBWARN("cannot detect is user_mad P_Key enabled API supported."); > - return ret; > - } > - } > + else > + new_user_mad_api = 0; > > DEBUG("opened %s fd %d portid %d", dev_file, fd, umad_id); > return fd; > From hrosenstock at xsigo.com Thu Nov 1 04:50:02 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 04:50:02 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101002410.GD20136@sashak.voltaire.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> Message-ID: <1193917802.26246.537.camel@hrosenstock-ws.xsigo.com> Hi Sasha, On Thu, 2007-11-01 at 02:24 +0200, Sasha Khapyorsky wrote: > Hi Hal, > > On 14:01 Tue 30 Oct , Hal Rosenstock wrote: > > > status..................0x0 > > > hop_ptr.................0x0 > > > hop_count...............0x0 > > > trans_id................0x377df6ce > > > attr_id.................0xFF02 (UNKNOWN) > > > > This is a proprietary SM attribute used by Cisco SM. Also, I believe the > > Cisco SM supports replication to standby's and that would be via > > proprietary means. > > > > > resv....................0x0 > > > attr_mod................0x1 > > > m_key...................0x0000000000000000 > > > MAD IS LID ROUTED > > > > > > I'm not sure what this ERR 3107 means, is there something I could do about > > > it? Is there a way to use OpenSM as a standby SM with a managed switch? > > > > No; SM flavors should not be mixed on a subnet. There are numerous > > reasons for this. > > What are the reasons? I think complaint SMs should be able to > inter-operate, of course not in part of proprietary extensions. Aside from value adds (proprietary extensions) which is an important and large issue as all SMs vendors claim advantage from this and they are not being opened up, there are the issues of routing algorithms (both unicast and multicast), management, and consistency of data. These are all different to varying degrees for each flavor. While I am aware of some customers requesting this, the IBTA does not sanction this configuration. The MgtWG has produced a white paper on the topic which is available on their web site. In fact, some areas of the above are beyond the IBTA charter. If you feel strongly about this, I suggest you get a like minded vendors and get as much of this standardized as possible. IMO the place for this is the IBTA MgtWG. > At least > I am able to run OpenSM with Voltaire SM on one subnet. Define what you mean by run here ? What "experiments" have you performed ? Does Voltaire stand behind this as a supported configuration ? If not, are there any plans to do so ? -- Hal > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Thu Nov 1 04:51:21 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 04:51:21 -0700 Subject: [ofa-general] Re: opensm partitions In-Reply-To: <1193907106.6053.73.camel@mtls03> References: <1193581081.25235.91.camel@mtls03> <20071028145029.GV6945@sashak.voltaire.com> <1193643780.25235.117.camel@mtls03> <20071101015738.GJ20136@sashak.voltaire.com> <1193907106.6053.73.camel@mtls03> Message-ID: <1193917881.26246.539.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 10:51 +0200, Eli Cohen wrote: > I used 1.2.5 and I wonder if that makes the difference. AFAIK there should be no difference here (although I made several failed attempts at there being some difference in OpenSM). -- Hal > > On Thu, 2007-11-01 at 03:57 +0200, Sasha Khapyorsky wrote: > > On 09:43 Mon 29 Oct , Eli Cohen wrote: > > > Here's the file I used (attached). I used this with ofa 1.2.5 so I will > > > try now with ofa 1.3 just to be sure. > > > > I cannot get any errors with ofed_1_2 branch too. > > > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Thu Nov 1 04:56:03 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 04:56:03 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101024131.GM2037@obsidianresearch.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> Message-ID: <1193918163.26246.546.camel@hrosenstock-ws.xsigo.com> On Wed, 2007-10-31 at 20:41 -0600, Jason Gunthorpe wrote: > On Thu, Nov 01, 2007 at 02:24:10AM +0200, Sasha Khapyorsky wrote: > > > What are the reasons? I think complaint SMs should be able to > > inter-operate, of course not in part of proprietary extensions. At least > > I am able to run OpenSM with Voltaire SM on one subnet. > > At a minimum how hand off is supposed to work is very vaugely > specified in the IBA. SM handover/failover is tested for interop by the IBTA but that's it. There are known proprietary extensions as well as this being the nose of the camel being in the tent as in order for this to work well, there is data replication needed (and no, please don't bring up client reregistration as the best solution for this). > Besides, even if hand off wasn't a problem the two SMs would have to > have very similar ideas on routing, multicast, QOS, services, etc or > the fabric will be badly disrupted after hand off.. Exactly. > Without extensions > to transfer this live data over before hand off it is unlikely to > be non-disruptive except in very constrained situations. Indeed. > It seems to me the main benifit of the whole standardized mechanism > (in an interoperability context) is just to help make it so that a new > sm starting up doesn't just trash the fabric accidentally, and provide > at least some sensible behavior when two seperate subnets are combined > into one. > > If you want to test hand over interop joining two operating networks > is a good way to do it - that is really hard to get right in all of > the cases :) This was the area where I felt the spec was weakest since > it really didn't say exactly when during the hand over exchanges each > SM was in control of the nodes, and exactly what should happen when > things go wrong was not specified.. Yes, I too believe more work could be and should be done here. -- Hal > Jason From hrosenstock at xsigo.com Thu Nov 1 05:00:29 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 05:00:29 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101035648.GK20136@sashak.voltaire.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <20071101035648.GK20136@sashak.voltaire.com> Message-ID: <1193918429.26246.551.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 05:56 +0200, Sasha Khapyorsky wrote: > On 20:41 Wed 31 Oct , Jason Gunthorpe wrote: > > > > On Thu, Nov 01, 2007 at 02:24:10AM +0200, Sasha Khapyorsky wrote: > > > > > What are the reasons? I think complaint SMs should be able to > > > inter-operate, of course not in part of proprietary extensions. At least > > > I am able to run OpenSM with Voltaire SM on one subnet. > > > > At a minimum how hand off is supposed to work is very vaugely > > specified in the IBA. > > It is at least basically described in the IBA - with exchanging SMInfo. > > > Besides, even if hand off wasn't a problem the two SMs would have to > > have very similar ideas on routing, multicast, QOS, services, etc > > In worst case the routing tables and QoS setups could be reconfigured > from scratch (just as if it could be first SM run), and all SA related > things could be rerequested with ClientReregistration bit. Routing tables are usually driven by algorithms (all beyond the spec) rather than table loading. Don't trivialize management data in a large subnet. It is potentially a large amount of configuration which people try hard to avoid until they no longer have a choice. I view client reregistration as a workaround for this very issue. I am regretting pushing that into the spec for that purpose. > And sure, some configurations (partitions, QoS, routing, etc.) can be > not synchronized for SMs, but then the differences in a fabric setups > should be expected results. Is that really acceptable for a real customer ? -- Hal > And I'm not about "how fast and efficient it is" and even not about > "interoperability" bugs in various implementations. > > > or > > the fabric will be badly disrupted after hand off.. Without extensions > > to transfer this live data over before hand off it is unlikely to > > be non-disruptive except in very constrained situations. > > > > It seems to me the main benifit of the whole standardized mechanism > > (in an interoperability context) is just to help make it so that a new > > sm starting up doesn't just trash the fabric accidentally, and provide > > at least some sensible behavior when two seperate subnets are combined > > into one. > > > > If you want to test hand over interop joining two operating networks > > is a good way to do it - that is really hard to get right in all of > > the cases :) This was the area where I felt the spec was weakest since > > it really didn't say exactly when during the hand over exchanges each > > SM was in control of the nodes, and exactly what should happen when > > things go wrong was not specified.. > > Ok, so we are not about "impossibility" to do this... Just current lack > of standardization makes it hard to do handover properly? > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From vlad at dev.mellanox.co.il Thu Nov 1 05:02:46 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 01 Nov 2007 14:02:46 +0200 Subject: [ofa-general] Re: [ewg] [Patch 0/3]ehca: Patchset to backport 2.6.24-rc1 kernel base In-Reply-To: <200710311707.04974.ossrosch@linux.vnet.ibm.com> References: <200710311707.04974.ossrosch@linux.vnet.ibm.com> Message-ID: <4729C066.10504@dev.mellanox.co.il> Stefan Roscher wrote: > These three patches are the backports against the new 2.6.24-rc1 kernel base. > > [patch 1/3] - In kernel version 2.6.17 and lower the interface for > register/unregister_hotcpu_notifier() is missing. This patch includes the > backport for linux/cpu.h to the concerning kernel versions. > > [patch 2/3] - Starting with kernel version 2.6.24 ehca is using sg_page() > interface, which does not exists in older kernels. So this patch adds a > backport for sg_page() in linux/scatterlist.h for all kernel versions > lower 2.6.24. > > [patch 3/3] - ibmebus changes the location code interface in 2.6.24-rc1. > Because those changes are not available in older kernel versions we > have to backport all older kernels to use the old version of ibmebus. > > kind regards Stefan Roscher > > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > Patches 1, 2 and 3 are applied to the ofed_1_3/linux-2.6.git ofed_kernel_2_6_24_rc1 branch. Regards, Vladimir From sashak at voltaire.com Thu Nov 1 05:32:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 14:32:01 +0200 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <1193917802.26246.537.camel@hrosenstock-ws.xsigo.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <1193917802.26246.537.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071101123201.GU20136@sashak.voltaire.com> On 04:50 Thu 01 Nov , Hal Rosenstock wrote: > > Aside from value adds (proprietary extensions) which is an important and > large issue as all SMs vendors claim advantage from this and they are > not being opened up, there are the issues of routing algorithms (both > unicast and multicast), management, and consistency of data. These are > all different to varying degrees for each flavor. > > While I am aware of some customers requesting this, the IBTA does not > sanction this configuration. The MgtWG has produced a white paper on the > topic which is available on their web site. Do you mean management interoperability white paper? > In fact, some areas of the > above are beyond the IBTA charter. If you feel strongly about this, I > suggest you get a like minded vendors and get as much of this > standardized as possible. IMO the place for this is the IBTA MgtWG. I see. Thanks for suggestion :) > > At least > > I am able to run OpenSM with Voltaire SM on one subnet. > > Define what you mean by run here ? What "experiments" have you > performed ? Nothing really special. I'm just running OpenSM on the subnet with Voltaire managed switches (where VoltaireSM is on). And I don't remember any big problems there (including handover, etc.). Never investigated interoperability issue in deep however. > Does Voltaire stand behind this as a supported configuration ? Not sure. > If not, > are there any plans to do so ? No idea, sorry :( Sasha From vlad at dev.mellanox.co.il Thu Nov 1 05:23:03 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 01 Nov 2007 14:23:03 +0200 Subject: [ofa-general] Re: [ewg] [PATCH] ofed_scripts: Add location code fix for older ppc64 kernels In-Reply-To: <200710291322.20235.fenkes@de.ibm.com> References: <200710291322.20235.fenkes@de.ibm.com> Message-ID: <4729C527.8050106@dev.mellanox.co.il> Joachim Fenkes wrote: > Kernels prior to 2.6.24 have problems with multiple devices sharing the same > location code on ppc64 systems -- only one of these devices would be usable > by ibmebus. This will be a problem on systems with multiple eHCA chips on a > single hardware location. > > For older kernels, this problem can be circumvented by, prior to loading the > eHCA driver, changing the location codes of the offending devices so that > they're not the same anymore. > > This patch adds an openibd patch file which, if applied, will make openibd > change the location codes of eHCA adapters with the same location code. > ofed_patch.sh is changed so that it applies that patch if, and only if, it > is run on a ppc64 architecture and the kernel version implies that the > kernel has the ibmebus bug. > > Signed-off-by: Joachim Fenkes > --- > ofed_scripts/ofed_patch.sh | 49 +++++++++++++++++++++++++++++++++++ > ofed_scripts/openibd-loc_code.patch | 43 ++++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+), 0 deletions(-) > create mode 100644 ofed_scripts/openibd-loc_code.patch > > diff --git a/ofed_scripts/ofed_patch.sh b/ofed_scripts/ofed_patch.sh > index e1f039d..b254000 100755 > --- a/ofed_scripts/ofed_patch.sh > +++ b/ofed_scripts/ofed_patch.sh > @@ -200,6 +200,44 @@ get_backport_dir() > > } > > +need_openibd_loc_code_patch() > +{ > + local sub > + > + if [ "$ARCH" != "ppc64" ]; then > + return 1; > + fi > + > + case $KVERSION in > + 2.6.9-*.EL*) > + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) > + if [ $sub -lt 62 ]; then > + return 0; > + fi > + ;; > + 2.6.16.*-*-*) > + sub=$(echo $KVERSION | cut -d"." -f4 | cut -d"-" -f1) > + if [ $sub -lt 53 ]; then > + return 0; > + fi > + ;; > + 2.6.18-*.el5*) > + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) > + if [ $sub -lt 52 ]; then > + return 0; > + fi > + ;; > + 2.6.*) > + sub=$(echo $KVERSION | cut -d"." -f3 | cut -d"-" -f1 | tr -d [:alpha:][:punct:]) > + if [ $sub -lt 24 ]; then > + return 0; > + fi > + ;; > + esac > + > + return 1; > +} > + > # Apply patch > apply_patch() > { > @@ -253,6 +291,13 @@ apply_backport_patches() > fi > } > > +apply_openibd_patches() > +{ > + if need_openibd_loc_code_patch; then > + apply_patch ${CWD}/ofed_scripts/openibd-loc_code.patch > + fi > +} > + > # Apply patches > patches_handle() > { > @@ -288,6 +333,9 @@ EOF > fi > BACKPORT_INCLUDES='-I${CWD}/kernel_addons/backport/'${BACKPORT_DIR}/include/ > fi > + > + # Apply openibd patches > + apply_openibd_patches $KVERSION > > > #FIXME: why are these applied here? Move them to before backports? > @@ -399,6 +447,7 @@ main() > > #Set default values > KVERSION=${KVERSION:-$(uname -r)} > +ARCH=${ARCH:-$(uname -m)} > WITH_QUILT=${WITH_QUILT:-"yes"} > WITH_PATCH=${WITH_PATCH:-"yes"} > WITH_KERNEL_FIXES=${WITH_KERNEL_FIXES:-"yes"} > diff --git a/ofed_scripts/openibd-loc_code.patch b/ofed_scripts/openibd-loc_code.patch > new file mode 100644 > index 0000000..43d70b4 > --- /dev/null > +++ b/ofed_scripts/openibd-loc_code.patch > @@ -0,0 +1,43 @@ > +--- a/ofed_scripts/openibd 2007-10-25 08:01:51.000000000 -0500 > ++++ b/ofed_scripts/openibd 2007-10-27 09:58:56.000000000 -0500 > +@@ -538,6 +538,32 @@ if test -x /sbin/lspci && test -x /sbin/ > + fi > + } > + > ++fix_location_codes() > ++{ > ++ # ppc64 only: > ++ # Fix duplicate location codes on kernels where ibmebus can't handle them > ++ if [ -d /proc/device-tree -a -f /proc/ppc64/ofdt ]; then > ++ local i=1 phandle lcode len > ++ # output all duplicate location codes and their devices > ++ for attr in $(find /proc/device-tree -wholename "*lhca\@*/ibm,loc-code"); do > ++ echo -e $(dirname $attr)"\t"$(cat $attr) > ++ done | sort -k2 | uniq -f1 --all-repeated=separate | cut -f1 | while read dev; do > ++ if [ -n "$dev" ]; then > ++ # append an instance counter to the location code > ++ phandle=$(hexdump -e '8 "%u"' $dev/ibm,phandle) > ++ lcode=$(cat $dev/ibm,loc-code)-I$i > ++ len=$(echo -n "$lcode" | wc -c) > ++ # echo "$dev -> $lcode" > ++ echo -n "update_property $phandle ibm,loc-code $len $lcode" > /proc/ppc64/ofdt > ++ i=$(($i + 1)) > ++ else > ++ # empty line means new group -- reset i > ++ i=1 > ++ fi > ++ done > ++ fi > ++} > ++ > + rotate_log() > + { > + local log=$1 > +@@ -694,6 +720,7 @@ start() > + > + # Load eHCA driver > + if [ "X${EHCA_LOAD}" == "Xyes" ]; then > ++ fix_location_codes > + /sbin/modprobe ib_ehca > /dev/null 2>&1 > + my_rc=$? > + if [ $my_rc -ne 0 ]; then Hi Joachim, I think that it will be easier to maintain the openibd script if you will change it directly (not via patch applied by ofed_patch.sh). You can add 'need_openibd_loc_code_patch' function to the openibd with: KVERSION=`uname -r` if need_openibd_loc_code_patch; then fix_location_codes fi Regards, Vladimir From hrosenstock at xsigo.com Thu Nov 1 05:25:14 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 05:25:14 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101123201.GU20136@sashak.voltaire.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <1193917802.26246.537.camel@hrosenstock-ws.xsigo.com> <20071101123201.GU20136@sashak.voltaire.com> Message-ID: <1193919914.26246.578.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 14:32 +0200, Sasha Khapyorsky wrote: > On 04:50 Thu 01 Nov , Hal Rosenstock wrote: > > > > Aside from value adds (proprietary extensions) which is an important and > > large issue as all SMs vendors claim advantage from this and they are > > not being opened up, there are the issues of routing algorithms (both > > unicast and multicast), management, and consistency of data. These are > > all different to varying degrees for each flavor. > > > > While I am aware of some customers requesting this, the IBTA does not > > sanction this configuration. The MgtWG has produced a white paper on the > > topic which is available on their web site. > > Do you mean management interoperability white paper? Yes. > > In fact, some areas of the > > above are beyond the IBTA charter. If you feel strongly about this, I > > suggest you get a like minded vendors and get as much of this > > standardized as possible. IMO the place for this is the IBTA MgtWG. > > I see. Thanks for suggestion :) > > > > At least > > > I am able to run OpenSM with Voltaire SM on one subnet. > > > > Define what you mean by run here ? What "experiments" have you > > performed ? > > Nothing really special. I'm just running OpenSM on the subnet with > Voltaire managed switches (where VoltaireSM is on). And I don't remember > any big problems there (including handover, etc.). Never investigated > interoperability issue in deep however. What ULPs if any have you run across failovers and failbacks ? > > Does Voltaire stand behind this as a supported configuration ? > > Not sure. Let us know when you find out. I, for one, and I think there are others would be very interested in this. -- Hal > > If not, > > are there any plans to do so ? > > No idea, sorry :( > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Thu Nov 1 05:48:55 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 14:48:55 +0200 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <1193918429.26246.551.camel@hrosenstock-ws.xsigo.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <20071101035648.GK20136@sashak.voltaire.com> <1193918429.26246.551.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071101124855.GW20136@sashak.voltaire.com> On 05:00 Thu 01 Nov , Hal Rosenstock wrote: > On Thu, 2007-11-01 at 05:56 +0200, Sasha Khapyorsky wrote: > > On 20:41 Wed 31 Oct , Jason Gunthorpe wrote: > > > > > > On Thu, Nov 01, 2007 at 02:24:10AM +0200, Sasha Khapyorsky wrote: > > > > > > > What are the reasons? I think complaint SMs should be able to > > > > inter-operate, of course not in part of proprietary extensions. At least > > > > I am able to run OpenSM with Voltaire SM on one subnet. > > > > > > At a minimum how hand off is supposed to work is very vaugely > > > specified in the IBA. > > > > It is at least basically described in the IBA - with exchanging SMInfo. > > > > > Besides, even if hand off wasn't a problem the two SMs would have to > > > have very similar ideas on routing, multicast, QOS, services, etc > > > > In worst case the routing tables and QoS setups could be reconfigured > > from scratch (just as if it could be first SM run), and all SA related > > things could be rerequested with ClientReregistration bit. > > Routing tables are usually driven by algorithms (all beyond the spec) > rather than table loading. > > Don't trivialize management data in a large subnet. It is potentially a > large amount of configuration which people try hard to avoid until they > no longer have a choice. > > I view client reregistration as a workaround for this very issue. I am > regretting pushing that into the spec for that purpose. > > > And sure, some configurations (partitions, QoS, routing, etc.) can be > > not synchronized for SMs, but then the differences in a fabric setups > > should be expected results. > > Is that really acceptable for a real customer ? This was not a question - "acceptable" and "impossible" is not a same. Sasha From sashak at voltaire.com Thu Nov 1 05:52:51 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 14:52:51 +0200 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <1193918163.26246.546.camel@hrosenstock-ws.xsigo.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <1193918163.26246.546.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071101125251.GX20136@sashak.voltaire.com> On 04:56 Thu 01 Nov , Hal Rosenstock wrote: > > SM handover/failover is tested for interop by the IBTA but that's it. BTW was it officially done with SMs from different vendors? Sasha From sashak at voltaire.com Thu Nov 1 05:54:57 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 14:54:57 +0200 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <1193919914.26246.578.camel@hrosenstock-ws.xsigo.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <1193917802.26246.537.camel@hrosenstock-ws.xsigo.com> <20071101123201.GU20136@sashak.voltaire.com> <1193919914.26246.578.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071101125457.GY20136@sashak.voltaire.com> On 05:25 Thu 01 Nov , Hal Rosenstock wrote: > > > > Nothing really special. I'm just running OpenSM on the subnet with > > Voltaire managed switches (where VoltaireSM is on). And I don't remember > > any big problems there (including handover, etc.). Never investigated > > interoperability issue in deep however. > > What ULPs if any have you run across failovers and failbacks ? At least IPoIB is always running... Sasha From hrosenstock at xsigo.com Thu Nov 1 06:07:40 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 06:07:40 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101125251.GX20136@sashak.voltaire.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <1193918163.26246.546.camel@hrosenstock-ws.xsigo.com> <20071101125251.GX20136@sashak.voltaire.com> Message-ID: <1193922460.26246.586.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 14:52 +0200, Sasha Khapyorsky wrote: > On 04:56 Thu 01 Nov , Hal Rosenstock wrote: > > > > SM handover/failover is tested for interop by the IBTA but that's it. > > BTW was it officially done with SMs from different vendors? Just SMInfo testing and the fact that the other became master in various scenarios. -- Hal > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Thu Nov 1 06:24:00 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 06:24:00 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <20071101035648.GK20136@sashak.voltaire.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <20071101035648.GK20136@sashak.voltaire.com> Message-ID: <1193923440.26246.603.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 05:56 +0200, Sasha Khapyorsky wrote: > On 20:41 Wed 31 Oct , Jason Gunthorpe wrote: > > > > On Thu, Nov 01, 2007 at 02:24:10AM +0200, Sasha Khapyorsky wrote: > > > > > What are the reasons? I think complaint SMs should be able to > > > inter-operate, of course not in part of proprietary extensions. At least > > > I am able to run OpenSM with Voltaire SM on one subnet. > > > > At a minimum how hand off is supposed to work is very vaugely > > specified in the IBA. > > It is at least basically described in the IBA - with exchanging SMInfo. > > > Besides, even if hand off wasn't a problem the two SMs would have to > > have very similar ideas on routing, multicast, QOS, services, etc > > In worst case the routing tables and QoS setups could be reconfigured > from scratch (just as if it could be first SM run), and all SA related > things could be rerequested with ClientReregistration bit. As mentioned in the past, client reregistration is a rather large hammer. There have been discussions on utilizing this mechanism in more scenarios (which FWIW is not a good thing IMO). This approach (and it is optional) pushes the burden back on the end nodes rather than the SM. Scalability is certainly an issue with it. It was begrudgingly put into the spec. It was intended only as a stopgap measure. There was informative text put into the spec alluding to the "appropriate" use of this option: "A reason for the SM doing this might be that the SM suffered a failure and as a result lost its own records of such subscriptions." This is referring to a single SM (although that is not the recommended deployment topology) crashing and being restarted. IMO a civil SM would not rely on this mechanism. -- Hal > And sure, some configurations (partitions, QoS, routing, etc.) can be > not synchronized for SMs, but then the differences in a fabric setups > should be expected results. > > And I'm not about "how fast and efficient it is" and even not about > "interoperability" bugs in various implementations. > > > or > > the fabric will be badly disrupted after hand off.. Without extensions > > to transfer this live data over before hand off it is unlikely to > > be non-disruptive except in very constrained situations. > > > > It seems to me the main benifit of the whole standardized mechanism > > (in an interoperability context) is just to help make it so that a new > > sm starting up doesn't just trash the fabric accidentally, and provide > > at least some sensible behavior when two seperate subnets are combined > > into one. > > > > If you want to test hand over interop joining two operating networks > > is a good way to do it - that is really hard to get right in all of > > the cases :) This was the area where I felt the spec was weakest since > > it really didn't say exactly when during the hand over exchanges each > > SM was in control of the nodes, and exactly what should happen when > > things go wrong was not specified.. > > Ok, so we are not about "impossibility" to do this... Just current lack > of standardization makes it hard to do handover properly? > > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From Arkady.Kanevsky at netapp.com Thu Nov 1 06:34:51 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Thu, 1 Nov 2007 09:34:51 -0400 Subject: [ofa-general] iWARP issues Message-ID: Bill, you had asked what are the iWARP open issues that need discussion at SC'07 OFA dev. workshop. I recall 4: - iWARP Support for Peer-to-Peer Applications, this is CM interoperability issue - iWARP + TCP host stack port space sharing (required by IETF iSER spec.) - missing verbs (IB-only, iWARP-only, and iWARP-nonstandard), for example FMR, send with invalidate. (I recall that complete list was flash out a year ago) (ULP changes to take advantage of these verbs: e.g. NFS-RDMA, iSER). - RDMA connection timeout; expand RDMA_CM API to support timeout paramater (not iWARP specific) Cheers, Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 From moshek at voltaire.com Thu Nov 1 06:46:35 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Thu, 1 Nov 2007 15:46:35 +0200 Subject: [ofa-general] OFED-1.3 compile problems on PPC64 SLES10 SP1 Message-ID: <39C75744D164D948A170E9792AF8E7CA4D2BEB@exil.voltaire.com> 3 packages included in OFED-1.3 ( openmpi, mvapich2, ibutils) has compile problems on JS21 PPC64 SLES10 SP1 . I suspect that the compile problem is caused as result of a change in the SLES10 SP1 g++ 64 bits shared library location that was not followed by the right change in the utility autoconf automatic generated files. These packages path compilation with no error on SLES 10. Can you help ? Detailed technical description of the problems : https://bugs.openfabrics.org/show_bug.cgi?id=753 https://bugs.openfabrics.org/show_bug.cgi?id=754 https://bugs.openfabrics.org/show_bug.cgi?id=755 Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From krause at cup.hp.com Thu Nov 1 09:21:44 2007 From: krause at cup.hp.com (Michael Krause) Date: Thu, 01 Nov 2007 09:21:44 -0700 Subject: [ofa-general] Fwd: [rddp] RFCs 5040-5045 and the RDDP WG Message-ID: <6.2.0.14.2.20071101092122.07abd258@esmail.cup.hp.com> For those wondering about the status of the iWARP specifications. Mike >From: Black_David at emc.com >Date: Thu, 1 Nov 2007 08:44:22 -0400 >X-MS-Has-Attach: >X-MS-TNEF-Correlator: >Thread-Topic: RFCs 5040-5045 and the RDDP WG >Thread-Index: AcgchN9Cfit3Vms/QnO5eI5esujgpg== >Priority: Urgent >To: >X-OriginalArrivalTime: 01 Nov 2007 12:44:23.0089 (UTC) > FILETIME=[EDCC0210:01C81C84] >X-PMX-Version: 4.7.1.128075, Antispam-Engine: 2.5.1.298604, > Antispam-Data: 2007.8.30.51425 >X-PerlMx-Spam: Gauge=, SPAM=0%, Reason='EMC_BODY_1+ -3, EMC_FROM_0+ -3, > PRIORITY_NO_NAME 0.716, NO_REAL_NAME 0, __C230066_P5 0, __CT 0, > __CTE 0, __CT_TEXT_PLAIN 0, __HAS_MSGID 0, __HAS_X_PRIORITY 0, > __IMS_MSGID 0, __MIME_TEXT_ONLY 0, __MIME_VERSION 0, > __SANE_MSGID 0' >X-Spam-Score: -4.0 (----) >X-Scan-Signature: 0bc60ec82efc80c84b8d02f4b0e4de22 >Cc: Black_David at emc.com >Subject: [rddp] RFCs 5040-5045 and the RDDP WG >X-BeenThere: rddp at ietf.org >X-Mailman-Version: 2.1.5 >List-Id: "IETF Remote Direct Data Placement \(rddp\) WG" >List-Unsubscribe: , > >List-Post: >List-Help: >List-Subscribe: , > >X-MIME-Autoconverted: from quoted-printable to 8bit by esmail.cup.hp.com >id FAA05698 > >Everyone, > >With the publication of RFCs 5040-5045, the program of >work of the RDDP Working Group has been completed. > >I would like to thank all the authors and WG members for >their hard work and support in producing the RFCs for >the RDDP protocols. This has been an adventure, and it's >been my privilege to serve as the chair of this WG. > >There will be an announcement shortly that the RDDP Working >Group is being closed because it has finished its program >of work. The rddp at ietf.org mailing list will remain open >and active for the time being - there has been very little >activity on the list recently, and the list is like to be >closed down around the end of this year if there is >little to no activity. > >Congratulations to all and many thanks, >--David >---------------------------------------------------- >David L. Black, Distinguished Engineer >EMC Corporation, 176 South St., Hopkinton, MA 01748 >+1 (508) 293-7953 FAX: +1 (508) 293-7786 >black_david at emc.com Mobile: +1 (978) 394-7754 >---------------------------------------------------- > > > > >_______________________________________________ >rddp mailing list >rddp at ietf.org >https://www1.ietf.org/mailman/listinfo/rddp -------------- next part -------------- An HTML attachment was scrubbed... URL: From mshefty at ichips.intel.com Thu Nov 1 09:34:23 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 01 Nov 2007 09:34:23 -0700 Subject: [ofa-general] [PATCH] management: changed method_mask type in user_mad interface In-Reply-To: <20071101062006.GL20136@sashak.voltaire.com> References: <20071101062006.GL20136@sashak.voltaire.com> Message-ID: <472A000F.1040205@ichips.intel.com> > The method_mask is array of longs now in all libibumad interfaces. Doesn't this break binary compatibility and force a new major release? From tziporet at dev.mellanox.co.il Thu Nov 1 09:37:26 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Thu, 01 Nov 2007 18:37:26 +0200 Subject: [ofa-general] Re: Input on the new OFED package In-Reply-To: <1193352611.10336.215.camel@firewall.xsintricity.com> References: <4720C40E.1030708@mellanox.co.il> <1193352611.10336.215.camel@firewall.xsintricity.com> Message-ID: <472A00C6.1070106@mellanox.co.il> Doug Ledford wrote: > It's definitely better being all split up, but I'm still going to have > to replace at least some of the spec files wholesale (well, currently > all of them). I'll pick on the ibutils spec file as my example and I'm > cc:ing this to the list so I don't need to do this over and over again. > > We will work with Oren to improve ibutils package spec files > > But, I still really need a download link to put in the > spec files or else I get yelled at. > > I will try to push all maintainers to put their staff in the OFA download page (it will happened eventually) Thanks for teh input, Tziporet From mshefty at ichips.intel.com Thu Nov 1 09:48:07 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 01 Nov 2007 09:48:07 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <1193923440.26246.603.camel@hrosenstock-ws.xsigo.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <20071101035648.GK20136@sashak.voltaire.com> <1193923440.26246.603.camel@hrosenstock-ws.xsigo.com> Message-ID: <472A0347.3060807@ichips.intel.com> > As mentioned in the past, client reregistration is a rather large > hammer. There have been discussions on utilizing this mechanism in more > scenarios (which FWIW is not a good thing IMO). This approach (and it is > optional) pushes the burden back on the end nodes rather than the SM. > Scalability is certainly an issue with it. It was begrudgingly put into > the spec. It was intended only as a stopgap measure. > > There was informative text put into the spec alluding to the > "appropriate" use of this option: > > "A reason for the SM doing this might be that the SM suffered a failure > and as a result lost its own records of such subscriptions." > This is referring to a single SM (although that is not the recommended > deployment topology) crashing and being restarted. > > IMO a civil SM would not rely on this mechanism. There's still the problem that the ULPs on the end-node do not know when or if the data is lost. IMO, making client reregistration mandatory would have been a better solution, allowing ULPs to only re-register on that event. As it stands now, ULPs automatically reregister on SM LID changes, port events, etc.. In order to avoid ULP re-registration, SM failover has to bring along the LID. An alternate solution could have let an SM learn what it needed from the end nodes through queries... - Sean From tziporet at mellanox.co.il Thu Nov 1 09:50:20 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Thu, 1 Nov 2007 18:50:20 +0200 Subject: [ofa-general] OFED 1.3 beta status Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E13B@mtlexch01.mtl.com> As promised this is the status of OFED 1.3 beta and schedule: The code rebase kernel code on 2.6.24 rc1 was not completed yet. Till now we have backport patches for mlx4, mthca and core. IPoIB is still on work (pass compilation on 2.6.23 only) We plan to complete all backports by next Monday morning. Thus the beta release is planed for Wednesday 7 November Tziporet Koren From Thomas.Talpey at netapp.com Thu Nov 1 09:55:19 2007 From: Thomas.Talpey at netapp.com (Talpey, Thomas) Date: Thu, 01 Nov 2007 12:55:19 -0400 Subject: [ofa-general] Fwd: [rddp] RFCs 5040-5045 and the RDDP WG In-Reply-To: <6.2.0.14.2.20071101092122.07abd258@esmail.cup.hp.com> References: <6.2.0.14.2.20071101092122.07abd258@esmail.cup.hp.com> Message-ID: At 12:21 PM 11/1/2007, Michael Krause wrote: >For those wondering about the status of the iWARP specifications. Also released yesterday were the iSER specification and iSCSI Datamover Architecture: http://www.rfc-editor.org/rfc/rfc5046.txt (iSER) http://www.rfc-editor.org/rfc/rfc5047.txt (iSCSI DA) Tom. >Mike > > >>From: Black_David at emc.com >>Date: Thu, 1 Nov 2007 08:44:22 -0400 >>X-MS-Has-Attach: >>X-MS-TNEF-Correlator: >>Thread-Topic: RFCs 5040-5045 and the RDDP WG >>Thread-Index: AcgchN9Cfit3Vms/QnO5eI5esujgpg== >>Priority: Urgent >>To: >>X-OriginalArrivalTime: 01 Nov 2007 12:44:23.0089 (UTC) >> FILETIME=[EDCC0210:01C81C84] >>X-PMX-Version: 4.7.1.128075, Antispam-Engine: 2.5.1.298604, >> Antispam-Data: 2007.8.30.51425 >>X-PerlMx-Spam: Gauge=, SPAM=0%, Reason='EMC_BODY_1+ -3, EMC_FROM_0+ -3, >> PRIORITY_NO_NAME 0.716, NO_REAL_NAME 0, __C230066_P5 0, __CT 0, >> __CTE 0, __CT_TEXT_PLAIN 0, __HAS_MSGID 0, __HAS_X_PRIORITY 0, >> __IMS_MSGID 0, __MIME_TEXT_ONLY 0, __MIME_VERSION 0, >> __SANE_MSGID 0' >>X-Spam-Score: -4.0 (----) >>X-Scan-Signature: 0bc60ec82efc80c84b8d02f4b0e4de22 >>Cc: Black_David at emc.com >>Subject: [rddp] RFCs 5040-5045 and the RDDP WG >>X-BeenThere: rddp at ietf.org >>X-Mailman-Version: 2.1.5 >>List-Id: "IETF Remote Direct Data Placement \(rddp\) WG" >>List-Unsubscribe: < https://www1.ietf.org/mailman/listinfo/rddp>, >> < mailto:rddp-request at ietf.org?subject=unsubscribe> >>List-Post: < mailto:rddp at ietf.org> >>List-Help: < mailto:rddp-request at ietf.org?subject=help> >>List-Subscribe: < https://www1.ietf.org/mailman/listinfo/rddp>, >> < mailto:rddp-request at ietf.org?subject=subscribe> >>X-MIME-Autoconverted: from quoted-printable to 8bit by esmail.cup.hp.com id FAA05698 >> >>Everyone, >> >>With the publication of RFCs 5040-5045, the program of >>work of the RDDP Working Group has been completed. >> >>I would like to thank all the authors and WG members for >>their hard work and support in producing the RFCs for >>the RDDP protocols. This has been an adventure, and it's >>been my privilege to serve as the chair of this WG. >> >>There will be an announcement shortly that the RDDP Working >>Group is being closed because it has finished its program >>of work. The rddp at ietf.org mailing list will remain open >>and active for the time being - there has been very little >>activity on the list recently, and the list is like to be >>closed down around the end of this year if there is >>little to no activity. >> >>Congratulations to all and many thanks, >>--David >>---------------------------------------------------- >>David L. Black, Distinguished Engineer >>EMC Corporation, 176 South St., Hopkinton, MA 01748 >>+1 (508) 293-7953 FAX: +1 (508) 293-7786 >>black_david at emc.com Mobile: +1 (978) 394-7754 >>---------------------------------------------------- >> >> >> >> >>_______________________________________________ >>rddp mailing list >>rddp at ietf.org >>https://www1.ietf.org/mailman/listinfo/rddp >_______________________________________________ >general mailing list >general at lists.openfabrics.org >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Thu Nov 1 10:04:56 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 01 Nov 2007 10:04:56 -0700 Subject: [ofa-general] opensm: Unsupported attribute = 0xFF02 In-Reply-To: <472A0347.3060807@ichips.intel.com> References: <200710301356.40137.kilian@stanford.edu> <1193778117.26246.325.camel@hrosenstock-ws.xsigo.com> <20071101002410.GD20136@sashak.voltaire.com> <20071101024131.GM2037@obsidianresearch.com> <20071101035648.GK20136@sashak.voltaire.com> <1193923440.26246.603.camel@hrosenstock-ws.xsigo.com> <472A0347.3060807@ichips.intel.com> Message-ID: <1193936696.26246.652.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-01 at 09:48 -0700, Sean Hefty wrote: > > As mentioned in the past, client reregistration is a rather large > > hammer. There have been discussions on utilizing this mechanism in more > > scenarios (which FWIW is not a good thing IMO). This approach (and it is > > optional) pushes the burden back on the end nodes rather than the SM. > > Scalability is certainly an issue with it. It was begrudgingly put into > > the spec. It was intended only as a stopgap measure. > > > > There was informative text put into the spec alluding to the > > "appropriate" use of this option: > > > > "A reason for the SM doing this might be that the SM suffered a failure > > and as a result lost its own records of such subscriptions." > > This is referring to a single SM (although that is not the recommended > > deployment topology) crashing and being restarted. > > > > IMO a civil SM would not rely on this mechanism. > > There's still the problem that the ULPs on the end-node do not know when > or if the data is lost. Such data is not supposed to be lost although this is left as an exercise to the reader. That is all beyond the spec value add currently. > IMO, making client reregistration mandatory Couldn't be done due to backwards compatibility guarantee of IBA. > would have been a better solution, allowing ULPs to only re-register on > that event. As it stands now, ULPs automatically reregister on SM LID > changes, port events, etc.. In order to avoid ULP re-registration, SM > failover has to bring along the LID. In general, it does. Most SMs do not change LIDs unless they absolutely have to. Using reregister for this (and some other point cases) would be fine but not in the general case of failover for all ports. > An alternate solution could have let an SM learn what it needed from the > end nodes through queries... Currently, SA queries are from the client (end node) to the SA; not the other way around. The only thing from the SA to the end node are reports. That could be changed if it really is needed. -- Hal > - Sean From rdreier at cisco.com Thu Nov 1 10:24:50 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 01 Nov 2007 10:24:50 -0700 Subject: [ofa-general] Re: [PATCH] mlx4: Add bad flow check when freeing in mlx4_buf_free (potential Oops) In-Reply-To: <200711010937.44339.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Thu, 1 Nov 2007 09:37:44 +0200") References: <200711010937.44339.jackm@dev.mellanox.co.il> Message-ID: Thanks, applied for 2.6.24. I assume based on the sign-off that Ali really wrote this patch. In general please preserve author information by adding a "From:" line at the beginning of the patch when forwarding a patch. From rdreier at cisco.com Thu Nov 1 10:31:25 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 01 Nov 2007 10:31:25 -0700 Subject: [ofa-general] [PATCH 2/2] IB/ipath - fix race with ACK retry timeout list management In-Reply-To: <20071031220829.22603.49034.stgit@eng-46.internal.keyresearch.com> (Arthur Jones's message of "Wed, 31 Oct 2007 15:08:29 -0700") References: <20071031220819.22603.19575.stgit@eng-46.internal.keyresearch.com> <20071031220829.22603.49034.stgit@eng-46.internal.keyresearch.com> Message-ID: thanks, applied 1-2 for 2.6.24. From rdreier at cisco.com Thu Nov 1 10:33:35 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 01 Nov 2007 10:33:35 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <4727C18F.3010509@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Tue, 30 Oct 2007 16:43:11 -0700") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> Message-ID: FWIW, I left netpipe-tcp running in a loop overnight over a connected mode IPoIB interface on a system running my for-2.6.25 tree (plus a hack to use the non-SRQ code on mlx4 by forcing create SRQ to fail). It ran with no problems (and transferred nearly a billion packets and 10 TB of data). - R. From meier3 at llnl.gov Thu Nov 1 10:53:22 2007 From: meier3 at llnl.gov (Timothy A. Meier) Date: Thu, 01 Nov 2007 10:53:22 -0700 Subject: [ofa-general] Re: [PATCH] opensm & osm_console: modified console framework to support multiple connections In-Reply-To: <20071101010044.GG20136@sashak.voltaire.com> References: <4713FD51.4010506@llnl.gov> <20071028010226.GN22317@sashak.voltaire.com> <47261CFF.1060206@llnl.gov> <20071101010044.GG20136@sashak.voltaire.com> Message-ID: <472A1292.6060500@llnl.gov> Sasha, I have some in-line comments, but the main "threading issue" discussion is near the end. Please advise. Sasha Khapyorsky wrote: > On 10:48 Mon 29 Oct , Timothy A. Meier wrote: > >> I apologize for the style and submission issues - still adjusting... >> > > No need to apologize :) > > >> I was troubled with breaking this into pieces. The patch is really about >> providing an abstract OSM Server that supports local/remote connections. >> >> I can break them up, but in my mind, they were tightly coupled. >> > > I think it could be broken at least to multiconnection support and the > rest abstractions. No need to split it now only for "split", just try > to make it in smaller patches in the next version of this. > > >>>> +/* TODO move along with other IO abstraction code */ >>>> +int cio_printf( CIO_t *cio, const char *format, ...); >>>> +int cio_flush( CIO_t *cio); >>>> +int cio_getline( char **lineptr, size_t *n, CIO_t *cio); >>>> +int cio_open( CIO_t *cio); >>>> +int cio_close( CIO_t *cio); >>>> +int cio_poll(CIO_t *cio, int timeout); >>>> >>>> >>> Later I see that all cio_* and CIO_* stuff is used only in >>> osm_console.c, then I think this all should be moved to this file, >>> local function should be static, etc.. >>> >>> >>> >> The intent of the CIO abstraction is to support connections to the OSM >> server. Currently, the only thing "planned" to use this connection is >> the interactive Console. That might not always be the case. >> > > Now it is the case. And if there are no concrete plans to use this APIs > externally I prefer to keep it local. > > Okay, well I quoted the "planned" because I/we (LLNL) have some ideas (not really plans) we would like to try that will use this abstraction. Keeping it local, until needed elsewhere is fine. >>>> +typedef struct _osm_console_thread_t >>>> +{ >>>> + int used; >>>> + unsigned short int port; >>>> + int authorized; >>>> + int state; >>>> + char name[CIO_INFO_SIZE]; >>>> + char in_buff[CIO_BUFSIZE]; >>>> + char out_buff[CIO_BUFSIZE]; >>>> + char client_type[CIO_NOTE_SIZE]; // maps to option->console >>>> (off|local|socket) >>>> + char client_ip[CIO_NOTE_SIZE]; >>>> + char client_hn[CIO_INFO_SIZE]; >>>> + unsigned int thread_num; // a unique ever increasing number + >>>> osm_opensm_t *p_osm; // the global opensm singleton (protect with >>>> lock) >>>> + CIO_t io; // the io streams for the connection >>>> + LoopCmd loop_command; >>>> + cl_thread_t consoleThread; // a specific thread each console >>>> connection >>>> + struct timeval connect_time; >>>> +} osm_console_thread_t; >>>> >>>> >>> I think this introduces CIO_MAX_CONNECTS new threads + for loop commands. >>> What about to do all in one thread - to use select() or poll() with >>> timeout on multiple file descriptors? This will "reserve" another CPUs >>> for running another OpenSM things. Another potential problem is multi >>> thread synchronizations - we had (and still have) a lot of issues in this >>> area. >>> >>> >>> >> I wasn't aware of thread synchronization issues.... >> >> You are correct, this potentially introduces 2*CIO_MAX_CONNECTS new threads. >> (Worst case, all connections are used, all running a loop command.) >> >> Currently, the only loop command is for printing status, but the software >> was designed to support any command you may want to put in a >> loop. If no additional commands will be "looped", then I agree its overkill >> to put this in its own thread. >> >> I think each connection/session should be in its own thread. >> > > Wouldn't poll() on multiple file descriptors (connected and listened > sockets) be simpler and more robust approach here? Why? > > See the thread/poll discussion below.. >> Currently those wrapper functions only provide a single implementation, but >> I intend to extend them with additional functionality when I add SSL/TSL. >> > > This is why I thought it would be clearer to see in a patch series.. > > Understood. Abstractions are kind of.... abstract. Its hard to see the justification for an abstraction layer without having at least two different implementations. I provide one. The second one will be SSL/TSL. I'd like to provide that after the new framework/abstractions are in place and working just as before. >> The new protocol will depend on new libraries/headers. We (LLNL) >> discussed this, and thought conditionally compiling this feature in would >> satisfy those folks who did not want to add this dependency if they did >> not want the feature. >> > > That should be fine. > > >> Thanks for reviewing all of this. How would you like me to move forward? >> Would you rather me (re)submit this Patch as a series of 2? >> > > I think we need to close threading issue first. Then patch series of 2 > looks fine for me. > > I really think the "thread-per-session" would be a more flexible and powerful design. Setting up and maintaining threads might seem more complex at first, but it makes servicing requests/commands much more simple because everything is in its own context. The previous Console used a polling mechanism, and I found an edge case condition which allowed one connection to block the other. Thread-per-session (or thread per connection) makes it difficult for one session to influence another. The number of threads/connections would be limited. Other than the normal multi-threading issues, are there other thread hazards in OFED/OpenSM that I need to be aware of? Your thoughts? >> I want to >> establish this as a working baseline (no new functionality, just more >> extensible) before adding the SSL/TSL code. >> > > Understood. Thanks for doing this! > > Sasha > > -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov From sashak at voltaire.com Thu Nov 1 11:11:38 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 20:11:38 +0200 Subject: [ofa-general] [PATCH] management: changed method_mask type in user_mad interface In-Reply-To: <472A000F.1040205@ichips.intel.com> References: <20071101062006.GL20136@sashak.voltaire.com> <472A000F.1040205@ichips.intel.com> Message-ID: <20071101181138.GD20136@sashak.voltaire.com> On 09:34 Thu 01 Nov , Sean Hefty wrote: > > The method_mask is array of longs now in all libibumad interfaces. > > Doesn't this break binary compatibility and force a new major release? Not really, the array still has same size (16 bytes). And anyway I'm planning to bump libibumad version soon (due to this and another changes). Sasha From sean.hefty at intel.com Thu Nov 1 11:30:38 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 1 Nov 2007 11:30:38 -0700 Subject: [ofa-general] [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com><4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com><47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> Message-ID: <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> Fix a couple of errors in the man page documentation and add infiniband specific text about QP configuration settings. This is in response to user questions about various settings based on feedback from Or. Signed-off-by: Sean Hefty --- I've tried to address all comments regarding missing or unclear documentation. I also added IB specific areas to the man pages in a few areas to clarify how calls are operating over IB. If anyone knows of any iWarp specific notes to add to the documentation, please let me know. I've added a comment to rdma_connect() indicating that the active side must send the first message. man/rdma_accept.3 | 25 +++++++++++++++---------- man/rdma_connect.3 | 23 +++++++++++++++++------ man/rdma_get_cm_event.3 | 4 ++-- man/rdma_resolve_addr.3 | 6 +++++- man/rdma_resolve_route.3 | 4 +++- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/man/rdma_accept.3 b/man/rdma_accept.3 index c0c12d8..f604c0e 100644 --- a/man/rdma_accept.3 +++ b/man/rdma_accept.3 @@ -1,4 +1,4 @@ -.TH "RDMA_ACCEPT" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.TH "RDMA_ACCEPT" 3 "2007-10-31" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_accept \- Called to accept a connection request. .SH SYNOPSIS @@ -53,17 +53,16 @@ This value must be less than or equal to the local RDMA device attribute max_qp_init_rd_atom and the initiator_depth value reported in the connect request event. .IP flow_control -Specifies if hardware flow control should be used. Applies only to RDMA_PS_TCP. +Specifies if hardware flow control is available. This value is exchanged +with the remote peer and is not used to configure the QP. Applies only to +RDMA_PS_TCP. .IP retry_count -The maximum number of times that a data transfer operation should be retried -on the connection when an error occurs. This setting controls the number of -times to retry send, RDMA, and atomic operations when timeouts occur. -Applies only to RDMA_PS_TCP. +This value is ignored. .IP rnr_retry_count -The maximum number of times that a send operation should be retried on a -connection after receiving a receiver not ready (RNR) error. RNR errors are -generated when a send request arrives before a buffer has been posted to -receive the incoming data. Applies only to RDMA_PS_TCP. +The maximum number of times that a send operation from the remote peer +should be retried on a connection after receiving a receiver not ready (RNR) +error. RNR errors are generated when a send request arrives before a buffer +has been posted to receive the incoming data. Applies only to RDMA_PS_TCP. .IP srq Specifies if the QP associated with the connection is using a shared receive queue. This field is ignored by the library if a QP has been created on the @@ -71,5 +70,11 @@ rdma_cm_id. Applies only to RDMA_PS_TCP. .IP qp_num Specifies the QP number associated with the connection. This field is ignored by the library if a QP has been created on the rdma_cm_id. +.SH "INFINIBAND SPECIFIC" +In addition to the connection properties defined above, InfiniBand QPs are +configured with minimum RNR NAK timer and local ACK timeout values. The +minimum RNR NAK timer value is set to 0, for the maximum delay of 655 ms. +The local ACK timeout is calculated based on the packet lifetime and local +HCA ACK delay. .SH "SEE ALSO" rdma_listen(3), rdma_reject(3), rdma_get_cm_event(3) diff --git a/man/rdma_connect.3 b/man/rdma_connect.3 index 71d5594..c27a3f4 100644 --- a/man/rdma_connect.3 +++ b/man/rdma_connect.3 @@ -1,4 +1,4 @@ -.TH "RDMA_CONNECT" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.TH "RDMA_CONNECT" 3 "2007-10-31" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_connect \- Initiate an active connection request. .SH SYNOPSIS @@ -44,17 +44,19 @@ This value must be less than or equal to the local RDMA device attribute max_qp_init_rd_atom and remote RDMA device attribute max_qp_rd_atom. The remote endpoint can adjust this value when accepting the connection. .IP flow_control -Specifies if hardware flow control should be used. Applies only to RDMA_PS_TCP. +Specifies if hardware flow control is available. This value is exchanged +with the remote peer and is not used to configure the QP. Applies only to +RDMA_PS_TCP. .IP retry_count The maximum number of times that a data transfer operation should be retried on the connection when an error occurs. This setting controls the number of times to retry send, RDMA, and atomic operations when timeouts occur. Applies only to RDMA_PS_TCP. .IP rnr_retry_count -The maximum number of times that a send operation should be retried on a -connection after receiving a receiver not ready (RNR) error. RNR errors are -generated when a send request arrives before a buffer has been posted to -receive the incoming data. Applies only to RDMA_PS_TCP. +The maximum number of times that a send operation from the remote peer +should be retried on a connection after receiving a receiver not ready (RNR) +error. RNR errors are generated when a send request arrives before a buffer +has been posted to receive the incoming data. Applies only to RDMA_PS_TCP. .IP srq Specifies if the QP associated with the connection is using a shared receive queue. This field is ignored by the library if a QP has been created on the @@ -63,6 +65,15 @@ rdma_cm_id. Applies only to RDMA_PS_TCP. Specifies the QP number associated with the connection. This field is ignored by the library if a QP has been created on the rdma_cm_id. Applies only to RDMA_PS_TCP. +.SH "INFINIBAND SPECIFIC" +In addition to the connection properties defined above, InfiniBand QPs are +configured with minimum RNR NAK timer and local ACK timeout values. The +minimum RNR NAK timer value is set to 0, for the maximum delay of 655 ms. +The local ACK timeout is calculated based on the packet lifetime and local +HCA ACK delay. +.SH "IWARP SPECIFIC" +Connections established over iWarp RDMA devices currently require that the +active side of the connection send the first message. .SH "SEE ALSO" rdma_cm(7), rdma_create_id(3), rdma_resolve_route(3), rdma_disconnect(3), rdma_listen(3), rdma_get_cm_event(3) diff --git a/man/rdma_get_cm_event.3 b/man/rdma_get_cm_event.3 index 987ead5..1ba8797 100644 --- a/man/rdma_get_cm_event.3 +++ b/man/rdma_get_cm_event.3 @@ -1,4 +1,4 @@ -.TH "RDMA_GET_CM_EVENT" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.TH "RDMA_GET_CM_EVENT" 3 "2007-10-31" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_get_cm_event \- Retrieves the next pending communication event. .SH SYNOPSIS @@ -91,7 +91,7 @@ that the recipient may have outstanding. This field matches the responder resources specified by the remote node when calling rdma_connect and rdma_accept. .IP "flow_control" 12 -Indicates if hardware level flow control is provided. +Indicates if hardware level flow control is provided by the sender. .IP "retry_count" 12 For RDMA_CM_EVENT_CONNECT_REQUEST events only, indicates the number of times that the recipient should retry send operations. diff --git a/man/rdma_resolve_addr.3 b/man/rdma_resolve_addr.3 index 11d737d..bbba230 100644 --- a/man/rdma_resolve_addr.3 +++ b/man/rdma_resolve_addr.3 @@ -1,4 +1,4 @@ -.TH "RDMA_RESOLVE_ADDR" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.TH "RDMA_RESOLVE_ADDR" 3 "2007-10-31" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_resolve_addr \- Resolve destination and optional source addresses. .SH SYNOPSIS @@ -31,6 +31,10 @@ then the rdma_cm_id will be bound to a source address based on the local routing tables. After this call, the rdma_cm_id will be bound to an RDMA device. This call is typically made from the active side of a connection before calling rdma_resolve_route and rdma_connect. +.SH "INFINIBAND SPECIFIC" +This call maps the destination and, if given, source IP addresses to GIDs. +In order to perform the mapping, IPoIB must be running on both the local +and remote nodes. .SH "SEE ALSO" rdma_create_id(3), rdma_resolve_route(3), rdma_connect(3), rdma_create_qp(3), rdma_get_cm_event(3), rdma_bind_addr(3), rdma_get_src_port(3), diff --git a/man/rdma_resolve_route.3 b/man/rdma_resolve_route.3 index 8fb1826..ac1b3bc 100644 --- a/man/rdma_resolve_route.3 +++ b/man/rdma_resolve_route.3 @@ -1,4 +1,4 @@ -.TH "RDMA_RESOLVE_ROUTE" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.TH "RDMA_RESOLVE_ROUTE" 3 "2007-10-31" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_resolve_route \- Resolve the route information needed to establish a connection. .SH SYNOPSIS @@ -19,5 +19,7 @@ by calling rdma_resolve_addr. .SH "NOTES" This is called on the client side of a connection after calling rdma_resolve_addr, but before calling rdma_connect. +.SH "INFINIBAND SPECIFIC" +This call obtains a path record that is used by the connection. .SH "SEE ALSO" rdma_resolve_addr(3), rdma_connect(3), rdma_get_cm_event(3) From sashak at voltaire.com Thu Nov 1 12:30:37 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 1 Nov 2007 21:30:37 +0200 Subject: [ofa-general] Re: [PATCH] opensm & osm_console: modified console framework to support multiple connections In-Reply-To: <472A1292.6060500@llnl.gov> References: <4713FD51.4010506@llnl.gov> <20071028010226.GN22317@sashak.voltaire.com> <47261CFF.1060206@llnl.gov> <20071101010044.GG20136@sashak.voltaire.com> <472A1292.6060500@llnl.gov> Message-ID: <20071101193037.GG20136@sashak.voltaire.com> Hi Tim, On 10:53 Thu 01 Nov , Timothy A. Meier wrote: > > > > I think we need to close threading issue first. Then patch series of 2 > > looks fine for me. > > > > > I really think the "thread-per-session" would be a more flexible and > powerful > design. Setting up and maintaining threads might seem more complex at first, > but it makes servicing requests/commands much more simple because everything > is > in its own context. And require proper locking, thread termination handling, etc.. Which is not always easy even with full featured pthread library, and especially hard with limited cl_thread*() primitives... I didn't analyze submitted code in this aspect - just tried to save the time... :) > The previous Console used a polling mechanism, and I found an edge case > condition > which allowed one connection to block the other. How? There is no "blocking" commands? Right? > Thread-per-session (or > thread > per connection) makes it difficult for one session to influence another. > > The number of threads/connections would be limited. Other than the normal > multi-threading issues, are there other thread hazards in OFED/OpenSM that I > need to be aware of? Another reason is to not get too much cpus from another OpenSM threads (which mainly are responsible for IB MADs processing). Sasha From sean.hefty at intel.com Thu Nov 1 14:57:47 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 1 Nov 2007 14:57:47 -0700 Subject: [ofa-general] iWARP issues In-Reply-To: References: Message-ID: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> >- iWARP + TCP host stack port space sharing (required by IETF iSER >spec.) I don't think we can accomplish much without participation from the network maintainers, which I'm guessing will go something like this: "No - end of discussion". (This is the polite version.) >- RDMA connection timeout; expand RDMA_CM API to support timeout >paramater (not iWARP specific) I'm working on adding a timeout to rdma_connect() now. I'm trying to decide if there should be a single timeout parameter, or timeout and retry values. Does anyone know the details regarding the TCP connection retry algorithm in Linux? (time between retries, number of retries, etc.) - Sean From meier3 at llnl.gov Thu Nov 1 15:06:54 2007 From: meier3 at llnl.gov (Timothy A. Meier) Date: Thu, 01 Nov 2007 15:06:54 -0700 Subject: [ofa-general] Re: [PATCH] opensm & osm_console: modified console framework to support multiple connections In-Reply-To: <20071101193037.GG20136@sashak.voltaire.com> References: <4713FD51.4010506@llnl.gov> <20071028010226.GN22317@sashak.voltaire.com> <47261CFF.1060206@llnl.gov> <20071101010044.GG20136@sashak.voltaire.com> <472A1292.6060500@llnl.gov> <20071101193037.GG20136@sashak.voltaire.com> Message-ID: <472A4DFE.3040906@llnl.gov> Sasha Khapyorsky wrote: > Hi Tim, > > On 10:53 Thu 01 Nov , Timothy A. Meier wrote: > >>> I think we need to close threading issue first. Then patch series of 2 >>> looks fine for me. >>> >>> >>> >> I really think the "thread-per-session" would be a more flexible and >> powerful >> design. Setting up and maintaining threads might seem more complex at first, >> but it makes servicing requests/commands much more simple because everything >> is >> in its own context. >> > > And require proper locking, thread termination handling, etc.. Which is > not always easy even with full featured pthread library, and especially > hard with limited cl_thread*() primitives... I didn't analyze submitted > code in this aspect - just tried to save the time... :) > > Understood. I will be careful, I promise. ;^) [Fortunately I have other source code to examine.] >> The previous Console used a polling mechanism, and I found an edge case >> condition >> which allowed one connection to block the other. >> > > How? There is no "blocking" commands? Right? > > You are right, no blocking "commands" (this is a requirement). As I said, it is sort of an edge case, but it illustrates the vulnerability of connections in a single threaded model interfering with each other (not intentional of course). osm_console.c: handle_osm_connection() method when a second connection is attempted (and successfully made within the same thread) a blocking io call is used (getline()) to query the user about killing the other connection. If this goes unanswered, the original connection is blocked. Fundamentally, using a thread-per-session design formalizes the need to keep session specific data/resources separate. In a single threaded design, it would be more difficult to implement and enforce this policy. >> Thread-per-session (or >> thread >> per connection) makes it difficult for one session to influence another. >> >> The number of threads/connections would be limited. Other than the normal >> multi-threading issues, are there other thread hazards in OFED/OpenSM that I >> need to be aware of? >> > > Another reason is to not get too much cpus from another OpenSM threads > (which mainly are responsible for IB MADs processing). > > I agree. The maximum # of connections is a #define and currently set to 5 (4 useful, and 1 just to provide feedback that no more connections are available). There will be a couple of connection management commands (something like "usage" and "kill", etc..) to make sure sessions don't get out of control. In general, I don't expect much of a CPU load, or any additional burden on OpenSM. In any case, a single-threaded, multi-connection design would also need to be sensitive to the CPU load it places on the system. > Sasha > > Finally, a multi-threaded design would remove the non-blocking io restriction/requirement. Individual threads would be free to block (if they choose) and would only block themselves. This would allow for some more rich (multi-input) commands. Specifically, I need something like this to accept ACC/PW info when authenticating/authorizing over SSL/TSL. Other concerns? Convinced yet? -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov From rdreier at cisco.com Thu Nov 1 15:26:56 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 01 Nov 2007 15:26:56 -0700 Subject: [ofa-general] iWARP issues In-Reply-To: (Arkady Kanevsky's message of "Thu, 1 Nov 2007 09:34:51 -0400") References: Message-ID: > - iWARP Support for Peer-to-Peer Applications, this is CM > interoperability issue I guess the relevant people will be there from the RNIC vendors, but this seems more like an IETF/rdma consortium issue to me. > - iWARP + TCP host stack port space sharing (required by IETF iSER > spec.) Maybe we can try to brainstorm for alternate solutions, but it seems we are missing all the key stakeholders from the Linux networking side to really resolve this. > - missing verbs (IB-only, iWARP-only, and iWARP-nonstandard), > for example FMR, send with invalidate. (I recall that complete list was > flash out > a year ago) > (ULP changes to take advantage of these verbs: e.g. NFS-RDMA, iSER). I haven't seen any discussion of this yet. Is it really worth taking up face-to-face time on this? It seems like it should all be pretty straightforward, and even if it isn't, we haven't spent the time to figure out what the not straightforward parts are yet. > - RDMA connection timeout; expand RDMA_CM API to support timeout > paramater (not iWARP specific) Again, I haven't seen any discussion yet, so it doesn't seem worth taking up face-to-face-time until we know what the sticking points are. - R. From pradeeps at linux.vnet.ibm.com Thu Nov 1 15:29:37 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Thu, 01 Nov 2007 15:29:37 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> Message-ID: <472A5351.5020106@linux.vnet.ibm.com> Roland Dreier wrote: > FWIW, I left netpipe-tcp running in a loop overnight over a connected > mode IPoIB interface on a system running my for-2.6.25 tree (plus a > hack to use the non-SRQ code on mlx4 by forcing create SRQ to fail). > It ran with no problems (and transferred nearly a billion packets and > 10 TB of data). > Yes, it definitely seems much better with the for-2.6.25 tree and it all seems to go off well. Except for one crash in cache_alloc_refill() all of the other test runs have completed. BTW, I have been using SLAB thus far. I will switch to SLUB and see if that makes any difference. And thanks for testing it out on mlx4. Pradeep From quackiest at guiasantander.com Thu Nov 1 16:14:43 2007 From: quackiest at guiasantander.com (Dannie Lane) Date: Thu, 01 Nov 2007 18:14:43 -0500 Subject: [ofa-general] Ado6e Photoshop CS3 & Acro6at 8 Pro, New for Vista/XP 79$ Save 1999.95$ 0ff Retai| Message-ID: <000001c81cdc$5edd3f80$0100007f@localhost> newadobedeals . com From sashak at voltaire.com Thu Nov 1 16:49:56 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 2 Nov 2007 01:49:56 +0200 Subject: [ofa-general] Re: [PATCH] opensm & osm_console: modified console framework to support multiple connections In-Reply-To: <472A4DFE.3040906@llnl.gov> References: <4713FD51.4010506@llnl.gov> <20071028010226.GN22317@sashak.voltaire.com> <47261CFF.1060206@llnl.gov> <20071101010044.GG20136@sashak.voltaire.com> <472A1292.6060500@llnl.gov> <20071101193037.GG20136@sashak.voltaire.com> <472A4DFE.3040906@llnl.gov> Message-ID: <20071101234956.GI20136@sashak.voltaire.com> On 15:06 Thu 01 Nov , Timothy A. Meier wrote: > Sasha Khapyorsky wrote: > > Hi Tim, > > > > On 10:53 Thu 01 Nov , Timothy A. Meier wrote: > > > >>> I think we need to close threading issue first. Then patch series of 2 > >>> looks fine for me. > >>> > >>> > >> I really think the "thread-per-session" would be a more flexible and > >> powerful > >> design. Setting up and maintaining threads might seem more complex at > >> first, > >> but it makes servicing requests/commands much more simple because > >> everything is > >> in its own context. > >> > > > > And require proper locking, thread termination handling, etc.. Which is > > not always easy even with full featured pthread library, and especially > > hard with limited cl_thread*() primitives... I didn't analyze submitted > > code in this aspect - just tried to save the time... :) > > > > > Understood. I will be careful, I promise. ;^) > > [Fortunately I have other source code to examine.] > >> The previous Console used a polling mechanism, and I found an edge case > >> condition > >> which allowed one connection to block the other. > >> > > > > How? There is no "blocking" commands? Right? > > > > > You are right, no blocking "commands" (this is a requirement). > > As I said, it is sort of an edge case, but it illustrates the vulnerability > of connections in a single threaded model interfering with each other > (not intentional of course). > > osm_console.c: handle_osm_connection() method > > when a second connection is attempted (and successfully made within the > same thread) a blocking io call is used (getline()) to query the user > about killing the other connection. If this goes unanswered, the original > connection is blocked. Seems like usage bug. This should not be used without select() or poll(). > Fundamentally, using a thread-per-session design formalizes the need > to keep session specific data/resources separate. In a single threaded > design, it would be more difficult to implement and enforce this policy. It is just matter of design style - at least personally I don't need such enforcements. > >> Thread-per-session (or thread > >> per connection) makes it difficult for one session to influence another. > >> > >> The number of threads/connections would be limited. Other than the normal > >> multi-threading issues, are there other thread hazards in OFED/OpenSM > >> that I > >> need to be aware of? > >> > > > > Another reason is to not get too much cpus from another OpenSM threads > > (which mainly are responsible for IB MADs processing). > > > > > I agree. The maximum # of connections is a #define and currently set to 5 > (4 useful, and 1 just to provide feedback that no more connections are > available). > > There will be a couple of connection management commands (something like > "usage" > and "kill", etc..) to make sure sessions don't get out of control. In > general, > I don't expect much of a CPU load, or any additional burden on OpenSM. > > In any case, a single-threaded, multi-connection design would also need to > be > sensitive to the CPU load it places on the system. Single thread cannot load more than 1 CPU on typically multiprocessors machine. > > Sasha > > > > > Finally, a multi-threaded design would remove the non-blocking io > restriction/requirement. Individual threads would be free to block > (if they choose) and would only block themselves. This would allow > for some more rich (multi-input) commands. Specifically, I need > something like this to accept ACC/PW info when authenticating/authorizing > over SSL/TSL. > > Other concerns? Convinced yet? I am not really convinced and feel you are also not. I think it is better to start from something now - it will be possible to rework things later. Sasha From weiny2 at llnl.gov Thu Nov 1 20:14:48 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:14:48 -0700 Subject: [ofa-general] [PATCH 0/7] Add Node Name Map support to opensm Message-ID: <20071101201448.7317825b.weiny2@llnl.gov> Sasha and I discussed the switch map support patch series and I have changed the implementation. Here is another series which adds "node name" map support to both the diags as well as opensm. This new functionality allows for any node descriptor to be renamed based on a "node name map" file. The final implementation uses the qmap data structure which should be quick enough for large map files. The patches are as follows: 0001 - use lookup_switch_name for all node types. 0002 - Change switch map function names to reflect the new functionality of a simple node name map. 0003 - Move nodenamemap out of infiniband-diags into libosmcomp 0004 - Change node name map implementation to use qmap in memory storage 0005 - infiniband-diags/src/smpquery.c : special case situation where nodename len is >= 32 0006 - Add node-name-map support to OpenSM; using the "default" map. 0007 - Add node_name_map_name to opts file. Patch number 5 can and should be applied on it's own. It fixes a bug found during testing. Patches to follow, Ira From weiny2 at llnl.gov Thu Nov 1 20:14:56 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:14:56 -0700 Subject: [ofa-general] [PATCH 1/7] use lookup_switch_name for all node types. Message-ID: <20071101201456.410c7d65.weiny2@llnl.gov> >From f451c7b352b44747d1eb6f4627109a3f12b757e6 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 31 Oct 2007 16:22:12 -0700 Subject: [PATCH] use lookup_switch_name for all node types. Signed-off-by: Ira K. Weiny --- infiniband-diags/src/ibnetdiscover.c | 39 +++++++++++---------------------- infiniband-diags/src/ibtracert.c | 24 +++++--------------- infiniband-diags/src/saquery.c | 10 +++----- infiniband-diags/src/smpquery.c | 6 +--- 4 files changed, 25 insertions(+), 54 deletions(-) diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c index e627e84..fd824c0 100644 --- a/infiniband-diags/src/ibnetdiscover.c +++ b/infiniband-diags/src/ibnetdiscover.c @@ -459,13 +459,9 @@ void list_node(Node *node) { char *node_type; - char *nodename = NULL; - - if (node->type == SWITCH_NODE) - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, + char *nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); - else - nodename = clean_nodedesc(node->nodedesc); + switch(node->type) { case SWITCH_NODE: node_type = "Switch"; @@ -485,8 +481,7 @@ list_node(Node *node) node->nodeguid, node->numports, node->devid, node->vendid, nodename); - if (nodename && (node->type == SWITCH_NODE)) - free(nodename); + free(nodename); } void @@ -541,18 +536,16 @@ out_switch(Node *node, int group, char *chname) fprintf(f, "%d Chip %d", node->chrecord->slotnum, node->chrecord->anafanum); } - if (node->type == SWITCH_NODE) - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, + nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); - else - nodename = clean_nodedesc(node->nodedesc); + fprintf(f, "\nSwitch\t%d %s\t\t# \"%s\" %s port 0 lid %d lmc %d\n", node->numports, node_name(node), nodename, node->smaenhsp0 ? "enhanced" : "base", node->smalid, node->smalmc); - if (nodename && (node->type == SWITCH_NODE)) - free(nodename); + + free(nodename); } void @@ -612,12 +605,9 @@ out_switch_port(Port *port, int group) if (ext_port_str) fprintf(f, "%s", ext_port_str); - if (port->remoteport->node->type == SWITCH_NODE) - rem_nodename = lookup_switch_name(switch_map_fp, + rem_nodename = lookup_switch_name(switch_map_fp, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); - else - rem_nodename = clean_nodedesc(port->remoteport->node->nodedesc); ext_port_str = out_ext_port(port->remoteport, group); fprintf(f, "\t%s[%d]%s", @@ -638,8 +628,7 @@ out_switch_port(Port *port, int group) fprintf(f, " (scp)"); fprintf(f, "\n"); - if (rem_nodename && (port->remoteport->node->type == SWITCH_NODE)) - free(rem_nodename); + free(rem_nodename); } void @@ -660,19 +649,17 @@ out_ca_port(Port *port, int group) if (port->remoteport->node->type != SWITCH_NODE) fprintf(f, " (%" PRIx64 ") ", port->remoteport->portguid); - if (port->remoteport->node->type == SWITCH_NODE) - rem_nodename = lookup_switch_name(switch_map_fp, + rem_nodename = lookup_switch_name(switch_map_fp, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); - else - rem_nodename = clean_nodedesc(port->remoteport->node->nodedesc); + fprintf(f, "\t\t# lid %d lmc %d \"%s\" lid %d %s%s\n", port->lid, port->lmc, rem_nodename, port->remoteport->node->type == SWITCH_NODE ? port->remoteport->node->smalid : port->remoteport->lid, get_linkwidth_str(port->linkwidth), get_linkspeed_str(port->linkspeed)); - if (rem_nodename && (port->remoteport->node->type == SWITCH_NODE)) - free(rem_nodename); + + free(rem_nodename); } int diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c index e553f4f..5cf8043 100644 --- a/infiniband-diags/src/ibtracert.c +++ b/infiniband-diags/src/ibtracert.c @@ -204,10 +204,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) return; } - if (node->type == IB_NODE_SWITCH) - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); - else - nodename = clean_nodedesc(node->nodedesc); + nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", prompt, @@ -216,8 +213,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) port->lid, port->lid + (1 << port->lmc) - 1, nodename); - if (nodename && (node->type == IB_NODE_SWITCH)) - free(nodename); + free(nodename); } static void @@ -228,10 +224,7 @@ dump_route(int dump, Node *node, int outport, Port *port) if (!dump && !verbose) return; - if (node->type == IB_NODE_SWITCH) - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); - else - nodename = clean_nodedesc(node->nodedesc); + nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); if (dump == 1) fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", @@ -244,8 +237,7 @@ dump_route(int dump, Node *node, int outport, Port *port) port->lid, port->lid + (1 << port->lmc) - 1, nodename); - if (nodename && (node->type == IB_NODE_SWITCH)) - free(nodename); + free(nodename); } static int @@ -644,10 +636,7 @@ dump_mcpath(Node *node, int dumplevel) if (node->upnode) dump_mcpath(node->upnode, dumplevel); - if (node->type == IB_NODE_SWITCH) - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); - else - nodename = clean_nodedesc(node->nodedesc); + nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); if (!node->dist) { printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", @@ -681,8 +670,7 @@ dump_mcpath(Node *node, int dumplevel) nodename); free_name: - if (nodename && (node->type == IB_NODE_SWITCH)) - free(nodename); + free(nodename); } static void diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index e17ec5a..dfcc61a 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -136,13 +136,11 @@ print_node_record(ib_node_record_t *node_record) return; case NAME_OF_LID: case NAME_OF_GUID: - if (p_ni->node_type == IB_NODE_TYPE_SWITCH) - name = lookup_switch_name(switch_map_fp, - cl_ntoh64(p_ni->node_guid), - (char *)p_nd->description); - else - name = clean_nodedesc((char *)p_nd->description); + name = lookup_switch_name(switch_map_fp, + cl_ntoh64(p_ni->node_guid), + (char *)p_nd->description); printf("%s\n", name); + free(name); return; case ALL: default: diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c index 73e880b..7d09e35 100644 --- a/infiniband-diags/src/smpquery.c +++ b/infiniband-diags/src/smpquery.c @@ -107,10 +107,7 @@ node_desc(ib_portid_t *dest, char **argv, int argc) if (!smp_query(nd, dest, IB_ATTR_NODE_DESC, 0, 0)) return "node desc query failed"; - if (node_type == IB_NODE_SWITCH) - nodename = lookup_switch_name(switch_map_fp, node_guid, nd); - else - nodename = clean_nodedesc(nd); + nodename = lookup_switch_name(switch_map_fp, node_guid, nd); l = strlen(nodename); if (l < 32) { @@ -119,6 +116,7 @@ node_desc(ib_portid_t *dest, char **argv, int argc) } printf("Node Description:%s%s\n", dots, nodename); + free(nodename); return 0; } -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-use-lookup_switch_name-for-all-node-types.patch Type: application/octet-stream Size: 7415 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:02 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:02 -0700 Subject: [ofa-general] [PATCH 2/7] Change switch map function names to reflect the new functionality of a simple node name map. Message-ID: <20071101201502.6d393646.weiny2@llnl.gov> >From b4f6bbea815aaa91837d464f882d30405ffe9d98 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 31 Oct 2007 16:34:44 -0700 Subject: [PATCH] Change switch map function names to reflect the new functionality of a simple node name map. Signed-off-by: Ira K. Weiny --- infiniband-diags/configure.in | 30 ++++++++++++++-------------- infiniband-diags/include/ibdiag_common.h | 10 ++++---- infiniband-diags/infiniband-diags.spec.in | 9 +++++-- infiniband-diags/man/ibnetdiscover.8 | 12 +++++----- infiniband-diags/man/ibtracert.8 | 10 +++++--- infiniband-diags/man/saquery.8 | 8 +++--- infiniband-diags/man/smpquery.8 | 9 +++++++- infiniband-diags/src/ibdiag_common.c | 24 +++++++++++----------- infiniband-diags/src/ibnetdiscover.c | 24 +++++++++++----------- infiniband-diags/src/ibtracert.c | 20 +++++++++--------- infiniband-diags/src/saquery.c | 16 +++++++------- infiniband-diags/src/smpquery.c | 16 +++++++------- 12 files changed, 100 insertions(+), 88 deletions(-) diff --git a/infiniband-diags/configure.in b/infiniband-diags/configure.in index 95c7b34..0a5f3c8 100644 --- a/infiniband-diags/configure.in +++ b/infiniband-diags/configure.in @@ -72,30 +72,30 @@ AC_CHECK_FUNCS([strchr strrchr strtol strtoul memset]) dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST -dnl Check for the specification of a default switch map file -AC_MSG_CHECKING(for --with-switch-map ) -AC_ARG_WITH(switch-map, - AC_HELP_STRING([--with-switch-map=file], - [define a default switch map file]), +dnl Check for the specification of a default node name map file +AC_MSG_CHECKING(for --with-node-name-map ) +AC_ARG_WITH(node-name-map, + AC_HELP_STRING([--with-node-name-map=file], + [define a default node name map file]), [ case "$withval" in no) ;; *) - withswitchmap=yes - SWITCHMAPFILE=$withval + withnodenamemap=yes + NODENAMEMAPFILE=$withval ;; esac ] ) -AC_MSG_RESULT(${withswitchmap=no}) +AC_MSG_RESULT(${withnodenamemap=no}) -if test $withswitchmap = "yes"; then - SWITCHMAP_TMP1="`eval echo ${sysconfdir}/$SWITCHMAPFILE`" - SWITCHMAP_TMP2="`echo $SWITCHMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" - SWITCHMAP="`eval echo $SWITCHMAP_TMP2`" +if test $withnodenamemap = "yes"; then + NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" + NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" + NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_SWITCH_MAP, - ["$SWITCHMAP"], - [Define a default switch map file]) + AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, + ["$NODENAMEMAP"], + [Define a default node name map file]) fi dnl Check for perl and perl install location diff --git a/infiniband-diags/include/ibdiag_common.h b/infiniband-diags/include/ibdiag_common.h index 159e929..55df3fe 100644 --- a/infiniband-diags/include/ibdiag_common.h +++ b/infiniband-diags/include/ibdiag_common.h @@ -46,12 +46,12 @@ extern int ibdebug; /*========================================================*/ /** - * Switch map interface. - * It is OK to pass NULL for the switch_map[_fp] parameters. + * Node name map interface. + * It is OK to pass NULL for the node_name_map[_fp] parameters. */ -FILE *open_switch_map(char *switch_map); -void close_switch_map(FILE *switch_map_fp); -char *lookup_switch_name(FILE *switch_map_fp, uint64_t target_guid, +FILE *open_node_name_map(char *node_name_map); +void close_node_name_map(FILE *node_name_map_fp); +char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc); /* NOTE: parameter "nodedesc" may be modified here. */ diff --git a/infiniband-diags/infiniband-diags.spec.in b/infiniband-diags/infiniband-diags.spec.in index f4a08ab..75880c0 100644 --- a/infiniband-diags/infiniband-diags.spec.in +++ b/infiniband-diags/infiniband-diags.spec.in @@ -21,12 +21,12 @@ diagnose an IB subnet. %prep %setup -q -%if %{?_with_switch_map:1}%{!?_with_switch_map:0} -%define _enable_switch_map --with-switch-map%{?_with_switch_map} +%if %{?_with_node_name_map:1}%{!?_with_node_name_map:0} +%define _enable_node_name_map --with-node-name-map%{?_with_node_name_map} %endif %build -%configure %{?_enable_switch_map} +%configure %{?_enable_node_name_map} make %install @@ -55,6 +55,9 @@ rm -rf $RPM_BUILD_ROOT %doc README ChangeLog %changelog +* Wed Oct 31 2007 Ira Weiny - 1.3.2 +- Change switch-map option to node-name-map + * Thu Aug 9 2007 Ira Weiny - 1.3.1 - Change set_mthca_nodedesc.sh to set_nodedesc.sh diff --git a/infiniband-diags/man/ibnetdiscover.8 b/infiniband-diags/man/ibnetdiscover.8 index 03303be..9099cf3 100644 --- a/infiniband-diags/man/ibnetdiscover.8 +++ b/infiniband-diags/man/ibnetdiscover.8 @@ -5,7 +5,7 @@ ibnetdiscover \- discover InfiniBand topology .SH SYNOPSIS .B ibnetdiscover -[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-s(how)] [\-l(ist)] [\-g(rouping)] [\-H(ca_list)] [\-S(witch_list)] [\-R(outer_list)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\--switch-map ] [\-h(elp)] [] +[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-s(how)] [\-l(ist)] [\-g(rouping)] [\-H(ca_list)] [\-S(witch_list)] [\-R(outer_list)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\--node-name-map ] [\-h(elp)] [] .SH DESCRIPTION .PP @@ -39,8 +39,8 @@ List of connected routers \fB\-s\fR, \fB\-\-show\fR Show more information .TP -\fB\-\-switch\-map\fR -Specify a switch map. The switch map file maps GUIDs to more user friendly +\fB\-\-node\-name\-map\fR +Specify a node name map. The node name map file maps GUIDs to more user friendly names. See file format below. .SH COMMON OPTIONS @@ -180,8 +180,8 @@ displayed as "Non-Chassis Nodes". External ports are also shown on the connectivity lines. -.SH SWITCH MAP FILE FORMAT -The switch map is used to specify a user friendly name for switches in the +.SH NODE NAME MAP FILE FORMAT +The node name map is used to specify user friendly names for nodes in the output. GUIDs are used to perform the lookup. .TP @@ -222,7 +222,7 @@ output. GUIDs are used to perform the lookup. 0x0008f10400400e32 "IB1 (Rack 11 spine 2 ) ISR9288 Voltaire sFB-12D" .br .br -# GUID Switch Name +# GUID Node Name .br 0x0008f10400411a08 "SW1 (Rack 3) ISR9024 Voltaire 9024D" .br diff --git a/infiniband-diags/man/ibtracert.8 b/infiniband-diags/man/ibtracert.8 index 214b09b..32109fe 100644 --- a/infiniband-diags/man/ibtracert.8 +++ b/infiniband-diags/man/ibtracert.8 @@ -5,7 +5,9 @@ ibtracert\- trace InfiniBand path .SH SYNOPSIS .B ibtracert -[\-d(ebug)] [-v(erbose)] [\-D(irect)] [\-G(uids)] [-n(o_info)] [-m mlid] [-s smlid] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\-\-switch\-map ] [\-h(elp)] [ [ []]] +[\-d(ebug)] [-v(erbose)] [\-D(irect)] [\-G(uids)] [-n(o_info)] [-m mlid] [-s +smlid] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] +[\-\-node\-name\-\-map ] [\-h(elp)] [ [ []]] .SH DESCRIPTION .PP @@ -24,11 +26,11 @@ simple format; don't show additional information \fB\-m\fR show the multicast trace of the specified mlid .TP -\fB\-\-switch\-map\fR -Specify a switch map. The switch map file maps GUIDs to more user friendly +\fB\-\-node\-name\-map\fR +Specify a node name map. The node name map file maps GUIDs to more user friendly names. See .B ibnetdiscover(8) -for switch map file format. +for node name map file format. .SH COMMON OPTIONS diff --git a/infiniband-diags/man/saquery.8 b/infiniband-diags/man/saquery.8 index 5558cc9..516ae85 100644 --- a/infiniband-diags/man/saquery.8 +++ b/infiniband-diags/man/saquery.8 @@ -7,7 +7,7 @@ saquery \- query InfiniBand subnet administration attributes .B saquery [\-h] [\-d] [\-p] [\-N] [\-\-list | \-D] [\-S] [\-I] [\-L] [\-l] [\-G] [\-O] [\-U] [\-c] [\-s] [\-g] [\-m] [--src-to-dst ] [\-C ca_name] -[\-P ca_port] [\-t(imeout) ] [\-\-switch\-map ] +[\-P ca_port] [\-t(imeout) ] [\-\-node\-name\-map ] [ | | ] .SH DESCRIPTION @@ -77,11 +77,11 @@ Specify SA query response timeout in milliseconds. Default is 100 milliseconds. You may want to use this option if IB_TIMEOUT is indicated. .TP -\fB\-\-switch\-map\fR -Specify a switch map. The switch map file maps GUIDs to more user friendly +\fB\-\-node\-name\-map\fR +Specify a node name map. The node name map file maps GUIDs to more user friendly names. See .B ibnetdiscover(8) -for switch map file format. Only used with the \fB\-O\fR and \fB\-U\fR options. +for node name map file format. Only used with the \fB\-O\fR and \fB\-U\fR options. .TP \fB\-d\fR enable debugging diff --git a/infiniband-diags/man/smpquery.8 b/infiniband-diags/man/smpquery.8 index b81dc81..be619e9 100644 --- a/infiniband-diags/man/smpquery.8 +++ b/infiniband-diags/man/smpquery.8 @@ -5,7 +5,7 @@ smpquery \- query InfiniBand subnet management attributes .SH SYNOPSIS .B smpquery -[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-D(irect)] [\-G(uid)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms] [\-V(ersion)] [\-h(elp)] [op params] +[\-d(ebug)] [\-e(rr_show)] [\-v(erbose)] [\-D(irect)] [\-G(uid)] [\-C ca_name] [\-P ca_port] [\-t(imeout) timeout_ms --node-name-map node-name-map] [\-V(ersion)] [\-h(elp)] [op params] .SH DESCRIPTION .PP @@ -27,6 +27,13 @@ Current supported operations and their parameters: vlarb [] guids +.TP +\fB\-\-node\-name\-map\fR +Specify a node name map. The node name map file maps GUIDs to more user friendly +names. See +.B ibnetdiscover(8) +for node name map file format. + .SH COMMON OPTIONS Most OpenIB diagnostics take the following common flags. The exact list of diff --git a/infiniband-diags/src/ibdiag_common.c b/infiniband-diags/src/ibdiag_common.c index bfddfd7..c152853 100644 --- a/infiniband-diags/src/ibdiag_common.c +++ b/infiniband-diags/src/ibdiag_common.c @@ -52,34 +52,34 @@ int ibdebug; FILE * -open_switch_map(char *switch_map) +open_node_name_map(char *node_name_map) { FILE *rc = NULL; - if (switch_map != NULL) { - rc = fopen(switch_map, "r"); + if (node_name_map != NULL) { + rc = fopen(node_name_map, "r"); if (rc == NULL) { fprintf(stderr, "WARNING failed to open switch map \"%s\" (%s)\n", - switch_map, strerror(errno)); + node_name_map, strerror(errno)); } -#ifdef HAVE_DEFAULT_SWITCH_MAP +#ifdef HAVE_DEFAULT_NODENAME_MAP } else { - rc = fopen(HAVE_DEFAULT_SWITCH_MAP, "r"); -#endif /* HAVE_DEFAULT_SWITCH_MAP */ + rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); +#endif /* HAVE_DEFAULT_NODENAME_MAP */ } return (rc); } void -close_switch_map(FILE *fp) +close_node_name_map(FILE *fp) { if (fp) fclose(fp); } char * -lookup_switch_name(FILE *switch_map_fp, uint64_t target_guid, char *nodedesc) +remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) { #define NAME_LEN (256) char *line = NULL; @@ -88,12 +88,12 @@ lookup_switch_name(FILE *switch_map_fp, uint64_t target_guid, char *nodedesc) char *rc = NULL; int line_count = 0; - if (switch_map_fp == NULL) + if (node_name_map_fp == NULL) goto done; - rewind(switch_map_fp); + rewind(node_name_map_fp); for (line_count = 1; - getline(&line, &len, switch_map_fp) != -1; + getline(&line, &len, node_name_map_fp) != -1; line_count++) { line[len-1] = '\0'; if (line[0] == '#') diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c index fd824c0..5594b1c 100644 --- a/infiniband-diags/src/ibnetdiscover.c +++ b/infiniband-diags/src/ibnetdiscover.c @@ -91,8 +91,8 @@ static FILE *f; char *argv0 = "ibnetdiscover"; -static char *switch_map = NULL; -static FILE *switch_map_fp = NULL; +static char *node_name_map = NULL; +static FILE *node_name_map_fp = NULL; Node *nodesdist[MAXHOPS+1]; /* last is Ca list */ Node *mynode; @@ -459,7 +459,7 @@ void list_node(Node *node) { char *node_type; - char *nodename = lookup_switch_name(switch_map_fp, node->nodeguid, + char *nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); switch(node->type) { @@ -536,7 +536,7 @@ out_switch(Node *node, int group, char *chname) fprintf(f, "%d Chip %d", node->chrecord->slotnum, node->chrecord->anafanum); } - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, + nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); fprintf(f, "\nSwitch\t%d %s\t\t# \"%s\" %s port 0 lid %d lmc %d\n", @@ -605,7 +605,7 @@ out_switch_port(Port *port, int group) if (ext_port_str) fprintf(f, "%s", ext_port_str); - rem_nodename = lookup_switch_name(switch_map_fp, + rem_nodename = remap_node_name(node_name_map_fp, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); @@ -649,7 +649,7 @@ out_ca_port(Port *port, int group) if (port->remoteport->node->type != SWITCH_NODE) fprintf(f, " (%" PRIx64 ") ", port->remoteport->portguid); - rem_nodename = lookup_switch_name(switch_map_fp, + rem_nodename = remap_node_name(node_name_map_fp, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); @@ -842,9 +842,9 @@ void usage(void) { fprintf(stderr, "Usage: %s [-d(ebug)] -e(rr_show) -v(erbose) -s(how) -l(ist) -g(rouping) -H(ca_list) -S(witch_list) -R(outer_list) -V(ersion) -C ca_name -P ca_port " - "-t(imeout) timeout_ms --switch-map switch-map] []\n", + "-t(imeout) timeout_ms --node-name-map node-name-map] []\n", argv0); - fprintf(stderr, " --switch-map specify a switch-map file\n"); + fprintf(stderr, " --node-name-map specify a node name map file\n"); exit(-1); } @@ -872,7 +872,7 @@ main(int argc, char **argv) { "Switch_list", 0, 0, 'S'}, { "Router_list", 0, 0, 'R'}, { "timeout", 1, 0, 't'}, - { "switch-map", 1, 0, 1}, + { "node-name-map", 1, 0, 1}, { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, @@ -889,7 +889,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - switch_map = strdup(optarg); + node_name_map = strdup(optarg); break; case 'C': ca = optarg; @@ -946,7 +946,7 @@ main(int argc, char **argv) IBERROR("can't open file %s for writing", argv[0]); madrpc_init(ca, ca_port, mgmt_classes, 2); - switch_map_fp = open_switch_map(switch_map); + node_name_map_fp = open_node_name_map(node_name_map); if (discover(&my_portid) < 0) IBERROR("discover"); @@ -956,6 +956,6 @@ main(int argc, char **argv) dump_topology(list, group); - close_switch_map(switch_map_fp); + close_node_name_map(node_name_map_fp); exit(0); } diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c index 5cf8043..2e04f2f 100644 --- a/infiniband-diags/src/ibtracert.c +++ b/infiniband-diags/src/ibtracert.c @@ -70,8 +70,8 @@ static FILE *f; char *argv0 = "ibtracert"; -static char *switch_map = NULL; -static FILE *switch_map_fp = NULL; +static char *node_name_map = NULL; +static FILE *node_name_map_fp = NULL; typedef struct Port Port; typedef struct Switch Switch; @@ -204,7 +204,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) return; } - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", prompt, @@ -224,7 +224,7 @@ dump_route(int dump, Node *node, int outport, Port *port) if (!dump && !verbose) return; - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); if (dump == 1) fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", @@ -636,7 +636,7 @@ dump_mcpath(Node *node, int dumplevel) if (node->upnode) dump_mcpath(node->upnode, dumplevel); - nodename = lookup_switch_name(switch_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); if (!node->dist) { printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", @@ -684,7 +684,7 @@ usage(void) basename++; fprintf(stderr, "Usage: %s [-d(ebug) -v(erbose) -D(irect) -G(uids) -n(o_info) -C ca_name -P ca_port " - "-s smlid -t(imeout) timeout_ms -m mlid --switch-map switch-map ] \n", + "-s smlid -t(imeout) timeout_ms -m mlid --node-name-map node-name-map ] \n", basename); fprintf(stderr, "\n\tUnicast examples:\n"); fprintf(stderr, "\t\t%s 4 16\t\t\t# show path between lids 4 and 16\n", basename); @@ -726,7 +726,7 @@ main(int argc, char **argv) { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, - { "switch-map", 1, 0, 1}, + { "node-name-map", 1, 0, 1}, { } }; @@ -740,7 +740,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - switch_map = strdup(optarg); + node_name_map = strdup(optarg); break; case 'C': ca = optarg; @@ -798,7 +798,7 @@ main(int argc, char **argv) usage(); madrpc_init(ca, ca_port, mgmt_classes, 3); - switch_map_fp = open_switch_map(switch_map); + node_name_map_fp = open_node_name_map(node_name_map); if (ib_resolve_portid_str(&src_portid, argv[0], dest_type, sm_id) < 0) IBERROR("can't resolve source port %s", argv[0]); @@ -837,6 +837,6 @@ main(int argc, char **argv) /* dump multicast path */ dump_mcpath(endnode, dumplevel); - close_switch_map(switch_map_fp); + close_node_name_map(node_name_map_fp); exit(0); } diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index dfcc61a..7dcd5fb 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -59,8 +59,8 @@ char *argv0 = "saquery"; -static char *switch_map = NULL; -static FILE *switch_map_fp = NULL; +static char *node_name_map = NULL; +static FILE *node_name_map_fp = NULL; /** * Declare some globals because I don't want this to be too complex. @@ -136,7 +136,7 @@ print_node_record(ib_node_record_t *node_record) return; case NAME_OF_LID: case NAME_OF_GUID: - name = lookup_switch_name(switch_map_fp, + name = remap_node_name(node_name_map_fp, cl_ntoh64(p_ni->node_guid), (char *)p_nd->description); printf("%s\n", name); @@ -1076,7 +1076,7 @@ usage(void) fprintf(stderr, " -t | --timeout specify the SA query " "response timeout (default %u msec)\n", DEFAULT_SA_TIMEOUT_MS); - fprintf(stderr, " --switch-map specify a switch map\n"); + fprintf(stderr, " --node-name-map specify a node name map\n"); exit(-1); } @@ -1115,7 +1115,7 @@ main(int argc, char **argv) {"list", 0, 0, 'D'}, {"src-to-dst", 1, 0, 1}, {"timeout", 1, 0, 't'}, - {"switch-map", 1, 0, 2}, + {"node-name-map", 1, 0, 2}, { } }; @@ -1142,7 +1142,7 @@ main(int argc, char **argv) break; } case 2: - switch_map = strdup(optarg); + node_name_map = strdup(optarg); break; case 'p': query_type = IB_MAD_ATTR_PATH_RECORD; @@ -1247,7 +1247,7 @@ main(int argc, char **argv) } bind_handle = get_bind_handle(); - switch_map_fp = open_switch_map(switch_map); + node_name_map_fp = open_node_name_map(node_name_map); switch (query_type) { case IB_MAD_ATTR_NODE_RECORD: @@ -1293,6 +1293,6 @@ main(int argc, char **argv) if (dst) free(dst); clean_up(); - close_switch_map(switch_map_fp); + close_node_name_map(node_name_map_fp); return (status); } diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c index 7d09e35..60212f5 100644 --- a/infiniband-diags/src/smpquery.c +++ b/infiniband-diags/src/smpquery.c @@ -84,8 +84,8 @@ static const match_rec_t match_tbl[] = { }; char *argv0 = "smpquery"; -static char *switch_map = NULL; -static FILE *switch_map_fp = NULL; +static char *node_name_map = NULL; +static FILE *node_name_map_fp = NULL; /*******************************************/ static char * @@ -107,7 +107,7 @@ node_desc(ib_portid_t *dest, char **argv, int argc) if (!smp_query(nd, dest, IB_ATTR_NODE_DESC, 0, 0)) return "node desc query failed"; - nodename = lookup_switch_name(switch_map_fp, node_guid, nd); + nodename = remap_node_name(node_name_map_fp, node_guid, nd); l = strlen(nodename); if (l < 32) { @@ -401,7 +401,7 @@ usage(void) basename++; fprintf(stderr, "Usage: %s [-d(ebug) -e(rr_show) -v(erbose) -D(irect) -G(uid) -s smlid -V(ersion) -C ca_name -P ca_port " - "-t(imeout) timeout_ms --switch-map switch-map] [op params]\n", + "-t(imeout) timeout_ms --node-name-map node-name-map] [op params]\n", basename); fprintf(stderr, "\tsupported ops:\n"); for (r = match_tbl ; r->name ; r++) { @@ -441,7 +441,7 @@ main(int argc, char **argv) { "Guid", 0, 0, 'G'}, { "smlid", 1, 0, 's'}, { "timeout", 1, 0, 't'}, - { "switch-map", 1, 0, 1}, + { "node-name-map", 1, 0, 1}, { "Version", 0, 0, 'V'}, { "help", 0, 0, 'h'}, { "usage", 0, 0, 'u'}, @@ -456,7 +456,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - switch_map = strdup(optarg); + node_name_map = strdup(optarg); break; case 'd': ibdebug++; @@ -512,7 +512,7 @@ main(int argc, char **argv) IBERROR("operation '%s' not supported", argv[0]); madrpc_init(ca, ca_port, mgmt_classes, 3); - switch_map_fp = open_switch_map(switch_map); + node_name_map_fp = open_node_name_map(node_name_map); if (dest_type != IB_DEST_DRSLID) { if (ib_resolve_portid_str(&portid, argv[1], dest_type, sm_id) < 0) @@ -529,6 +529,6 @@ main(int argc, char **argv) if ((err = fn(&portid, argv+3, argc-3))) IBERROR("operation %s: %s", argv[0], err); } - close_switch_map(switch_map_fp); + close_node_name_map(node_name_map_fp); exit(0); } -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Change-switch-map-function-names-to-reflect-the-new.patch Type: application/octet-stream Size: 22688 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:08 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:08 -0700 Subject: [ofa-general] [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp Message-ID: <20071101201508.51b5e363.weiny2@llnl.gov> >From fe2756789ffbc69466eefea3cdffe200a0718561 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 1 Nov 2007 15:00:37 -0700 Subject: [PATCH] Move nodenamemap out of infiniband-diags into libosmcomp Signed-off-by: Ira K. Weiny --- infiniband-diags/Makefile.am | 3 + infiniband-diags/configure.in | 26 ------ infiniband-diags/include/ibdiag_common.h | 13 --- infiniband-diags/src/ibdiag_common.c | 81 ------------------- infiniband-diags/src/ibnetdiscover.c | 1 + infiniband-diags/src/ibtracert.c | 1 + infiniband-diags/src/saquery.c | 1 + infiniband-diags/src/smpquery.c | 1 + opensm/complib/Makefile.am | 7 +- opensm/complib/cl_nodenamemap.c | 128 ++++++++++++++++++++++++++++++ opensm/complib/libosmcomp.map | 4 + opensm/configure.in | 26 ++++++ opensm/include/Makefile.am | 1 + opensm/include/complib/cl_nodenamemap.h | 54 +++++++++++++ 14 files changed, 225 insertions(+), 122 deletions(-) create mode 100644 opensm/complib/cl_nodenamemap.c create mode 100644 opensm/include/complib/cl_nodenamemap.h diff --git a/infiniband-diags/Makefile.am b/infiniband-diags/Makefile.am index 7dcfa5a..edff06c 100644 --- a/infiniband-diags/Makefile.am +++ b/infiniband-diags/Makefile.am @@ -32,6 +32,7 @@ src_ibaddr_CFLAGS = -Wall $(DBGFLAGS) src_ibnetdiscover_SOURCES = src/ibnetdiscover.c src/grouping.c src/ibdiag_common.c src_ibnetdiscover_CFLAGS = -Wall $(DBGFLAGS) +src_ibnetdiscover_LDFLAGS = -Wl,--rpath -Wl,$(libdir) src_ibping_SOURCES = src/ibping.c src/ibdiag_common.c src_ibping_CFLAGS = -Wall $(DBGFLAGS) @@ -50,6 +51,7 @@ src_ibsysstat_CFLAGS = -Wall $(DBGFLAGS) src_ibtracert_SOURCES = src/ibtracert.c src/ibdiag_common.c src_ibtracert_CFLAGS = -Wall $(DBGFLAGS) +src_ibtracert_LDFLAGS = -Wl,--rpath -Wl,$(libdir) src_perfquery_SOURCES = src/perfquery.c src/ibdiag_common.c src_perfquery_CFLAGS = -Wall $(DBGFLAGS) @@ -62,6 +64,7 @@ src_smpdump_CFLAGS = -Wall $(DBGFLAGS) src_smpquery_SOURCES = src/smpquery.c src/ibdiag_common.c src_smpquery_CFLAGS = -Wall $(DBGFLAGS) +src_smpquery_LDFLAGS = -Wl,--rpath -Wl,$(libdir) src_saquery_SOURCES = src/saquery.c src/ibdiag_common.c src_saquery_CFLAGS = -Wall -DOSM_VENDOR_INTF_OPENIB -DVENDOR_RMPP_SUPPORT -DDUAL_SIDED_RMPP $(DBGFLAGS) diff --git a/infiniband-diags/configure.in b/infiniband-diags/configure.in index 0a5f3c8..a24d478 100644 --- a/infiniband-diags/configure.in +++ b/infiniband-diags/configure.in @@ -72,32 +72,6 @@ AC_CHECK_FUNCS([strchr strrchr strtol strtoul memset]) dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST -dnl Check for the specification of a default node name map file -AC_MSG_CHECKING(for --with-node-name-map ) -AC_ARG_WITH(node-name-map, - AC_HELP_STRING([--with-node-name-map=file], - [define a default node name map file]), - [ case "$withval" in - no) - ;; - *) - withnodenamemap=yes - NODENAMEMAPFILE=$withval - ;; - esac ] -) -AC_MSG_RESULT(${withnodenamemap=no}) - -if test $withnodenamemap = "yes"; then - NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" - NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" - NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" - - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, - ["$NODENAMEMAP"], - [Define a default node name map file]) -fi - dnl Check for perl and perl install location AC_MSG_CHECKING(for --with-perl-path ) AC_ARG_WITH(perl-path, diff --git a/infiniband-diags/include/ibdiag_common.h b/infiniband-diags/include/ibdiag_common.h index 55df3fe..029d80e 100644 --- a/infiniband-diags/include/ibdiag_common.h +++ b/infiniband-diags/include/ibdiag_common.h @@ -45,16 +45,6 @@ extern int ibdebug; /* External interface */ /*========================================================*/ -/** - * Node name map interface. - * It is OK to pass NULL for the node_name_map[_fp] parameters. - */ -FILE *open_node_name_map(char *node_name_map); -void close_node_name_map(FILE *node_name_map_fp); -char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, - char *nodedesc); - /* NOTE: parameter "nodedesc" may be modified here. */ - #undef DEBUG #define DEBUG if (ibdebug || verbose) IBWARN #define VERBOSE if (ibdebug || verbose > 1) IBWARN @@ -62,9 +52,6 @@ char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, void iberror(const char *fn, char *msg, ...); -/* NOTE: this modifies the parameter "nodedesc". */ -char *clean_nodedesc(char *nodedesc); - #ifdef __BUILD_VERSION_TAG__ #define stringify(s) to_string(s) diff --git a/infiniband-diags/src/ibdiag_common.c b/infiniband-diags/src/ibdiag_common.c index c152853..2d573b9 100644 --- a/infiniband-diags/src/ibdiag_common.c +++ b/infiniband-diags/src/ibdiag_common.c @@ -51,73 +51,6 @@ int ibdebug; -FILE * -open_node_name_map(char *node_name_map) -{ - FILE *rc = NULL; - - if (node_name_map != NULL) { - rc = fopen(node_name_map, "r"); - if (rc == NULL) { - fprintf(stderr, - "WARNING failed to open switch map \"%s\" (%s)\n", - node_name_map, strerror(errno)); - } -#ifdef HAVE_DEFAULT_NODENAME_MAP - } else { - rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); -#endif /* HAVE_DEFAULT_NODENAME_MAP */ - } - return (rc); -} - -void -close_node_name_map(FILE *fp) -{ - if (fp) - fclose(fp); -} - -char * -remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) -{ -#define NAME_LEN (256) - char *line = NULL; - size_t len = 0; - uint64_t guid = 0; - char *rc = NULL; - int line_count = 0; - - if (node_name_map_fp == NULL) - goto done; - - rewind(node_name_map_fp); - for (line_count = 1; - getline(&line, &len, node_name_map_fp) != -1; - line_count++) { - line[len-1] = '\0'; - if (line[0] == '#') - goto next_one; - char *guid_str = strtok(line, "\"#"); - char *name = strtok(NULL, "\"#"); - if (!guid_str || !name) - goto next_one; - guid = strtoull(guid_str, NULL, 0); - if (target_guid == guid) { - rc = strdup(name); - free (line); - goto done; - } -next_one: - free (line); - line = NULL; - } -done: - if (rc == NULL) - rc = strdup(clean_nodedesc(nodedesc)); - return (rc); -} - void iberror(const char *fn, char *msg, ...) { @@ -141,17 +74,3 @@ iberror(const char *fn, char *msg, ...) exit(-1); } -char * -clean_nodedesc(char *nodedesc) -{ - int i = 0; - - nodedesc[63] = '\0'; - while (nodedesc[i]) { - if (!isprint(nodedesc[i])) - nodedesc[i] = ' '; - i++; - } - - return (nodedesc); -} diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c index 5594b1c..03ef6f9 100644 --- a/infiniband-diags/src/ibnetdiscover.c +++ b/infiniband-diags/src/ibnetdiscover.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "ibnetdiscover.h" #include "grouping.h" diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c index 2e04f2f..c8a7b19 100644 --- a/infiniband-diags/src/ibtracert.c +++ b/infiniband-diags/src/ibtracert.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "ibdiag_common.h" diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index 7dcd5fb..a8d810f 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "ibdiag_common.h" diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c index 60212f5..7c2c129 100644 --- a/infiniband-diags/src/smpquery.c +++ b/infiniband-diags/src/smpquery.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "ibdiag_common.h" diff --git a/opensm/complib/Makefile.am b/opensm/complib/Makefile.am index 2967c87..3ef5357 100644 --- a/opensm/complib/Makefile.am +++ b/opensm/complib/Makefile.am @@ -26,7 +26,9 @@ libosmcomp_la_SOURCES = cl_complib.c cl_dispatcher.c \ cl_spinlock.c cl_statustext.c \ cl_thread.c cl_threadpool.c \ cl_timer.c cl_vector.c \ - ib_statustext.c + ib_statustext.c \ + cl_nodenamemap.c + libosmcomp_la_LDFLAGS = -version-info $(complib_api_version) \ -export-dynamic $(libosmcomp_version_script) libosmcomp_la_DEPENDENCIES = $(srcdir)/libosmcomp.map @@ -73,7 +75,8 @@ libosmcompinclude_HEADERS = $(srcdir)/../include/complib/cl_atomic.h \ $(srcdir)/../include/complib/cl_timer_osd.h \ $(srcdir)/../include/complib/cl_types.h \ $(srcdir)/../include/complib/cl_types_osd.h \ - $(srcdir)/../include/complib/cl_vector.h + $(srcdir)/../include/complib/cl_vector.h \ + $(srcdir)/../include/complib/cl_nodenamemap.h # headers are distributed as part of the include dir EXTRA_DIST = $(srcdir)/libosmcomp.map $(srcdir)/libosmcomp.ver diff --git a/opensm/complib/cl_nodenamemap.c b/opensm/complib/cl_nodenamemap.c new file mode 100644 index 0000000..144a7e4 --- /dev/null +++ b/opensm/complib/cl_nodenamemap.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2007 Lawrence Livermore National Lab + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +FILE * +open_node_name_map(char *node_name_map) +{ + FILE *rc = NULL; + + if (node_name_map != NULL) { + rc = fopen(node_name_map, "r"); + if (rc == NULL) { + fprintf(stderr, + "WARNING failed to open switch map \"%s\" (%s)\n", + node_name_map, strerror(errno)); + } +#ifdef HAVE_DEFAULT_NODENAME_MAP + } else { + rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); +#endif /* HAVE_DEFAULT_NODENAME_MAP */ + } + return (rc); +} + +void +close_node_name_map(FILE *fp) +{ + if (fp) + fclose(fp); +} + +char * +remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) +{ +#define NAME_LEN (256) + char *line = NULL; + size_t len = 0; + uint64_t guid = 0; + char *rc = NULL; + int line_count = 0; + + if (node_name_map_fp == NULL) + goto done; + + rewind(node_name_map_fp); + for (line_count = 1; + getline(&line, &len, node_name_map_fp) != -1; + line_count++) { + line[len-1] = '\0'; + if (line[0] == '#') + goto next_one; + char *guid_str = strtok(line, "\"#"); + char *name = strtok(NULL, "\"#"); + if (!guid_str || !name) + goto next_one; + guid = strtoull(guid_str, NULL, 0); + if (target_guid == guid) { + rc = strdup(name); + free (line); + goto done; + } +next_one: + free (line); + line = NULL; + } +done: + if (rc == NULL) + rc = strdup(clean_nodedesc(nodedesc)); + return (rc); +} + +char * +clean_nodedesc(char *nodedesc) +{ + int i = 0; + + nodedesc[63] = '\0'; + while (nodedesc[i]) { + if (!isprint(nodedesc[i])) + nodedesc[i] = ' '; + i++; + } + + return (nodedesc); +} + diff --git a/opensm/complib/libosmcomp.map b/opensm/complib/libosmcomp.map index cb2505b..7ee845d 100644 --- a/opensm/complib/libosmcomp.map +++ b/opensm/complib/libosmcomp.map @@ -151,5 +151,9 @@ OSMCOMP_2.3 { ib_error_str; ib_async_event_str; ib_wc_status_str; + open_node_name_map; + close_node_name_map; + remap_node_name; + clean_nodedesc; local: *; }; diff --git a/opensm/configure.in b/opensm/configure.in index d120c05..b596004 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -70,6 +70,32 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL dnl select performance manager or not OPENIB_OSM_PERF_MGR_SEL +dnl Check for the specification of a default node name map file +AC_MSG_CHECKING(for --with-node-name-map ) +AC_ARG_WITH(node-name-map, + AC_HELP_STRING([--with-node-name-map=file], + [define a default node name map file]), + [ case "$withval" in + no) + ;; + *) + withnodenamemap=yes + NODENAMEMAPFILE=$withval + ;; + esac ] +) +AC_MSG_RESULT(${withnodenamemap=no}) + +if test $withnodenamemap = "yes"; then + NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" + NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" + NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" + + AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, + ["$NODENAMEMAP"], + [Define a default node name map file]) +fi + dnl select example event plugin or not OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL diff --git a/opensm/include/Makefile.am b/opensm/include/Makefile.am index b2d01fa..34f82a1 100644 --- a/opensm/include/Makefile.am +++ b/opensm/include/Makefile.am @@ -126,6 +126,7 @@ EXTRA_DIST = \ $(srcdir)/complib/cl_types.h \ $(srcdir)/complib/cl_fleximap.h \ $(srcdir)/complib/cl_qcomppool.h \ + $(srcdir)/complib/cl_nodenamemap.h \ $(srcdir)/iba/ib_types.h \ $(srcdir)/iba/ib_cm_types.h \ $(srcdir)/vendor/osm_vendor_mlx_transport_anafa.h \ diff --git a/opensm/include/complib/cl_nodenamemap.h b/opensm/include/complib/cl_nodenamemap.h new file mode 100644 index 0000000..a4a09f7 --- /dev/null +++ b/opensm/include/complib/cl_nodenamemap.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2007 Lawrence Livermore National Lab + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _CL_NODE_NAME_MAP_H_ +#define _CL_NODE_NAME_MAP_H_ + +#include +#include + +/* NOTE: this modifies the parameter "nodedesc". */ +char *clean_nodedesc(char *nodedesc); + +/** + * Node name map interface. + * It is OK to pass NULL for the node_name_map[_fp] parameters. + */ +FILE *open_node_name_map(char *node_name_map); +void close_node_name_map(FILE *node_name_map_fp); +char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, + char *nodedesc); + /* NOTE: parameter "nodedesc" may be modified here. */ + +#endif /* _CL_NODE_NAME_MAP_H_ */ + -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0003-Move-nodenamemap-out-of-infiniband-diags-into-libosm.patch Type: application/octet-stream Size: 17098 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:14 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:14 -0700 Subject: [ofa-general] [PATCH 4/7] Change node name map implementation to use qmap in memory storage Message-ID: <20071101201514.62bd5ce8.weiny2@llnl.gov> >From 2dacfc928856351820fadc416da787350254419e Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 1 Nov 2007 19:29:02 -0700 Subject: [PATCH] Change node name map implementation to use qmap in memory storage Signed-off-by: Ira K. Weiny --- infiniband-diags/src/ibnetdiscover.c | 18 +++--- infiniband-diags/src/ibroute.c | 1 + infiniband-diags/src/ibtracert.c | 16 ++-- infiniband-diags/src/saquery.c | 12 ++-- infiniband-diags/src/smpquery.c | 12 ++-- opensm/complib/cl_nodenamemap.c | 116 ++++++++++++++++++++---------- opensm/include/complib/cl_nodenamemap.h | 19 ++++- 7 files changed, 122 insertions(+), 72 deletions(-) diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c index 03ef6f9..8b229c1 100644 --- a/infiniband-diags/src/ibnetdiscover.c +++ b/infiniband-diags/src/ibnetdiscover.c @@ -92,8 +92,8 @@ static FILE *f; char *argv0 = "ibnetdiscover"; -static char *node_name_map = NULL; -static FILE *node_name_map_fp = NULL; +static char *node_name_map_file = NULL; +static nn_map_t *node_name_map = NULL; Node *nodesdist[MAXHOPS+1]; /* last is Ca list */ Node *mynode; @@ -460,7 +460,7 @@ void list_node(Node *node) { char *node_type; - char *nodename = remap_node_name(node_name_map_fp, node->nodeguid, + char *nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); switch(node->type) { @@ -537,7 +537,7 @@ out_switch(Node *node, int group, char *chname) fprintf(f, "%d Chip %d", node->chrecord->slotnum, node->chrecord->anafanum); } - nodename = remap_node_name(node_name_map_fp, node->nodeguid, + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); fprintf(f, "\nSwitch\t%d %s\t\t# \"%s\" %s port 0 lid %d lmc %d\n", @@ -606,7 +606,7 @@ out_switch_port(Port *port, int group) if (ext_port_str) fprintf(f, "%s", ext_port_str); - rem_nodename = remap_node_name(node_name_map_fp, + rem_nodename = remap_node_name(node_name_map, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); @@ -650,7 +650,7 @@ out_ca_port(Port *port, int group) if (port->remoteport->node->type != SWITCH_NODE) fprintf(f, " (%" PRIx64 ") ", port->remoteport->portguid); - rem_nodename = remap_node_name(node_name_map_fp, + rem_nodename = remap_node_name(node_name_map, port->remoteport->node->nodeguid, port->remoteport->node->nodedesc); @@ -890,7 +890,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - node_name_map = strdup(optarg); + node_name_map_file = strdup(optarg); break; case 'C': ca = optarg; @@ -947,7 +947,7 @@ main(int argc, char **argv) IBERROR("can't open file %s for writing", argv[0]); madrpc_init(ca, ca_port, mgmt_classes, 2); - node_name_map_fp = open_node_name_map(node_name_map); + node_name_map = open_node_name_map(node_name_map_file); if (discover(&my_portid) < 0) IBERROR("discover"); @@ -957,6 +957,6 @@ main(int argc, char **argv) dump_topology(list, group); - close_node_name_map(node_name_map_fp); + close_node_name_map(node_name_map); exit(0); } diff --git a/infiniband-diags/src/ibroute.c b/infiniband-diags/src/ibroute.c index 44d2fc8..664f7f5 100644 --- a/infiniband-diags/src/ibroute.c +++ b/infiniband-diags/src/ibroute.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "ibdiag_common.h" diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c index c8a7b19..010f45f 100644 --- a/infiniband-diags/src/ibtracert.c +++ b/infiniband-diags/src/ibtracert.c @@ -71,8 +71,8 @@ static FILE *f; char *argv0 = "ibtracert"; -static char *node_name_map = NULL; -static FILE *node_name_map_fp = NULL; +static char *node_name_map_file = NULL; +static nn_map_t *node_name_map = NULL; typedef struct Port Port; typedef struct Switch Switch; @@ -205,7 +205,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) return; } - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", prompt, @@ -225,7 +225,7 @@ dump_route(int dump, Node *node, int outport, Port *port) if (!dump && !verbose) return; - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); if (dump == 1) fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", @@ -637,7 +637,7 @@ dump_mcpath(Node *node, int dumplevel) if (node->upnode) dump_mcpath(node->upnode, dumplevel); - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); if (!node->dist) { printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", @@ -741,7 +741,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - node_name_map = strdup(optarg); + node_name_map_file = strdup(optarg); break; case 'C': ca = optarg; @@ -799,7 +799,7 @@ main(int argc, char **argv) usage(); madrpc_init(ca, ca_port, mgmt_classes, 3); - node_name_map_fp = open_node_name_map(node_name_map); + node_name_map = open_node_name_map(node_name_map_file); if (ib_resolve_portid_str(&src_portid, argv[0], dest_type, sm_id) < 0) IBERROR("can't resolve source port %s", argv[0]); @@ -838,6 +838,6 @@ main(int argc, char **argv) /* dump multicast path */ dump_mcpath(endnode, dumplevel); - close_node_name_map(node_name_map_fp); + close_node_name_map(node_name_map); exit(0); } diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index a8d810f..c6cc0a2 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -60,8 +60,8 @@ char *argv0 = "saquery"; -static char *node_name_map = NULL; -static FILE *node_name_map_fp = NULL; +static char *node_name_map_file = NULL; +static nn_map_t *node_name_map = NULL; /** * Declare some globals because I don't want this to be too complex. @@ -137,7 +137,7 @@ print_node_record(ib_node_record_t *node_record) return; case NAME_OF_LID: case NAME_OF_GUID: - name = remap_node_name(node_name_map_fp, + name = remap_node_name(node_name_map, cl_ntoh64(p_ni->node_guid), (char *)p_nd->description); printf("%s\n", name); @@ -1143,7 +1143,7 @@ main(int argc, char **argv) break; } case 2: - node_name_map = strdup(optarg); + node_name_map_file = strdup(optarg); break; case 'p': query_type = IB_MAD_ATTR_PATH_RECORD; @@ -1248,7 +1248,7 @@ main(int argc, char **argv) } bind_handle = get_bind_handle(); - node_name_map_fp = open_node_name_map(node_name_map); + node_name_map = open_node_name_map(node_name_map_file); switch (query_type) { case IB_MAD_ATTR_NODE_RECORD: @@ -1294,6 +1294,6 @@ main(int argc, char **argv) if (dst) free(dst); clean_up(); - close_node_name_map(node_name_map_fp); + close_node_name_map(node_name_map); return (status); } diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c index 7c2c129..89b48f3 100644 --- a/infiniband-diags/src/smpquery.c +++ b/infiniband-diags/src/smpquery.c @@ -85,8 +85,8 @@ static const match_rec_t match_tbl[] = { }; char *argv0 = "smpquery"; -static char *node_name_map = NULL; -static FILE *node_name_map_fp = NULL; +static char *node_name_map_file = NULL; +static nn_map_t *node_name_map = NULL; /*******************************************/ static char * @@ -108,7 +108,7 @@ node_desc(ib_portid_t *dest, char **argv, int argc) if (!smp_query(nd, dest, IB_ATTR_NODE_DESC, 0, 0)) return "node desc query failed"; - nodename = remap_node_name(node_name_map_fp, node_guid, nd); + nodename = remap_node_name(node_name_map, node_guid, nd); l = strlen(nodename); if (l < 32) { @@ -457,7 +457,7 @@ main(int argc, char **argv) break; switch(ch) { case 1: - node_name_map = strdup(optarg); + node_name_map_file = strdup(optarg); break; case 'd': ibdebug++; @@ -513,7 +513,7 @@ main(int argc, char **argv) IBERROR("operation '%s' not supported", argv[0]); madrpc_init(ca, ca_port, mgmt_classes, 3); - node_name_map_fp = open_node_name_map(node_name_map); + node_name_map = open_node_name_map(node_name_map_file); if (dest_type != IB_DEST_DRSLID) { if (ib_resolve_portid_str(&portid, argv[1], dest_type, sm_id) < 0) @@ -530,6 +530,6 @@ main(int argc, char **argv) if ((err = fn(&portid, argv+3, argc-3))) IBERROR("operation %s: %s", argv[0], err); } - close_node_name_map(node_name_map_fp); + close_node_name_map(node_name_map); exit(0); } diff --git a/opensm/complib/cl_nodenamemap.c b/opensm/complib/cl_nodenamemap.c index 144a7e4..584c78c 100644 --- a/opensm/complib/cl_nodenamemap.c +++ b/opensm/complib/cl_nodenamemap.c @@ -44,67 +44,105 @@ #include -FILE * +static nn_map_t * +read_names(nn_map_t *map) +{ + char *line = NULL; + size_t len = 0; + name_map_item_t *item; + + rewind(map->fp); + while (getline(&line, &len, map->fp) != -1) { + char *guid_str = NULL; + char *name = NULL; + line[len-1] = '\0'; + if (line[0] == '#') + goto next_one; + + guid_str = strtok(line, "\"#"); + name = strtok(NULL, "\"#"); + if (!guid_str || !name) + goto next_one; + + item = malloc(sizeof(*item)); + if (!item) { + goto error; + } + item->guid = strtoull(guid_str, NULL, 0); + item->name = strdup(name); + cl_qmap_insert(&(map->map), item->guid, (cl_map_item_t *)item); + +next_one: + free (line); + line = NULL; + } + +error: + return (map); +} + +nn_map_t * open_node_name_map(char *node_name_map) { - FILE *rc = NULL; + FILE *tmp_fp = NULL; + nn_map_t *rc = NULL; if (node_name_map != NULL) { - rc = fopen(node_name_map, "r"); - if (rc == NULL) { + tmp_fp = fopen(node_name_map, "r"); + if (tmp_fp == NULL) { fprintf(stderr, "WARNING failed to open switch map \"%s\" (%s)\n", node_name_map, strerror(errno)); } #ifdef HAVE_DEFAULT_NODENAME_MAP } else { - rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); + tmp_fp = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); #endif /* HAVE_DEFAULT_NODENAME_MAP */ } - return (rc); + if (!tmp_fp) + return (NULL); + + rc = malloc(sizeof(*rc)); + if (!rc) + return (NULL); + rc->fp = tmp_fp; + cl_qmap_init(&(rc->map)); + return (read_names(rc)); } void -close_node_name_map(FILE *fp) +close_node_name_map(nn_map_t *map) { - if (fp) - fclose(fp); + name_map_item_t *item = NULL; + + if (!map) + return; + + item = (name_map_item_t *)cl_qmap_head(&(map->map)); + while (item != cl_qmap_end(&(map->map))) { + item = (name_map_item_t *)cl_qmap_remove(&(map->map), item->guid); + free(item->name); + free(item); + item = (name_map_item_t *)cl_qmap_head(&(map->map)); + } + if (map->fp) + fclose(map->fp); + free(map); } char * -remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) +remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc) { -#define NAME_LEN (256) - char *line = NULL; - size_t len = 0; - uint64_t guid = 0; - char *rc = NULL; - int line_count = 0; - - if (node_name_map_fp == NULL) + char *rc = NULL; + name_map_item_t *item = NULL; + + if (!map) goto done; - rewind(node_name_map_fp); - for (line_count = 1; - getline(&line, &len, node_name_map_fp) != -1; - line_count++) { - line[len-1] = '\0'; - if (line[0] == '#') - goto next_one; - char *guid_str = strtok(line, "\"#"); - char *name = strtok(NULL, "\"#"); - if (!guid_str || !name) - goto next_one; - guid = strtoull(guid_str, NULL, 0); - if (target_guid == guid) { - rc = strdup(name); - free (line); - goto done; - } -next_one: - free (line); - line = NULL; - } + item = (name_map_item_t *)cl_qmap_get(&(map->map), target_guid); + if (item != cl_qmap_end(&(map->map))) + rc = strdup(item->name); + done: if (rc == NULL) rc = strdup(clean_nodedesc(nodedesc)); diff --git a/opensm/include/complib/cl_nodenamemap.h b/opensm/include/complib/cl_nodenamemap.h index a4a09f7..9d0b7d4 100644 --- a/opensm/include/complib/cl_nodenamemap.h +++ b/opensm/include/complib/cl_nodenamemap.h @@ -36,17 +36,28 @@ #include #include +#include -/* NOTE: this modifies the parameter "nodedesc". */ +/* NOTE: this may modify the parameter "nodedesc". */ char *clean_nodedesc(char *nodedesc); +typedef struct _name_map_item { + cl_map_item_t item; + uint64_t guid; + char *name; +} name_map_item_t; +typedef struct _node_name_map { + FILE *fp; + cl_qmap_t map; +} nn_map_t; + /** * Node name map interface. * It is OK to pass NULL for the node_name_map[_fp] parameters. */ -FILE *open_node_name_map(char *node_name_map); -void close_node_name_map(FILE *node_name_map_fp); -char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, +nn_map_t *open_node_name_map(char *node_name_map); +void close_node_name_map(nn_map_t *map); +char *remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc); /* NOTE: parameter "nodedesc" may be modified here. */ -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0004-Change-node-name-map-implementation-to-use-qmap-in-m.patch Type: application/octet-stream Size: 13407 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:18 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:18 -0700 Subject: [ofa-general] [PATCH 5/7] infiniband-diags/src/smpquery.c : special case situation where nodename len is >= 32 Message-ID: <20071101201518.4d70e189.weiny2@llnl.gov> >From 04c67433708cb14ab384a2acfc19755998f7a8b2 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 1 Nov 2007 19:31:30 -0700 Subject: [PATCH] infiniband-diags/src/smpquery.c : special case situation where nodename len is >= 32 Signed-off-by: Ira K. Weiny --- infiniband-diags/src/smpquery.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c index 89b48f3..0a91de1 100644 --- a/infiniband-diags/src/smpquery.c +++ b/infiniband-diags/src/smpquery.c @@ -114,6 +114,9 @@ node_desc(ib_portid_t *dest, char **argv, int argc) if (l < 32) { memset(dots, '.', 32 - l); dots[32 - l] = '\0'; + } else { + dots[0] = '.'; + dots[1] = '\0'; } printf("Node Description:%s%s\n", dots, nodename); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0005-infiniband-diags-src-smpquery.c-special-case-situa.patch Type: application/octet-stream Size: 838 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:24 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:24 -0700 Subject: [ofa-general] [PATCH 6/7] Add node-name-map support to OpenSM; using the "default" map. Message-ID: <20071101201524.3f95a33a.weiny2@llnl.gov> >From 35280cfd5229ccc8d91b6fd98e0f4b58193d0d03 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 1 Nov 2007 19:41:37 -0700 Subject: [PATCH] Add node-name-map support to OpenSM; using the "default" map. Signed-off-by: Ira K. Weiny --- opensm/include/opensm/osm_node.h | 2 +- opensm/include/opensm/osm_opensm.h | 2 ++ opensm/include/opensm/osm_subnet.h | 1 + opensm/opensm/osm_node.c | 6 ++++++ opensm/opensm/osm_node_desc_rcv.c | 14 ++++++++++++-- opensm/opensm/osm_opensm.c | 4 ++++ 6 files changed, 26 insertions(+), 3 deletions(-) diff --git a/opensm/include/opensm/osm_node.h b/opensm/include/opensm/osm_node.h index f87e81d..8af5418 100644 --- a/opensm/include/opensm/osm_node.h +++ b/opensm/include/opensm/osm_node.h @@ -106,7 +106,7 @@ typedef struct _osm_node { ib_node_desc_t node_desc; uint32_t discovery_count; uint32_t physp_tbl_size; - char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; + char *print_desc; osm_physp_t physp_table[1]; } osm_node_t; /* diff --git a/opensm/include/opensm/osm_opensm.h b/opensm/include/opensm/osm_opensm.h index 1ea1ec2..1b5edb8 100644 --- a/opensm/include/opensm/osm_opensm.h +++ b/opensm/include/opensm/osm_opensm.h @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -168,6 +169,7 @@ typedef struct _osm_opensm_t { struct osm_routing_engine routing_engine; osm_stats_t stats; osm_console_t console; + nn_map_t *node_name_map; } osm_opensm_t; /* * FIELDS diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index dada8bf..452098b 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -297,6 +297,7 @@ typedef struct _osm_subn_opt { char *event_db_dump_file; #endif /* ENABLE_OSM_PERF_MGR */ char *event_plugin_name; + char *node_name_map_name; } osm_subn_opt_t; /* * FIELDS diff --git a/opensm/opensm/osm_node.c b/opensm/opensm/osm_node.c index 645daa9..f34da1f 100644 --- a/opensm/opensm/osm_node.c +++ b/opensm/opensm/osm_node.c @@ -131,6 +131,7 @@ osm_node_t *osm_node_new(IN const osm_madw_t * const p_madw) osm_node_init_physp(p_node, p_madw); } + p_node->print_desc = ""; return (p_node); } @@ -146,6 +147,11 @@ static void osm_node_destroy(IN osm_node_t * p_node) */ for (i = 0; i < p_node->physp_tbl_size; i++) osm_physp_destroy(&p_node->physp_table[i]); + + /* cleanup printable node_desc field */ + if (p_node->print_desc) { + free(p_node->print_desc); + } } /********************************************************************** diff --git a/opensm/opensm/osm_node_desc_rcv.c b/opensm/opensm/osm_node_desc_rcv.c index d50883c..f758d5a 100644 --- a/opensm/opensm/osm_node_desc_rcv.c +++ b/opensm/opensm/osm_node_desc_rcv.c @@ -58,6 +58,7 @@ #include #include #include +#include #include /********************************************************************** @@ -67,13 +68,22 @@ __osm_nd_rcv_process_nd(IN const osm_nd_rcv_t * const p_rcv, IN osm_node_t * const p_node, IN const ib_node_desc_t * const p_nd) { + char *tmp_desc; + char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; + OSM_LOG_ENTER(p_rcv->p_log, __osm_nd_rcv_process_nd); memcpy(&p_node->node_desc.description, p_nd, sizeof(*p_nd)); /* also set up a printable version */ - memcpy(&p_node->print_desc, p_nd, sizeof(*p_nd)); - p_node->print_desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; + memcpy(print_desc, p_nd, sizeof(*p_nd)); + print_desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; + tmp_desc = remap_node_name(p_rcv->p_subn->p_osm->node_name_map, + cl_ntoh64(osm_node_get_node_guid(p_node)), + print_desc); + + /* make a copy for this node to "own" */ + p_node->print_desc = strdup(tmp_desc); if (osm_log_is_active(p_rcv->p_log, OSM_LOG_VERBOSE)) { osm_log(p_rcv->p_log, OSM_LOG_VERBOSE, diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 5b45401..9841c75 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -183,6 +183,8 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm) osm_subn_destroy(&p_osm->subn); cl_disp_destroy(&p_osm->disp); + close_node_name_map(p_osm->node_name_map); + cl_plock_destroy(&p_osm->lock); osm_log_destroy(&p_osm->log); @@ -310,6 +312,8 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, goto Exit; } + p_osm->node_name_map = open_node_name_map(NULL); + Exit: osm_log(&p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: ]\n"); /* Format Waived */ return (status); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0006-Add-node-name-map-support-to-OpenSM-using-the-defa.patch Type: application/octet-stream Size: 4839 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 1 20:15:31 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 1 Nov 2007 20:15:31 -0700 Subject: [ofa-general] [PATCH 7/7] Add node_name_map_name to opts file. Message-ID: <20071101201531.281fbb7c.weiny2@llnl.gov> >From 2f88e7db6e2553cac310209e0679e099e1a97576 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Thu, 1 Nov 2007 19:48:40 -0700 Subject: [PATCH] Add node_name_map_name to opts file. Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_opensm.c | 2 +- opensm/opensm/osm_subnet.c | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletions(-) diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c index 9841c75..d442340 100644 --- a/opensm/opensm/osm_opensm.c +++ b/opensm/opensm/osm_opensm.c @@ -312,7 +312,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, goto Exit; } - p_osm->node_name_map = open_node_name_map(NULL); + p_osm->node_name_map = open_node_name_map(p_opt->node_name_map_name); Exit: osm_log(&p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: ]\n"); /* Format Waived */ diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index 7fb21fd..7114305 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -445,6 +445,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt) #endif /* ENABLE_OSM_PERF_MGR */ p_opt->event_plugin_name = OSM_DEFAULT_EVENT_PLUGIN_NAME; + p_opt->node_name_map_name = NULL; p_opt->dump_files_dir = getenv("OSM_TMP_DIR"); if (!p_opt->dump_files_dir || !(*p_opt->dump_files_dir)) @@ -1250,6 +1251,9 @@ ib_api_status_t osm_subn_parse_conf_file(IN osm_subn_opt_t * const p_opts) opts_unpack_charp("event_plugin_name", p_key, p_val, &p_opts->event_plugin_name); + opts_unpack_charp("node_name_map_name", + p_key, p_val, &p_opts->node_name_map_name); + subn_parse_qos_options("qos", p_key, p_val, &p_opts->qos_options); @@ -1509,6 +1513,12 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) "event_plugin_name %s\n\n", p_opts->event_plugin_name); fprintf(opts_file, + "#\n# Node name map for mapping node's to more descirptive node descriptors\n" + "# (man ibnetdiscover for more information)\n#\n" + "node_name_map_name %s\n\n", + p_opts->node_name_map_name ? p_opts->node_name_map_name : "(null)"); + + fprintf(opts_file, "#\n# DEBUG FEATURES\n#\n" "# The log flags used\n" "log_flags 0x%02x\n\n" -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0007-Add-node_name_map_name-to-opts-file.patch Type: application/octet-stream Size: 2260 bytes Desc: not available URL: From jssceat at blockislandproperty.com Fri Nov 2 02:25:32 2007 From: jssceat at blockislandproperty.com (Merle Law) Date: Fri, 2 Nov 2007 09:25:32 +0000 Subject: [ofa-general] Get life-like better than real pussy masturbator! Message-ID: <01c81d32$50e14510$da426c58@jssceat> Enjoy lifelike sensations with a specially designed to feel like a real pussy hand held masturbator. The Personal Pussy can be fucked any day and any time. Made of best modern materials it is reported by some men to be better than the real pussy. Enjoy lifelike sensations with a specially designed to feel like a real pussy hand held masturbator. The Personal Pussy can be fucked any day and any time. Made of best modern materials it is reported by some men to be better than the real pussy. http://quintub.com Discover the Personal Puss! for extra pleasure! From encephalomalaxis at rallyzone.com Fri Nov 2 03:17:19 2007 From: encephalomalaxis at rallyzone.com (Bobbie Taylor) Date: Fri, 02 Nov 2007 15:17:19 +0500 Subject: [ofa-general] Mlcrosoft W|ndows Sof+ware for $2O Message-ID: <000001c81d34$6c261880$0100007f@localhost> V!sit realnewsoft . com From vlad at lists.openfabrics.org Fri Nov 2 02:59:10 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 2 Nov 2007 02:59:10 -0700 (PDT) Subject: [ofa-general] ofa_1_3_kernel 20071102-0200 daily build status Message-ID: <20071102095911.04CBBE608E6@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.23 Passed on ppc64 with linux-2.6.18 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.9-34.ELsmp Failed: From Thomas.Talpey at netapp.com Fri Nov 2 05:02:58 2007 From: Thomas.Talpey at netapp.com (Talpey, Thomas) Date: Fri, 02 Nov 2007 08:02:58 -0400 Subject: [ofa-general] iWARP issues In-Reply-To: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> Message-ID: At 05:57 PM 11/1/2007, Sean Hefty wrote: >Does anyone know the details regarding the TCP connection retry algorithm in >Linux? (time between retries, number of retries, etc.) Sure. The time between retries is variable, it starts out as a few seconds (think, three), and backs off exponentially for a variable number of tries (/proc/sys/net/ipv4/*retries*). It comes out to a pretty large number, a minute or two typically. You really don't want to depend on any of this however. TCP will use all sorts of information from other connections, routing table entries and congestion algorithms, etc to be as adaptive as it feels it needs to be. Constants are NEVER a good idea in networking. Why wouldn't you just leave the timeout to TCP, and make CM's infinite? Tom. From fenkes at de.ibm.com Fri Nov 2 05:59:12 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Fri, 2 Nov 2007 14:59:12 +0200 Subject: [ofa-general] [PATCH] [REPOST] ofed_scripts: Add location code fix for older ppc64 kernels Message-ID: <200711021359.12949.fenkes@de.ibm.com> Kernels prior to 2.6.24 have problems with multiple devices sharing the same location code on ppc64 systems -- only one of these devices would be usable by ibmebus. This will be a problem on systems with multiple eHCA chips on a single hardware location. For older kernels, this problem can be circumvented by, prior to loading the eHCA driver, changing the location codes of the offending devices so that they're not the same anymore. This patch adds that circumvention to openibd, with an additional check to make sure we're on the right architecture and kernel version. Signed-off-by: Joachim Fenkes --- This is a repost of my previous patch, with the changes suggested by Vladimir included. ofed_scripts/openibd | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 69 insertions(+), 0 deletions(-) diff --git a/ofed_scripts/openibd b/ofed_scripts/openibd index cf4aee1..2cca88a 100755 --- a/ofed_scripts/openibd +++ b/ofed_scripts/openibd @@ -538,6 +538,74 @@ if test -x /sbin/lspci && test -x /sbin/setpci; then fi } +need_location_code_fix() +{ + local sub ARCH KVERSION + ARCH=$(uname -m) + KVERSION=$(uname -r) + + if [ "$ARCH" != "ppc64" ]; then + return 1; + fi + + case $KVERSION in + 2.6.9-*.EL*) + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) + if [ $sub -lt 62 ]; then + return 0; + fi + ;; + 2.6.16.*-*-*) + sub=$(echo $KVERSION | cut -d"." -f4 | cut -d"-" -f1) + if [ $sub -lt 53 ]; then + return 0; + fi + ;; + 2.6.18-*.el5*) + sub=$(echo $KVERSION | cut -d"-" -f2 | cut -d"." -f1) + if [ $sub -lt 54 ]; then + return 0; + fi + ;; + 2.6.*) + sub=$(echo $KVERSION | cut -d"." -f3 | cut -d"-" -f1 | tr -d [:alpha:][:punct:]) + if [ $sub -lt 24 ]; then + return 0; + fi + ;; + esac + + return 1; +} + +fix_location_codes() +{ + # ppc64 only: + # Fix duplicate location codes on kernels where ibmebus can't handle them + + if ! need_location_code_fix; then return 0; fi + if ! [ -d /proc/device-tree -a -f /proc/ppc64/ofdt ]; then return 0; fi + + local i=1 phandle lcode len + # output all duplicate location codes and their devices + for attr in $(find /proc/device-tree -wholename "*lhca\@*/ibm,loc-code"); do + echo -e $(dirname $attr)"\t"$(cat $attr) + done | sort -k2 | uniq -f1 --all-repeated=separate | cut -f1 | while read dev; do + if [ -n "$dev" ]; then + # append an instance counter to the location code + phandle=$(hexdump -e '8 "%u"' $dev/ibm,phandle) + lcode=$(cat $dev/ibm,loc-code)-I$i + len=$(echo -n "$lcode" | wc -c) + # echo "$dev -> $lcode" + echo -n "update_property $phandle ibm,loc-code $len $lcode" > /proc/ppc64/ofdt + i=$(($i + 1)) + else + # empty line means new group -- reset i + i=1 + fi + done +} + rotate_log() { local log=$1 @@ -694,6 +762,7 @@ start() # Load eHCA driver if [ "X${EHCA_LOAD}" == "Xyes" ]; then + fix_location_codes /sbin/modprobe ib_ehca > /dev/null 2>&1 my_rc=$? if [ $my_rc -ne 0 ]; then -- 1.5.2 From fenkes at de.ibm.com Fri Nov 2 06:32:49 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Fri, 2 Nov 2007 15:32:49 +0200 Subject: [ofa-general] [PATCH 0/2] IB/ehca: Return physical link information, fix static rate calculation Message-ID: <200711021432.50203.fenkes@de.ibm.com> This patchset will fix static rate calculation for the new link speeds supported by eHCA2. Also, it enables query_port() to return physical link information instead of constant values, which is needed for the static rate fix. [1/2] makes query_port() return actual physical link info where supported [2/2] fixes static rate calculation based on that info The patches will apply, in order, on top of Roland's for-2.6.24 branch. Please review them and apply for 2.6.24-rc2 if you think they're okay. Thanks and regards, Joachim -- Joachim Fenkes -- eHCA Linux Driver Developer and Hardware Tamer IBM Deutschland Entwicklung GmbH -- Dept. 3627 (I/O Firmware Dev. 2) Schoenaicher Strasse 220 -- 71032 Boeblingen -- Germany eMail: fenkes at de.ibm.com From fenkes at de.ibm.com Fri Nov 2 06:33:51 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Fri, 2 Nov 2007 15:33:51 +0200 Subject: [ofa-general] [PATCH 1/2] IB/ehca: Return physical link information in query_port() In-Reply-To: <200711021432.50203.fenkes@de.ibm.com> References: <200711021432.50203.fenkes@de.ibm.com> Message-ID: <200711021433.51565.fenkes@de.ibm.com> Newer firmware versions return physical port information to the partition, so hand that information to the consumer if it's present. Signed-off-by: Joachim Fenkes --- drivers/infiniband/hw/ehca/ehca_hca.c | 20 ++++++++++++++------ drivers/infiniband/hw/ehca/hipz_hw.h | 6 +++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 15806d1..5bd7b59 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -151,7 +151,6 @@ int ehca_query_port(struct ib_device *ibdev, } memset(props, 0, sizeof(struct ib_port_attr)); - props->state = rblock->state; switch (rblock->max_mtu) { case 0x1: @@ -188,11 +187,20 @@ int ehca_query_port(struct ib_device *ibdev, props->subnet_timeout = rblock->subnet_timeout; props->init_type_reply = rblock->init_type_reply; - props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; - - /* at the moment (logical) link state is always LINK_UP */ - props->phys_state = 0x5; + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + } query_port1: ehca_free_fw_ctrlblock(rblock); diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index d9739e5..485b840 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -402,7 +402,11 @@ struct hipz_query_port { u64 max_msg_sz; u32 max_mtu; u32 vl_cap; - u8 reserved2[1900]; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; u64 guid_entries[255]; } __attribute__ ((packed)); -- 1.5.2 From fenkes at de.ibm.com Fri Nov 2 06:41:49 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Fri, 2 Nov 2007 15:41:49 +0200 Subject: [ofa-general] [PATCH 2/2] IB/ehca: Fix static rate calculation In-Reply-To: <200711021432.50203.fenkes@de.ibm.com> References: <200711021432.50203.fenkes@de.ibm.com> Message-ID: <200711021441.50158.fenkes@de.ibm.com> The IPD formula was a little off and assumed a fixed physical link rate; fix the formula and query the actual physical link rate, now that we can get it. Also, refactor the calculation into a common function ehca_calc_ipd() and use that instead of duplicating code. Signed-off-by: Joachim Fenkes --- drivers/infiniband/hw/ehca/ehca_av.c | 48 +++++++++++++++++++++++----- drivers/infiniband/hw/ehca/ehca_classes.h | 1 - drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 ++ drivers/infiniband/hw/ehca/ehca_main.c | 3 -- drivers/infiniband/hw/ehca/ehca_qp.c | 29 +++++++---------- 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 97d1086..453eb99 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -50,6 +50,38 @@ static struct kmem_cache *av_cache; +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int ret; @@ -69,15 +101,13 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.slid_path_bits = ah_attr->src_path_bits; if (ehca_static_rate < 0) { - int ah_mult = ib_rate_to_mult(ah_attr->static_rate); - int ehca_mult = - ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); - - if (ah_mult >= ehca_mult) - av->av.ipd = 0; - else - av->av.ipd = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; } else av->av.ipd = ehca_static_rate; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 2d660ae..87f12d4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -95,7 +95,6 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; - enum ib_rate rate; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; }; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index dce503b..5485799 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -189,6 +189,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ehca_poll_eqs(unsigned long data); +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c6cd38c..90d4334 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -327,9 +327,6 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - shca->sport[0].rate = IB_RATE_30_GBPS; - shca->sport[1].rate = IB_RATE_30_GBPS; - shca->hca_cap = rblock->hca_cap_indicators; ehca_gen_dbg(" ... HCA capabilities:"); for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index de18264..2e3e654 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1196,10 +1196,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); } if (attr_mask & IB_QP_AV) { - int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> - init_attr.port_num].rate); - mqpcb->dlid = attr->ah_attr.dlid; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); mqpcb->source_path_bits = attr->ah_attr.src_path_bits; @@ -1207,11 +1203,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* @@ -1280,10 +1277,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); } if (attr_mask & IB_QP_ALT_PATH) { - int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult( - shca->sport[my_qp->init_attr.port_num].rate); - if (attr->alt_port_num < 1 || attr->alt_port_num > shca->num_ports) { ret = -EINVAL; @@ -1309,10 +1302,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ah_mult > 0 && ah_mult < ehca_mult) - mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult; - else - mqpcb->max_static_rate_al = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } /* OpenIB doesn't support alternate retry counts - copy them */ mqpcb->retry_count_al = mqpcb->retry_count; -- 1.5.2 From Arkady.Kanevsky at netapp.com Fri Nov 2 07:33:07 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Fri, 2 Nov 2007 10:33:07 -0400 Subject: [ofa-general] iWARP issues In-Reply-To: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> Message-ID: inline. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Sean Hefty [mailto:sean.hefty at intel.com] > Sent: Thursday, November 01, 2007 5:58 PM > To: Kanevsky, Arkady; bboas at systemfabricworks.com > Cc: OpenFabrics General > Subject: RE: [ofa-general] iWARP issues > > >- iWARP + TCP host stack port space sharing (required by IETF iSER > >spec.) > > I don't think we can accomplish much without participation > from the network maintainers, which I'm guessing will go > something like this: "No - end of discussion". (This is the > polite version.) That is why I think we need planery time. We are between 2 rocks. IETF spec defines iSER protocol which requires features which Linux net guys do not want to see. Do we want to proposed a change to IETF now? Last thing we want to get into is interop issues between different Oses. > > >- RDMA connection timeout; expand RDMA_CM API to support timeout > >paramater (not iWARP specific) > > I'm working on adding a timeout to rdma_connect() now. I'm > trying to decide if there should be a single timeout > parameter, or timeout and retry values. Thank you. > > Does anyone know the details regarding the TCP connection > retry algorithm in Linux? (time between retries, number of > retries, etc.) > > - Sean > From Arkady.Kanevsky at netapp.com Fri Nov 2 07:39:11 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Fri, 2 Nov 2007 10:39:11 -0400 Subject: [ofa-general] iWARP issues In-Reply-To: References: Message-ID: comments inline. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Roland Dreier [mailto:rdreier at cisco.com] > Sent: Thursday, November 01, 2007 6:27 PM > To: Kanevsky, Arkady > Cc: bboas at systemfabricworks.com; OpenFabrics General > Subject: Re: [ofa-general] iWARP issues > > > - iWARP Support for Peer-to-Peer Applications, this is CM > > interoperability issue > > I guess the relevant people will be there from the RNIC > vendors, but this seems more like an IETF/rdma consortium issue to me. yes, I expect RNIC vendors will be there. The hope is that we can come up with changes in iwcm to ensure interop. This is why is planery item. It is true if IB vendors do not require to participate. > > > - iWARP + TCP host stack port space sharing (required by > IETF iSER > spec.) > > Maybe we can try to brainstorm for alternate solutions, but > it seems we are missing all the key stakeholders from the > Linux networking side to really resolve this. That is the goal. > > > - missing verbs (IB-only, iWARP-only, and > iWARP-nonstandard), > for example FMR, send with > invalidate. (I recall that complete list was > flash out > > a year ago) > (ULP changes to take advantage of these verbs: > e.g. NFS-RDMA, iSER). > > I haven't seen any discussion of this yet. Is it really > worth taking up face-to-face time on this? It seems like it > should all be pretty straightforward, and even if it isn't, > we haven't spent the time to figure out what the not > straightforward parts are yet. There was a discussion about it almost a year ago. I think if RNIC vendors will fund a person to move this forward. This is why it need planery time. IB vendors are also needed there because many of the same verbs are transport independent and defined in IBTA spec v1.2. > > > - RDMA connection timeout; expand RDMA_CM API to support > timeout > paramater (not iWARP specific) > > Again, I haven't seen any discussion yet, so it doesn't seem > worth taking up face-to-face-time until we know what the > sticking points are. If Sean is doing it already we may not need planery time for it. > > - R. > From rdreier at cisco.com Fri Nov 2 08:57:44 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 08:57:44 -0700 Subject: [ofa-general] iWARP issues In-Reply-To: (Arkady Kanevsky's message of "Fri, 2 Nov 2007 10:39:11 -0400") References: Message-ID: > > > - missing verbs (IB-only, iWARP-only, and iWARP-nonstandard), > > > for example FMR, send with invalidate. (I recall that complete > > > list was flash out a year ago) (ULP changes to take advantage > > > of these verbs: e.g. NFS-RDMA, iSER). > > I haven't seen any discussion of this yet. Is it really > > worth taking up face-to-face time on this? It seems like it > > should all be pretty straightforward, and even if it isn't, > > we haven't spent the time to figure out what the not > > straightforward parts are yet. > There was a discussion about it almost a year ago. > I think if RNIC vendors will fund a person to move this forward. > This is why it need planery time. > IB vendors are also needed there because many of the same verbs > are transport independent and defined in IBTA spec v1.2. I still don't understand why we have to waste face-to-face time on this. Is there anything that can't be accomplished via email or just by sitting down and implementing things? The fact that there was a discussion about it a year ago and no further progress really says to me that the problem is not lack of discussion. In general I think we should only use face-to-face time on things that we have tried to resolve through normal online channels and gotten stuck on. It's a complete waste of time to get together and talk about a big general topic where we don't know what the real issues are; the discussion just ends up being, "Yep, yep, I agree, yep, that makes sense, yep, we should look at that, OK, I'm not sure, I need to look that up, OK, we should do this via email," and we burn a big chunk of the day in a completely unproductive way. > > > - RDMA connection timeout; expand RDMA_CM API to support > > > timeout paramater (not iWARP specific) > > Again, I haven't seen any discussion yet, so it doesn't seem > > worth taking up face-to-face-time until we know what the > > sticking points are. > If Sean is doing it already we may not need planery time for it. Even if Sean isn't doing it why do we need to spend time on it? The fact that you think one person just going off and working on it eliminates the need for the session says to me that there is no need for a session in the first place -- whoever cares about this issue should just work on it, rather taking a lot of valuable face-to-face time before there's anything to talk about (and probably not ending up doing anything in the end). - R. From mshefty at ichips.intel.com Fri Nov 2 08:54:25 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 02 Nov 2007 08:54:25 -0700 Subject: [ofa-general] iWARP issues In-Reply-To: References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> Message-ID: <472B4831.7030303@ichips.intel.com> > Why wouldn't you just leave the timeout to TCP, and make CM's infinite? I'm more concerned about IB, but would like the rdma cm interface usable by iWarp as well. The last request I had was to make the timeout smaller, not larger. (With the MRA patch that recently went in, I don't think we'll need to support longer timeouts in IB.) At least on linux, it appears that a user can control the number of SYN retries using a socket option. I don't see anything that allows them to control the timeout however. - Sean From pradeeps at linux.vnet.ibm.com Fri Nov 2 09:50:47 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Fri, 02 Nov 2007 09:50:47 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <472A5351.5020106@linux.vnet.ibm.com> References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> Message-ID: <472B5567.1020301@linux.vnet.ibm.com> Pradeep Satyanarayana wrote: > Roland Dreier wrote: >> FWIW, I left netpipe-tcp running in a loop overnight over a connected >> mode IPoIB interface on a system running my for-2.6.25 tree (plus a >> hack to use the non-SRQ code on mlx4 by forcing create SRQ to fail). >> It ran with no problems (and transferred nearly a billion packets and >> 10 TB of data). >> > Yes, it definitely seems much better with the for-2.6.25 tree and it all > seems to go off well. Except for one crash in cache_alloc_refill() > all of the other test runs have completed. BTW, I have been using SLAB thus > far. I will switch to SLUB and see if that makes any difference. > > And thanks for testing it out on mlx4. Ran into some crashes with Slub too. Maybe it is PPC64 specific. What machine did you run the mlx4 tests on? However, here is a stack trace with e1000 (I was running netperf on IB when I saw this crash)that indicates that this is unlikely to be an IB issue. 0:mon> t [c00000000ffff7f0] c000000000361340 .skb_release_data+0xf0/0x120 [c00000000ffff880] c000000000360ee0 .kfree_skbmem+0x20/0x130 [c00000000ffff900] c0000000003cc118 .__udp4_lib_rcv+0x408/0x950 [c00000000ffffa10] c00000000039efc0 .ip_local_deliver_finish+0x170/0x340 [c00000000ffffab0] c00000000039eb28 .ip_rcv_finish+0x198/0x4c0 [c00000000ffffb70] c00000000036a268 .netif_receive_skb+0x3a8/0x6e0 [c00000000ffffc40] d0000000003b34b0 .e1000_clean_rx_irq+0x250/0x6c0 [e1000] [c00000000ffffd50] d0000000003b08a0 .e1000_clean+0x2e0/0x390 [e1000] [c00000000ffffe10] c00000000036daa0 .net_rx_action+0x1f0/0x2a0 [c00000000ffffed0] c000000000064d48 .__do_softirq+0xe8/0x1e0 [c00000000fffff90] c00000000002ad88 .call_do_softirq+0x14/0x24 [c0000000006678a0] c00000000000c2b8 .do_softirq+0x88/0xe0 [c000000000667930] c000000000064f04 .irq_exit+0x74/0x90 [c0000000006679b0] c00000000000cccc .do_IRQ+0xec/0x1e0 [c000000000667a40] c000000000004780 hardware_interrupt_entry+0x18/0x98 --- Exception: 501 (Hardware Interrupt) at c00000000003d94c .pseries_dedicated_idle_sleep+0xdc/0x1c0 [c000000000667d30] 00000000021464d8 (unreliable) [c000000000667dd0] c00000000001207c .cpu_idle+0x13c/0x250 [c000000000667e60] c0000000004349b8 .rest_init+0x78/0x90 [c000000000667ee0] c000000000510a24 .start_kernel+0x354/0x400 [c000000000667f90] c000000000434930 .start_here_common+0x54/0x64 0:mon> e cpu 0x0: Vector: 300 (Data Access) at [c00000000ffff4d0] pc: c0000000000a842c: .put_page+0x2c/0x1a0 lr: c000000000361340: .skb_release_data+0xf0/0x120 sp: c00000000ffff750 msr: 8000000000009032 dar: 6e65747065726600 dsisr: 40000000 current = 0xc00000000058e450 paca = 0xc00000000058ed00 pid = 0, comm = swapper 0:mon> I would like to pursue this a bit further since I think this is an issue that needs to be addressed. I pulled from your for-2.6.25 git tree. Is this the same as Linus' 2.6.24-rc1? Pradeep From Thomas.Talpey at netapp.com Fri Nov 2 09:57:36 2007 From: Thomas.Talpey at netapp.com (Talpey, Thomas) Date: Fri, 02 Nov 2007 12:57:36 -0400 Subject: [ofa-general] iWARP issues In-Reply-To: <472B4831.7030303@ichips.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> <472B4831.7030303@ichips.intel.com> Message-ID: At 11:54 AM 11/2/2007, Sean Hefty wrote: >> Why wouldn't you just leave the timeout to TCP, and make CM's infinite? > >I'm more concerned about IB, but would like the rdma cm interface usable >by iWarp as well. The last request I had was to make the timeout >smaller, not larger. (With the MRA patch that recently went in, I don't >think we'll need to support longer timeouts in IB.) > >At least on linux, it appears that a user can control the number of SYN >retries using a socket option. I don't see anything that allows them to >control the timeout however. I still don't understand why you would want to do this. TCP already implements the best timer you could hope for. But, if all you want to do is abort an in-progress connection attempt, can't you just run a timer to signal you and thereby interrupt the connect(2) in progress? Tom. From sashak at voltaire.com Fri Nov 2 10:34:27 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 2 Nov 2007 19:34:27 +0200 Subject: [ofa-general] Re: [PATCH 5/7] infiniband-diags/src/smpquery.c : special case situation where nodename len is >= 32 In-Reply-To: <20071101201518.4d70e189.weiny2@llnl.gov> References: <20071101201518.4d70e189.weiny2@llnl.gov> Message-ID: <20071102173427.GK20136@sashak.voltaire.com> On 20:15 Thu 01 Nov , Ira Weiny wrote: > From 04c67433708cb14ab384a2acfc19755998f7a8b2 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 1 Nov 2007 19:31:30 -0700 > Subject: [PATCH] infiniband-diags/src/smpquery.c : special case situation where nodename len is >= 32 > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From rdreier at cisco.com Fri Nov 2 10:24:02 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 10:24:02 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <472B5567.1020301@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Fri, 02 Nov 2007 09:50:47 -0700") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> Message-ID: > Ran into some crashes with Slub too. Maybe it is PPC64 specific. What machine > did you run the mlx4 tests on? Intel 2 * quad core Xeon. I don't have any ppc64 systems any more. > However, here is a stack trace with e1000 (I was running netperf on IB when I > saw this crash)that indicates that this is unlikely to be an IB issue. What do you see that implicates IB here? > I would like to pursue this a bit further since I think this is an issue that > needs to be addressed. I pulled from your for-2.6.25 git tree. Is this the same > as Linus' 2.6.24-rc1? My branch is based off of Linus's commit 5307cc1aa, which is a bit after 2.6.24-rc1. - R. From rdreier at cisco.com Fri Nov 2 10:25:37 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 10:25:37 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <472B5567.1020301@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Fri, 02 Nov 2007 09:50:47 -0700") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> Message-ID: BTW, I'm finishing up an update of the non-SRQ patches to take advantage of being able to use wc->qp even in the non-SRQ case (and get rid of the static table of rx contexts). - R. From yangdong at ncic.ac.cn Fri Nov 2 10:25:37 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Sat, 03 Nov 2007 01:25:37 +0800 Subject: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq? Message-ID: <472B5D91.4030802@ncic.ac.cn> I want to use mutli qps referred to a cq, but in librdmacm. First, i create a cm-event-channel using rdma_create_event_channel, which is referred to a context. Then i create a pd, which is also referred to the same context, then i create comp-channel and cq using ibv_create_comp_channel and ibv_create_cq. i create a cm_id for every connection, which as the first param for rdma_create_qp. i create qp per connection using rdma_create_qp, int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr). But i cannot identify the param pd, when i put the context-pd here, i would get error-can't create qp, because rdma_create_qp need to check the cm_id->pd and context-pd, they are different, when i create a new pd for qp, i can create qp, but can't transefer data through qp. Alternative, i can create a qp using ibv_create_qp and do sth., but i can't know how to contact qp with cm_id which has created? And i can't use mechanism provided by librdmacm? Anyone can tell me what to do? thanks. From yangdong at ncic.ac.cn Fri Nov 2 10:26:02 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Sat, 03 Nov 2007 01:26:02 +0800 Subject: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq? Message-ID: <472B5DAA.7000900@ncic.ac.cn> I want to use mutli qps referred to a cq, but in librdmacm. First, i create a cm-event-channel using rdma_create_event_channel, which is referred to a context. Then i create a pd, which is also referred to the same context, then i create comp-channel and cq using ibv_create_comp_channel and ibv_create_cq. i create a cm_id for every connection, which as the first param for rdma_create_qp. i create qp per connection using rdma_create_qp, int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr). But i cannot identify the param pd, when i put the context-pd here, i would get error-can't create qp, because rdma_create_qp need to check the cm_id->pd and context-pd, they are different, when i create a new pd for qp, i can create qp, but can't transefer data through qp. Alternative, i can create a qp using ibv_create_qp and do sth., but i can't know how to contact qp with cm_id which has created? And i can't use mechanism provided by librdmacm? Anyone can tell me what to do? thanks. From sean.hefty at intel.com Fri Nov 2 10:17:34 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 2 Nov 2007 10:17:34 -0700 Subject: [ofa-general] rdma cm timeout option, was [iWARP issues] In-Reply-To: References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com><472B4831.7030303@ichips.intel.com> Message-ID: <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> >I still don't understand why you would want to do this. TCP already >implements the best timer you could hope for. Because TCP isn't running on top of IB. And IB doesn't automatically establish connections for the user on the passive side. >But, if all you want to do is abort an in-progress connection attempt, >can't you just run a timer to signal you and thereby interrupt the >connect(2) in progress? Yes - that's one of the options I'm considering. But either the ULP can be responsible for canceling the connection request, or the rdma cm can manage this for the user. These are the possibilities that I see: 1 Leave API unchanged. 2 Allow ULP to set number of connection retries. 3 Allow ULP to set connection timeout. 4 Allow ULP to set timeout per retry and number of retries. The 1st option requires ULP to manage shorter timeouts. From what I can tell, the 2nd option matches a non-portable Linux setsockopt() capability. The 3rd and 4th options can be applied to IB connections, but do not easily extend to iWarp. Of these, I'm leaning towards the first option. But this doesn't allow for longer timeouts. - Sean From sashak at voltaire.com Fri Nov 2 10:42:15 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 2 Nov 2007 19:42:15 +0200 Subject: [ofa-general] Re: [PATCH 1/7] use lookup_switch_name for all node types. In-Reply-To: <20071101201456.410c7d65.weiny2@llnl.gov> References: <20071101201456.410c7d65.weiny2@llnl.gov> Message-ID: <20071102174215.GL20136@sashak.voltaire.com> On 20:14 Thu 01 Nov , Ira Weiny wrote: > From f451c7b352b44747d1eb6f4627109a3f12b757e6 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 31 Oct 2007 16:22:12 -0700 > Subject: [PATCH] use lookup_switch_name for all node types. > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From xenophontic at quinnart.com Fri Nov 2 10:33:16 2007 From: xenophontic at quinnart.com (Linley Silva) Date: Fri, 02 Nov 2007 18:33:16 +0100 Subject: [ofa-general] realnewsoft . com From rdreier at cisco.com Fri Nov 2 10:37:02 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 10:37:02 -0700 Subject: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq? In-Reply-To: <472B5DAA.7000900@ncic.ac.cn> (yangdong@ncic.ac.cn's message of "Sat, 03 Nov 2007 01:26:02 +0800") References: <472B5DAA.7000900@ncic.ac.cn> Message-ID: > I want to use mutli qps referred to a cq, but in librdmacm. > First, i create a cm-event-channel using rdma_create_event_channel, > which is referred to a context. Then i create a pd, which is also > referred to the same context, then i create comp-channel and cq using > ibv_create_comp_channel and ibv_create_cq. > i create a cm_id for every connection, which as the first param for > rdma_create_qp. i create qp per connection using rdma_create_qp, int > rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, struct > ibv_qp_init_attr *qp_init_attr). But i cannot identify the param pd, > when i put the context-pd here, i would get error-can't create qp, > because rdma_create_qp need to check the cm_id->pd and context-pd, they > are different, when i create a new pd for qp, i can create qp, but can't > transefer data through qp. > Alternative, i can create a qp using ibv_create_qp and do sth., but i > can't know how to contact qp with cm_id which has created? And i can't > use mechanism provided by librdmacm? I don't really understand your problem. The sequence that works fine for me is: cm_channel = rdma_create_event_channel(); rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); rdma_resolve_addr(cm_id, NULL, addr, ); rdma_resolve_route(cm_id, ); pd = ibv_alloc_pd(cm_id->verbs); comp_chan = ibv_create_comp_channel(cm_id->verbs); cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0); qp_attr.send_cq = cq; qp_attr.recv_cq = cq; rdma_create_qp(cm_id, pd, &qp_attr); librdmacm internally will use the same libibverbs context for all cm_id structures attached to the same device, so you should be able to use the same CQ for multiple QPs. If you post the code you are trying to run and describe how it fails, we may be able to give better advice. - R. From pradeeps at linux.vnet.ibm.com Fri Nov 2 10:44:17 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Fri, 02 Nov 2007 10:44:17 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> Message-ID: <472B61F1.5060305@linux.vnet.ibm.com> Roland Dreier wrote: > > Ran into some crashes with Slub too. Maybe it is PPC64 specific. What machine > > did you run the mlx4 tests on? > > Intel 2 * quad core Xeon. I don't have any ppc64 systems any more. > > > However, here is a stack trace with e1000 (I was running netperf on IB when I > > saw this crash)that indicates that this is unlikely to be an IB issue. > > What do you see that implicates IB here? *unlikely* to be an IB issue is what I stated. Did you misread? Pradeep From rdreier at cisco.com Fri Nov 2 10:45:13 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 10:45:13 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <472B61F1.5060305@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Fri, 02 Nov 2007 10:44:17 -0700") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> Message-ID: > *unlikely* to be an IB issue is what I stated. Did you misread? Yes I did misread, sorry. OTOH it is quite possible that ipoib is corrupting an skb somehow so that when it gets reused by e1000, you see a crash. The fact that you were running netperf on IB when e1000 crashed is somewhat suspicious. - R. From yangdong at ncic.ac.cn Fri Nov 2 10:51:15 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Sat, 03 Nov 2007 01:51:15 +0800 Subject: [Fwd: Re: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq?] Message-ID: <472B6393.7050807@ncic.ac.cn> -------------- next part -------------- An embedded message was scrubbed... From: yangdong Subject: Re: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq? Date: Sat, 03 Nov 2007 01:47:32 +0800 Size: 3360 URL: From rdreier at cisco.com Fri Nov 2 10:56:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 02 Nov 2007 10:56:04 -0700 Subject: [Fwd: Re: [ofa-general] librdmacm use problem -- multi qps per cq or a qp per cq?] In-Reply-To: <472B6393.7050807@ncic.ac.cn> (yangdong@ncic.ac.cn's message of "Sat, 03 Nov 2007 01:51:15 +0800") References: <472B6393.7050807@ncic.ac.cn> Message-ID: > Ok, first i see a context, i cannot create a cm_id ,and i directly create a cm_channel, a comp_channel and a cq: > nic_handle = get_nic_handle(); > ib_ctx = ibv_open_device(nic_handle); > ctx_pd = ibv_alloc_pd(ib_ctx); > cm_channel = rdma_create_event_channel(); > comp_chan = ibv_create_comp_channel(ctx_pd); > cq = ibv_create_cq(pd, 2, NULL, comp_chan, 0); > > then i create a id and do that for every connection in context: > rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); > rdma_resolve_addr(cm_id, NULL, addr, ); > rdma_resolve_route(cm_id, ); > pd = ibv_alloc_pd(cm_id->verbs); > qp_attr.send_cq = cq; > qp_attr.recv_cq = cq; > rdma_create_qp(cm_id, pd, &qp_attr); > ... Right, that won't work. If you want to use librdmacm, then you have to use the librdmacm-provided verbs context for everything. So rather than using ibv_open_device(), just use cm_id->verbs for everything. - R. From sean.hefty at intel.com Fri Nov 2 10:59:53 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 2 Nov 2007 10:59:53 -0700 Subject: [ofa-general] librdmacm use problem -- multi qps per cq or aqp per cq? In-Reply-To: <472B62B4.508@ncic.ac.cn> References: <472B5DAA.7000900@ncic.ac.cn> <472B62B4.508@ncic.ac.cn> Message-ID: <001901c81d7a$2b8c26c0$9c98070a@amr.corp.intel.com> >Ok, first i see a context, i cannot create a cm_id ,and i directly create a >cm_channel, a comp_channel and a cq: > nic_handle = get_nic_handle(); > ib_ctx = ibv_open_device(nic_handle); Try using rdma_get_devices() here to get the same device list that's used by the rdma cm. > ctx_pd = ibv_alloc_pd(ib_ctx); > cm_channel = rdma_create_event_channel(); > comp_chan = ibv_create_comp_channel(ctx_pd); > cq = ibv_create_cq(pd, 2, NULL, comp_chan, 0); > > then i create a id and do that for every connection in context: This process will only work if you have a single device in the system. Otherwise, you will want to follow the steps that Roland gave. > rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); > rdma_resolve_addr(cm_id, NULL, addr, ); > rdma_resolve_route(cm_id, ); > pd = ibv_alloc_pd(cm_id->verbs); > qp_attr.send_cq = cq; > qp_attr.recv_cq = cq; > rdma_create_qp(cm_id, pd, &qp_attr); Does using ctx_pd here work instead? - Sean From Arkady.Kanevsky at netapp.com Fri Nov 2 11:31:20 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Fri, 2 Nov 2007 14:31:20 -0400 Subject: [ofa-general] rdma cm timeout option, was [iWARP issues] In-Reply-To: <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com><472B4831.7030303@ichips.intel.com> <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> Message-ID: The longer timeout is what I was after. If server need to do fair amount of setup before responding to connection request it times out on the client. Even if RDMA_CM runs on top of TCP it does not help since RDMA client will get a signal that timeout expired. So I was looking for extension to API so client can specify longer timeout. Thanks, Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Sean Hefty [mailto:sean.hefty at intel.com] > Sent: Friday, November 02, 2007 1:18 PM > To: Talpey, Thomas; Sean Hefty > Cc: OpenFabrics General > Subject: [ofa-general] rdma cm timeout option, was [iWARP issues] > > >I still don't understand why you would want to do this. TCP already > >implements the best timer you could hope for. > > Because TCP isn't running on top of IB. And IB doesn't > automatically establish connections for the user on the passive side. > > >But, if all you want to do is abort an in-progress > connection attempt, > >can't you just run a timer to signal you and thereby interrupt the > >connect(2) in progress? > > Yes - that's one of the options I'm considering. But either > the ULP can be responsible for canceling the connection > request, or the rdma cm can manage this for the user. > > These are the possibilities that I see: > > 1 Leave API unchanged. > 2 Allow ULP to set number of connection retries. > 3 Allow ULP to set connection timeout. > 4 Allow ULP to set timeout per retry and number of retries. > > The 1st option requires ULP to manage shorter timeouts. From > what I can tell, the 2nd option matches a non-portable Linux > setsockopt() capability. The 3rd and 4th options can be > applied to IB connections, but do not easily extend to iWarp. > > Of these, I'm leaning towards the first option. But this > doesn't allow for longer timeouts. > > - Sean > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Fri Nov 2 11:53:52 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 2 Nov 2007 20:53:52 +0200 Subject: [ofa-general] Re: [PATCH 2/7] Change switch map function names to reflect the new functionality of a simple node name map. In-Reply-To: <20071101201502.6d393646.weiny2@llnl.gov> References: <20071101201502.6d393646.weiny2@llnl.gov> Message-ID: <20071102185352.GM20136@sashak.voltaire.com> On 20:15 Thu 01 Nov , Ira Weiny wrote: > From b4f6bbea815aaa91837d464f882d30405ffe9d98 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 31 Oct 2007 16:34:44 -0700 > Subject: [PATCH] Change switch map function names to reflect the new functionality of a simple > > node name map. > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From mshefty at ichips.intel.com Fri Nov 2 11:52:15 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 02 Nov 2007 11:52:15 -0700 Subject: [ofa-general] rdma cm timeout option, was [iWARP issues] In-Reply-To: References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com><472B4831.7030303@ichips.intel.com> <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> Message-ID: <472B71DF.2090408@ichips.intel.com> Kanevsky, Arkady wrote: > The longer timeout is what I was after. > If server need to do fair amount of setup before > responding to connection request it times out on the client. But doesn't iWarp form the TCP connection without ULP intervention? (This is where it differs from IB.) > So I was looking for extension to API so client can specify longer > timeout. Do you have a proposal for how the API extension would be implemented for iWarp? - Sean From Thomas.Talpey at netapp.com Fri Nov 2 12:08:53 2007 From: Thomas.Talpey at netapp.com (Talpey, Thomas) Date: Fri, 02 Nov 2007 15:08:53 -0400 Subject: [ofa-general] Re: rdma cm timeout option, was [iWARP issues] In-Reply-To: <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> <472B4831.7030303@ichips.intel.com> <001301c81d74$426c8840$9c98070a@amr.corp.intel.com> Message-ID: At 01:17 PM 11/2/2007, Sean Hefty wrote: >>I still don't understand why you would want to do this. TCP already >>implements the best timer you could hope for. > >Because TCP isn't running on top of IB. And IB doesn't automatically establish >connections for the user on the passive side. Sure, the CM needs a timeout when handling its own transport connections as it does over native IB. But if it's running over timeout-aware transports such as TCP, I think it should defer to them. >>But, if all you want to do is abort an in-progress connection attempt, >>can't you just run a timer to signal you and thereby interrupt the >>connect(2) in progress? > >Yes - that's one of the options I'm considering. But either the ULP can be >responsible for canceling the connection request, or the rdma cm can >manage this >for the user. > >These are the possibilities that I see: > >1 Leave API unchanged. >2 Allow ULP to set number of connection retries. >3 Allow ULP to set connection timeout. >4 Allow ULP to set timeout per retry and number of retries. Options 2 and 4 are not a good solution, IMO. They don't actually specify a timeout and depend on events beyond the ULP's control. So, my opinion is that it's down to #3. Is the rdma_connect() API interruptible? I.e. if the connection is running in an application with a TTY associated, does it abort if the user types ^C? What else causes the CM client to abort? Tom. > >The 1st option requires ULP to manage shorter timeouts. From what I can tell, >the 2nd option matches a non-portable Linux setsockopt() capability. The 3rd >and 4th options can be applied to IB connections, but do not easily extend to >iWarp. > >Of these, I'm leaning towards the first option. But this doesn't allow for >longer timeouts. > >- Sean From Arkady.Kanevsky at netapp.com Fri Nov 2 12:17:54 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Fri, 2 Nov 2007 15:17:54 -0400 Subject: [ofa-general] rdma cm timeout option, was [iWARP issues] In-Reply-To: <472B71DF.2090408@ichips.intel.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com><472B4831.7030303@ichips.intel.com><001301c81d74$426c8840$9c98070a@amr.corp.intel.com> <472B71DF.2090408@ichips.intel.com> Message-ID: Yes on first. No on second yet. Maybe for iWARP the only acceptable value will be "default"? But this still feels that ULPs need to do something transport specific. except for "default" case. But implementing TCP style timeout for IB looks like overkill. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Sean Hefty [mailto:mshefty at ichips.intel.com] > Sent: Friday, November 02, 2007 2:52 PM > To: Kanevsky, Arkady > Cc: Talpey, Thomas; OpenFabrics General > Subject: Re: [ofa-general] rdma cm timeout option, was [iWARP issues] > > Kanevsky, Arkady wrote: > > The longer timeout is what I was after. > > If server need to do fair amount of setup before responding to > > connection request it times out on the client. > > But doesn't iWarp form the TCP connection without ULP intervention? > (This is where it differs from IB.) > > > So I was looking for extension to API so client can specify longer > > timeout. > > Do you have a proposal for how the API extension would be > implemented for iWarp? > > - Sean > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From inveigled at welkgroup.com Fri Nov 2 12:25:57 2007 From: inveigled at welkgroup.com (Tollefsen Elliott) Date: Fri, 02 Nov 2007 22:25:57 +0300 Subject: [ofa-general] realnewsoft . com From outgleam at excelroyals.com Fri Nov 2 14:16:57 2007 From: outgleam at excelroyals.com (Vivek Price) Date: Fri, 02 Nov 2007 22:16:57 +0100 Subject: [ofa-general] realnewsoft . com From arteriograph at sourcearchive.com Fri Nov 2 18:11:05 2007 From: arteriograph at sourcearchive.com (Emil Brown) Date: Fri, 02 Nov 2007 16:11:05 -0900 Subject: [ofa-general] realnewsoft . com From pradeeps at linux.vnet.ibm.com Fri Nov 2 19:07:46 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Fri, 02 Nov 2007 19:07:46 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> Message-ID: <472BD7F2.70200@linux.vnet.ibm.com> > > OTOH it is quite possible that ipoib is corrupting an skb somehow so > that when it gets reused by e1000, you see a crash. The fact that you > were running netperf on IB when e1000 crashed is somewhat suspicious. Yes, exactly the lingering suspicions that I had. I ran several iterations of neteperf on e1000 and there were no crashes. So, I started looking at the patch more closely. I think I am on to something now. In ipoib_cm_handle_rx_wc() I see two things (I have not yet looked at the latest changes that you mentioned earlier today) : 1. Do not understand the usage and purpose of recv_count (something new that you have introduced). Can you please explain. However, the suspicion being that if somehow the if clause is executed, the rx_ring gets freed and so all the skb pointers are bogus. I have commented out this segment of code. 2. The call to ipoib_cm_alloc_rx_skb() in ipoib_cm_handle_rx_wc() uses an index value of 0 (hard coded) which is incorrect for no srq. I have changed that to index instead. I have been running this for some hours now; no crashes and no errors. This is using Slub. If I get a chance I will run with slab over the weekend and let you know of the results. Pradeep From jim at mellanox.com Fri Nov 2 19:45:40 2007 From: jim at mellanox.com (Jim Mott) Date: Fri, 2 Nov 2007 19:45:40 -0700 Subject: [ofa-general] [PATCH 1/1] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload Message-ID: Add code to handle mlx4 device remove call. Signed-off-by: Jim Mott --- diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index 2bac485..f0b8703 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -367,6 +367,8 @@ void sdp_reset(struct sock *sk) { int err; + sdp_dbg(sk, "%s state=%d\n", __func__, sk->sk_state); + if (sk->sk_state != TCP_ESTABLISHED) return; @@ -653,7 +655,8 @@ static int sdp_disconnect(struct sock *sk, int flags) ssk->id = NULL; release_sock(sk); /* release socket since locking semantics is parent inside child */ - rdma_destroy_id(id); + if (id) + rdma_destroy_id(id); list_for_each_entry_safe(s, t, &ssk->backlog_queue, backlog_queue) { sk_common_release(&s->isk.sk); @@ -2231,7 +2234,29 @@ static void sdp_add_device(struct ib_device *device) static void sdp_remove_device(struct ib_device *device) { + struct list_head *p; + struct sdp_sock *ssk; + struct sock *sk; + write_lock(&device_removal_lock); + + spin_lock_irq(&sock_list_lock); + list_for_each(p, &sock_list) { + ssk = list_entry(p, struct sdp_sock, sock_list); + if (ssk->ib_device == device) { + sk = &ssk->isk.sk; + + if (ssk->id) { + rdma_destroy_id(ssk->id); + ssk->id = NULL; + } + + sk->sk_shutdown |= RCV_SHUTDOWN; + sdp_reset(sk); + } + } + spin_unlock_irq(&sock_list_lock); + write_unlock(&device_removal_lock); } From jim at mellanox.com Fri Nov 2 19:49:24 2007 From: jim at mellanox.com (Jim Mott) Date: Fri, 2 Nov 2007 19:49:24 -0700 Subject: [ofa-general] [PATCH 1/1] SDP - Make bzcopy defualt for 2K and larger transfer size Message-ID: In order to be sure we test the new bzcopy code it will be enabled by default. The 2K threshold is what my testing shows to be the lowest value that always wins. We may have to adjust this upward if other hardware has worse performance. Signed-off-by: Jim Mott --- diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index f0b8703..d1eb6ab 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -141,7 +141,7 @@ static unsigned int sdp_keepalive_time = SDP_KEEPALIVE_TIME; module_param_named(sdp_keepalive_time, sdp_keepalive_time, uint, 0644); MODULE_PARM_DESC(sdp_keepalive_time, "Default idle time in seconds before keepalive probe sent."); -static int sdp_zcopy_thresh = 0; +static int sdp_zcopy_thresh = 2048; module_param_named(sdp_zcopy_thresh, sdp_zcopy_thresh, int, 0644); MODULE_PARM_DESC(sdp_zcopy_thresh, "Zero copy send threshold; 0=0ff."); From vlad at lists.openfabrics.org Sat Nov 3 02:57:27 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 3 Nov 2007 02:57:27 -0700 (PDT) Subject: [ofa-general] ofa_1_3_kernel 20071103-0200 daily build status Message-ID: <20071103095727.852B0E6086A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.19 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.9-34.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Failed: From photocopyvi1 at auburn.edu Sat Nov 3 03:54:07 2007 From: photocopyvi1 at auburn.edu (Kendra Silva) Date: Sat, 3 Nov 2007 20:54:07 +1000 Subject: [ofa-general] For openib-general Message-ID: <003033044.70206901666166@auburn.edu> An HTML attachment was scrubbed... URL: From barr_james at xemail.de Sat Nov 3 05:02:38 2007 From: barr_james at xemail.de (james kain) Date: Sat, 03 Nov 2007 13:02:38 +0100 Subject: [ofa-general] CONTACT GLOWORLD COURRIER EXPRESS COMPANY Message-ID: <799eb33f1dd34ce2ac009de9973bd2a312932472c635e02660@localhost> ARRISTER JAMES KAIN & COUNSELORS AT LAW 06 BP 1409 AKPAKPA DODOMEY COTONOU REPUBLIC OF BENIN Hello Dear , I have Paid the fee for your Cheque Draft.but the manager of Eco Bank Benin Republic told me that before the check will get to you that it will expire. So I told him to cash $1,200 000.00 and all the necessary arrangement of delivering the $1,200 000.00 in cash was made with GLOWORLD COURRIER EXPRESS COMPANY. This in the information they need to delivery your package to you. ATTN: DR.PHILIP ALEE CONTACT EMAIL: gloworld.comp at yahoo.co.uk CONTACT TEL: 00229 9390 8735 CONTACT FAX: 00229-9381-2488 Please, Send them your contacts information to able them locate you immediately they arrived in your country with your BOX. This is what they need from you. 1. YOUR FULL NAME 2. YOUR HOME ADDRESS. 3. YOUR CURRENT HOME TELEPHONE NUMBER. 4. YOUR CURRENT OFFICE TELEPHONE. 5. A COPY OF YOUR PICTURE Please make sure you send this needed info’s to the Director general of GLOWORLD COURRIER EXPRESS COMPANY. with the address given to you. For your informations, Note. The GLOWORLD COURRIER EXPRESS COMPANY.don't know the contents of the Box. I registered it as a Box of an Africa cloths. They don't know it contents money, this is to avoid them delaying with the Box. don't let them know that is money that is in the Box.I am waiting for your urgent response. Thanks and Remain Blessed. Barrister JAMES KAIN _____________________________________________________ Kostenloses E-Mail-Postfach mit 2 GB Speicher sowie 20 SMS pro Monat gratis - http://www.xemail.de Spam? xemail at xemail.de From rdreier at cisco.com Sat Nov 3 08:11:15 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 03 Nov 2007 08:11:15 -0700 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <472BD7F2.70200@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Fri, 02 Nov 2007 19:07:46 -0700") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> Message-ID: > 1. Do not understand the usage and purpose of recv_count (something new that > you have introduced). Can you please explain. However, the suspicion being > that if somehow the if clause is executed, the rx_ring gets freed and so > all the skb pointers are bogus. I have commented out this segment of code. We need to keep track of the number of pending receives on a QP so that we wait until all have been flushed before destroying the QP. recv_count starts at ipoib_recvq_size and is decremented every time we get an error completion and don't repost the receive, and when it reaches 0 we move the QP to the reap list. > 2. The call to ipoib_cm_alloc_rx_skb() in ipoib_cm_handle_rx_wc() uses an > index value of 0 (hard coded) which is incorrect for no srq. I have changed > that to index instead. Yes, I saw that and fixed it. Actually I finished rewriting things so that there is no more index and no static table any more. I haven't posted the patches yet because I wanted to read it over a little more, but if you re-pull my for-2.6.25 branch you will get them. I have a feeling that this second bug might be the cause of the problems you are seeing. - R. From suboxides at dragonfalls.com Sat Nov 3 10:10:27 2007 From: suboxides at dragonfalls.com (Sidney King) Date: Sat, 03 Nov 2007 18:10:27 +0100 Subject: [ofa-general] realnewsoft . com From grantha at yejiyeon.com Sat Nov 3 13:22:13 2007 From: grantha at yejiyeon.com (Tharen Robinson) Date: Sat, 03 Nov 2007 22:22:13 +0200 Subject: [ofa-general] Free Microsoft Software Message-ID: <000001c81e56$d3d48480$0100007f@localhost> newmicrosoftdeals,com From dwsjekkpunktm at sjekkpunkt.no Sat Nov 3 14:17:34 2007 From: dwsjekkpunktm at sjekkpunkt.no (Shawn Mcmahon) Date: Sat, 3 Nov 2007 22:17:34 +0100 Subject: [ofa-general] You have no need to look for a reliable online drugstore anymore. Message-ID: <01c81e67$559e1f70$f8323c54@dwsjekkpunktm> «CanadianPharmacy» offers a wide selection of 100% generic products to choose from. Great level of service, fast delivery, personal approach to each customer! Security of your information! Absolutely cheap prices!Visit our "CanadianPharmacy" site Choosing «CanadianPharmacy», you choose absolute security and confidentiality. http://wireyellow.cn -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Sat Nov 3 14:58:36 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 03 Nov 2007 14:58:36 -0700 Subject: [ofa-general] [PATCH] infiniband-diags/grouping.c: For Xsigo grouping, fix seg fault when no peer port Message-ID: <1194127116.26246.852.camel@hrosenstock-ws.xsigo.com> infiniband-diags/grouping.c: For Xsigo grouping, fix seg fault when no peer port Signed-off-by: Hal Rosenstock diff --git a/infiniband-diags/src/grouping.c b/infiniband-diags/src/grouping.c index 621d49e..86fd810 100644 --- a/infiniband-diags/src/grouping.c +++ b/infiniband-diags/src/grouping.c @@ -157,6 +157,10 @@ static uint64_t xsigo_chassisguid(Node *node) else return node->sysimgguid; } else { + /* Is there a peer port ? */ + if (!node->ports->remoteport) + return node->sysimgguid; + /* If peer port is Leaf 1, use its chassis GUID */ if (is_xsigo_leafone(node->ports->remoteport->node->sysimgguid)) return node->ports->remoteport->node->sysimgguid & From trichinosis at bartonandwilliams.com Sat Nov 3 14:31:54 2007 From: trichinosis at bartonandwilliams.com (Hsi Davis) Date: Sat, 03 Nov 2007 21:31:54 +0000 Subject: [ofa-general] Free Microsoft Software Message-ID: <000001c81e67$359cea80$0100007f@localhost> newmicrosoftdeals,com From homovanillic at blockislandresorts.com Sat Nov 3 19:18:03 2007 From: homovanillic at blockislandresorts.com (Herbert Stewart) Date: Sat, 03 Nov 2007 17:18:03 -0900 Subject: [ofa-general] Free Microsoft Software Message-ID: <000001c81e77$71d84700$0100007f@localhost> newmicrosoftdeals,com From domiciling at bostonfruitbaskets.com Sun Nov 4 01:02:53 2007 From: domiciling at bostonfruitbaskets.com (Norm Henderson) Date: Sun, 04 Nov 2007 10:02:53 +0200 Subject: [ofa-general] Free Microsoft Software Message-ID: <000001c81eb8$7709b600$0100007f@localhost> kvaka-soft. com From vlad at dev.mellanox.co.il Sun Nov 4 01:59:39 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 04 Nov 2007 10:59:39 +0200 Subject: [ofa-general] Re: [PATCH] [REPOST] ofed_scripts: Add location code fix for older ppc64 kernels In-Reply-To: <200711021359.12949.fenkes@de.ibm.com> References: <200711021359.12949.fenkes@de.ibm.com> Message-ID: <472D89FB.8070404@dev.mellanox.co.il> Joachim Fenkes wrote: > Kernels prior to 2.6.24 have problems with multiple devices sharing the same > location code on ppc64 systems -- only one of these devices would be usable > by ibmebus. This will be a problem on systems with multiple eHCA chips on a > single hardware location. > > For older kernels, this problem can be circumvented by, prior to loading the > eHCA driver, changing the location codes of the offending devices so that > they're not the same anymore. This patch adds that circumvention to openibd, > with an additional check to make sure we're on the right architecture and > kernel version. > > Signed-off-by: Joachim Fenkes > --- > > This is a repost of my previous patch, with the changes suggested by > Vladimir included. > > ofed_scripts/openibd | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 files changed, 69 insertions(+), 0 deletions(-) > Applied. Regards, Vladimir From vlad at mellanox.co.il Sun Nov 4 01:51:17 2007 From: vlad at mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 4 Nov 2007 11:51:17 +0200 Subject: [ofa-general] RE: [ANNOUCE] dapl-1.2.3 and dapl-2.0.2 release In-Reply-To: <4728E283.7060206@ichips.intel.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C90168D6D5@mtlexch01.mtl.com> Hi Arlin, Can't download dapl from the download page: Forbidden You don't have permission to access /downloads/dapl/dapl-1.2.3.tar.gz on this server. Forbidden You don't have permission to access /downloads/dapl/dapl-2.0.2.tar.gz on this server. Please fix permissions. Thanks, Vladimir > -----Original Message----- > From: Arlin Davis [mailto:ardavis at ichips.intel.com] > Sent: Wednesday, October 31, 2007 10:16 PM > To: OpenFabrics General; Vladimir Sokolovsky > Cc: James Lentini > Subject: [ANNOUCE] dapl-1.2.3 and dapl-2.0.2 release > > > > There are new releases for DAPL 1.2 and 2.0 available on the OFA > download page and in my git tree. > > md5sum: 6e934d68e4ffbc84fcc9edcf364fdddd dapl-1.2.3.tar.gz > md5sum: 5ba0d27b369f42015f1326084cf3487c dapl-2.0.2.tar.gz > > Vlad, please pull both releases into OFED 1.3 beta, using the > configure > options from the package spec files, and install the > following packages: > > dapl-1.2.3-1 > dapl-2.0.2-1 > dapl-utils-2.0.2-1 > dapl-devel-2.0.2-1 > dapl-debuginfo-2.0.2-1 > > See http://www.openfabrics.org/downloads/dapl/README for more details. > > -arlin > From vlad at lists.openfabrics.org Sun Nov 4 02:58:44 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 4 Nov 2007 02:58:44 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071104-0200 daily build status Message-ID: <20071104105844.D368EE6080A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.9-34.ELsmp Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ppc64 with linux-2.6.18-8.el5 Failed: From sashak at voltaire.com Sun Nov 4 08:07:43 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 4 Nov 2007 18:07:43 +0200 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071101201508.51b5e363.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> Message-ID: <20071104160743.GX6945@sashak.voltaire.com> On 20:15 Thu 01 Nov , Ira Weiny wrote: > From fe2756789ffbc69466eefea3cdffe200a0718561 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 1 Nov 2007 15:00:37 -0700 > Subject: [PATCH] Move nodenamemap out of infiniband-diags into libosmcomp > > Signed-off-by: Ira K. Weiny Applied. Thanks. I have a question below. > diff --git a/infiniband-diags/configure.in b/infiniband-diags/configure.in > index 0a5f3c8..a24d478 100644 > --- a/infiniband-diags/configure.in > +++ b/infiniband-diags/configure.in > @@ -72,32 +72,6 @@ AC_CHECK_FUNCS([strchr strrchr strtol strtoul memset]) > dnl Checks for typedefs, structures, and compiler characteristics. > AC_C_CONST > > -dnl Check for the specification of a default node name map file > -AC_MSG_CHECKING(for --with-node-name-map ) > -AC_ARG_WITH(node-name-map, > - AC_HELP_STRING([--with-node-name-map=file], > - [define a default node name map file]), > - [ case "$withval" in > - no) > - ;; > - *) > - withnodenamemap=yes > - NODENAMEMAPFILE=$withval > - ;; > - esac ] > -) > -AC_MSG_RESULT(${withnodenamemap=no}) > - > -if test $withnodenamemap = "yes"; then > - NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" > - NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" > - NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" > - > - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, > - ["$NODENAMEMAP"], > - [Define a default node name map file]) > -fi > - > dnl Check for perl and perl install location > AC_MSG_CHECKING(for --with-perl-path ) > AC_ARG_WITH(perl-path, So --with-node-name-map=file configure option is removed completely from diags and how mapping will work by default is only depens on how OpenSM (which is separate package) was configured. Do you think it would be useful useful to keep default map name as configure option for infiniband-diags and to not depend from OpenSM configuration (it seems easy doable - we could put some default_node_name_map_file variable in ibdiag_common.c or so)? And then we probably don't need such configure option for OpenSM? Sasha From monisonlists at gmail.com Sun Nov 4 08:00:19 2007 From: monisonlists at gmail.com (Moni Shoua) Date: Sun, 04 Nov 2007 18:00:19 +0200 Subject: [ofa-general] ib-bonding release 18 Message-ID: <472DEC93.4050105@gmail.com> Hi, Release 18 of ib-bonding-0.9.0 is available. Please take from the usual place (/home/monis/public_html/ofed_1_3). Change Log: ----------- 1. This package is based on the bonding module that comes with linux-2.6.24-rc1. Besides from moving forward with the code, the patches that were once applied to an older bonding code from the outside are now part of the kernel. However, due to an instability that was found with bonding of 2.6.24-rc1 some patches were removed (see list below) 2. This rpm patches the OS to enable initscripts (redhat) and sysconfig (sles) to work with bonding support. This achieves the same functionality of persistent configuration as it is being done with the openib script but in a standard way. Please read ib-bonding.txt that comes with the package for detailed usage instructions. Note that this feature is supported foe Redhat4 (Update 4 or 5) Redhat5 and SLES10 (sp1) 3. Add option --add-slave in ib-bond for adding slave to existing bonding interface 4. Fix bug: Ethernet bonding interfaces go down when calling ib-bond --stop-all List of patches that were stripped from bonding of 2.6.24-rc11 commit d0e81b7e2246a41d068ecaf15aac9de570816d63 Author: Jay Vosburgh Date: Wed Oct 17 17:37:51 2007 -0700 bonding: Acquire correct locks in alb for promisc change -- commit 6603a6f25e4bca922a7dfbf0bf03072d98850176 Author: Jay Vosburgh Date: Wed Oct 17 17:37:50 2007 -0700 bonding: Convert more locks to _bh, acquire rtnl, for new locking -- commit 059fe7a578fba5bbb0fdc0365bfcf6218fa25eb0 Author: Jay Vosburgh Date: Wed Oct 17 17:37:49 2007 -0700 bonding: Convert locks to _bh, rework alb locking for new locking -- commit 0b0eef66419e9abe6fd62bc958ab7cd0a18f858e Author: Jay Vosburgh Date: Wed Oct 17 17:37:48 2007 -0700 bonding: Convert miimon to new locking -- commit cf5f9044934658dd3ffc628a60cd37c70f8168b1 Author: Jay Vosburgh Date: Wed Oct 17 17:37:47 2007 -0700 bonding: Convert balance-rr transmit to new locking -- commit 1b76b31693d4a6088dec104ff6a6ead54081a3c2 Author: Jay Vosburgh Date: Wed Oct 17 17:37:45 2007 -0700 Convert bonding timers to workqueues -- commit 3a4fa0a25da81600ea0bcd75692ae8ca6050d165 Author: Robert P. J. Day Date: Fri Oct 19 23:10:43 2007 +0200 Fix misspellings of "system", "controller", "interrupt" and "necessary". -- commit 1c3f0b8e07de78a86f2dce911f5e245845ce40a8 Author: Mathieu Desnoyers Date: Thu Oct 18 23:41:04 2007 -0700 Change struct marker users From rdreier at cisco.com Sun Nov 4 08:12:21 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sun, 04 Nov 2007 08:12:21 -0800 Subject: [ofa-general] ib-bonding release 18 In-Reply-To: <472DEC93.4050105@gmail.com> (Moni Shoua's message of "Sun, 04 Nov 2007 18:00:19 +0200") References: <472DEC93.4050105@gmail.com> Message-ID: > List of patches that were stripped from bonding of 2.6.24-rc11 Have you reported these problems to get bonding fixed upstream too? From sashak at voltaire.com Sun Nov 4 12:34:12 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 4 Nov 2007 22:34:12 +0200 Subject: [ofa-general] Re: [PATCH 4/7] Change node name map implementation to use qmap in memory storage In-Reply-To: <20071101201514.62bd5ce8.weiny2@llnl.gov> References: <20071101201514.62bd5ce8.weiny2@llnl.gov> Message-ID: <20071104203412.GY6945@sashak.voltaire.com> Hi Ira, On 20:15 Thu 01 Nov , Ira Weiny wrote: > From 2dacfc928856351820fadc416da787350254419e Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 1 Nov 2007 19:29:02 -0700 > Subject: [PATCH] Change node name map implementation to use qmap in memory storage > > Signed-off-by: Ira K. Weiny > --- > infiniband-diags/src/ibnetdiscover.c | 18 +++--- > infiniband-diags/src/ibroute.c | 1 + > infiniband-diags/src/ibtracert.c | 16 ++-- > infiniband-diags/src/saquery.c | 12 ++-- > infiniband-diags/src/smpquery.c | 12 ++-- > opensm/complib/cl_nodenamemap.c | 116 ++++++++++++++++++++---------- > opensm/include/complib/cl_nodenamemap.h | 19 ++++- > 7 files changed, 122 insertions(+), 72 deletions(-) > > diff --git a/infiniband-diags/src/ibnetdiscover.c b/infiniband-diags/src/ibnetdiscover.c > index 03ef6f9..8b229c1 100644 > --- a/infiniband-diags/src/ibnetdiscover.c > +++ b/infiniband-diags/src/ibnetdiscover.c > @@ -92,8 +92,8 @@ static FILE *f; > > char *argv0 = "ibnetdiscover"; > > -static char *node_name_map = NULL; > -static FILE *node_name_map_fp = NULL; > +static char *node_name_map_file = NULL; > +static nn_map_t *node_name_map = NULL; > > Node *nodesdist[MAXHOPS+1]; /* last is Ca list */ > Node *mynode; > @@ -460,7 +460,7 @@ void > list_node(Node *node) > { > char *node_type; > - char *nodename = remap_node_name(node_name_map_fp, node->nodeguid, > + char *nodename = remap_node_name(node_name_map, node->nodeguid, > node->nodedesc); > > switch(node->type) { > @@ -537,7 +537,7 @@ out_switch(Node *node, int group, char *chname) > fprintf(f, "%d Chip %d", node->chrecord->slotnum, node->chrecord->anafanum); > } > > - nodename = remap_node_name(node_name_map_fp, node->nodeguid, > + nodename = remap_node_name(node_name_map, node->nodeguid, > node->nodedesc); > > fprintf(f, "\nSwitch\t%d %s\t\t# \"%s\" %s port 0 lid %d lmc %d\n", > @@ -606,7 +606,7 @@ out_switch_port(Port *port, int group) > if (ext_port_str) > fprintf(f, "%s", ext_port_str); > > - rem_nodename = remap_node_name(node_name_map_fp, > + rem_nodename = remap_node_name(node_name_map, > port->remoteport->node->nodeguid, > port->remoteport->node->nodedesc); > > @@ -650,7 +650,7 @@ out_ca_port(Port *port, int group) > if (port->remoteport->node->type != SWITCH_NODE) > fprintf(f, " (%" PRIx64 ") ", port->remoteport->portguid); > > - rem_nodename = remap_node_name(node_name_map_fp, > + rem_nodename = remap_node_name(node_name_map, > port->remoteport->node->nodeguid, > port->remoteport->node->nodedesc); > > @@ -890,7 +890,7 @@ main(int argc, char **argv) > break; > switch(ch) { > case 1: > - node_name_map = strdup(optarg); > + node_name_map_file = strdup(optarg); > break; > case 'C': > ca = optarg; > @@ -947,7 +947,7 @@ main(int argc, char **argv) > IBERROR("can't open file %s for writing", argv[0]); > > madrpc_init(ca, ca_port, mgmt_classes, 2); > - node_name_map_fp = open_node_name_map(node_name_map); > + node_name_map = open_node_name_map(node_name_map_file); > > if (discover(&my_portid) < 0) > IBERROR("discover"); > @@ -957,6 +957,6 @@ main(int argc, char **argv) > > dump_topology(list, group); > > - close_node_name_map(node_name_map_fp); > + close_node_name_map(node_name_map); > exit(0); > } > diff --git a/infiniband-diags/src/ibroute.c b/infiniband-diags/src/ibroute.c > index 44d2fc8..664f7f5 100644 > --- a/infiniband-diags/src/ibroute.c > +++ b/infiniband-diags/src/ibroute.c > @@ -50,6 +50,7 @@ > #include > #include > #include > +#include > > #include "ibdiag_common.h" > I think this chunk should be part of patch 3 - I moved it there already. > diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c > index c8a7b19..010f45f 100644 > --- a/infiniband-diags/src/ibtracert.c > +++ b/infiniband-diags/src/ibtracert.c > @@ -71,8 +71,8 @@ static FILE *f; > > char *argv0 = "ibtracert"; > > -static char *node_name_map = NULL; > -static FILE *node_name_map_fp = NULL; > +static char *node_name_map_file = NULL; > +static nn_map_t *node_name_map = NULL; > > typedef struct Port Port; > typedef struct Switch Switch; > @@ -205,7 +205,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) > return; > } > > - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); > + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", > prompt, > @@ -225,7 +225,7 @@ dump_route(int dump, Node *node, int outport, Port *port) > if (!dump && !verbose) > return; > > - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); > + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > if (dump == 1) > fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", > @@ -637,7 +637,7 @@ dump_mcpath(Node *node, int dumplevel) > if (node->upnode) > dump_mcpath(node->upnode, dumplevel); > > - nodename = remap_node_name(node_name_map_fp, node->nodeguid, node->nodedesc); > + nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > if (!node->dist) { > printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > @@ -741,7 +741,7 @@ main(int argc, char **argv) > break; > switch(ch) { > case 1: > - node_name_map = strdup(optarg); > + node_name_map_file = strdup(optarg); > break; > case 'C': > ca = optarg; > @@ -799,7 +799,7 @@ main(int argc, char **argv) > usage(); > > madrpc_init(ca, ca_port, mgmt_classes, 3); > - node_name_map_fp = open_node_name_map(node_name_map); > + node_name_map = open_node_name_map(node_name_map_file); > > if (ib_resolve_portid_str(&src_portid, argv[0], dest_type, sm_id) < 0) > IBERROR("can't resolve source port %s", argv[0]); > @@ -838,6 +838,6 @@ main(int argc, char **argv) > /* dump multicast path */ > dump_mcpath(endnode, dumplevel); > > - close_node_name_map(node_name_map_fp); > + close_node_name_map(node_name_map); > exit(0); > } > diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c > index a8d810f..c6cc0a2 100644 > --- a/infiniband-diags/src/saquery.c > +++ b/infiniband-diags/src/saquery.c > @@ -60,8 +60,8 @@ > > char *argv0 = "saquery"; > > -static char *node_name_map = NULL; > -static FILE *node_name_map_fp = NULL; > +static char *node_name_map_file = NULL; > +static nn_map_t *node_name_map = NULL; > > /** > * Declare some globals because I don't want this to be too complex. > @@ -137,7 +137,7 @@ print_node_record(ib_node_record_t *node_record) > return; > case NAME_OF_LID: > case NAME_OF_GUID: > - name = remap_node_name(node_name_map_fp, > + name = remap_node_name(node_name_map, > cl_ntoh64(p_ni->node_guid), > (char *)p_nd->description); > printf("%s\n", name); > @@ -1143,7 +1143,7 @@ main(int argc, char **argv) > break; > } > case 2: > - node_name_map = strdup(optarg); > + node_name_map_file = strdup(optarg); > break; > case 'p': > query_type = IB_MAD_ATTR_PATH_RECORD; > @@ -1248,7 +1248,7 @@ main(int argc, char **argv) > } > > bind_handle = get_bind_handle(); > - node_name_map_fp = open_node_name_map(node_name_map); > + node_name_map = open_node_name_map(node_name_map_file); > > switch (query_type) { > case IB_MAD_ATTR_NODE_RECORD: > @@ -1294,6 +1294,6 @@ main(int argc, char **argv) > if (dst) > free(dst); > clean_up(); > - close_node_name_map(node_name_map_fp); > + close_node_name_map(node_name_map); > return (status); > } > diff --git a/infiniband-diags/src/smpquery.c b/infiniband-diags/src/smpquery.c > index 7c2c129..89b48f3 100644 > --- a/infiniband-diags/src/smpquery.c > +++ b/infiniband-diags/src/smpquery.c > @@ -85,8 +85,8 @@ static const match_rec_t match_tbl[] = { > }; > > char *argv0 = "smpquery"; > -static char *node_name_map = NULL; > -static FILE *node_name_map_fp = NULL; > +static char *node_name_map_file = NULL; > +static nn_map_t *node_name_map = NULL; > > /*******************************************/ > static char * > @@ -108,7 +108,7 @@ node_desc(ib_portid_t *dest, char **argv, int argc) > if (!smp_query(nd, dest, IB_ATTR_NODE_DESC, 0, 0)) > return "node desc query failed"; > > - nodename = remap_node_name(node_name_map_fp, node_guid, nd); > + nodename = remap_node_name(node_name_map, node_guid, nd); > > l = strlen(nodename); > if (l < 32) { > @@ -457,7 +457,7 @@ main(int argc, char **argv) > break; > switch(ch) { > case 1: > - node_name_map = strdup(optarg); > + node_name_map_file = strdup(optarg); > break; > case 'd': > ibdebug++; > @@ -513,7 +513,7 @@ main(int argc, char **argv) > IBERROR("operation '%s' not supported", argv[0]); > > madrpc_init(ca, ca_port, mgmt_classes, 3); > - node_name_map_fp = open_node_name_map(node_name_map); > + node_name_map = open_node_name_map(node_name_map_file); > > if (dest_type != IB_DEST_DRSLID) { > if (ib_resolve_portid_str(&portid, argv[1], dest_type, sm_id) < 0) > @@ -530,6 +530,6 @@ main(int argc, char **argv) > if ((err = fn(&portid, argv+3, argc-3))) > IBERROR("operation %s: %s", argv[0], err); > } > - close_node_name_map(node_name_map_fp); > + close_node_name_map(node_name_map); > exit(0); > } > diff --git a/opensm/complib/cl_nodenamemap.c b/opensm/complib/cl_nodenamemap.c > index 144a7e4..584c78c 100644 > --- a/opensm/complib/cl_nodenamemap.c > +++ b/opensm/complib/cl_nodenamemap.c > @@ -44,67 +44,105 @@ > > #include > > -FILE * > +static nn_map_t * > +read_names(nn_map_t *map) > +{ > + char *line = NULL; > + size_t len = 0; > + name_map_item_t *item; > + > + rewind(map->fp); > + while (getline(&line, &len, map->fp) != -1) { > + char *guid_str = NULL; > + char *name = NULL; > + line[len-1] = '\0'; > + if (line[0] == '#') > + goto next_one; > + > + guid_str = strtok(line, "\"#"); > + name = strtok(NULL, "\"#"); > + if (!guid_str || !name) > + goto next_one; > + > + item = malloc(sizeof(*item)); > + if (!item) { > + goto error; > + } > + item->guid = strtoull(guid_str, NULL, 0); > + item->name = strdup(name); > + cl_qmap_insert(&(map->map), item->guid, (cl_map_item_t *)item); > + > +next_one: > + free (line); > + line = NULL; getline() is able to realloc 'line' buffer by itself, so should this repeated free() be moved out of loop (with adding a variable which stores allocated 'line' size)? > + } > + > +error: > + return (map); > +} > + > +nn_map_t * > open_node_name_map(char *node_name_map) > { > - FILE *rc = NULL; > + FILE *tmp_fp = NULL; > + nn_map_t *rc = NULL; > > if (node_name_map != NULL) { > - rc = fopen(node_name_map, "r"); > - if (rc == NULL) { > + tmp_fp = fopen(node_name_map, "r"); > + if (tmp_fp == NULL) { > fprintf(stderr, > "WARNING failed to open switch map \"%s\" (%s)\n", > node_name_map, strerror(errno)); > } > #ifdef HAVE_DEFAULT_NODENAME_MAP > } else { > - rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); > + tmp_fp = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); > #endif /* HAVE_DEFAULT_NODENAME_MAP */ > } > - return (rc); > + if (!tmp_fp) > + return (NULL); > + > + rc = malloc(sizeof(*rc)); > + if (!rc) > + return (NULL); > + rc->fp = tmp_fp; > + cl_qmap_init(&(rc->map)); > + return (read_names(rc)); read_names() function cannot fail. Probably it would be cleaner to make it void and just to return rc here. > } > > void > -close_node_name_map(FILE *fp) > +close_node_name_map(nn_map_t *map) > { > - if (fp) > - fclose(fp); > + name_map_item_t *item = NULL; > + > + if (!map) > + return; > + > + item = (name_map_item_t *)cl_qmap_head(&(map->map)); > + while (item != cl_qmap_end(&(map->map))) { There are compilation warning about different pointer types. > + item = (name_map_item_t *)cl_qmap_remove(&(map->map), item->guid); > + free(item->name); > + free(item); > + item = (name_map_item_t *)cl_qmap_head(&(map->map)); > + } > + if (map->fp) > + fclose(map->fp); > + free(map); > } > > char * > -remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) > +remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc) > { > -#define NAME_LEN (256) > - char *line = NULL; > - size_t len = 0; > - uint64_t guid = 0; > - char *rc = NULL; > - int line_count = 0; > - > - if (node_name_map_fp == NULL) > + char *rc = NULL; > + name_map_item_t *item = NULL; > + > + if (!map) > goto done; > > - rewind(node_name_map_fp); > - for (line_count = 1; > - getline(&line, &len, node_name_map_fp) != -1; > - line_count++) { > - line[len-1] = '\0'; > - if (line[0] == '#') > - goto next_one; > - char *guid_str = strtok(line, "\"#"); > - char *name = strtok(NULL, "\"#"); > - if (!guid_str || !name) > - goto next_one; > - guid = strtoull(guid_str, NULL, 0); > - if (target_guid == guid) { > - rc = strdup(name); > - free (line); > - goto done; > - } > -next_one: > - free (line); > - line = NULL; > - } > + item = (name_map_item_t *)cl_qmap_get(&(map->map), target_guid); > + if (item != cl_qmap_end(&(map->map))) Ditto. Sasha > + rc = strdup(item->name); > + > done: > if (rc == NULL) > rc = strdup(clean_nodedesc(nodedesc)); > diff --git a/opensm/include/complib/cl_nodenamemap.h b/opensm/include/complib/cl_nodenamemap.h > index a4a09f7..9d0b7d4 100644 > --- a/opensm/include/complib/cl_nodenamemap.h > +++ b/opensm/include/complib/cl_nodenamemap.h > @@ -36,17 +36,28 @@ > > #include > #include > +#include > > -/* NOTE: this modifies the parameter "nodedesc". */ > +/* NOTE: this may modify the parameter "nodedesc". */ > char *clean_nodedesc(char *nodedesc); > > +typedef struct _name_map_item { > + cl_map_item_t item; > + uint64_t guid; > + char *name; > +} name_map_item_t; > +typedef struct _node_name_map { > + FILE *fp; > + cl_qmap_t map; > +} nn_map_t; > + > /** > * Node name map interface. > * It is OK to pass NULL for the node_name_map[_fp] parameters. > */ > -FILE *open_node_name_map(char *node_name_map); > -void close_node_name_map(FILE *node_name_map_fp); > -char *remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, > +nn_map_t *open_node_name_map(char *node_name_map); > +void close_node_name_map(nn_map_t *map); > +char *remap_node_name(nn_map_t *map, uint64_t target_guid, > char *nodedesc); > /* NOTE: parameter "nodedesc" may be modified here. */ > > -- > 1.5.1 > From sashak at voltaire.com Sun Nov 4 13:13:52 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 4 Nov 2007 23:13:52 +0200 Subject: [ofa-general] Re: [PATCH 6/7] Add node-name-map support to OpenSM; using the "default" map. In-Reply-To: <20071101201524.3f95a33a.weiny2@llnl.gov> References: <20071101201524.3f95a33a.weiny2@llnl.gov> Message-ID: <20071104211352.GZ6945@sashak.voltaire.com> On 20:15 Thu 01 Nov , Ira Weiny wrote: > From 35280cfd5229ccc8d91b6fd98e0f4b58193d0d03 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 1 Nov 2007 19:41:37 -0700 > Subject: [PATCH] Add node-name-map support to OpenSM; using the "default" map. > > Signed-off-by: Ira K. Weiny > --- > opensm/include/opensm/osm_node.h | 2 +- > opensm/include/opensm/osm_opensm.h | 2 ++ > opensm/include/opensm/osm_subnet.h | 1 + > opensm/opensm/osm_node.c | 6 ++++++ > opensm/opensm/osm_node_desc_rcv.c | 14 ++++++++++++-- > opensm/opensm/osm_opensm.c | 4 ++++ > 6 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/opensm/include/opensm/osm_node.h b/opensm/include/opensm/osm_node.h > index f87e81d..8af5418 100644 > --- a/opensm/include/opensm/osm_node.h > +++ b/opensm/include/opensm/osm_node.h > @@ -106,7 +106,7 @@ typedef struct _osm_node { > ib_node_desc_t node_desc; > uint32_t discovery_count; > uint32_t physp_tbl_size; > - char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; > + char *print_desc; > osm_physp_t physp_table[1]; > } osm_node_t; > /* > diff --git a/opensm/include/opensm/osm_opensm.h b/opensm/include/opensm/osm_opensm.h > index 1ea1ec2..1b5edb8 100644 > --- a/opensm/include/opensm/osm_opensm.h > +++ b/opensm/include/opensm/osm_opensm.h > @@ -52,6 +52,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -168,6 +169,7 @@ typedef struct _osm_opensm_t { > struct osm_routing_engine routing_engine; > osm_stats_t stats; > osm_console_t console; > + nn_map_t *node_name_map; > } osm_opensm_t; > /* > * FIELDS > diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h > index dada8bf..452098b 100644 > --- a/opensm/include/opensm/osm_subnet.h > +++ b/opensm/include/opensm/osm_subnet.h > @@ -297,6 +297,7 @@ typedef struct _osm_subn_opt { > char *event_db_dump_file; > #endif /* ENABLE_OSM_PERF_MGR */ > char *event_plugin_name; > + char *node_name_map_name; > } osm_subn_opt_t; > /* > * FIELDS > diff --git a/opensm/opensm/osm_node.c b/opensm/opensm/osm_node.c > index 645daa9..f34da1f 100644 > --- a/opensm/opensm/osm_node.c > +++ b/opensm/opensm/osm_node.c > @@ -131,6 +131,7 @@ osm_node_t *osm_node_new(IN const osm_madw_t * const p_madw) > > osm_node_init_physp(p_node, p_madw); > } > + p_node->print_desc = ""; > > return (p_node); > } > @@ -146,6 +147,11 @@ static void osm_node_destroy(IN osm_node_t * p_node) > */ > for (i = 0; i < p_node->physp_tbl_size; i++) > osm_physp_destroy(&p_node->physp_table[i]); > + > + /* cleanup printable node_desc field */ > + if (p_node->print_desc) { > + free(p_node->print_desc); > + } > } > > /********************************************************************** p_node->print_desc is initialized as constant string and later freed as dynamically allocated memory. I think there are paths when osm_node_destroy() could run before __osm_nd_rcv_process_nd(). If so it it looks like a bug? Sasha > diff --git a/opensm/opensm/osm_node_desc_rcv.c b/opensm/opensm/osm_node_desc_rcv.c > index d50883c..f758d5a 100644 > --- a/opensm/opensm/osm_node_desc_rcv.c > +++ b/opensm/opensm/osm_node_desc_rcv.c > @@ -58,6 +58,7 @@ > #include > #include > #include > +#include > #include > > /********************************************************************** > @@ -67,13 +68,22 @@ __osm_nd_rcv_process_nd(IN const osm_nd_rcv_t * const p_rcv, > IN osm_node_t * const p_node, > IN const ib_node_desc_t * const p_nd) > { > + char *tmp_desc; > + char print_desc[IB_NODE_DESCRIPTION_SIZE + 1]; > + > OSM_LOG_ENTER(p_rcv->p_log, __osm_nd_rcv_process_nd); > > memcpy(&p_node->node_desc.description, p_nd, sizeof(*p_nd)); > > /* also set up a printable version */ > - memcpy(&p_node->print_desc, p_nd, sizeof(*p_nd)); > - p_node->print_desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; > + memcpy(print_desc, p_nd, sizeof(*p_nd)); > + print_desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; > + tmp_desc = remap_node_name(p_rcv->p_subn->p_osm->node_name_map, > + cl_ntoh64(osm_node_get_node_guid(p_node)), > + print_desc); > + > + /* make a copy for this node to "own" */ > + p_node->print_desc = strdup(tmp_desc); > > if (osm_log_is_active(p_rcv->p_log, OSM_LOG_VERBOSE)) { > osm_log(p_rcv->p_log, OSM_LOG_VERBOSE, > diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c > index 5b45401..9841c75 100644 > --- a/opensm/opensm/osm_opensm.c > +++ b/opensm/opensm/osm_opensm.c > @@ -183,6 +183,8 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm) > osm_subn_destroy(&p_osm->subn); > cl_disp_destroy(&p_osm->disp); > > + close_node_name_map(p_osm->node_name_map); > + > cl_plock_destroy(&p_osm->lock); > > osm_log_destroy(&p_osm->log); > @@ -310,6 +312,8 @@ osm_opensm_init(IN osm_opensm_t * const p_osm, > goto Exit; > } > > + p_osm->node_name_map = open_node_name_map(NULL); > + > Exit: > osm_log(&p_osm->log, OSM_LOG_FUNCS, "osm_opensm_init: ]\n"); /* Format Waived */ > return (status); > -- > 1.5.1 > From sashak at voltaire.com Sun Nov 4 13:20:22 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 4 Nov 2007 23:20:22 +0200 Subject: [ofa-general] Re: [PATCH 0/7] Add Node Name Map support to opensm In-Reply-To: <20071101201448.7317825b.weiny2@llnl.gov> References: <20071101201448.7317825b.weiny2@llnl.gov> Message-ID: <20071104212022.GA6945@sashak.voltaire.com> Hi Ira, On 20:14 Thu 01 Nov , Ira Weiny wrote: > Sasha and I discussed the switch map support patch series and I have changed > the implementation. Here is another series which adds "node name" map support > to both the diags as well as opensm. > > This new functionality allows for any node descriptor to be renamed based on a > "node name map" file. The final implementation uses the qmap data structure > which should be quick enough for large map files. The patches are as follows: > > 0001 - use lookup_switch_name for all node types. > 0002 - Change switch map function names to reflect the new functionality of > a simple node name map. > 0003 - Move nodenamemap out of infiniband-diags into libosmcomp > 0004 - Change node name map implementation to use qmap in memory storage > 0005 - infiniband-diags/src/smpquery.c : special case situation where > nodename len is >= 32 > 0006 - Add node-name-map support to OpenSM; using the "default" map. > 0007 - Add node_name_map_name to opts file. > > Patch number 5 can and should be applied on it's own. It fixes a bug found > during testing. I applied patches 1,2,3,5. Have some comments about patches 3,4,6 (sent). Patch 7 looks fine for me, but it requires 4 to be applied before. Thanks for you work. Sasha From sashak at voltaire.com Sun Nov 4 13:22:20 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 4 Nov 2007 23:22:20 +0200 Subject: [ofa-general] Re: [PATCH] infiniband-diags/grouping.c: For Xsigo grouping, fix seg fault when no peer port In-Reply-To: <1194127116.26246.852.camel@hrosenstock-ws.xsigo.com> References: <1194127116.26246.852.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071104212220.GB6945@sashak.voltaire.com> On 14:58 Sat 03 Nov , Hal Rosenstock wrote: > infiniband-diags/grouping.c: For Xsigo grouping, fix seg fault when no > peer port > > Signed-off-by: Hal Rosenstock Applied. Thanks. Sasha From johann.george at qlogic.com Sun Nov 4 19:32:01 2007 From: johann.george at qlogic.com (Johann George) Date: Sun, 4 Nov 2007 19:32:01 -0800 Subject: [ofa-general] Feedback on Developer's Summit Message-ID: <20071105033201.GA28495@cuprite.pathscale.com> We received 12 responses to our mini-survey. Thanks to all who responded. Here are the results: > (1) Are you willing and able to attend if we start at > 11:00am on Thursday rather than at 1:00pm? 7 yes, 4 no, 1 abstention > (2) If we are able to, would you prefer to see simultaneous > tracks and lengthen some of the sessions. 3 yes, 8 no, 1 abstention > (3) Would you like to see additional MPI sessions crammed > into the allotted time? 8 yes, 0 no, 4 abstention > (4) Are you willing and able to stay if we ran later on > Friday? How long? 5 yes, 4 no, 3 abstention Given the support for additional MPI sessions, we have shortened the current ones and have included presentations from Intel and HP. There was mixed reaction to coming earlier on Thursday or staying later on Friday. If a particular group is available and would like to hold a discussion during either of these times, let me know and we'll try to make the room available. Thanks. Johann From bramesh at vt.edu Sun Nov 4 21:06:00 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Mon, 5 Nov 2007 00:06:00 -0500 Subject: [ofa-general] Application blocked in mthca_poll_cq Message-ID: <20071105050600.GA20812@vt.edu> Every now and then I notice that my application is blocks inside mthca_poll_cq. When I attach gdb to the process I find its blocking on a call to pthread_spin_lock/pthread_spin_unlock. I am not sure if this is a bug or something wrong with what I am doing. I calling ibv_poll_cq with the number of entries as 1. Any help on this would be much appreciated. I am not able to replicate it on separate test program. There is not other call to ibv_poll_cq. Thanks, Bharath --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From ogerlitz at voltaire.com Sun Nov 4 22:08:04 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 05 Nov 2007 08:08:04 +0200 Subject: [ofa-general] [PATCH 0/7] Add Node Name Map support to opensm In-Reply-To: <20071101201448.7317825b.weiny2@llnl.gov> References: <20071101201448.7317825b.weiny2@llnl.gov> Message-ID: <472EB344.9070302@voltaire.com> Ira Weiny wrote: > Sasha and I discussed the switch map support patch series and I have changed > the implementation. Here is another series which adds "node name" map support > to both the diags as well as opensm. Sasha, Ira Can you educate me re the "node name" functionality of opensm and the diags? does the thing is using the node description field of the node info for display purposes? > This new functionality allows for any node descriptor to be renamed based on a > "node name map" file. The final implementation uses the qmap data structure > which should be quick enough for large map files. The patches are as follows: assuming I was correct above, what does this enhancement is about? if the use configured the node description why they want to override it? how is this "node name map" implemented is it a mapping file from node GUID to string and this file is read by the diag/opensm before/while running? thanks, Or. From monisonlists at gmail.com Sun Nov 4 22:50:26 2007 From: monisonlists at gmail.com (Moni Shoua) Date: Mon, 05 Nov 2007 08:50:26 +0200 Subject: [ofa-general] ib-bonding release 18 In-Reply-To: References: <472DEC93.4050105@gmail.com> Message-ID: <472EBD32.2030803@gmail.com> Roland Dreier wrote: > > List of patches that were stripped from bonding of 2.6.24-rc11 > > Have you reported these problems to get bonding fixed upstream too? > Yes I did. From vlad at lists.openfabrics.org Mon Nov 5 02:58:50 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 5 Nov 2007 02:58:50 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071105-0200 daily build status Message-ID: <20071105105850.77266E6085D@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-34.ELsmp Failed: From ogerlitz at voltaire.com Mon Nov 5 03:27:11 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Mon, 05 Nov 2007 13:27:11 +0200 Subject: [ofa-general] Feedback on Developer's Summit In-Reply-To: <20071105033201.GA28495@cuprite.pathscale.com> References: <20071105033201.GA28495@cuprite.pathscale.com> Message-ID: <472EFE0F.5070101@voltaire.com> Johann George wrote: > There was mixed reaction to coming earlier on Thursday or > staying later on Friday. If a particular group is available > and would like to hold a discussion during either of these > times, let me know and we'll try to make the room available. I suggest that the Linux IB developers would meet before the rest of the agenda starts: 11-12: SA cache session 12-1: IPoIB stateless offload issues Sean, Roland, Dror - can you make it? Or. From or.gerlitz at gmail.com Mon Nov 5 03:30:32 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 5 Nov 2007 13:30:32 +0200 Subject: [ofa-general] OpenFabrics Developer's Summit: tentative agenda In-Reply-To: <20071023200329.GA6368@cuprite.pathscale.com> References: <20071023200329.GA6368@cuprite.pathscale.com> Message-ID: <15ddcffd0711050330i23c06268ue206b0046aabd45@mail.gmail.com> On 10/23/07, Johann George wrote: > > Below is a tentative agenda for the upcoming OpenFabrics Developer's > Summit. While most sessions are confirmed, at least a couple of > speakers are attempting to resolve conflicts so they may attend. We > have attempted to accommodate everyone but if you are speaking and see > a conflict with the proposed time of your session, let me know. An > updated agenda will be made available in the future. 18:00 20m Update on NFSoRDMA > James Lentini, Network Appliance > 18:20 20m Lustre > Eric Barton, Sun Microsystems > 18:40 20m Bonding > Or Gerlitz, Voltaire > Johann, I don't need 20 minutes for the bonding update, 10m will be more then enough. Or. -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Sun Nov 4 21:14:49 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 5 Nov 2007 07:14:49 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-05:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-04 OpenSM git rev = Fri_Nov_2_20:55:22_2007 [e032988e75abbd9d6007136254dfc14bab2ec9b4] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From gdror at dev.mellanox.co.il Mon Nov 5 06:02:27 2007 From: gdror at dev.mellanox.co.il (Dror Goldenberg) Date: Mon, 05 Nov 2007 16:02:27 +0200 Subject: [ofa-general] Feedback on Developer's Summit In-Reply-To: <472EFE0F.5070101@voltaire.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> Message-ID: <472F2273.6030707@dev.mellanox.co.il> Or Gerlitz wrote: > Johann George wrote: >> There was mixed reaction to coming earlier on Thursday or >> staying later on Friday. If a particular group is available >> and would like to hold a discussion during either of these >> times, let me know and we'll try to make the room available. > > I suggest that the Linux IB developers would meet before the rest of > the agenda starts: > > 11-12: SA cache session > 12-1: IPoIB stateless offload issues > > Sean, Roland, Dror - can you make it? I can make it early on Thu. Note that I cannot stay late on Fri. > > Or. > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Mon Nov 5 07:28:41 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 5 Nov 2007 17:28:41 +0200 Subject: [ofa-general] [PATCH 0/7] Add Node Name Map support to opensm In-Reply-To: <472EB344.9070302@voltaire.com> References: <20071101201448.7317825b.weiny2@llnl.gov> <472EB344.9070302@voltaire.com> Message-ID: <20071105152841.GE8766@sashak.voltaire.com> On 08:08 Mon 05 Nov , Or Gerlitz wrote: > > Can you educate me re the "node name" functionality of opensm and the diags? > does the thing is using the node description field of the node info for > display purposes? Node description or name string specified in name map file. > > This new functionality allows for any node descriptor to be renamed based > > on a > > "node name map" file. The final implementation uses the qmap data > > structure > > which should be quick enough for large map files. The patches are as > > follows: > > assuming I was correct above, what does this enhancement is about? if the > use configured the node description why they want to override it? It is about all nodes in a fabric (not just OFED nodes where node description could be configured) including switches, routers, TCAs, etc. > how is > this "node name map" implemented is it a mapping file from node GUID to > string and this file is read by the diag/opensm before/while running? Yes. Sasha From tziporet at mellanox.co.il Mon Nov 5 08:17:15 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 5 Nov 2007 18:17:15 +0200 Subject: [ofa-general] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <4727426C.5090504@mellanox.co.il> References: <4727426C.5090504@mellanox.co.il> Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> This is the agenda of OFED 1.3 meeting today: 1. Rebase for kernel 2.6.24-rc1: The backport was more complicated (mainly in IPoIB). The following kernel modules have now backports to all kernels: mthca, mlx4, ehca, core, IPoIB, RDS Kernel modules that need update: Chelsio driver (cxgb3), ipath driver, iSER, SDP, SRP, VNIC Schedule: All new backport patches should be send to Vlad by tomorrow (Tuesday Nov 6). On Wed (Nov 7) we will start to publish the new package based on kernel 2.6.24. Kernel modules that will not pass compilation will be disabled 2. Other Beta tasks status: 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad (Mellanox) - on work 2. SPEC files should be part of each user space package - each owner should take the spec file 3. Fix all compilation and install issues - All 3. Beta schedule: Need to discuss and decide on the beta date Done tasks as of this week: o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) o ibutils on SLES10 PPC64 (64 bits) - Vlad Previous done tasks: o Add qperf test from Qlogic - Johann (Qlogic) o Support RHEL 5 up1 - Woody & Vlad o Apply patches that fix warning of backport patches - Vlad o New MVAPICH package - Pasha & DK (OSU) o Complete RDS work - Vlad (Mellanox) o Integrate all SDP features - Jim (Mellanox) o nes - updated backport patches - Glenn (NetEffect) Tziporet From rdreier at cisco.com Mon Nov 5 08:40:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 05 Nov 2007 08:40:45 -0800 Subject: [ofa-general] Feedback on Developer's Summit In-Reply-To: <472EFE0F.5070101@voltaire.com> (Or Gerlitz's message of "Mon, 05 Nov 2007 13:27:11 +0200") References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> Message-ID: > 11-12: SA cache session > 12-1: IPoIB stateless offload issues > Sean, Roland, Dror - can you make it? I guess I can make it but what are the stateless offload issues? If we want to spend an hour on it I think we need to figure out what if anything we're stuck on -- at this point I don't know of any major problems that need face-to-face time. - R. From swise at opengridcomputing.com Mon Nov 5 08:47:13 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 05 Nov 2007 10:47:13 -0600 Subject: [ofa-general] Re: [ewg] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> Message-ID: <472F4911.5050109@opengridcomputing.com> Tziporet Koren wrote: > This is the agenda of OFED 1.3 meeting today: > > 1. Rebase for kernel 2.6.24-rc1: > The backport was more complicated (mainly in IPoIB). > The following kernel modules have now backports to all kernels: > mthca, mlx4, ehca, core, IPoIB, RDS > > Kernel modules that need update: Chelsio driver (cxgb3), ipath > driver, iSER, SDP, SRP, VNIC > I'll get any backport fixes for cxgb3 by EOB tomorrow. I cannot make the call today. That's the only status I have. Steve. From rdreier at cisco.com Mon Nov 5 08:49:26 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 05 Nov 2007 08:49:26 -0800 Subject: [ofa-general] Application blocked in mthca_poll_cq In-Reply-To: <20071105050600.GA20812@vt.edu> (Bharath Ramesh's message of "Mon, 5 Nov 2007 00:06:00 -0500") References: <20071105050600.GA20812@vt.edu> Message-ID: > Every now and then I notice that my application is blocks inside > mthca_poll_cq. When I attach gdb to the process I find its blocking on a > call to pthread_spin_lock/pthread_spin_unlock. I am not sure if this is > a bug or something wrong with what I am doing. I calling ibv_poll_cq > with the number of entries as 1. Any help on this would be much > appreciated. I am not able to replicate it on separate test program. > There is not other call to ibv_poll_cq. What version of libmthca are you using? libmthca 1.0.2 and earlier had a bug that could cause this in rare circumstances (if you destroy two QPs simultaneously from different threads and the two QPs are such that the receive CQ of one QP is the send CQ of the other and vice versa). To be honest I doubt you're hitting this. The only operations in libmthca that hit the CQ spinlock are: - polling the CQ - resizing a CQ - modifying a QP to RESET - destroying a QP all of that code seems to take and release the CQ spinlock properly. I assume your application is multithreaded? When it gets stuck it would be useful to know which other thread is holding the CQ lock that poll_cq is blocked on; I don't know of a really good way to figure that out though. Is it possible that you have a use-after-free where you destroy a CQ and then call poll with a pointer to the freed CQ? - R. From parks at lanl.gov Mon Nov 5 08:58:08 2007 From: parks at lanl.gov (Parks Fields) Date: Mon, 05 Nov 2007 09:58:08 -0700 Subject: [ofa-general] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com > References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> Message-ID: <7.0.1.0.2.20071105095741.028ecd30@lanl.gov> What is call in # and passcode At 09:17 AM 11/5/2007, Tziporet Koren wrote: >This is the agenda of OFED 1.3 meeting today: > >1. Rebase for kernel 2.6.24-rc1: > The backport was more complicated (mainly in IPoIB). > The following kernel modules have now backports to all kernels: > mthca, mlx4, ehca, core, IPoIB, RDS > > Kernel modules that need update: Chelsio driver (cxgb3), ipath >driver, iSER, SDP, SRP, VNIC > > Schedule: > All new backport patches should be send to Vlad by tomorrow (Tuesday >Nov 6). > On Wed (Nov 7) we will start to publish the new package based on >kernel 2.6.24. > Kernel modules that will not pass compilation will be disabled > >2. Other Beta tasks status: > 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad >(Mellanox) - on work > 2. SPEC files should be part of each user space package - each owner >should take the spec file > 3. Fix all compilation and install issues - All > >3. Beta schedule: > Need to discuss and decide on the beta date > >Done tasks as of this week: > o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) > o ibutils on SLES10 PPC64 (64 bits) - Vlad > >Previous done tasks: > o Add qperf test from Qlogic - Johann (Qlogic) > o Support RHEL 5 up1 - Woody & Vlad > o Apply patches that fix warning of backport patches - Vlad > o New MVAPICH package - Pasha & DK (OSU) > o Complete RDS work - Vlad (Mellanox) > o Integrate all SDP features - Jim (Mellanox) > o nes - updated backport patches - Glenn (NetEffect) > > >Tziporet > > > >_______________________________________________ >general mailing list >general at lists.openfabrics.org >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general ***** Correspondence ***** This email contains no programmatic content that requires independent ADC review From jsquyres at cisco.com Mon Nov 5 09:09:35 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 5 Nov 2007 12:09:35 -0500 Subject: [ewg] Re: [ofa-general] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <7.0.1.0.2.20071105095741.028ecd30@lanl.gov> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> <7.0.1.0.2.20071105095741.028ecd30@lanl.gov> Message-ID: <7AAF8175-897A-4688-8655-BE6867B2B2B7@cisco.com> The call info reminder was sent earlier this morning; Outlook invites were sent a week or so ago: http://lists.openfabrics.org/pipermail/ewg/2007-November/ 004874.html On Nov 5, 2007, at 11:58 AM, Parks Fields wrote: > What is call in # and passcode > > > > At 09:17 AM 11/5/2007, Tziporet Koren wrote: >> This is the agenda of OFED 1.3 meeting today: >> >> 1. Rebase for kernel 2.6.24-rc1: >> The backport was more complicated (mainly in IPoIB). >> The following kernel modules have now backports to all kernels: >> mthca, mlx4, ehca, core, IPoIB, RDS >> >> Kernel modules that need update: Chelsio driver (cxgb3), ipath >> driver, iSER, SDP, SRP, VNIC >> >> Schedule: >> All new backport patches should be send to Vlad by tomorrow >> (Tuesday >> Nov 6). >> On Wed (Nov 7) we will start to publish the new package based on >> kernel 2.6.24. >> Kernel modules that will not pass compilation will be disabled >> >> 2. Other Beta tasks status: >> 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad >> (Mellanox) - on work >> 2. SPEC files should be part of each user space package - each >> owner >> should take the spec file >> 3. Fix all compilation and install issues - All >> >> 3. Beta schedule: >> Need to discuss and decide on the beta date >> >> Done tasks as of this week: >> o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) >> o ibutils on SLES10 PPC64 (64 bits) - Vlad >> >> Previous done tasks: >> o Add qperf test from Qlogic - Johann (Qlogic) >> o Support RHEL 5 up1 - Woody & Vlad >> o Apply patches that fix warning of backport patches - Vlad >> o New MVAPICH package - Pasha & DK (OSU) >> o Complete RDS work - Vlad (Mellanox) >> o Integrate all SDP features - Jim (Mellanox) >> o nes - updated backport patches - Glenn (NetEffect) >> >> >> Tziporet >> >> >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > ***** Correspondence ***** > > This email contains no programmatic content that requires > independent ADC review > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg -- Jeff Squyres Cisco Systems From parks at lanl.gov Mon Nov 5 09:16:26 2007 From: parks at lanl.gov (Parks Fields) Date: Mon, 05 Nov 2007 10:16:26 -0700 Subject: [ewg] Re: [ofa-general] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <7AAF8175-897A-4688-8655-BE6867B2B2B7@cisco.com> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> <7.0.1.0.2.20071105095741.028ecd30@lanl.gov> <7AAF8175-897A-4688-8655-BE6867B2B2B7@cisco.com> Message-ID: <7.0.1.0.2.20071105101522.028ed370@lanl.gov> At 10:09 AM 11/5/2007, Jeff Squyres wrote: >The call info reminder was sent earlier this morning; Outlook invites >were sent a week or so ago: Not all of us use Microsoft/outlook :-) > http://lists.openfabrics.org/pipermail/ewg/2007-November/ 004874.html How about just adding the number to the agenda since the passcode always changes. >On Nov 5, 2007, at 11:58 AM, Parks Fields wrote: > >>What is call in # and passcode >> >> >> >>At 09:17 AM 11/5/2007, Tziporet Koren wrote: >>>This is the agenda of OFED 1.3 meeting today: >>> >>>1. Rebase for kernel 2.6.24-rc1: >>> The backport was more complicated (mainly in IPoIB). >>> The following kernel modules have now backports to all kernels: >>> mthca, mlx4, ehca, core, IPoIB, RDS >>> >>> Kernel modules that need update: Chelsio driver (cxgb3), ipath >>>driver, iSER, SDP, SRP, VNIC >>> >>> Schedule: >>> All new backport patches should be send to Vlad by tomorrow >>>(Tuesday >>>Nov 6). >>> On Wed (Nov 7) we will start to publish the new package based on >>>kernel 2.6.24. >>> Kernel modules that will not pass compilation will be disabled >>> >>>2. Other Beta tasks status: >>> 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad >>>(Mellanox) - on work >>> 2. SPEC files should be part of each user space package - each >>>owner >>>should take the spec file >>> 3. Fix all compilation and install issues - All >>> >>>3. Beta schedule: >>> Need to discuss and decide on the beta date >>> >>>Done tasks as of this week: >>> o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) >>> o ibutils on SLES10 PPC64 (64 bits) - Vlad >>> >>>Previous done tasks: >>> o Add qperf test from Qlogic - Johann (Qlogic) >>> o Support RHEL 5 up1 - Woody & Vlad >>> o Apply patches that fix warning of backport patches - Vlad >>> o New MVAPICH package - Pasha & DK (OSU) >>> o Complete RDS work - Vlad (Mellanox) >>> o Integrate all SDP features - Jim (Mellanox) >>> o nes - updated backport patches - Glenn (NetEffect) >>> >>> >>>Tziporet >>> >>> >>> >>>_______________________________________________ >>>general mailing list >>>general at lists.openfabrics.org >>>http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>> >>>To unsubscribe, please visit >>>http://openib.org/mailman/listinfo/openib-general >> >> ***** Correspondence ***** >> >>This email contains no programmatic content that requires >>independent ADC review >> >>_______________________________________________ >>ewg mailing list >>ewg at lists.openfabrics.org >>http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > > >-- >Jeff Squyres >Cisco Systems > ***** Correspondence ***** This email contains no programmatic content that requires independent ADC review From jsquyres at cisco.com Mon Nov 5 09:23:29 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 5 Nov 2007 12:23:29 -0500 Subject: [ewg] Re: [ofa-general] OFED Nov 05 meeting agenda on OFED 1.3 beta readiness In-Reply-To: <7.0.1.0.2.20071105101522.028ed370@lanl.gov> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> <7.0.1.0.2.20071105095741.028ecd30@lanl.gov> <7AAF8175-897A-4688-8655-BE6867B2B2B7@cisco.com> <7.0.1.0.2.20071105101522.028ed370@lanl.gov> Message-ID: On Nov 5, 2007, at 12:16 PM, Parks Fields wrote: >> The call info reminder was sent earlier this morning; Outlook invites >> were sent a week or so ago: > > Not all of us use Microsoft/outlook :-) Some of us have to, unfortunately. :-) >> http://lists.openfabrics.org/pipermail/ewg/2007-November/ >> 004874.html > > How about just adding the number to the agenda since the passcode > always changes. I usually send the reminder every Monday morning with that day's code (and the next few, just in case I'm unable to send it for the next teleconference, such as if I'm on a plane or something). Tziporet then usually replies to my post with the agenda. FWIW: I mentioned before that I only send the call info to the EWG list -- not the general list -- because a) the call is OFED-specific, and b) there have been flames about off-topic posts on the general list before. -- Jeff Squyres Cisco Systems From bramesh at vt.edu Mon Nov 5 09:37:04 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Mon, 5 Nov 2007 12:37:04 -0500 Subject: [ofa-general] Application blocked in mthca_poll_cq In-Reply-To: References: <20071105050600.GA20812@vt.edu> Message-ID: <001801c81fd2$7e593d70$7702a8c0@ida> I am not sure about the version of OFED being used, but its most likely OFED-1.2. Is there any way to find the version of OFED used. libmthca.so points to libmthca-rdmav2.so. I am not sure if this helps. My application is multithreaded, every time this happens when I try to attach the process to gdb I find that mthca_poll_cq is the one blocking and sometimes the call is blocking on pthread_spin_unlock. Which is surprising as I wouldnt expect pthread_spin_unlock to be blocking. I am sure that I am not doing any use-after-free. I dont destroy the CQ till the application is terminating. This situation occurs well before the application terminates. Thanks, Bharath -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Monday, November 05, 2007 11:49 AM To: Bharath Ramesh Cc: OFA-General Subject: Re: [ofa-general] Application blocked in mthca_poll_cq > Every now and then I notice that my application is blocks inside > mthca_poll_cq. When I attach gdb to the process I find its blocking on a > call to pthread_spin_lock/pthread_spin_unlock. I am not sure if this is > a bug or something wrong with what I am doing. I calling ibv_poll_cq > with the number of entries as 1. Any help on this would be much > appreciated. I am not able to replicate it on separate test program. > There is not other call to ibv_poll_cq. What version of libmthca are you using? libmthca 1.0.2 and earlier had a bug that could cause this in rare circumstances (if you destroy two QPs simultaneously from different threads and the two QPs are such that the receive CQ of one QP is the send CQ of the other and vice versa). To be honest I doubt you're hitting this. The only operations in libmthca that hit the CQ spinlock are: - polling the CQ - resizing a CQ - modifying a QP to RESET - destroying a QP all of that code seems to take and release the CQ spinlock properly. I assume your application is multithreaded? When it gets stuck it would be useful to know which other thread is holding the CQ lock that poll_cq is blocked on; I don't know of a really good way to figure that out though. Is it possible that you have a use-after-free where you destroy a CQ and then call poll with a pointer to the freed CQ? - R. From weiny2 at llnl.gov Mon Nov 5 10:32:29 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 5 Nov 2007 10:32:29 -0800 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071104160743.GX6945@sashak.voltaire.com> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> Message-ID: <20071105103229.32e41a31.weiny2@llnl.gov> On Sun, 4 Nov 2007 18:07:43 +0200 Sasha Khapyorsky wrote: > On 20:15 Thu 01 Nov , Ira Weiny wrote: > > > diff --git a/infiniband-diags/configure.in b/infiniband-diags/configure.in > > index 0a5f3c8..a24d478 100644 > > --- a/infiniband-diags/configure.in > > +++ b/infiniband-diags/configure.in > > @@ -72,32 +72,6 @@ AC_CHECK_FUNCS([strchr strrchr strtol strtoul memset]) > > dnl Checks for typedefs, structures, and compiler characteristics. > > AC_C_CONST > > > > -dnl Check for the specification of a default node name map file > > -AC_MSG_CHECKING(for --with-node-name-map ) > > -AC_ARG_WITH(node-name-map, > > - AC_HELP_STRING([--with-node-name-map=file], > > - [define a default node name map file]), > > - [ case "$withval" in > > - no) > > - ;; > > - *) > > - withnodenamemap=yes > > - NODENAMEMAPFILE=$withval > > - ;; > > - esac ] > > -) > > -AC_MSG_RESULT(${withnodenamemap=no}) > > - > > -if test $withnodenamemap = "yes"; then > > - NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" > > - NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" > > - NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" > > - > > - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, > > - ["$NODENAMEMAP"], > > - [Define a default node name map file]) > > -fi > > - > > dnl Check for perl and perl install location > > AC_MSG_CHECKING(for --with-perl-path ) > > AC_ARG_WITH(perl-path, > > So --with-node-name-map=file configure option is removed completely from > diags and how mapping will work by default is only depens on how OpenSM > (which is separate package) was configured. > > Do you think it would be useful useful to keep default map name as > configure option for infiniband-diags and to not depend from OpenSM > configuration (it seems easy doable - we could put some > default_node_name_map_file variable in ibdiag_common.c or so)? And then > we probably don't need such configure option for OpenSM? > Actually I would prefer a default which does not have to be configured. Would this be acceptable? Perhaps /ib-node-name-map? Ira From sashak at voltaire.com Mon Nov 5 11:33:58 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 5 Nov 2007 21:33:58 +0200 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071105103229.32e41a31.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> Message-ID: <20071105193358.GM8766@sashak.voltaire.com> On 10:32 Mon 05 Nov , Ira Weiny wrote: > On Sun, 4 Nov 2007 18:07:43 +0200 > Sasha Khapyorsky wrote: > > > On 20:15 Thu 01 Nov , Ira Weiny wrote: > > > > > diff --git a/infiniband-diags/configure.in b/infiniband-diags/configure.in > > > index 0a5f3c8..a24d478 100644 > > > --- a/infiniband-diags/configure.in > > > +++ b/infiniband-diags/configure.in > > > @@ -72,32 +72,6 @@ AC_CHECK_FUNCS([strchr strrchr strtol strtoul memset]) > > > dnl Checks for typedefs, structures, and compiler characteristics. > > > AC_C_CONST > > > > > > -dnl Check for the specification of a default node name map file > > > -AC_MSG_CHECKING(for --with-node-name-map ) > > > -AC_ARG_WITH(node-name-map, > > > - AC_HELP_STRING([--with-node-name-map=file], > > > - [define a default node name map file]), > > > - [ case "$withval" in > > > - no) > > > - ;; > > > - *) > > > - withnodenamemap=yes > > > - NODENAMEMAPFILE=$withval > > > - ;; > > > - esac ] > > > -) > > > -AC_MSG_RESULT(${withnodenamemap=no}) > > > - > > > -if test $withnodenamemap = "yes"; then > > > - NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" > > > - NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" > > > - NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" > > > - > > > - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, > > > - ["$NODENAMEMAP"], > > > - [Define a default node name map file]) > > > -fi > > > - > > > dnl Check for perl and perl install location > > > AC_MSG_CHECKING(for --with-perl-path ) > > > AC_ARG_WITH(perl-path, > > > > So --with-node-name-map=file configure option is removed completely from > > diags and how mapping will work by default is only depens on how OpenSM > > (which is separate package) was configured. > > > > Do you think it would be useful useful to keep default map name as > > configure option for infiniband-diags and to not depend from OpenSM > > configuration (it seems easy doable - we could put some > > default_node_name_map_file variable in ibdiag_common.c or so)? And then > > we probably don't need such configure option for OpenSM? > > > > Actually I would prefer a default which does not have to be configured. Would > this be acceptable? Yes. I'm fine this this. > Perhaps /ib-node-name-map? Or under /etc/ofa/ ? Sasha From weiny2 at llnl.gov Mon Nov 5 12:17:44 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 5 Nov 2007 12:17:44 -0800 Subject: [ofa-general] Re: [PATCH 4/7] Change node name map implementation to use qmap in memory storage In-Reply-To: <20071104203412.GY6945@sashak.voltaire.com> References: <20071101201514.62bd5ce8.weiny2@llnl.gov> <20071104203412.GY6945@sashak.voltaire.com> Message-ID: <20071105121744.7c82dfa0.weiny2@llnl.gov> On Sun, 4 Nov 2007 22:34:12 +0200 Sasha Khapyorsky wrote: > Hi Ira, > > On 20:15 Thu 01 Nov , Ira Weiny wrote: > > From 2dacfc928856351820fadc416da787350254419e Mon Sep 17 00:00:00 2001 > > From: Ira K. Weiny > > Date: Thu, 1 Nov 2007 19:29:02 -0700 > > Subject: [PATCH] Change node name map implementation to use qmap in memory storage > > > > Signed-off-by: Ira K. Weiny > > } > > diff --git a/infiniband-diags/src/ibroute.c b/infiniband-diags/src/ibroute.c > > index 44d2fc8..664f7f5 100644 > > --- a/infiniband-diags/src/ibroute.c > > +++ b/infiniband-diags/src/ibroute.c > > @@ -50,6 +50,7 @@ > > #include > > #include > > #include > > +#include > > > > #include "ibdiag_common.h" > > > > I think this chunk should be part of patch 3 - I moved it there already. Thanks. > > diff --git a/opensm/complib/cl_nodenamemap.c b/opensm/complib/cl_nodenamemap.c > > index 144a7e4..584c78c 100644 > > --- a/opensm/complib/cl_nodenamemap.c > > +++ b/opensm/complib/cl_nodenamemap.c > > @@ -44,67 +44,105 @@ > > > > #include > > > > -FILE * > > +static nn_map_t * > > +read_names(nn_map_t *map) > > +{ > > + char *line = NULL; > > + size_t len = 0; > > + name_map_item_t *item; > > + > > + rewind(map->fp); > > + while (getline(&line, &len, map->fp) != -1) { > > + char *guid_str = NULL; > > + char *name = NULL; > > + line[len-1] = '\0'; > > + if (line[0] == '#') > > + goto next_one; > > + > > + guid_str = strtok(line, "\"#"); > > + name = strtok(NULL, "\"#"); > > + if (!guid_str || !name) > > + goto next_one; > > + > > + item = malloc(sizeof(*item)); > > + if (!item) { > > + goto error; > > + } > > + item->guid = strtoull(guid_str, NULL, 0); > > + item->name = strdup(name); > > + cl_qmap_insert(&(map->map), item->guid, (cl_map_item_t *)item); > > + > > +next_one: > > + free (line); > > + line = NULL; > > getline() is able to realloc 'line' buffer by itself, so should this > repeated free() be moved out of loop (with adding a variable which > stores allocated 'line' size)? yep, your right. New patch attached. > > > + } > > + > > +error: > > + return (map); > > +} > > + > > +nn_map_t * > > open_node_name_map(char *node_name_map) > > { > > - FILE *rc = NULL; > > + FILE *tmp_fp = NULL; > > + nn_map_t *rc = NULL; > > > > if (node_name_map != NULL) { > > - rc = fopen(node_name_map, "r"); > > - if (rc == NULL) { > > + tmp_fp = fopen(node_name_map, "r"); > > + if (tmp_fp == NULL) { > > fprintf(stderr, > > "WARNING failed to open switch map \"%s\" (%s)\n", > > node_name_map, strerror(errno)); > > } > > #ifdef HAVE_DEFAULT_NODENAME_MAP > > } else { > > - rc = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); > > + tmp_fp = fopen(HAVE_DEFAULT_NODENAME_MAP, "r"); > > #endif /* HAVE_DEFAULT_NODENAME_MAP */ > > } > > - return (rc); > > + if (!tmp_fp) > > + return (NULL); > > + > > + rc = malloc(sizeof(*rc)); > > + if (!rc) > > + return (NULL); > > + rc->fp = tmp_fp; > > + cl_qmap_init(&(rc->map)); > > + return (read_names(rc)); > > read_names() function cannot fail. Probably it would be cleaner to make > it void and just to return rc here. Done, new patch attached. > > > } > > > > void > > -close_node_name_map(FILE *fp) > > +close_node_name_map(nn_map_t *map) > > { > > - if (fp) > > - fclose(fp); > > + name_map_item_t *item = NULL; > > + > > + if (!map) > > + return; > > + > > + item = (name_map_item_t *)cl_qmap_head(&(map->map)); > > + while (item != cl_qmap_end(&(map->map))) { > > There are compilation warning about different pointer types. Fixed. > > > + item = (name_map_item_t *)cl_qmap_remove(&(map->map), item->guid); > > + free(item->name); > > + free(item); > > + item = (name_map_item_t *)cl_qmap_head(&(map->map)); > > + } > > + if (map->fp) > > + fclose(map->fp); > > + free(map); > > } > > > > char * > > -remap_node_name(FILE *node_name_map_fp, uint64_t target_guid, char *nodedesc) > > +remap_node_name(nn_map_t *map, uint64_t target_guid, char *nodedesc) > > { > > -#define NAME_LEN (256) > > - char *line = NULL; > > - size_t len = 0; > > - uint64_t guid = 0; > > - char *rc = NULL; > > - int line_count = 0; > > - > > - if (node_name_map_fp == NULL) > > + char *rc = NULL; > > + name_map_item_t *item = NULL; > > + > > + if (!map) > > goto done; > > > > - rewind(node_name_map_fp); > > - for (line_count = 1; > > - getline(&line, &len, node_name_map_fp) != -1; > > - line_count++) { > > - line[len-1] = '\0'; > > - if (line[0] == '#') > > - goto next_one; > > - char *guid_str = strtok(line, "\"#"); > > - char *name = strtok(NULL, "\"#"); > > - if (!guid_str || !name) > > - goto next_one; > > - guid = strtoull(guid_str, NULL, 0); > > - if (target_guid == guid) { > > - rc = strdup(name); > > - free (line); > > - goto done; > > - } > > -next_one: > > - free (line); > > - line = NULL; > > - } > > + item = (name_map_item_t *)cl_qmap_get(&(map->map), target_guid); > > + if (item != cl_qmap_end(&(map->map))) > > Ditto. Fixed. Sorry about these I should have caught them. New patch is attached. Thanks, Ira -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Change-node-name-map-implementation-to-use-qmap-in-m.patch Type: application/octet-stream Size: 12955 bytes Desc: not available URL: From weiny2 at llnl.gov Mon Nov 5 12:19:05 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 5 Nov 2007 12:19:05 -0800 Subject: [ofa-general] Re: [PATCH 6/7] Add node-name-map support to OpenSM; using the "default" map. In-Reply-To: <20071104211352.GZ6945@sashak.voltaire.com> References: <20071101201524.3f95a33a.weiny2@llnl.gov> <20071104211352.GZ6945@sashak.voltaire.com> Message-ID: <20071105121905.76b35067.weiny2@llnl.gov> On Sun, 4 Nov 2007 23:13:52 +0200 Sasha Khapyorsky wrote: > On 20:15 Thu 01 Nov , Ira Weiny wrote: > > From 35280cfd5229ccc8d91b6fd98e0f4b58193d0d03 Mon Sep 17 00:00:00 2001 > > From: Ira K. Weiny > > Date: Thu, 1 Nov 2007 19:41:37 -0700 > > Subject: [PATCH] Add node-name-map support to OpenSM; using the "default" map. > > > > Signed-off-by: Ira K. Weiny > > diff --git a/opensm/opensm/osm_node.c b/opensm/opensm/osm_node.c > > index 645daa9..f34da1f 100644 > > --- a/opensm/opensm/osm_node.c > > +++ b/opensm/opensm/osm_node.c > > @@ -131,6 +131,7 @@ osm_node_t *osm_node_new(IN const osm_madw_t * const p_madw) > > > > osm_node_init_physp(p_node, p_madw); > > } > > + p_node->print_desc = ""; > > > > return (p_node); > > } > > @@ -146,6 +147,11 @@ static void osm_node_destroy(IN osm_node_t * p_node) > > */ > > for (i = 0; i < p_node->physp_tbl_size; i++) > > osm_physp_destroy(&p_node->physp_table[i]); > > + > > + /* cleanup printable node_desc field */ > > + if (p_node->print_desc) { > > + free(p_node->print_desc); > > + } > > } > > > > /********************************************************************** > > p_node->print_desc is initialized as constant string and later freed as > dynamically allocated memory. I think there are paths when > osm_node_destroy() could run before __osm_nd_rcv_process_nd(). If so it > it looks like a bug? > yep, that is a bug, sorry... New patch with fix is attached. Ira -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Add-node-name-map-support-to-OpenSM-using-the-defa.patch Type: application/octet-stream Size: 4904 bytes Desc: not available URL: From or.gerlitz at gmail.com Mon Nov 5 12:44:58 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 5 Nov 2007 22:44:58 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> Message-ID: <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> On 11/5/07, Roland Dreier wrote: > > > 11-12: SA cache session > > 12-1: IPoIB stateless offload issues > > > Sean, Roland, Dror - can you make it? > > I guess I can make it but what are the stateless offload issues? If > we want to spend an hour on it I think we need to figure out what if > anything we're stuck on -- at this point I don't know of any major > problems that need face-to-face time. As I said earlier on this thread, the open issues I see with the stateless offload series are (A) the non interoperable checksum offload patch based on the IB ICRC sent by Michael (and if it is inter-operable, I'd like to be educated how) (B) LRO - a pure SW optimization, why it need to be in the ipoib driver and not at least shared with the network stack code above ipoib. If Dror can't present and address these issues, maybe we can't have this session on this f2f - Dror? Roland - one more issue which was discussed and remained open with Michael is "connection liveness detection code to active side" for UC implementation of the connected mode, see the thread named "IPoIB-CM UC mode" from July 2nd etc at http://lists.openfabrics.org/pipermail/general/2007-July/037649.html- what do you say? Sean - are you fine with the SA cache being discussed over this hour? Or. -------------- next part -------------- An HTML attachment was scrubbed... URL: From rdreier at cisco.com Mon Nov 5 13:34:42 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 05 Nov 2007 13:34:42 -0800 Subject: [ofa-general] Application blocked in mthca_poll_cq In-Reply-To: <001801c81fd2$7e593d70$7702a8c0@ida> (Bharath Ramesh's message of "Mon, 5 Nov 2007 12:37:04 -0500") References: <20071105050600.GA20812@vt.edu> <001801c81fd2$7e593d70$7702a8c0@ida> Message-ID: > I am not sure about the version of OFED being used, but its most likely > OFED-1.2. Is there any way to find the version of OFED used. libmthca.so > points to libmthca-rdmav2.so. I am not sure if this helps. My application is > multithreaded, every time this happens when I try to attach the process to > gdb I find that mthca_poll_cq is the one blocking and sometimes the call is > blocking on pthread_spin_unlock. Which is surprising as I wouldnt expect > pthread_spin_unlock to be blocking. I am sure that I am not doing any > use-after-free. I dont destroy the CQ till the application is terminating. > This situation occurs well before the application terminates. Yes, it's not really possible for pthread_spin_unlock() to block. In general (on any common architecture -- 32/64-bit x86, powerpc, ia64 -- at least) pthread_spin_unlock() is just a single store to the spinlock memory location. What that says to me is that either gdb is giving you bogus information (quite possible) or perhaps your application is not really stuck -- it is just in a tight loop polling a CQ maybe? (BTW I think you should be able to determine the libmthca version by doing "rpm -qi libmthca") - R. From sashak at voltaire.com Mon Nov 5 13:57:30 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 5 Nov 2007 23:57:30 +0200 Subject: [ofa-general] Re: [PATCH 4/7] Change node name map implementation to use qmap in memory storage In-Reply-To: <20071105121744.7c82dfa0.weiny2@llnl.gov> References: <20071101201514.62bd5ce8.weiny2@llnl.gov> <20071104203412.GY6945@sashak.voltaire.com> <20071105121744.7c82dfa0.weiny2@llnl.gov> Message-ID: <20071105215730.GO8766@sashak.voltaire.com> On 12:17 Mon 05 Nov , Ira Weiny wrote: > > New patch attached. And applied. Thanks. Sasha From rdreier at cisco.com Mon Nov 5 13:51:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 05 Nov 2007 13:51:58 -0800 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> (Or Gerlitz's message of "Mon, 5 Nov 2007 22:44:58 +0200") References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: > As I said earlier on this thread, the open issues I see with the stateless > offload series are (A) the non interoperable checksum offload patch based on > the IB ICRC sent by Michael (and if it is inter-operable, I'd like to be > educated how) (B) LRO - a pure SW optimization, why it need to be in the > ipoib driver and not at least shared with the network stack code above > ipoib. For (A): as far as I'm concerned, turning off TCP/IP checksums is not something we want to do in IPoIB. Is there anyone arguing in favor of it at this point? For (B): the LRO implementation posted is exactly using the generic network stack software LRO helpers! - R. From rdreier at cisco.com Mon Nov 5 13:57:33 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 05 Nov 2007 13:57:33 -0800 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: (Roland Dreier's message of "Mon, 05 Nov 2007 13:51:58 -0800") References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: > For (B): the LRO implementation posted is exactly using the generic > network stack software LRO helpers! Sorry, I take that back. I just checked again, and the patch that was posted just used the same function names as the upstream LRO generic helpers, which is what confused me when I skimmed it. Yes, clearly we want to use the inet_lro stuff; I don't think there's anything to discuss about that. - R. From bramesh at vt.edu Mon Nov 5 13:59:53 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Mon, 5 Nov 2007 16:59:53 -0500 Subject: [ofa-general] Application blocked in mthca_poll_cq In-Reply-To: References: <20071105050600.GA20812@vt.edu> <001801c81fd2$7e593d70$7702a8c0@ida> Message-ID: <20071105215953.GA25111@vt.edu> * Roland Dreier (rdreier at cisco.com) wrote: > > I am not sure about the version of OFED being used, but its most likely > > OFED-1.2. Is there any way to find the version of OFED used. libmthca.so > > points to libmthca-rdmav2.so. I am not sure if this helps. My application is > > multithreaded, every time this happens when I try to attach the process to > > gdb I find that mthca_poll_cq is the one blocking and sometimes the call is > > blocking on pthread_spin_unlock. Which is surprising as I wouldnt expect > > pthread_spin_unlock to be blocking. I am sure that I am not doing any > > use-after-free. I dont destroy the CQ till the application is terminating. > > This situation occurs well before the application terminates. > > Yes, it's not really possible for pthread_spin_unlock() to block. In > general (on any common architecture -- 32/64-bit x86, powerpc, ia64 -- > at least) pthread_spin_unlock() is just a single store to the spinlock > memory location. What that says to me is that either gdb is giving > you bogus information (quite possible) or perhaps your application is > not really stuck -- it is just in a tight loop polling a CQ maybe? I am looping around ibv_poll_cq on a tight loop. I think its quite possible that I am looping but I do quit the loop once I dont have any cq events. I will check that part of the code again. > > (BTW I think you should be able to determine the libmthca version by > doing "rpm -qi libmthca") The version of libmthca I am using is 1.0.4. Thanks, Bharath > > - R. > --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From sashak at voltaire.com Mon Nov 5 14:11:49 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 6 Nov 2007 00:11:49 +0200 Subject: [ofa-general] Re: [PATCH 6/7] Add node-name-map support to OpenSM; using the "default" map. In-Reply-To: <20071105121905.76b35067.weiny2@llnl.gov> References: <20071101201524.3f95a33a.weiny2@llnl.gov> <20071104211352.GZ6945@sashak.voltaire.com> <20071105121905.76b35067.weiny2@llnl.gov> Message-ID: <20071105221149.GP8766@sashak.voltaire.com> On 12:19 Mon 05 Nov , Ira Weiny wrote: > > New patch with fix is attached. Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 5 14:12:44 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 6 Nov 2007 00:12:44 +0200 Subject: [ofa-general] Re: [PATCH 7/7] Add node_name_map_name to opts file. In-Reply-To: <20071101201531.281fbb7c.weiny2@llnl.gov> References: <20071101201531.281fbb7c.weiny2@llnl.gov> Message-ID: <20071105221244.GQ8766@sashak.voltaire.com> On 20:15 Thu 01 Nov , Ira Weiny wrote: > From 2f88e7db6e2553cac310209e0679e099e1a97576 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Thu, 1 Nov 2007 19:48:40 -0700 > Subject: [PATCH] Add node_name_map_name to opts file. > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From jgunthorpe at obsidianresearch.com Mon Nov 5 14:08:17 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Mon, 5 Nov 2007 15:08:17 -0700 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: <20071105220817.GC26372@obsidianresearch.com> On Mon, Nov 05, 2007 at 01:51:58PM -0800, Roland Dreier wrote: > > As I said earlier on this thread, the open issues I see with the stateless > > offload series are (A) the non interoperable checksum offload patch based on > > the IB ICRC sent by Michael (and if it is inter-operable, I'd like to be > > educated how) (B) LRO - a pure SW optimization, why it need to be in the > > ipoib driver and not at least shared with the network stack code above > > ipoib. > For (A): as far as I'm concerned, turning off TCP/IP checksums is not > something we want to do in IPoIB. Is there anyone arguing in favor of > it at this point? I think at the end of the thread it was left sort of hanging.. Certainly, for instance, the qlogic vnic driver does exactly the same thing as was proposed for ipoib when it's hw csum mode is used, so it isn't an unconditionally bad thing. IMHO, what was never addressed was to produce a patch set that made the csum offload process work properly and not be incompatible with existing code. Ie negotiated, and tagged with appropriate CHECKSUM_* marks in all the right places so that forwarding works. Basically make IPoIB CM mode use all the same optimization tricks as we see in a vnic protocol or in something like the xen networking bypass. Unless someone is stepping up to do that work it doesn't seem like there is anything that needs discussion face to face? Jason From gdror at dev.mellanox.co.il Mon Nov 5 14:26:16 2007 From: gdror at dev.mellanox.co.il (Dror Goldenberg) Date: Tue, 06 Nov 2007 00:26:16 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: <472F9888.5080301@dev.mellanox.co.il> Or Gerlitz wrote: > On 11/5/07, *Roland Dreier* > wrote: > > > 11-12: SA cache session > > 12-1: IPoIB stateless offload issues > > > Sean, Roland, Dror - can you make it? > > I guess I can make it but what are the stateless offload issues? If > we want to spend an hour on it I think we need to figure out what if > anything we're stuck on -- at this point I don't know of any major > problems that need face-to-face time. > > > As I said earlier on this thread, the open issues I see with the > stateless offload series are (A) the non interoperable checksum > offload patch based on the IB ICRC sent by Michael (and if it is > inter-operable, I'd like to be educated how) (B) LRO - a pure SW > optimization, why it need to be in the ipoib driver and not at least > shared with the network stack code above ipoib. > > If Dror can't present and address these issues, maybe we can't have > this session on this f2f - Dror? I can. a) it's an option that is turned off by default and people that are willing to take the risk can turn it on and get better performance b) indeed we will need to make it work with the kernel lso, no need to have something specific for ipoib Anyway, I am planning to present those things during the IPoIB SO talk. Wouldn't this be good enough ? From gdror at dev.mellanox.co.il Mon Nov 5 14:20:01 2007 From: gdror at dev.mellanox.co.il (Dror Goldenberg) Date: Tue, 06 Nov 2007 00:20:01 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: <472F9711.8010307@dev.mellanox.co.il> Roland Dreier wrote: > > As I said earlier on this thread, the open issues I see with the stateless > > offload series are (A) the non interoperable checksum offload patch based on > > the IB ICRC sent by Michael (and if it is inter-operable, I'd like to be > > educated how) (B) LRO - a pure SW optimization, why it need to be in the > > ipoib driver and not at least shared with the network stack code above > > ipoib. > > For (A): as far as I'm concerned, turning off TCP/IP checksums is not > something we want to do in IPoIB. Is there anyone arguing in favor of > it at this point? > > This issue is moot, but it's an option for devices that do not support csum offload and would like to still get better performance. Default is off. If you have a specific installation where it makes sense to turn off csum and rely on IB end to end ICRC then you can turn it on. On the other hand, it breaks the TCP/IP end to end data integrity validation and it violates csum calculation requirement. I can present what is there in the patch, motivation and let people choose. Therefore I think that a reasonable compromise will be to have it as an option and have it turned off by default. From yangdong at ncic.ac.cn Mon Nov 5 18:15:33 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Tue, 06 Nov 2007 10:15:33 +0800 Subject: [ofa-general] ibv_post_send (RDMA READ) Message-ID: <472FCE45.9040805@ncic.ac.cn> Anybody know when ibv_post_send(IBV_RDMA_READ/IBV_SEND_SIGNALED) can return an err -1? From pradeeps at linux.vnet.ibm.com Mon Nov 5 19:52:35 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Mon, 05 Nov 2007 19:52:35 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> Message-ID: <472FE503.1010105@linux.vnet.ibm.com> > Yes, I saw that and fixed it. Actually I finished rewriting things so > that there is no more index and no static table any more. I haven't > posted the patches yet because I wanted to read it over a little more, > but if you re-pull my for-2.6.25 branch you will get them. > Quick update: I made several iterations with both slab and slub caches (and combinations). Saw one instance of BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num); in cache_alloc_refill(). This was the first run wherein both the client and server were the latest from your for-2.6.25 tree. After that I could not reproduce this crash in many runs. I am not sure, what I should make of it. So, I guess it looks OK. I might have a few questions, however that is a separate issue. I will follow up on that separately. Pradeep From unshaking at disisdad.com Mon Nov 5 21:59:54 2007 From: unshaking at disisdad.com (Sergiu Smith) Date: Mon, 05 Nov 2007 21:59:54 -0800 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c82039$aa278300$0100007f@localhost> cheapxpsoft2. com From dotanb at dev.mellanox.co.il Mon Nov 5 22:23:38 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 06 Nov 2007 08:23:38 +0200 Subject: [ofa-general] ibv_post_send (RDMA READ) In-Reply-To: <472FCE45.9040805@ncic.ac.cn> References: <472FCE45.9040805@ncic.ac.cn> Message-ID: <4730086A.1050906@dev.mellanox.co.il> Hi. yangdong wrote: > Anybody know when ibv_post_send(IBV_RDMA_READ/IBV_SEND_SIGNALED) can > return an err -1? > There are several options for post send failures: 1) the SQ is full (all of the WR in this Queue are outstanding) 2) bad opcode is being used 3) number of sge in the WR > number of sge of the SQ that the QP was created with 4) if inline flag was set: the message size is too big to be sent as inline I believe that your problem is 1) .... Dotan From diego.guella at sircomtech.com Mon Nov 5 23:16:57 2007 From: diego.guella at sircomtech.com (Diego Guella) Date: Tue, 6 Nov 2007 08:16:57 +0100 Subject: [ofa-general] some WinOF questions Message-ID: <007801c82045$0525ba50$05c8a8c0@DIEGO> I have come questions about the Windows code, that now seems to be "WinOF" (tell me if I'm wrong). How much time do you need approximately to port new code/fixes to WinOF? We are particularly interested in SDP Zero Copy. Does WinOF follow OFED or it is a separate working group? Where can I find binaries to install WinOF, and libraries for linking executables against SDP, verbs, rdmacm? Is interoperability between WinOF and OFED guaranteed? Thanks, Diego -------------- next part -------------- An HTML attachment was scrubbed... URL: From antipyretic at olandas.com Tue Nov 6 00:18:50 2007 From: antipyretic at olandas.com (Samir Hernandez) Date: Tue, 06 Nov 2007 16:18:50 +0800 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c8204c$eadcc780$0100007f@localhost> cheapxpsoft2. com From ogerlitz at voltaire.com Tue Nov 6 00:28:08 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 06 Nov 2007 10:28:08 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: <47302598.3090203@voltaire.com> Roland Dreier wrote: > > For (B): the LRO implementation posted is exactly using the generic > > network stack software LRO helpers! > > Sorry, I take that back. I just checked again, and the patch that was > posted just used the same function names as the upstream LRO generic > helpers, which is what confused me when I skimmed it. > > Yes, clearly we want to use the inet_lro stuff; I don't think there's > anything to discuss about that. OK, fine. So the way this patch is present in OFED 1.3 is only temporal where the upstream code would be done differently, let it be. Or. From bramesh at vt.edu Tue Nov 6 00:30:55 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Tue, 6 Nov 2007 03:30:55 -0500 Subject: [ofa-general] IBV_WC_STATUS status set as 135 Message-ID: <20071106083055.GA30284@vt.edu> I am getting this error every single time when I poll the CQ, with IBV_WC_STATUS set to 135. The vendor error code is also set to the same value of 135. I am trying to perform a RDMA Write operation. My WR is setup as follows. memcpy (ib_page_send_buf, data, PAGE_SIZE); sge.addr = (uintptr_t) ib_page_send_buf; sge.length = PAGE_SIZE; sge.lkey = ib_page_send_buf_mr->lkey; wr.wr_id = 1; wr.next = NULL; wr.opcode = IBV_WR_RDMA_WRITE; wr.send_flags = IBV_SEND_SIGNALED; wr.num_sge = 1; wr.sg_list = &sge; wr.wr.rdma.remote_addr = host->vaddr[0]; wr.wr.rdma.rkey = host->rkey[0]; It works for the first three iterations and on the fourth iteration it fails with this error. I ran the code a whole bunch of times and every single time it fails exactly at the same place with the same error. Any help on this is appreciated. Thanks, Bharath --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From ogerlitz at voltaire.com Tue Nov 6 00:36:46 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 06 Nov 2007 10:36:46 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> Message-ID: <4730279E.6090906@voltaire.com> Roland Dreier wrote: >> As I said earlier on this thread, the open issues I see with the stateless >> offload series are (A) the non interoperable checksum offload patch based on >> the IB ICRC sent by Michael (and if it is inter-operable, I'd like to be >> educated how) > For (A): as far as I'm concerned, turning off TCP/IP checksums is not > something we want to do in IPoIB. Is there anyone arguing in favor of > it at this point? Not that someone argues, but they just went and put the below patch in OFED 1.3 skipping the review process and maintainer acceptance. So if Dror does not want to stand and refine the approach, eg as Jason suggested, maybe we can avoid discussing it at this point - Dror? Or. > Add module option hw_csum: when set, IPoIB will report HW CSUM > and S/G support, and rely on hardware end-to-end transport > checksum (ICRC) instead of software-level protocol checksums. > > Forwarding such packets outside the IB subnet would increase > the risk of data corruption, so it is safest not to set > hw_csum flag on gateways. To reduce the chance of > this routing triggering data corruption by mistake, on RX > we set skb checksum field to CHECKSUM_UNNECESSARY - this way > if such a packet ends up outside the IB network, > it is detected as malformed and dropped. > > To enable interoperability with IEEE IPoIB, checksum > for outgoing packets is calculated in software > unless the remote advertises hw_csum capability > by setting a bit in hardware address flag. > > Signed-off-by: Michael S. Tsirkin > > --- > > This patch has to be applied on top of > [PATCH 2/11] IB/ipoib: support for sending gather skbs. > > Updates since v2: > > Enable interoperability with IEEE IPoIB. > Split out S/G support to a separate patch. > > Updates since v1: fixed thinko in setting header flags. > > When applied on top of previously posted mlx4 patches, > and with hw_csum enabled on both ends, this patch speeds up > single-stream netperf bandwidth on connectx DDR from 1000 > to 1250 MBytes/sec. > > Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h > =================================================================== > --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-09-24 16:21:10.000000000 +0200 > +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2007-09-24 16:21:29.000000000 +0200 > @@ -86,6 +86,7 @@ enum { > IPOIB_MCAST_STARTED = 8, > IPOIB_FLAG_NETIF_STOPPED = 9, > IPOIB_FLAG_ADMIN_CM = 10, > + IPOIB_FLAG_HW_CSUM = 11, > > IPOIB_MAX_BACKOFF_SECONDS = 16, > > @@ -104,9 +105,11 @@ enum { > > /* structs */ > > +#define IPOIB_HEADER_F_HWCSUM 0x1 > + > struct ipoib_header { > __be16 proto; > - u16 reserved; > + __be16 flags; > }; > > struct ipoib_pseudoheader { > @@ -484,6 +487,8 @@ void ipoib_pkey_poll(struct work_struct > int ipoib_pkey_dev_delay_open(struct net_device *dev); > void ipoib_drain_cq(struct net_device *dev); > > +#define IPOIB_FLAGS_HWCSUM 0x01 > + > #ifdef CONFIG_INFINIBAND_IPOIB_CM > > #define IPOIB_FLAGS_RC 0x80 > Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c > =================================================================== > --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-09-24 16:21:10.000000000 +0200 > +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-09-24 16:21:29.000000000 +0200 > @@ -407,6 +407,7 @@ void ipoib_cm_handle_rx_wc(struct net_de > unsigned long flags; > u64 mapping[IPOIB_CM_RX_SG]; > int frags; > + struct ipoib_header *header; > > ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", > wr_id, wc->status); > @@ -469,7 +470,10 @@ void ipoib_cm_handle_rx_wc(struct net_de > > skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); > > - skb->protocol = ((struct ipoib_header *) skb->data)->proto; > + header = (struct ipoib_header *)skb->data; > + skb->protocol = header->proto; > + if (header->flags & cpu_to_be16(IPOIB_HEADER_F_HWCSUM)) > + skb->ip_summed = CHECKSUM_UNNECESSARY; > skb_reset_mac_header(skb); > skb_pull(skb, IPOIB_ENCAP_LEN); > > Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c > =================================================================== > --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-09-24 16:21:10.000000000 +0200 > +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-09-24 16:21:29.000000000 +0200 > @@ -170,6 +170,7 @@ static void ipoib_ib_handle_rx_wc(struct > struct ipoib_dev_priv *priv = netdev_priv(dev); > unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV; > struct sk_buff *skb; > + struct ipoib_header *header; > u64 addr; > > ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n", > @@ -220,7 +221,10 @@ static void ipoib_ib_handle_rx_wc(struct > skb_put(skb, wc->byte_len); > skb_pull(skb, IB_GRH_BYTES); > > - skb->protocol = ((struct ipoib_header *) skb->data)->proto; > + header = (struct ipoib_header *)skb->data; > + skb->protocol = header->proto; > + if (header->flags & cpu_to_be16(IPOIB_HEADER_F_HWCSUM)) > + skb->ip_summed = CHECKSUM_UNNECESSARY; > skb_reset_mac_header(skb); > skb_pull(skb, IPOIB_ENCAP_LEN); > > Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c > =================================================================== > --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-09-24 16:18:38.000000000 +0200 > +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-09-24 16:21:29.000000000 +0200 > @@ -55,11 +55,14 @@ MODULE_LICENSE("Dual BSD/GPL"); > > int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; > int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; > +static int ipoib_hw_csum __read_mostly = 0; > > module_param_named(send_queue_size, ipoib_sendq_size, int, 0444); > MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); > module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); > MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); > +module_param_named(hw_csum, ipoib_hw_csum, int, 0444); > +MODULE_PARM_DESC(hw_csum, "Rely on hardware end-to-end checksum (ICRC) if > 0"); > > #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG > int ipoib_debug_level; > @@ -804,11 +807,18 @@ static int ipoib_hard_header(struct sk_b > void *daddr, void *saddr, unsigned len) > { > struct ipoib_header *header; > + struct ipoib_dev_priv *priv = netdev_priv(dev); > > header = (struct ipoib_header *) skb_push(skb, sizeof *header); > > header->proto = htons(type); > - header->reserved = 0; > + if (!test_bit(IPOIB_FLAG_HW_CSUM, &priv->flags) || > + skb->ip_summed != CHECKSUM_PARTIAL) > + header->flags = 0; > + else if (daddr && *((char *)daddr) & IPOIB_FLAGS_HWCSUM) > + header->flags = cpu_to_be16(IPOIB_HEADER_F_HWCSUM); > + else > + skb_checksum_help(skb); > > /* > * If we don't have a neighbour structure, stuff the > @@ -943,6 +953,9 @@ int ipoib_dev_init(struct net_device *de > if (ipoib_ib_dev_init(dev, ca, port)) > goto out_tx_ring_cleanup; > > + if (ipoib_hw_csum) > + dev->dev_addr[0] |= IPOIB_FLAGS_HWCSUM; > + > return 0; > > out_tx_ring_cleanup: > @@ -1006,6 +1019,10 @@ static void ipoib_setup(struct net_devic > dev->type = ARPHRD_INFINIBAND; > dev->tx_queue_len = ipoib_sendq_size * 2; > dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; > + if (ipoib_hw_csum) { > + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; > + set_bit(IPOIB_FLAG_HW_CSUM, &priv->flags); > + } > > /* MTU will be reset when mcast join happens */ > dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; From tziporet at dev.mellanox.co.il Tue Nov 6 00:45:08 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 06 Nov 2007 10:45:08 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <472F9888.5080301@dev.mellanox.co.il> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> <472F9888.5080301@dev.mellanox.co.il> Message-ID: <47302994.2030402@mellanox.co.il> Dror Goldenberg wrote: > Or Gerlitz wrote: >> On 11/5/07, *Roland Dreier* > > wrote: >> >> > 12-1: IPoIB stateless offload issues >> > > a) it's an option that is turned off by default and people that are > willing to take the risk can turn it on and get better performance > b) indeed we will need to make it work with the kernel lso, no need to > have something specific for ipoib > > > Anyway, I am planning to present those things during the IPoIB SO > talk. Wouldn't this be good enough ? > I think Roland will not be on the 2nd day (Friday) Maybe we need to switch this session to Thursday - Johann is this possible? Thanks, Tziporet From ogerlitz at voltaire.com Tue Nov 6 00:46:28 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 06 Nov 2007 10:46:28 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <472F9888.5080301@dev.mellanox.co.il> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> <472F9888.5080301@dev.mellanox.co.il> Message-ID: <473029E4.2050600@voltaire.com> Dror Goldenberg wrote: > a) it's an option that is turned off by default and people that are > willing to take the risk can turn it on and get better performance > b) indeed we will need to make it work with the kernel lso, no need to > have something specific for ipoib > Anyway, I am planning to present those things during the IPoIB SO talk. > Wouldn't this be good enough ? Dror, I have missed this post of yours, sorry for asking again if you can explain the approach. With what you say here, it seems that discussion on the LRO and non-complaint-checksum-offload patches can continue over the list once they are submitted to review for the upstream kernel. If possible, I'd like to hear more details on what does the HW actually knows to do (eg checksum and LSO offloads - for what IB transports, UDP or TCP, IPv6 or IPv4 only where the IP version matters), the slides in Sonoma were --very-- short to describe this and reviewing the patches without having the complete picture is tough. Can you shed more light (Roland, indeed its more education then debate on open issues) on what you called in the Sonoma slides RCA (Receive Core Affinity), I understand this is actually multiple EQs? I'd be happy to learn on the motivation and suggested SW usage. Dror/Roland - how about a 30m session between 11:30-12 (before the SA cache)? Or. From RAISCH at de.ibm.com Mon Nov 5 23:42:40 2007 From: RAISCH at de.ibm.com (Christoph Raisch) Date: Tue, 6 Nov 2007 08:42:40 +0100 Subject: [ofa-general] problem in follow_hugetlb_page on ppc64 architecture with get_user_pages Message-ID: Hello, if get_user_pages is used on a hugetlb vma, and there was no previous write to the pages, follow_hugetlb_page will call ret = hugetlb_fault(mm, vma, vaddr, 0), although the page should be used for write access in get_user_pages. We currently see this when testing Infiniband on ppc64 with ehca + hugetlbfs. >From reading the code this should also be an issue on other architectures. Roland, Adam, are you aware of anything in this area with mellanox Infiniband cards or other usages with I/O adapters? Gruss / Regards Christoph R. + Nam Ng. From ogerlitz at voltaire.com Tue Nov 6 01:02:05 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 06 Nov 2007 11:02:05 +0200 Subject: [ofa-general] [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com><4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com><47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> Message-ID: <47302D8D.8050208@voltaire.com> Sean Hefty wrote: > Fix a couple of errors in the man page documentation and add > infiniband specific text about QP configuration settings. This > is in response to user questions about various settings based > on feedback from Or. > I've tried to address all comments regarding missing or unclear > documentation. I also added IB specific areas to the man pages > in a few areas to clarify how calls are operating over IB. Hi Sean, Great job, thanks. Some comments that I had while reading the man pages: 1) I think we want to mention that rdma-cm remote address resolving involves issuing an ARP through the network stack (similar to the path query mentioning in the route resolve page, ARP is not even Infiniband specific) 2) I see you are using a "minimum RNR NAK timer" notation, what does "minimum" comes to say here? 3) withing mentioning the packet-life-time and hca-local-delay, I think that it makes sense to spare a sentence on each and say that the packet lifetime is set by the IB SA per route (path) Or. From ogerlitz at voltaire.com Tue Nov 6 01:03:41 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 06 Nov 2007 11:03:41 +0200 Subject: [ofa-general] [PATCH 0/7] Add Node Name Map support to opensm In-Reply-To: <20071105152841.GE8766@sashak.voltaire.com> References: <20071101201448.7317825b.weiny2@llnl.gov> <472EB344.9070302@voltaire.com> <20071105152841.GE8766@sashak.voltaire.com> Message-ID: <47302DED.9080005@voltaire.com> Sasha Khapyorsky wrote: > Node description or name string specified in name map file. > It is about all nodes in a fabric (not just OFED nodes where node > description could be configured) including switches, routers, TCAs, etc. thanks for the clarifications. Or. From discourses at sullivantwins.com Tue Nov 6 04:17:53 2007 From: discourses at sullivantwins.com (Dustin Randolph) Date: Tue, 06 Nov 2007 12:17:53 +0000 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c8205d$b3c2e200$0100007f@localhost> cheapxpsoft2. com From vlad at lists.openfabrics.org Tue Nov 6 03:04:17 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 6 Nov 2007 03:04:17 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071106-0200 daily build status Message-ID: <20071106110417.F15E6E608B8@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-34.ELsmp Failed: From trachelospermum at gralencompany.com Tue Nov 6 04:29:53 2007 From: trachelospermum at gralencompany.com (Kristen Bryan) Date: Tue, 06 Nov 2007 06:29:53 -0600 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c8206f$e675d600$0100007f@localhost> cheapxpsoft2. com From kliteyn at mellanox.co.il Mon Nov 5 21:19:47 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 6 Nov 2007 07:19:47 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-06:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-05 OpenSM git rev = Sat_Nov_3_14:58:36_2007 [a95cd8ff78c5436dc33f26828aa480dbed6f9c75] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From agl at us.ibm.com Tue Nov 6 07:05:32 2007 From: agl at us.ibm.com (aglitke) Date: Tue, 06 Nov 2007 09:05:32 -0600 Subject: [ofa-general] Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages In-Reply-To: References: Message-ID: <1194361532.20383.4.camel@localhost.localdomain> Please try this patch and see if it helps. commit 6decbd17d6fb70d50f6db2c348bb41d7246a67d1 Author: Adam Litke Date: Tue Nov 6 06:59:12 2007 -0800 hugetlb: follow_hugetlb_page for write access When calling get_user_pages(), a write flag is passed in by the caller to indicate if write access is required on the faulted-in pages. Currently, follow_hugetlb_page() ignores this flag and always faults pages for read-only access. This patch passes the write flag down to follow_hugetlb_page() and makes sure hugetlb_fault() is called with the right write_access parameter. Test patch only. Not Signed-off. diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3a19b03..31fa0a0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -19,7 +19,7 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); +int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index eab8c42..b645985 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -621,7 +621,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, - unsigned long *position, int *length, int i) + unsigned long *position, int *length, int i, + int write) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -643,7 +644,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, int ret; spin_unlock(&mm->page_table_lock); - ret = hugetlb_fault(mm, vma, vaddr, 0); + ret = hugetlb_fault(mm, vma, vaddr, write); spin_lock(&mm->page_table_lock); if (!(ret & VM_FAULT_ERROR)) continue; diff --git a/mm/memory.c b/mm/memory.c index f82b359..1bcd444 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1039,7 +1039,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &len, i); + &start, &len, i, write); continue; } On Tue, 2007-11-06 at 08:42 +0100, Christoph Raisch wrote: > Hello, > if get_user_pages is used on a hugetlb vma, and there was no previous write > to the pages, > follow_hugetlb_page will call > ret = hugetlb_fault(mm, vma, vaddr, 0), > although the page should be used for write access in get_user_pages. > > We currently see this when testing Infiniband on ppc64 with ehca + > hugetlbfs. > From reading the code this should also be an issue on other architectures. > Roland, Adam, are you aware of anything in this area with mellanox > Infiniband cards or other usages with I/O adapters? > > Gruss / Regards > Christoph R. + Nam Ng. > > -- Adam Litke - (agl at us.ibm.com) IBM Linux Technology Center From hnguyen at linux.vnet.ibm.com Tue Nov 6 07:06:04 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Tue, 6 Nov 2007 16:06:04 +0100 Subject: [ofa-general] Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages In-Reply-To: References: Message-ID: <200711061606.04402.hnguyen@linux.vnet.ibm.com> Hello Roland! > We currently see this when testing Infiniband on ppc64 with ehca + > hugetlbfs. > From reading the code this should also be an issue on other architectures. > Roland, Adam, are you aware of anything in this area with mellanox > Infiniband cards or other usages with I/O adapters? Below is a testcase demonstrating this problem. You need to install libhugetlbfs.so and run it as below: HUGETLB_MORECORE=yes LD_PRELOAD=libhugetlbfs.so ./hugetlb_ibtest 100 This testcase does the following steps (high level desc): 1. malloc two buffers each of 100MB for send and recv 2. register them as memory regions 3. create queue pair QP 4. send data in send buffer using QP to itself (target is then recv buffer) 5. compare those buffers content It runs fine without libhugetlbsf. If you call it with libhugetlbfs as above, step 5 will fail. If you do memset() of the buffers before step 2 (register mr), then it runs without errors. It appears that hugetlb_cow() is called when first write access is performed after mrs have been registered. That means the testcase is seeing other pages than the ones registered to the adapter... I was able reproduce this with mthca on 2.6.23/ppc64 and fc6/intel. Regards Nam #include #include #include #include #include #include static unsigned int pagesize; static unsigned int bufsize=1024*1024*19; int cmp_data(void *s, void *d, unsigned long len, unsigned long *fail_pos) { unsigned char *cs = s, *cd = d; assert(cs); assert(cd); assert(fail_pos); *fail_pos = 0; while (len) { if (*cs < *cd) return -1; if (*cs > *cd) return 1; len--; cs++; cd++; *fail_pos += 1; } return 0; } int hugetlb_ibtest(struct ibv_device* device) { struct ibv_context *context = NULL; struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; struct ibv_cq *send_cq = NULL; struct ibv_cq *recv_cq = NULL; struct ibv_qp *qp = NULL; struct ibv_mr *send_mr = NULL; struct ibv_mr *recv_mr = NULL; unsigned char *send_buffer = NULL; unsigned char *recv_buffer = NULL; int port = 1; // hardcoded for now int rc = 0; context = ibv_open_device(device); assert(context!=NULL); // query port memset(&port_attr, 0, sizeof(port_attr)); rc = ibv_query_port(context, port, &port_attr); assert(rc==0); // pd pd = ibv_alloc_pd(context); assert(pd!=NULL); // ah struct ibv_ah_attr ah_attr = { .is_global = 0, .dlid = port_attr.lid, .sl = 0, .src_path_bits = 0, .port_num = port, .static_rate = 3 }; struct ibv_ah *ah = ibv_create_ah(pd, &ah_attr); assert(ah!=NULL); // send cq send_cq = ibv_create_cq(context, 1, NULL, NULL, 0); assert(send_cq!=NULL); // recv cq recv_cq = ibv_create_cq(context, 1, NULL, NULL, 0); assert(recv_cq!=NULL); // qp struct ibv_qp_init_attr attr = { .send_cq = send_cq, .recv_cq = recv_cq, .cap = { .max_send_wr = 2, .max_recv_wr = 2, .max_send_sge = 1, .max_recv_sge = 1 }, .qp_type = IBV_QPT_RC, }; qp = ibv_create_qp(pd, &attr); assert(qp!=NULL); // qp RESET -> INIT struct ibv_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IBV_QPS_INIT; qp_attr.pkey_index = 0; qp_attr.port_num = port; qp_attr.qp_access_flags = 0; rc = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); assert(rc==0); // qp INIT -> RTR memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IBV_QPS_RTR; qp_attr.rq_psn = 0; qp_attr.max_rd_atomic = 1; qp_attr.dest_qp_num = qp->qp_num; qp_attr.path_mtu = IBV_MTU_2048; qp_attr.ah_attr = ah_attr; qp_attr.min_rnr_timer = 0; rc = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_DEST_QPN | IBV_QP_PATH_MTU | IBV_QP_AV | IBV_QP_MIN_RNR_TIMER); assert(rc==0); // qp RTR -> RTS memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IBV_QPS_RTS; qp_attr.sq_psn = 0; qp_attr.max_dest_rd_atomic = 1; qp_attr.timeout = 18; qp_attr.retry_cnt = 1; qp_attr.rnr_retry = 1; rc = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY); assert(rc==0); // mr recv recv_buffer = malloc(bufsize); assert(recv_buffer); unsigned int i; recv_mr = ibv_reg_mr(pd, recv_buffer, bufsize, IBV_ACCESS_LOCAL_WRITE); assert(recv_mr!=NULL); for (i = 0; i < bufsize; i++) recv_buffer[i] = ~(i & 0xff); // qp post_recv rc = ibv_req_notify_cq(recv_cq, 0); struct ibv_sge sge_recv = { .addr = (uintptr_t) recv_buffer, .length = bufsize, .lkey = recv_mr->lkey }; struct ibv_recv_wr recv_wr = { .next = NULL, .wr_id = 0x5003, .sg_list = &sge_recv, .num_sge = 1 }; struct ibv_recv_wr *bad_recv_wr = NULL; rc = ibv_post_recv(qp, &recv_wr, &bad_recv_wr); assert(rc==0); // mr send send_buffer = malloc(bufsize); assert(send_buffer); send_mr = ibv_reg_mr(pd, send_buffer, bufsize, IBV_ACCESS_LOCAL_WRITE); assert(send_mr!=NULL); for (i = 0; i < bufsize; i++) send_buffer[i] = (i & 0xff); rc = ibv_req_notify_cq(send_cq, 0); strcpy(send_buffer, "300 lines for one packet"); int slen = strlen(send_buffer); if (bufsize > slen*2+2) strcpy(send_buffer+bufsize-slen-1, send_buffer); struct ibv_sge sge_send = { .addr = (uintptr_t) send_buffer, .length = bufsize, .lkey = send_mr->lkey }; struct ibv_send_wr send_wr = { .wr_id = 0x71032, .sg_list = &sge_send, .num_sge = 1, .opcode = IBV_WR_SEND, .send_flags = IBV_SEND_SIGNALED, }; struct ibv_send_wr *bad_send_wr = NULL; rc = ibv_post_send(qp, &send_wr, &bad_send_wr); assert(rc==0); // poll send completion struct ibv_wc wc; int ne; memset(&wc, 0, sizeof(wc)); do { ne = ibv_poll_cq(send_cq, 1, &wc); } while (ne < 1); assert(ne==1); assert(wc.status==IBV_WC_SUCCESS); // poll recv completion memset(&wc, 0, sizeof(wc)); do { ne = ibv_poll_cq(recv_cq, 1, &wc); } while (ne < 1); assert(ne==1); assert(wc.status==IBV_WC_SUCCESS); // check what we received is what we sent printf("send: \"%s\"\n", send_buffer); printf("recv: \"%s\"\n", recv_buffer); unsigned long fail_pos; rc = cmp_data(send_buffer, recv_buffer, bufsize, &fail_pos); if (rc) { printf("fail_pos=%lx send_buffer=%p recv_buffer=%p " "%02x<>%02x\n", fail_pos, send_buffer, recv_buffer, send_buffer[fail_pos], recv_buffer[fail_pos]); FILE *f = fopen("hugetlb_ibtest.log", "w"); fprintf(f, "fail_pos=%lx send_buffer=%p recv_buffer=%p " "%02x<>%02x\n", fail_pos, send_buffer, recv_buffer, send_buffer[fail_pos], recv_buffer[fail_pos]); for (i = 0; i < bufsize; i += 16) { unsigned int j; fprintf(f, "%016lx %p ", (unsigned long)i, send_buffer + i); for (j = 0; j < 16; j++) fprintf(f, "%02x ", send_buffer[i + j]); fprintf(f, " %p ", recv_buffer + i); for (j = 0; j < 16; j++) fprintf(f, "%02x ", recv_buffer[i + j]); fprintf(f, "\n"); } fclose(f); printf("see log file hugetlb_ibtest.log\n"); } // clean up rc = ibv_dereg_mr(recv_mr); assert(rc==0); rc = ibv_dereg_mr(send_mr); assert(rc==0); rc = ibv_destroy_ah(ah); assert(rc==0); rc = ibv_destroy_qp(qp); assert(rc==0); rc = ibv_destroy_cq(send_cq); assert(rc==0); rc = ibv_destroy_cq(recv_cq); assert(rc==0); rc = ibv_dealloc_pd(pd); assert(rc==0); rc = ibv_close_device(context); assert(rc==0); return rc; } int main(int argc, char *argv[]) { struct ibv_device **dev_array = ibv_get_device_list(NULL); struct ibv_device *device = NULL; assert(dev_array!=NULL); device = dev_array[0]; // take first IB device assert(device!=NULL); pagesize = sysconf(_SC_PAGESIZE);; printf("pagesize=0x%x\n", pagesize); if (argc > 1) { int l = atoi(argv[1]); if (l) bufsize = 1024*1024*l; } printf("bufsize=0x%x\n", bufsize); int rc = hugetlb_ibtest(device); assert(rc==0); printf("OK!\n"); return 0; } From tziporet at mellanox.co.il Tue Nov 6 07:39:11 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 6 Nov 2007 17:39:11 +0200 Subject: [ofa-general] OFED Nov 05 meeting summary on OFED 1.3 beta readiness In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E1CC@mtlexch01.mtl.com> Note: there will be no meeting next week - CU all in SC07 Tziporet OFED Nov 05 meeting summary on OFED 1.3 beta readiness 1. Rebase for kernel 2.6.24-rc1: The backport was more complicated (mainly in IPoIB). The following kernel modules have now backports to all kernels: mthca, mlx4, ehca, ipath, core, IPoIB, RDS Kernel modules that need update: Chelsio driver (cxgb3), iSER, SDP, SRP, VNIC Note: Please work on this git branch: git://git.openfabrics.org/ofed_1_3/linux-2.6.git ofed_kernel_2_6_24_rc1 Schedule: All new backport patches should be send to Vlad by Tuesday Nov 6. On Wed (Nov 7) we will start to publish the new package based on kernel 2.6.24. Kernel modules that will not pass compilation will be disabled 2. Other Beta tasks status: 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad (Mellanox) - on work 2. SPEC files should be part of each user space package - each owner should take the spec file 3. Fix all compilation and install issues - All 4. management readiness and open a branch for 1.3 - Sasha 3. Beta schedule: Target: do the beta release by the end of this week (Note: Since in Israel we are not working on Friday it will be done either on Thursday or Sunday) 4. GA schedule: Tziporet to publish the GA schedule - after the beta release will be done The schedule we had is published on the Wiki at https://wiki.openfabrics.org/tiki-index.php?page=OFED+1.3+release+plan+a nd+features 5. Integration of OFED 1.3 with Redhat: Tziporet to talk to Doug in SC07 Done tasks for the beta: o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) o ibutils on SLES10 PPC64 (64 bits) - Vlad o Add qperf test from Qlogic - Johann (Qlogic) o Support RHEL 5 up1 - Woody & Vlad o Apply patches that fix warning of backport patches - Vlad o New MVAPICH package - Pasha & DK (OSU) o Complete RDS work - Vlad (Mellanox) o Integrate all SDP features - Jim (Mellanox) o nes - updated backport patches - Glenn (NetEffect) From hrosenstock at xsigo.com Tue Nov 6 07:41:02 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 06 Nov 2007 07:41:02 -0800 Subject: [ofa-general] [Fwd: Outstanding MAD/SMI related patches] Message-ID: <1194363662.26246.940.camel@hrosenstock-ws.xsigo.com> Roland, What's the plan for these patches being pushed upstream ? Did I miss that ? Thanks. -- Hal -------- Forwarded Message -------- From: Hal Rosenstock To: Roland Dreier Cc: general at lists.openfabrics.org Subject: Outstanding MAD/SMI related patches Date: Tue, 23 Oct 2007 14:48:44 -0700 Hi Roland, AFAIK there are three outstanding patches now related to MAD and SMI which have all been acked and I believe are ready to go ahead: 1. [PATCH] IB/core - remove redundant NULL pointer check in ib_mad_recv_done_handler(): Ralph Campbell 10/17 2. [PATCH V4] infiniband/core: Enable loopback of DR SMP responses from userspace: Steve Welch 10/19 3. [PATCH] IB/ipath - Enable loopback of DR SMP responses from userspace: Ralph Campbell 10/19 Just wanted to be clear on their status as these have floated around for a while now. Thanks. -- Hal From mshefty at ichips.intel.com Tue Nov 6 07:50:52 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 06 Nov 2007 07:50:52 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload In-Reply-To: References: Message-ID: <47308D5C.3070406@ichips.intel.com> > static void sdp_remove_device(struct ib_device *device) > { > + struct list_head *p; > + struct sdp_sock *ssk; > + struct sock *sk; > + > write_lock(&device_removal_lock); > + > + spin_lock_irq(&sock_list_lock); > + list_for_each(p, &sock_list) { > + ssk = list_entry(p, struct sdp_sock, sock_list); > + if (ssk->ib_device == device) { > + sk = &ssk->isk.sk; > + > + if (ssk->id) { > + rdma_destroy_id(ssk->id); This is a blocking call, and a spin lock is being held. > + ssk->id = NULL; > + } > + > + sk->sk_shutdown |= RCV_SHUTDOWN; > + sdp_reset(sk); > + } > + } > + spin_unlock_irq(&sock_list_lock); > + > write_unlock(&device_removal_lock); > } - Sean From rdreier at cisco.com Tue Nov 6 07:52:36 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 06 Nov 2007 07:52:36 -0800 Subject: [ofa-general] [Fwd: Outstanding MAD/SMI related patches] In-Reply-To: <1194363662.26246.940.camel@hrosenstock-ws.xsigo.com> (Hal Rosenstock's message of "Tue, 06 Nov 2007 07:41:02 -0800") References: <1194363662.26246.940.camel@hrosenstock-ws.xsigo.com> Message-ID: > What's the plan for these patches being pushed upstream ? Did I miss > that ? Thanks. I will queue them for 2.6.25... none of them are fixes, right? From tziporet at dev.mellanox.co.il Tue Nov 6 07:53:22 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 06 Nov 2007 17:53:22 +0200 Subject: [ofa-general] New features for OFED 1.4 Message-ID: <47308DF2.70409@mellanox.co.il> I wish to collect requirements for new features for OFED 1.4 Please reply with any request you have (features of existing modules, new modules etc.) Thanks, Tziporet From hrosenstock at xsigo.com Tue Nov 6 07:56:45 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 06 Nov 2007 07:56:45 -0800 Subject: [ofa-general] [Fwd: Outstanding MAD/SMI related patches] In-Reply-To: References: <1194363662.26246.940.camel@hrosenstock-ws.xsigo.com> Message-ID: <1194364605.26246.956.camel@hrosenstock-ws.xsigo.com> On Tue, 2007-11-06 at 07:52 -0800, Roland Dreier wrote: > > What's the plan for these patches being pushed upstream ? Did I miss > > that ? Thanks. > > I will queue them for 2.6.25... Sounds good; thanks > none of them are fixes, right? None are critical fixes. The DR loopback change (and associated ipath change) are but are not critical so IMO this can wait for 2.6.25 From mshefty at ichips.intel.com Tue Nov 6 08:00:07 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 06 Nov 2007 08:00:07 -0800 Subject: [ofa-general] Feedback on Developer's Summit In-Reply-To: <472EFE0F.5070101@voltaire.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> Message-ID: <47308F87.7030507@ichips.intel.com> > 11-12: SA cache session > 12-1: IPoIB stateless offload issues > > Sean, Roland, Dror - can you make it? I should be able to make this, but as soon as you start pushing sessions before noon, time should probably be made for lunch. From Arkady.Kanevsky at netapp.com Tue Nov 6 08:01:05 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Tue, 6 Nov 2007 11:01:05 -0500 Subject: [ofa-general] New features for OFED 1.4 In-Reply-To: <47308DF2.70409@mellanox.co.il> References: <47308DF2.70409@mellanox.co.il> Message-ID: Tziporet, which Linux version is it going to be based on? 2.6.25? 26? Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Tziporet Koren [mailto:tziporet at dev.mellanox.co.il] > Sent: Tuesday, November 06, 2007 10:53 AM > To: EWG; OpenFabrics General > Subject: [ofa-general] New features for OFED 1.4 > > I wish to collect requirements for new features for OFED 1.4 > Please reply with any request you have (features of existing > modules, new modules etc.) > > Thanks, > Tziporet > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From hnguyen at linux.vnet.ibm.com Tue Nov 6 08:39:34 2007 From: hnguyen at linux.vnet.ibm.com (Hoang-Nam Nguyen) Date: Tue, 6 Nov 2007 17:39:34 +0100 Subject: [ofa-general] Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages In-Reply-To: <1194361532.20383.4.camel@localhost.localdomain> References: <1194361532.20383.4.camel@localhost.localdomain> Message-ID: <200711061739.34952.hnguyen@linux.vnet.ibm.com> Hi Adam! On Tuesday 06 November 2007 16:05, aglitke wrote: > Please try this patch and see if it helps. Tested on 2.6.22 (don't have the system with 2.6.23 at the moment) and the testcase ran perfectly. Thanks! Nam From mshefty at ichips.intel.com Tue Nov 6 11:09:11 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 06 Nov 2007 11:09:11 -0800 Subject: [ofa-general] [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <47302D8D.8050208@voltaire.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com><4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com><47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <47302D8D.8050208@voltaire.com> Message-ID: <4730BBD7.6050403@ichips.intel.com> > 1) I think we want to mention that rdma-cm remote address resolving > involves issuing an ARP through the network stack (similar to the path > query mentioning in the route resolve page, ARP is not even Infiniband > specific) I intentionally did not mention ARP here. I anticipate that other address resolution techniques will be needed. Although, I guess that I can call out ARP use for now, and change the documentation later. I will start a thread on possible other techniques soon. > 2) I see you are using a "minimum RNR NAK timer" notation, what does > "minimum" comes to say here? This matches the wording in the IB spec: section 11.2.4.2. page 573. - Sean From swise at opengridcomputing.com Tue Nov 6 11:45:42 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 06 Nov 2007 13:45:42 -0600 Subject: [ofa-general] printk in ofed-1.3 ib_destroy_qp() Message-ID: <4730C466.9010400@opengridcomputing.com> Should this printk be here? > int ib_destroy_qp(struct ib_qp *qp) > { > struct ib_pd *pd; > struct ib_cq *scq, *rcq; > struct ib_srq *srq; > struct ib_xrcd *xrcd; > enum ib_qp_type qp_type = qp->qp_type; > int ret; > > pd = qp->pd; > scq = qp->send_cq; > rcq = qp->recv_cq; > srq = qp->srq; > xrcd = qp->xrcd; > > ret = qp->device->destroy_qp(qp); > if (!ret) { > atomic_dec(&pd->usecnt); > atomic_dec(&scq->usecnt); > atomic_dec(&rcq->usecnt); > if (srq) > atomic_dec(&srq->usecnt); > if (qp_type == IB_QPT_XRC) > atomic_dec(&xrcd->usecnt); > else > printk("ib_destroy_qp: type = %d, xrcd = %p\n", qp_type, xrcd); > } > > return ret; > } From sashak at voltaire.com Tue Nov 6 11:57:04 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 6 Nov 2007 21:57:04 +0200 Subject: [ofa-general] [ANNOUNCE] management tarballs release Message-ID: <20071106195704.GF6945@sashak.voltaire.com> Hi, There is a new release of the management (OpenSM and infiniband diagnostics) tarballs available in: http://www.openfabrics.org/downloads/management/ md5sum: e270309f2fb0f948b098f63cb1f13bfb infiniband-diags-1.3.3.tar.gz 25b9491f90c7e851f5bafd556bcac5f6 libibcommon-1.0.6.tar.gz 0fa433e69cb04559efbc76a7157cc700 libibmad-1.1.3.tar.gz b4297b00f3999c951f8b98df6f5e6b19 libibumad-1.1.4.tar.gz 979b05d0534b1ee5f4a2eb12576a76e7 opensm-3.1.6.tar.gz Sasha From tziporet at dev.mellanox.co.il Tue Nov 6 11:59:51 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 06 Nov 2007 21:59:51 +0200 Subject: [ofa-general] New features for OFED 1.4 In-Reply-To: References: <47308DF2.70409@mellanox.co.il> Message-ID: <4730C7B7.7000905@mellanox.co.il> Kanevsky, Arkady wrote: > Tziporet, > which Linux version is it going to be based on? 2.6.25? 26? > I guess 2.6.26 Tziporet From tziporet at dev.mellanox.co.il Tue Nov 6 12:02:07 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 06 Nov 2007 22:02:07 +0200 Subject: [ofa-general] Re: [ewg] printk in ofed-1.3 ib_destroy_qp() In-Reply-To: <4730C466.9010400@opengridcomputing.com> References: <4730C466.9010400@opengridcomputing.com> Message-ID: <4730C83F.9040801@mellanox.co.il> Steve Wise wrote: > Should this printk be here? > >> >> else >> printk("ib_destroy_qp: type = %d, xrcd = >> %p\n", qp_type, xrcd); >> } >> >> return ret; >> } I think Jack already fixed this (there was also a bug about it) Jack? Tziporet From ssufficool at rov.sbcounty.gov Tue Nov 6 12:09:19 2007 From: ssufficool at rov.sbcounty.gov (Sufficool, Stanley) Date: Tue, 6 Nov 2007 12:09:19 -0800 Subject: [ofa-general] New features for OFED 1.4 In-Reply-To: <47308DF2.70409@mellanox.co.il> Message-ID: Integrating the SRP Target code into the stable or testing branch for nightly testing with newer kernels & OFED releases would be a plus IMHO. -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Tziporet Koren Sent: Tuesday, November 06, 2007 7:53 AM To: EWG; OpenFabrics General Subject: [ofa-general] New features for OFED 1.4 I wish to collect requirements for new features for OFED 1.4 Please reply with any request you have (features of existing modules, new modules etc.) Thanks, Tziporet _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From or.gerlitz at gmail.com Tue Nov 6 13:01:15 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Tue, 6 Nov 2007 23:01:15 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <47308F87.7030507@ichips.intel.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <47308F87.7030507@ichips.intel.com> Message-ID: <15ddcffd0711061301j34a6113en155ddd8629e5f82e@mail.gmail.com> On 11/6/07, Sean Hefty wrote: > > > 11-12: SA cache session > > 12-1: IPoIB stateless offload issues > > > > Sean, Roland, Dror - can you make it? > > I should be able to make this, but as soon as you start pushing sessions > before noon, time should probably be made for lunch. So what is your suggestion? with all the pushes I have tries, Johann refuses to allocate more then 20m to the two years SA cache old open issue in the IB stack, and said that only if we want to meet before 1PM, he might get us the room. So we would hear about windowz, about iwarp, about the logo program, about dapl, about this or that protocol update etc etc and not disucss our open issues? Or. -------------- next part -------------- An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Tue Nov 6 13:04:56 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 06 Nov 2007 13:04:56 -0800 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: <47308DF2.70409@mellanox.co.il> References: <47308DF2.70409@mellanox.co.il> Message-ID: <1194383096.26246.1031.camel@hrosenstock-ws.xsigo.com> On Tue, 2007-11-06 at 17:53 +0200, Tziporet Koren wrote: > I wish to collect requirements for new features for OFED 1.4 > Please reply with any request you have (features of existing modules, > new modules etc.) Xsigo will be contributing its virtual NIC and HBA host drivers for OFED 1.4. -- Hal > Thanks, > Tziporet > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From johann.george at qlogic.com Tue Nov 6 13:13:44 2007 From: johann.george at qlogic.com (Johann George) Date: Tue, 6 Nov 2007 13:13:44 -0800 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <15ddcffd0711061301j34a6113en155ddd8629e5f82e@mail.gmail.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <47308F87.7030507@ichips.intel.com> <15ddcffd0711061301j34a6113en155ddd8629e5f82e@mail.gmail.com> Message-ID: <20071106211344.GC28157@cuprite.pathscale.com> > > I should be able to make this, but as soon as you start pushing sessions > > before noon, time should probably be made for lunch. Perhaps we can include a working lunch for the developers who want to and are able to meet earlier and discuss these issues? Johann From Jeffrey.C.Becker at nasa.gov Tue Nov 6 13:26:53 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Tue, 06 Nov 2007 13:26:53 -0800 Subject: [ofa-general] New features for OFED 1.4 In-Reply-To: References: Message-ID: <4730DC1D.8040308@nasa.gov> Hi Tziporet. I agree with James. Let's plan for NFS-RDMA support in OFED 1.4. -jeff James Lentini wrote: > Jeff, > > This looks like a good opportunity to get NFS-RDMA turned on from the > start in OFED 1.4. The client is already part of 2.6.24 and the server > is expected to be in 2.6.25. I assume that OFED 1.4 will be 2.6.25+ > based, which will work out nicely for NFS-RDMA. > > james > > ---------- Forwarded message ---------- > Date: Tue, 06 Nov 2007 17:53:22 +0200 > From: Tziporet Koren > To: EWG , > OpenFabrics General > Subject: [ofa-general] New features for OFED 1.4 > > I wish to collect requirements for new features for OFED 1.4 > Please reply with any request you have (features of existing modules, new > modules etc.) > > Thanks, > Tziporet > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Tue Nov 6 13:36:08 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 06 Nov 2007 13:36:08 -0800 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: <47308DF2.70409@mellanox.co.il> (Tziporet Koren's message of "Tue, 06 Nov 2007 17:53:22 +0200") References: <47308DF2.70409@mellanox.co.il> Message-ID: I would suggest trying to figure out how to make OFED 1.4 the last OFED release. OFED was originally supposed to be a short-term distribution until mainstream Linux distributions caught up and were able to distribute IB/iWARP support. So we should assess how close we are to that goal and how we can put OFED out of business. - R. From jim at mellanox.com Tue Nov 6 14:53:35 2007 From: jim at mellanox.com (Jim Mott) Date: Tue, 6 Nov 2007 14:53:35 -0800 Subject: [ofa-general] [PATCH 1/1 V2] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload Message-ID: Add code to fix a problem found by the Mellanox regression group. When mlx4_ib driver is unloaded while SDP connections are active, the unloads would hang. The original fix for this problem called an rdma_cm service that can block with 2 spin locks held. This version does not hold any locks during the call. Signed-off-by: Jim Mott --- --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-06 14:21:26.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-06 14:22:19.000000000 -0800 @@ -2234,10 +2234,12 @@ static void sdp_add_device(struct ib_dev static void sdp_remove_device(struct ib_device *device) { - struct list_head *p; - struct sdp_sock *ssk; - struct sock *sk; + struct list_head *p; + struct sdp_sock *ssk; + struct sock *sk; + struct rdma_cm_id *id; +do_next: write_lock(&device_removal_lock); spin_lock_irq(&sock_list_lock); @@ -2245,16 +2247,30 @@ static void sdp_remove_device(struct ib_ ssk = list_entry(p, struct sdp_sock, sock_list); if (ssk->ib_device == device) { sk = &ssk->isk.sk; + id = ssk->id; - if (ssk->id) { - rdma_destroy_id(ssk->id); + if (id) { ssk->id = NULL; + + spin_unlock_irq(&sock_list_lock); + write_unlock(&device_removal_lock); + rdma_destroy_id(id); + + goto do_next; } + } + } + + list_for_each(p, &sock_list) { + ssk = list_entry(p, struct sdp_sock, sock_list); + if (ssk->ib_device == device) { + sk = &ssk->isk.sk; sk->sk_shutdown |= RCV_SHUTDOWN; sdp_reset(sk); } } + spin_unlock_irq(&sock_list_lock); write_unlock(&device_removal_lock); From rdreier at cisco.com Tue Nov 6 14:55:46 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 06 Nov 2007 14:55:46 -0800 Subject: [ofa-general] [PATCH 1/1 V2] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload In-Reply-To: (Jim Mott's message of "Tue, 6 Nov 2007 14:53:35 -0800") References: Message-ID: What does this have to do with mlx4? It seems it is just a bug in SDP related to hot-removing any device, right? - R. From rdreier at cisco.com Tue Nov 6 14:58:49 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 06 Nov 2007 14:58:49 -0800 Subject: [ofa-general] IBV_WC_STATUS status set as 135 In-Reply-To: <20071106083055.GA30284@vt.edu> (Bharath Ramesh's message of "Tue, 6 Nov 2007 03:30:55 -0500") References: <20071106083055.GA30284@vt.edu> Message-ID: > I am getting this error every single time when I poll the CQ, with > IBV_WC_STATUS set to 135. The vendor error code is also set to the same > value of 135. I am trying to perform a RDMA Write operation. My WR is > setup as follows. I don't see any way that the libmthca code could set wc.status to 135. Are you sure that the poll operation is succeeding? What is the return value from ibv_poll_cq()? Do you have a complete test case that you could post, which I could build and run? It sounds like this is very reproducible, but it's hard to debug from just the piece you posted. - R. From jsquyres at cisco.com Tue Nov 6 15:08:38 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Tue, 6 Nov 2007 18:08:38 -0500 Subject: [ofa-general] Summit schedule? Message-ID: <17C80901-FA87-48FD-B352-FA5388AE912B@cisco.com> Is there a new strawman version of the schedule available, perchance? I ask because I need to coordinate the OF sessions with some other events at SC, and we're now down to only a few days away. Thanks! -- Jeff Squyres Cisco Systems From Arkady.Kanevsky at netapp.com Tue Nov 6 15:18:07 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Tue, 6 Nov 2007 18:18:07 -0500 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: References: <47308DF2.70409@mellanox.co.il> Message-ID: sound like a topic for planery time at dev conference. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Roland Dreier [mailto:rdreier at cisco.com] > Sent: Tuesday, November 06, 2007 4:36 PM > To: Tziporet Koren > Cc: EWG; OpenFabrics General > Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 > > I would suggest trying to figure out how to make OFED 1.4 the > last OFED release. OFED was originally supposed to be a > short-term distribution until mainstream Linux distributions > caught up and were able to distribute IB/iWARP support. So > we should assess how close we are to that goal and how we can > put OFED out of business. > > - R. > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From jim at mellanox.com Tue Nov 6 15:09:49 2007 From: jim at mellanox.com (Jim Mott) Date: Tue, 6 Nov 2007 15:09:49 -0800 Subject: [ofa-general] [PATCH 1/1 V2] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload In-Reply-To: References: Message-ID: It is an SDP bug. The test that found the problem had a symptom where the "rmmod mlx4_ib" command would hang in an uninterruptable sleep in cma_remove_one(). Also any attempt to unload ib_sdp would also hang. The original V1 post of this patch was very light on detail and I took the V2 opportunity to explain. Almost makes up for the stupid mistake in the first patch... The process used to duplicate the bug and verify this fix is to use 3 nodes (1 just for SM to not confuse things), and execute the steps 1-7 below: nod0: (MLX4) 0) opensm started node1: (MLX4) [With ib_sdp loaded and LD_PRELOAD setup] 1) netserver 3) /sbin/rmmod mlx4_ib && /sbin/modprobe mlx4_ib (in parallel to 2) *** HANGS before fix; Works after *** 6) killall netserver 7) modprobe -r ib_sdp *** HANGS before fix; works after *** node2: (MLX4) [With ib_sdp loaded and LD_PRELOAD setup] 2) netperf -C -c -P 0 -t TCP_STREAM -H green_ib -l 120 -- -m 1000000 4) after failure ^C or just wait for netperf to end on its own with "netperf: cannot shutdown tcp stream socket: Transport endpoint is not connected" 5) /etc/init.d/openibd stop *** WORKS before and after fix *** Thanks, JIm Jim Mott Mellanox Technologies Ltd. mail: jim at mellanox.com Phone: 512-294-5481 -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Tuesday, November 06, 2007 4:56 PM To: Jim Mott Cc: openib-general at openib.org Subject: Re: [ofa-general] [PATCH 1/1 V2] SDP - Fix reference count bug that prevents mlx4_ib and ib_sdp unload What does this have to do with mlx4? It seems it is just a bug in SDP related to hot-removing any device, right? - R. From swise at opengridcomputing.com Tue Nov 6 16:43:36 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 06 Nov 2007 18:43:36 -0600 Subject: [ofa-general] [GIT PULL] ofed-1.3/2.6.24 - cxgb backports Message-ID: <47310A38.6010901@opengridcomputing.com> Vlad, Please pull from: git://git.openfabrics.org/~swise/ofed-1.3 stevo I've added cxgb3/iw_cxgb3 backports for: - 2.6.12-2.6.23 kernel.org kernels - rhel4u4 - rhel5/FC6 - sles10 I'll have more to come but this is all I could get done for today's cutoff. If possible, I'd like a few more days to finish all the backports and do some more testing. Thanks, Steve. From pradeeps at linux.vnet.ibm.com Tue Nov 6 18:13:08 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 06 Nov 2007 18:13:08 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> Message-ID: <47311F34.6030004@linux.vnet.ibm.com> Roland, A few things that caught my eye (in blue). I hope the colours show up. static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; int i; rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); if (!rx->rx_ring) return -ENOMEM; spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { spin_unlock_irq(&priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); return -EINVAL; } else ++priv->cm.nonsrq_conn_qp; spin_unlock_irq(&priv->lock); for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, rx->rx_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; /* See generic comments about ipoib_cm_alloc_rx_skb() below */ goto err; } ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); ret = -EIO; /* See generic comments about ipoib_cm_alloc_rx_skb() below */ goto err; } } rx->recv_count = ipoib_recvq_size; return 0; err: spin_lock_irq(&priv->lock); --priv->cm.nonsrq_conn_qp; spin_unlock_irq(&priv->lock); kfree(rx_ring) is missing return ret; } Generic comment about ipoib_cm_alloc_rx_skb() which is true for both the srq and non srq cases (except in the receive wc handler): I find that there will be skb leakage when ipoib_cm_alloc_rx_skb() fails say before all the rx skbs are allocated. We must undo those allocations and mappings. Probably we should call ipoib_cm_dev_cleanup() and free the skbs and do the unmap there. void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct sk_buff *skb, *newskb; struct ipoib_cm_rx *p; unsigned long flags; u64 mapping[IPOIB_CM_RX_SG]; int frags; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); if (unlikely(wr_id >= ipoib_recvq_size)) { printk(KERN_WARNING "Inside rx wc with wr_id=0x%x\n", wr_id); if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); ipoib_cm_start_rx_drain(priv); ----> I do not understand why we this is required. We have already received a wc with RX_DRAIN set. queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); spin_unlock_irqrestore(&priv->lock, flags); } else ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", wr_id, ipoib_recvq_size); return; } p = wc->qp->qp_context; if (ipoib_cm_has_srq(dev)) rx_ring = priv->cm.srq_ring; else rx_ring = p->rx_ring; skb = rx_ring[wr_id].skb; if (unlikely(wc->status != IB_WC_SUCCESS)) { ipoib_dbg(priv, "cm recv error " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); ++dev->stats.rx_dropped; if (!p->rx_ring) goto repost; ---->If rx_ring is indeed NULL, we should insert a BUG_ON and call panic() else { if (!--p->recv_count) { printk(KERN_WARNING "recv_count=0x%x\n", p->recv_count); spin_lock_irqsave(&priv->lock, flags); list_move(&p->list, &priv->cm.rx_reap_list); spin_unlock_irqrestore(&priv->lock, flags); queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); } return; } } if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { spin_lock_irqsave(&priv->lock, flags); p->jiffies = jiffies; /* Move this entry to list head, but do not re-add it * if it has been moved out of list. */ if (p->state == IPOIB_CM_RX_LIVE) list_move(&p->list, &priv->cm.passive_ids); spin_unlock_irqrestore(&priv->lock, flags); } } frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); ++dev->stats.rx_dropped; goto repost; } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); dev->last_rx = jiffies; ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; skb->dev = dev; /* XXX get correct PACKET_ type here */ skb->pkt_type = PACKET_HOST; netif_receive_skb(skb); repost: if (p->rx_ring) { -----> This should be if(!ipoib_cm_has_srq()) if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); } } else { if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id))) ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } } Pradeep From dwg at au1.ibm.com Tue Nov 6 14:31:19 2007 From: dwg at au1.ibm.com (David Gibson) Date: Wed, 7 Nov 2007 09:31:19 +1100 Subject: [ofa-general] Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages In-Reply-To: <200711061606.04402.hnguyen@linux.vnet.ibm.com> References: <200711061606.04402.hnguyen@linux.vnet.ibm.com> Message-ID: <20071106223119.GC31367@localhost.localdomain> On Tue, Nov 06, 2007 at 04:06:04PM +0100, Hoang-Nam Nguyen wrote: > Hello Roland! > > We currently see this when testing Infiniband on ppc64 with ehca + > > hugetlbfs. > > From reading the code this should also be an issue on other architectures. > > Roland, Adam, are you aware of anything in this area with mellanox > > Infiniband cards or other usages with I/O adapters? > Below is a testcase demonstrating this problem. You need to install > libhugetlbfs.so and run it as below: > HUGETLB_MORECORE=yes LD_PRELOAD=libhugetlbfs.so ./hugetlb_ibtest 100 > > This testcase does the following steps (high level desc): > 1. malloc two buffers each of 100MB for send and recv > 2. register them as memory regions > 3. create queue pair QP > 4. send data in send buffer using QP to itself (target is then recv buffer) > 5. compare those buffers content > > It runs fine without libhugetlbsf. If you call it with libhugetlbfs as > above, step 5 will fail. If you do memset() of the buffers before step 2 > (register mr), then it runs without errors. > It appears that hugetlb_cow() is called when first write access is performed > after mrs have been registered. That means the testcase is seeing other pages > than the ones registered to the adapter... > > I was able reproduce this with mthca on 2.6.23/ppc64 and fc6/intel. We should cut this down to the bare necessary and fold it into the libhugetlbfs testsuite. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson From bramesh at vt.edu Tue Nov 6 20:30:20 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Tue, 6 Nov 2007 23:30:20 -0500 Subject: [ofa-general] IBV_WC_STATUS status set as 135 In-Reply-To: References: <20071106083055.GA30284@vt.edu> Message-ID: <20071107043020.GA3759@vt.edu> * Roland Dreier (rdreier at cisco.com) wrote: > > I am getting this error every single time when I poll the CQ, with > > IBV_WC_STATUS set to 135. The vendor error code is also set to the same > > value of 135. I am trying to perform a RDMA Write operation. My WR is > > setup as follows. > > I don't see any way that the libmthca code could set wc.status to > 135. Are you sure that the poll operation is succeeding? What is the > return value from ibv_poll_cq()? The return value of ibv_poll_cq was always 1. > > Do you have a complete test case that you could post, which I could > build and run? It sounds like this is very reproducible, but it's > hard to debug from just the piece you posted. I think the problem that was my SRQ buffer was hitting low watermark and before I could post additional buffers to the SRQ my QP was going into an error state, a race condition that I had missed. Thanks, Bharath > > - R. > --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From jackm at dev.mellanox.co.il Tue Nov 6 22:28:11 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 7 Nov 2007 08:28:11 +0200 Subject: [ofa-general] Re: [ewg] printk in ofed-1.3 ib_destroy_qp() In-Reply-To: <4730C83F.9040801@mellanox.co.il> References: <4730C466.9010400@opengridcomputing.com> <4730C83F.9040801@mellanox.co.il> Message-ID: <200711070828.11537.jackm@dev.mellanox.co.il> On Tuesday 06 November 2007 22:02, Tziporet Koren wrote: > Steve Wise wrote: > > Should this printk be here? > > > >> > >> else > >> printk("ib_destroy_qp: type = %d, xrcd = > >> %p\n", qp_type, xrcd); > >> } > >> > >> return ret; > >> } > I think Jack already fixed this (there was also a bug about it) > > Jack? > > Tziporet > Fixed on Oct 31. From ogerlitz at voltaire.com Tue Nov 6 23:03:22 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 07 Nov 2007 09:03:22 +0200 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: References: <47308DF2.70409@mellanox.co.il> Message-ID: <4731633A.4010403@voltaire.com> Roland Dreier wrote: > I would suggest trying to figure out how to make OFED 1.4 the last > OFED release. OFED was originally supposed to be a short-term > distribution until mainstream Linux distributions caught up and were > able to distribute IB/iWARP support. So we should assess how close we > are to that goal and how we can put OFED out of business. I suggest putting this in a separate agenda item to be discussed in the first day (eg following the distribution people feedback and OFED 1.3 update by Tziporet). Johann - this can't be put aside under the excuse of no time. If needed, I suggest to carve 5 minutes from each session, etc. Or. From johann.george at qlogic.com Tue Nov 6 23:31:06 2007 From: johann.george at qlogic.com (Johann George) Date: Tue, 6 Nov 2007 23:31:06 -0800 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <47302994.2030402@mellanox.co.il> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> <472F9888.5080301@dev.mellanox.co.il> <47302994.2030402@mellanox.co.il> Message-ID: <20071107073106.GA3605@cuprite.pathscale.com> > I think Roland will not be on the 2nd day (Friday) > Maybe we need to switch this session to Thursday - Johann is this possible? Are you referring to the IPoIB Stateless Offloads session? Yes. I can move that to Thursday. Johann From johann.george at qlogic.com Tue Nov 6 23:49:23 2007 From: johann.george at qlogic.com (Johann George) Date: Tue, 6 Nov 2007 23:49:23 -0800 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: References: <47308DF2.70409@mellanox.co.il> Message-ID: <20071107074923.GB3605@cuprite.pathscale.com> Tziporet, > So we should assess how close we are to that goal and how we can put > OFED out of business. Could you cover this topic during your session on "OFED 1.3: Procedure and Review"? It seems that this would be the right place to bring it up and we can attempt to extend your session to allow for it. Johann On Tue, Nov 06, 2007 at 01:36:08PM -0800, Roland Dreier wrote: > I would suggest trying to figure out how to make OFED 1.4 the last > OFED release. OFED was originally supposed to be a short-term > distribution until mainstream Linux distributions caught up and were > able to distribute IB/iWARP support. So we should assess how close we > are to that goal and how we can put OFED out of business. > > - R. > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From merton.daughtee at sport-steger.com Wed Nov 7 00:12:29 2007 From: merton.daughtee at sport-steger.com (Jermaine Albright) Date: Wed, 7 Nov 2007 09:12:29 +0100 Subject: [ofa-general] I show you how far the rabbit hole goes Message-ID: <01c8211e$523db390$6efc0657@merton.daughtee> -------------- next part -------------- A non-text attachment was scrubbed... Name: abcd.gif Type: image/gif Size: 6656 bytes Desc: not available URL: From RAISCH at de.ibm.com Wed Nov 7 00:35:44 2007 From: RAISCH at de.ibm.com (Christoph Raisch) Date: Wed, 7 Nov 2007 09:35:44 +0100 Subject: [ofa-general] Re: problem in follow_hugetlb_page on ppc64 architecture with get_user_pages In-Reply-To: <20071106223119.GC31367@localhost.localdomain> References: <200711061606.04402.hnguyen@linux.vnet.ibm.com> <20071106223119.GC31367@localhost.localdomain> Message-ID: general-bounces at lists.openfabrics.org wrote on 06.11.2007 23:31:19: > We should cut this down to the bare necessary and fold it into the > libhugetlbfs testsuite. Well, this testcase is already pretty close to the bare minimum what's needed to run IB/RDMA queues. You can compare this to for example ibv_rc_pingpong in libibverbs... Maybe it's possible to test this with anything else than IB? > -- Gruss / Regards Christoph R From chickadeeus8 at bigpond.net.au Wed Nov 7 02:41:17 2007 From: chickadeeus8 at bigpond.net.au (Ernie Bellamy) Date: Wed, 7 Nov 2007 13:41:17 +0300 Subject: [ofa-general] Britney Spears shows it again! Message-ID: <374586134.14713252788122@bigpond.net.au> An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Wed Nov 7 03:03:46 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 7 Nov 2007 03:03:46 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071107-0200 daily build status Message-ID: <20071107110346.97847E608A8@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-22.ELsmp Passed on ia64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-34.ELsmp Failed: From precordiality at q8usa.com Wed Nov 7 04:37:54 2007 From: precordiality at q8usa.com (Svante Hayes) Date: Wed, 07 Nov 2007 06:37:54 -0600 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c8213a$4f298500$0100007f@localhost> cheapxpsoft3. com From varietally at adriancunningham.com Wed Nov 7 04:53:27 2007 From: varietally at adriancunningham.com (Tolerant Bradley) Date: Wed, 07 Nov 2007 13:53:27 +0100 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c8213c$7f722300$0100007f@localhost> cheapxpsoft9. com From kliteyn at mellanox.co.il Tue Nov 6 21:20:12 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 7 Nov 2007 07:20:12 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-07:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-06 OpenSM git rev = Tue_Nov_6_19:09:16_2007 [dcad36c34e71a25d328e8c2c6fc7862751b24a34] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=519 Fail=1 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 12 Multicast IS3-128.topo Failures: 1 Multicast IS3-128.topo From ogerlitz at voltaire.com Wed Nov 7 06:00:43 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 07 Nov 2007 16:00:43 +0200 Subject: [ofa-general] [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <4730BBD7.6050403@ichips.intel.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com><4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com><47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <47302D8D.8050208@voltaire.com> <4730BBD7.6050403@ichips.intel.com> Message-ID: <4731C50B.8010007@voltaire.com> Sean Hefty wrote: >> 1) I think we want to mention that rdma-cm remote address resolving >> involves issuing an ARP through the network stack (similar to the path >> query mentioning in the route resolve page, ARP is not even Infiniband >> specific) > > I intentionally did not mention ARP here. I anticipate that other > address resolution techniques will be needed. Although, I guess that I > can call out ARP use for now, and change the documentation later. > > I will start a thread on possible other techniques soon. Interesting, I'd be happy to see what you are upto... >> 2) I see you are using a "minimum RNR NAK timer" notation, what does >> "minimum" comes to say here? > This matches the wording in the IB spec: section 11.2.4.2. page 573. OK, thanks. Or. From ogerlitz at voltaire.com Wed Nov 7 06:47:01 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 07 Nov 2007 16:47:01 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <20071106211344.GC28157@cuprite.pathscale.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <47308F87.7030507@ichips.intel.com> <15ddcffd0711061301j34a6113en155ddd8629e5f82e@mail.gmail.com> <20071106211344.GC28157@cuprite.pathscale.com> Message-ID: <4731CFE5.8000504@voltaire.com> Johann George wrote: >>> I should be able to make this, but as soon as you start pushing sessions >>> before noon, time should probably be made for lunch. > > Perhaps we can include a working lunch for the developers who want to > and are able to meet earlier and discuss these issues? We can, but I prefer it to be just a session and not session over lunch. However, I will be fine with anything said by Sean and Roland and ofcourse everybody else that wants to join. Roland? Sean? Or. From swise at opengridcomputing.com Wed Nov 7 06:53:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 07 Nov 2007 08:53:37 -0600 Subject: [ofa-general] Re: [ewg] printk in ofed-1.3 ib_destroy_qp() In-Reply-To: <200711070828.11537.jackm@dev.mellanox.co.il> References: <4730C466.9010400@opengridcomputing.com> <4730C83F.9040801@mellanox.co.il> <200711070828.11537.jackm@dev.mellanox.co.il> Message-ID: <4731D171.7090102@opengridcomputing.com> Perhaps I'm missing something, but I don't see this in the kernel tree. Can you give me a git commit id? Thanks, Steve. Jack Morgenstein wrote: > On Tuesday 06 November 2007 22:02, Tziporet Koren wrote: >> Steve Wise wrote: >>> Should this printk be here? >>> >>>> else >>>> printk("ib_destroy_qp: type = %d, xrcd = >>>> %p\n", qp_type, xrcd); >>>> } >>>> >>>> return ret; >>>> } >> I think Jack already fixed this (there was also a bug about it) >> >> Jack? >> >> Tziporet >> > > Fixed on Oct 31. From swise at opengridcomputing.com Wed Nov 7 06:59:45 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 07 Nov 2007 08:59:45 -0600 Subject: [ofa-general] Re: [ewg] printk in ofed-1.3 ib_destroy_qp() In-Reply-To: <4731D171.7090102@opengridcomputing.com> References: <4730C466.9010400@opengridcomputing.com> <4730C83F.9040801@mellanox.co.il> <200711070828.11537.jackm@dev.mellanox.co.il> <4731D171.7090102@opengridcomputing.com> Message-ID: <4731D2E1.10800@opengridcomputing.com> nevermind. I'm confused.... :) Steve Wise wrote: > Perhaps I'm missing something, but I don't see this in the kernel tree. > Can you give me a git commit id? > > Thanks, > > Steve. > > > Jack Morgenstein wrote: >> On Tuesday 06 November 2007 22:02, Tziporet Koren wrote: >>> Steve Wise wrote: >>>> Should this printk be here? >>>> >>>>> else >>>>> printk("ib_destroy_qp: type = %d, xrcd = >>>>> %p\n", qp_type, xrcd); >>>>> } >>>>> >>>>> return ret; >>>>> } >>> I think Jack already fixed this (there was also a bug about it) >>> >>> Jack? >>> >>> Tziporet >>> >> >> Fixed on Oct 31. > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From David.Chevalier at ge.com Wed Nov 7 07:00:45 2007 From: David.Chevalier at ge.com (Chevalier, David (GE Healthcare)) Date: Wed, 7 Nov 2007 10:00:45 -0500 Subject: [ofa-general] rdma cm poll() POLLHUP? Message-ID: <68D58DEFB8673048A64DE1FBE56BEE1806078EA3@CINMLVEM11.e2k.ad.ge.com> Hi, I'm using rdma cm (OFED 1.2.5, kernel 2.6.22) to establish an rdma connection: rdma_event_channel *cm_channel = rdma_create_event_channel(); rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); ... // various rdma_bind, listen, etc ibv_comp_channel *ch = ibv_comp_channel(context); // follows ib_rdma_bw example init sequence with ibv_ calls ... // now connection is established and known to work sending/receiving data ... // and I've pre-posted some wr's to the receive queue on the server ... // next I want the server to wait for data from remote client: struct pollfd fds[1]; fds[0].fd = ch->fd; fds[0].events = POLLIN; // also have tried explicitly adding POLLHUP and POLLERR but no change fds[0].revents = 0; ret = poll(fds,1,-1); This returns with revents POLLIN as expected when the remote side sends something, but poll() does not seem to return POLLHUP (or anything) when the remote side gets killed. For a tcp socket, this would normally cause a POLLHUP event, but for rdma cm there appears to be no indication, we just wait in this case indefinitely for some input. My question is, should it be possible to get a disconnected-type notification with rdma cm, and what do I need to do to detect it? I've searched the general archive, but haven't found anything on this. Thanks, Dave From tziporet at dev.mellanox.co.il Wed Nov 7 07:10:05 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 07 Nov 2007 17:10:05 +0200 Subject: [ewg] Re: [ofa-general] Feedback on Developer's Summit In-Reply-To: <20071107073106.GA3605@cuprite.pathscale.com> References: <20071105033201.GA28495@cuprite.pathscale.com> <472EFE0F.5070101@voltaire.com> <15ddcffd0711051244r1511de93n67790c0047615391@mail.gmail.com> <472F9888.5080301@dev.mellanox.co.il> <47302994.2030402@mellanox.co.il> <20071107073106.GA3605@cuprite.pathscale.com> Message-ID: <4731D54D.1030704@mellanox.co.il> Johann George wrote: >> I think Roland will not be on the 2nd day (Friday) >> Maybe we need to switch this session to Thursday - Johann is this possible? >> > > Are you referring to the IPoIB Stateless Offloads session? Yes. I > can move that to Thursday. > > Johann > > yes - I referred to IPoIB Stateless Offloads Thanks, Tziporet From tziporet at dev.mellanox.co.il Wed Nov 7 08:08:26 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 07 Nov 2007 18:08:26 +0200 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: <20071107074923.GB3605@cuprite.pathscale.com> References: <47308DF2.70409@mellanox.co.il> <20071107074923.GB3605@cuprite.pathscale.com> Message-ID: <4731E2FA.1060109@mellanox.co.il> Johann George wrote: > Tziporet, > > >> So we should assess how close we are to that goal and how we can put >> OFED out of business. >> > > Could you cover this topic during your session on "OFED 1.3: Procedure > and Review"? It seems that this would be the right place to bring it > up and we can attempt to extend your session to allow for it. > > I think its more appropriate in the OFED 1.4 session But maybe instead of talking about 1.3 status (which everybody can see from the weekly meeting reports) I should talk about OFED in the future However I need some input from the distros Tziporet From robert at leblancnet.us Wed Nov 7 08:11:36 2007 From: robert at leblancnet.us (Robert LeBlanc) Date: Wed, 07 Nov 2007 09:11:36 -0700 Subject: [ofa-general] Debian/Ubuntu status Message-ID: In doing some research, I've noticed that the wiki states for 1.3 that Ubuntu is under the Supported OSes for basic testing. What does that mean? I've looked through the documentation and everything still looks very RPM based. I've noticed a couple of messages in the archive directed to Vlad about the status of certain Debain/Ubuntu issues, but really didn't see any responses. We hacked our own scripts to compile 1.1, which have been working well for us (MPI, IPoIB and Lustre), but we are unsure if we are missing anything since we are new at this. It would be real nice to be able to build .debs straight from the OFED source. We would be willing to help were we can. A nice overview of the build process for RPMs would help us translate it (not extremely familiar with RPM distros). Thanks, Robert Robert LeBlanc College of Life Sciences Computer Support Brigham Young University leblanc at byu.edu (801)422-1882 From rdreier at cisco.com Wed Nov 7 10:05:17 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 10:05:17 -0800 Subject: [ofa-general] Debian/Ubuntu status In-Reply-To: (Robert LeBlanc's message of "Wed, 07 Nov 2007 09:11:36 -0700") References: Message-ID: libibverbs and libmthca are already in the Ubuntu archive (universe I believe). I plan to try to get libmlx4 into Debian and thus into Ubuntu quite soon, and I'm thinking about packaging librdmacm as well. From rdreier at cisco.com Wed Nov 7 10:08:51 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 10:08:51 -0800 Subject: [ofa-general] IBV_WC_STATUS status set as 135 In-Reply-To: <20071107043020.GA3759@vt.edu> (Bharath Ramesh's message of "Tue, 6 Nov 2007 23:30:20 -0500") References: <20071106083055.GA30284@vt.edu> <20071107043020.GA3759@vt.edu> Message-ID: > I think the problem that was my SRQ buffer was hitting low watermark and > before I could post additional buffers to the SRQ my QP was going into > an error state, a race condition that I had missed. There's still something fishy -- I don't see how ibv_poll_cq() could ever set wc.status to 135. Do you understand why you weren't getting a valid error code? - R. From mshefty at ichips.intel.com Wed Nov 7 10:10:25 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 07 Nov 2007 10:10:25 -0800 Subject: [ofa-general] rdma cm poll() POLLHUP? In-Reply-To: <68D58DEFB8673048A64DE1FBE56BEE1806078EA3@CINMLVEM11.e2k.ad.ge.com> References: <68D58DEFB8673048A64DE1FBE56BEE1806078EA3@CINMLVEM11.e2k.ad.ge.com> Message-ID: <4731FF91.4010203@ichips.intel.com> > rdma_event_channel *cm_channel = rdma_create_event_channel(); > rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); > ... // various rdma_bind, listen, etc > ibv_comp_channel *ch = ibv_comp_channel(context); // follows ib_rdma_bw > example init sequence with ibv_ calls > ... // now connection is established and known to work sending/receiving > data > ... // and I've pre-posted some wr's to the receive queue on the server > > ... // next I want the server to wait for data from remote client: > struct pollfd fds[1]; > fds[0].fd = ch->fd; > fds[0].events = POLLIN; // also have tried explicitly adding POLLHUP and > POLLERR but no change > fds[0].revents = 0; > ret = poll(fds,1,-1); > > This returns with revents POLLIN as expected when the remote side sends > something, > but poll() does not seem to return POLLHUP (or anything) when > the remote side gets killed. > > For a tcp socket, this would normally cause a POLLHUP event, but for > rdma cm there appears to be no indication, > we just wait in this case indefinitely for some input. > > My question is, should it be possible to get a disconnected-type > notification > with rdma cm, and what do I need to do to detect it? The rdma cm events should be reported on your 'cm_channel', separate from the ibv_comp_channel. A disconnect notification will be received when the remote side disconnects. - Sean From rdreier at cisco.com Wed Nov 7 10:10:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 10:10:30 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <47311F34.6030004@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Tue, 06 Nov 2007 18:13:08 -0800") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> Message-ID: > Roland, A few things that caught my eye (in blue). I hope the colours show up. No, since your email has content type text/plain, there's no way the color could show up. Could you please resend your comments in a standard format using email quoting like: > if (blah) > // code you're commenting on A comment on the code Thanks... From jgunthorpe at obsidianresearch.com Wed Nov 7 10:23:00 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Wed, 7 Nov 2007 11:23:00 -0700 Subject: [ofa-general] Debian/Ubuntu status In-Reply-To: References: Message-ID: <20071107182300.GB6530@obsidianresearch.com> On Wed, Nov 07, 2007 at 10:05:17AM -0800, Roland Dreier wrote: > libibverbs and libmthca are already in the Ubuntu archive (universe > I believe). I plan to try to get libmlx4 into Debian and thus into > Ubuntu quite soon, and I'm thinking about packaging librdmacm as > well. Do you know if anyone has made a WNPP for the rest of what is in OFED? The mellanox support is nice, but opensm and all the ib utilities would be even better :) If it gets in Debian ubuntu tends to recompile it for universe, ie libibverbs 1.1 is available on gutsy universe. Jason From robert at leblancnet.us Wed Nov 7 10:29:08 2007 From: robert at leblancnet.us (Robert LeBlanc) Date: Wed, 07 Nov 2007 11:29:08 -0700 Subject: [ofa-general] Debian/Ubuntu status In-Reply-To: Message-ID: Our big need is IPoIB and to be able to compile Lustre and OpenMPI against it. I'm just not sure if all this exists in ibverbs. I understand that libmthca is for the HCA which we have. Thanks, Robert On 11/7/07 11:05 AM, "Roland Dreier" wrote: libibverbs and libmthca are already in the Ubuntu archive (universe I believe). I plan to try to get libmlx4 into Debian and thus into Ubuntu quite soon, and I'm thinking about packaging librdmacm as well. Robert LeBlanc College of Life Sciences Computer Support Brigham Young University leblanc at byu.edu (801)422-1882 From bramesh at vt.edu Wed Nov 7 10:42:45 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Wed, 7 Nov 2007 13:42:45 -0500 Subject: [ofa-general] IBV_WC_STATUS status set as 135 In-Reply-To: References: <20071106083055.GA30284@vt.edu> <20071107043020.GA3759@vt.edu> Message-ID: <001501c8216e$0025b710$7702a8c0@ida> I can always reproduce the by creating the race condition. I am trying to hit a deadline this week. I will try to create a test code that reproduces the same which you can run to test it. I will have it to you hopefully by the weekend. Bharath -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Wednesday, November 07, 2007 1:09 PM To: Bharath Ramesh Cc: OFA-General Subject: Re: [ofa-general] IBV_WC_STATUS status set as 135 > I think the problem that was my SRQ buffer was hitting low watermark and > before I could post additional buffers to the SRQ my QP was going into > an error state, a race condition that I had missed. There's still something fishy -- I don't see how ibv_poll_cq() could ever set wc.status to 135. Do you understand why you weren't getting a valid error code? - R. From swise at opengridcomputing.com Wed Nov 7 11:12:07 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 07 Nov 2007 13:12:07 -0600 Subject: [ofa-general] Re: [ewg] OFED Nov 05 meeting summary on OFED 1.3 beta readiness In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E1CC@mtlexch01.mtl.com> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> <6C2C79E72C305246B504CBA17B5500C90282E1CC@mtlexch01.mtl.com> Message-ID: <47320E07.6040600@opengridcomputing.com> I get this failure trying to configure the 2.6.24 tree against sles9sp3/x86_64. Is this a known issue with umad? > patching file drivers/infiniband/core/sysfs.c > Hunk #1 FAILED at 442. > 1 out of 1 hunk FAILED -- saving rejects to file drivers/infiniband/core/sysfs.c.rej > patching file drivers/infiniband/core/user_mad.c > Hunk #1 FAILED at 45. > Hunk #2 succeeded at 736 (offset 150 lines). > Hunk #3 FAILED at 830. > Hunk #4 succeeded at 1194 (offset 179 lines). > Hunk #5 succeeded at 1227 (offset 179 lines). > 2 out of 5 hunks FAILED -- saving rejects to file drivers/infiniband/core/user_mad.c.rej > patching file drivers/infiniband/core/uverbs_main.c > Hunk #2 succeeded at 122 (offset 7 lines). > patching file drivers/infiniband/core/umem.c > Hunk #1 succeeded at 182 (offset 85 lines). > Failed to apply patch: /usr/local/src/ofa_1_3_kernel-20071107-0842/kernel_patches/backport/2.6.5_sles9_sp3/core_4807_to_2_6_9.patch > > Failed executing /usr/local/src/ofa_1_3_kernel-20071107-0842/ofed_scripts/ofed_patch.sh > > vic11:/usr/local/src/ofa_1_3_kernel-20071107-0842 # Tziporet Koren wrote: > Note: there will be no meeting next week - CU all in SC07 > Tziporet > > > OFED Nov 05 meeting summary on OFED 1.3 beta readiness > > 1. Rebase for kernel 2.6.24-rc1: > The backport was more complicated (mainly in IPoIB). > The following kernel modules have now backports to all kernels: > mthca, mlx4, ehca, ipath, core, IPoIB, RDS > > Kernel modules that need update: Chelsio driver (cxgb3), iSER, SDP, > SRP, VNIC > > Note: Please work on this git branch: > git://git.openfabrics.org/ofed_1_3/linux-2.6.git > ofed_kernel_2_6_24_rc1 > > Schedule: > All new backport patches should be send to Vlad by Tuesday Nov 6. > On Wed (Nov 7) we will start to publish the new package based on > kernel 2.6.24. > Kernel modules that will not pass compilation will be disabled > > 2. Other Beta tasks status: > 1. Fix compilation problems on PPC SLES10 with 32 bits - Vlad > (Mellanox) - on work > 2. SPEC files should be part of each user space package - each owner > should take the spec file > 3. Fix all compilation and install issues - All > 4. management readiness and open a branch for 1.3 - Sasha > > 3. Beta schedule: > Target: do the beta release by the end of this week > (Note: Since in Israel we are not working on Friday it will be done > either on Thursday or Sunday) > > 4. GA schedule: > Tziporet to publish the GA schedule - after the beta release will be > done > The schedule we had is published on the Wiki at > > https://wiki.openfabrics.org/tiki-index.php?page=OFED+1.3+release+plan+a > nd+features > > 5. Integration of OFED 1.3 with Redhat: > Tziporet to talk to Doug in SC07 > > > Done tasks for the beta: > o Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) > o ibutils on SLES10 PPC64 (64 bits) - Vlad > o Add qperf test from Qlogic - Johann (Qlogic) > o Support RHEL 5 up1 - Woody & Vlad > o Apply patches that fix warning of backport patches - Vlad > o New MVAPICH package - Pasha & DK (OSU) > o Complete RDS work - Vlad (Mellanox) > o Integrate all SDP features - Jim (Mellanox) > o nes - updated backport patches - Glenn (NetEffect) > > > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From rdreier at cisco.com Wed Nov 7 11:21:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 11:21:45 -0800 Subject: [ofa-general] Debian/Ubuntu status In-Reply-To: (Robert LeBlanc's message of "Wed, 07 Nov 2007 11:29:08 -0700") References: Message-ID: > Our big need is IPoIB and to be able to compile Lustre and OpenMPI against > it. I'm just not sure if all this exists in ibverbs. I understand that > libmthca is for the HCA which we have. IPoIB is enabled in any Ubuntu/Debian kernel from the past year or two. I don't know what Lustre needs but I suspect it just needs a modern kernel. The version of openmpi in Debian lenny and Ubuntu hardy is 1.2.4 and is already built with IB support. - R. From tziporet at dev.mellanox.co.il Wed Nov 7 11:30:06 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 07 Nov 2007 21:30:06 +0200 Subject: [ofa-general] Re: [ewg] OFED Nov 05 meeting summary on OFED 1.3 beta readiness In-Reply-To: <47320E07.6040600@opengridcomputing.com> References: <4727426C.5090504@mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E195@mtlexch01.mtl.com> <6C2C79E72C305246B504CBA17B5500C90282E1CC@mtlexch01.mtl.com> <47320E07.6040600@opengridcomputing.com> Message-ID: <4732123E.7050003@mellanox.co.il> Steve Wise wrote: > I get this failure trying to configure the 2.6.24 tree against > sles9sp3/x86_64. Is this a known issue with umad? >> >> yes - we do not support SLES9 in OFED 1.3 Vlad will get it out of the make-dist script Tziporet From mshefty at ichips.intel.com Wed Nov 7 11:30:45 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Wed, 07 Nov 2007 11:30:45 -0800 Subject: [ofa-general] librdmacm feature request In-Reply-To: References: <1191767680.19888.310.camel@firewall.xsintricity.com> <470A632D.1050001@ichips.intel.com> <1191894507.19888.360.camel@firewall.xsintricity.com> Message-ID: <47321265.5070702@ichips.intel.com> > Hmm, how do you move events? Keep in mind that there may be an > arbitrary number of pending events that belong to other cm_ids that > are queued before the events you want to move. And you can't really > do anything too funky with the event channel fd, because you don't > want to mess up some other thread that might be waiting for events in > poll() or whatever. After spending some time looking into this, I'm not seeing an easy way to move events between fd's or guarantee that a user has permission to transfer a cm_id to a new fd. The modify requires access to two fd's, but only one is available in the write... Anyone have any ideas? Assuming that there is a solution for these issues, we already track the number of events reported for a given cm_id, in order to handle destruction. We _might_ be able to use this same mechanism to avoid races reporting events on the old channel after rdma_modify_channel() returned. (I'm not entirely sure on this; there are several race conditions to handle.) - Sean From rdreier at cisco.com Wed Nov 7 11:42:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 11:42:18 -0800 Subject: [Fwd: [ofa-general] [PATCH] IB/ipath - Enable loopback of DR SMP responses from userspace] In-Reply-To: <1193177261.18113.250.camel@hrosenstock-ws.xsigo.com> (Hal Rosenstock's message of "Tue, 23 Oct 2007 15:07:41 -0700") References: <1193177261.18113.250.camel@hrosenstock-ws.xsigo.com> Message-ID: thanks, queued these 3 patches for 2.6.25. From rdreier at cisco.com Wed Nov 7 11:45:49 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 07 Nov 2007 11:45:49 -0800 Subject: [ofa-general] Re: [PATCH 2/2] IB/ehca: Fix static rate calculation In-Reply-To: <200711021441.50158.fenkes@de.ibm.com> (Joachim Fenkes's message of "Fri, 2 Nov 2007 15:41:49 +0200") References: <200711021432.50203.fenkes@de.ibm.com> <200711021441.50158.fenkes@de.ibm.com> Message-ID: thanks, applied both patches. From johann.george at qlogic.com Wed Nov 7 12:00:15 2007 From: johann.george at qlogic.com (Johann George) Date: Wed, 7 Nov 2007 12:00:15 -0800 Subject: [ofa-general] OpenFabrics Developer's Summit: reminder to register Message-ID: <20071107200014.GA13275@cuprite.pathscale.com> I have attached a printable agenda for the Developer's Summit. As usual, we are asking everyone who is participating to register. It allows us to pay for the cost of putting it on. You can register by clicking on the following link: http://www.acteva.com/booking.cfm?bevaid=143964 The Developer's Summit is being held on November 15-16, 2007 at the Boomtown Hotel near Reno. Dinner will be provided on Thursday as well as breakfast and lunch on Friday. Registration is $195 with a student rate of $95. As always, the agenda is subject to change as we attempt to accommodate the speakers and attendees. If you are presenting, please send your slides ahead of time to Jeff Becker so that we can have them all on one laptop and transition through the sessions more quickly. Thank you. Johann -------------- next part -------------- A non-text attachment was scrubbed... Name: agenda.pdf Type: application/pdf Size: 5491 bytes Desc: not available URL: From tziporet at dev.mellanox.co.il Wed Nov 7 12:26:35 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Wed, 07 Nov 2007 22:26:35 +0200 Subject: [ofa-general] Re: [ewg] OpenFabrics Developer's Summit: reminder to register In-Reply-To: <20071107200014.GA13275@cuprite.pathscale.com> References: <20071107200014.GA13275@cuprite.pathscale.com> Message-ID: <47321F7B.1060204@mellanox.co.il> Johann George wrote: > I have attached a printable agenda for the Developer's > Summit. As usual, we are asking everyone who is > participating to register. It allows us to pay for the cost > of putting it on. > Johann, I see you gave me 40 minutes twice and I don't think I need so much time (unless you think there will be a hot discussions on each session) Tziporet From pradeeps at linux.vnet.ibm.com Wed Nov 7 13:30:47 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 07 Nov 2007 13:30:47 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> Message-ID: <47322E87.60409@linux.vnet.ibm.com> Sorry. Resending comments as requested. This is based on the for-2.6.25 git tree that I pulled from, and so is not in patch format. static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, struct ipoib_cm_rx *rx) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; int i; rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); if (!rx->rx_ring) return -ENOMEM; spin_lock_irq(&priv->lock); if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { spin_unlock_irq(&priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); return -EINVAL; } else ++priv->cm.nonsrq_conn_qp; spin_unlock_irq(&priv->lock); for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, rx->rx_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; This will cause an skb leak- see generic comments about ipoib_cm_alloc_rx_skb() below goto err; } ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); if (ret) { ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " "failed for buf %d\n", i); ret = -EIO; This will cause an skb leak- see generic comments about ipoib_cm_alloc_rx_skb() below goto err; } } rx->recv_count = ipoib_recvq_size; return 0; err: spin_lock_irq(&priv->lock); --priv->cm.nonsrq_conn_qp; spin_unlock_irq(&priv->lock); kfree(rx_ring) is missing return ret; } Generic comment about ipoib_cm_alloc_rx_skb() which is true for both the srq and non srq cases (except in the receive wc handler): I find that there will be skb leakage if ipoib_cm_alloc_rx_skb() fails before all the rx skbs are allocated. We must undo those allocations and mappings. Probably we should call ipoib_cm_dev_cleanup() and free the skbs and do the unmap in that routine. void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_rx_buf *rx_ring; unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); struct sk_buff *skb, *newskb; struct ipoib_cm_rx *p; unsigned long flags; u64 mapping[IPOIB_CM_RX_SG]; int frags; ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", wr_id, wc->status); if (unlikely(wr_id >= ipoib_recvq_size)) { printk(KERN_WARNING "Inside rx wc with wr_id=0x%x\n", wr_id); if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); ipoib_cm_start_rx_drain(priv); I do not understand why we need to call ipoib_cm_start_rx_drain(). We have already received a work completion with RX_DRAIN set. queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); spin_unlock_irqrestore(&priv->lock, flags); } else ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", wr_id, ipoib_recvq_size); return; } p = wc->qp->qp_context; if (ipoib_cm_has_srq(dev)) rx_ring = priv->cm.srq_ring; else rx_ring = p->rx_ring; skb = rx_ring[wr_id].skb; if (unlikely(wc->status != IB_WC_SUCCESS)) { ipoib_dbg(priv, "cm recv error " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); ++dev->stats.rx_dropped; if (!p->rx_ring) If rx_ring is indeed NULL, there is no point in continuing further, we will probably crash sooner or later. We should insert a BUG_ON and thus crash the system. goto repost; else { if (!--p->recv_count) { printk(KERN_WARNING "recv_count=0x%x\n", p->recv_count); spin_lock_irqsave(&priv->lock, flags); list_move(&p->list, &priv->cm.rx_reap_list); spin_unlock_irqrestore(&priv->lock, flags); queue_work(ipoib_workqueue, &priv->cm.rx_reap_task); } return; } } if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { spin_lock_irqsave(&priv->lock, flags); p->jiffies = jiffies; /* Move this entry to list head, but do not re-add it * if it has been moved out of list. */ if (p->state == IPOIB_CM_RX_LIVE) list_move(&p->list, &priv->cm.passive_ids); spin_unlock_irqrestore(&priv->lock, flags); } } frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer. */ ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); ++dev->stats.rx_dropped; goto repost; } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); skb->protocol = ((struct ipoib_header *) skb->data)->proto; skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); dev->last_rx = jiffies; ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; skb->dev = dev; /* XXX get correct PACKET_ type here */ skb->pkt_type = PACKET_HOST; netif_receive_skb(skb); repost: if (p->rx_ring) { Shouldn't this be if(!ipoib_cm_has_srq())? if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); } } else { if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id))) ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " "for buf %d\n", wr_id); } } From bramesh at vt.edu Wed Nov 7 16:28:31 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Wed, 7 Nov 2007 19:28:31 -0500 Subject: [ofa-general] IB post send lost. Message-ID: <20071108002831.GA8339@vt.edu> I have a multi-threaded application. My application has its own message exchange protocol, it uses IB as the communication layer. I send a lot of messages which are normally of the order of few ten thousands. After sometime it seems like one message from one of the node is lost. I am using RC QP type. This causes the thread to deadlock. The other threads are still able to communicate exchanging messages without any problem over the same QP. Both ends are using SRQs and there is sufficient buffers posted so that I dont run out of buffers. I even tried doubling the buffers posted I see the same problem again. One message being lost. The ibv_post_send doesnt report any error. I am trying to get this done for a conference deadline early next week. I would really appreciate any help in suggesting any possibilities which might cause the message to be dropped without any error being returned. Thanks, Bharath --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From weiny2 at llnl.gov Wed Nov 7 18:58:17 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:17 -0800 Subject: [ofa-general] [PATCH 0/5] More log output improvements Message-ID: <20071107185817.404baaef.weiny2@llnl.gov> The following 5 patches improve osm log output mostly by adding node name printing. 0001-Fix-log-messages-in-perfmgr-to-use-perfmgr-in-name.patch 0002-Maintain-a-name-of-the-node-in-the-monitored-node-st.patch 0003-Use-monitored-map-lookup-to-get-the-name-of-the-node.patch 0004-use-the-monitored-node-s-redirect-information-for-th.patch 0005-opensm-opensm-osm_state_mgr.c-update-log-messages-w.patch 1 and 5 are stand alone. 2-4 need to be applied in order and add a node name member to the perfmgr monitored node structure. Ira From weiny2 at llnl.gov Wed Nov 7 18:58:19 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:19 -0800 Subject: [ofa-general] [PATCH 1/5] Fix log messages in perfmgr to use "perfmgr" in name, _not_ "pm" Message-ID: <20071107185819.2103ad72.weiny2@llnl.gov> >From e89613ccb9386c4f1360246d9171e756aff664eb Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Tue, 6 Nov 2007 18:22:06 -0800 Subject: [PATCH] Fix log messages in perfmgr to use "perfmgr" in name, _not_ "pm" Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_perfmgr.c | 28 ++++++++++++++-------------- 1 files changed, 14 insertions(+), 14 deletions(-) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index fcfb2ab..89fe7a5 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -168,7 +168,7 @@ osm_perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, { osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; - OSM_LOG_ENTER(pm->log, osm_pm_mad_recv_callback); + OSM_LOG_ENTER(pm->log, osm_perfmgr_mad_recv_callback); osm_madw_copy_context(p_madw, p_req_madw); osm_mad_pool_put(pm->mad_pool, p_req_madw); @@ -197,7 +197,7 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) uint64_t node_guid = context->perfmgr_context.node_guid; uint8_t port = context->perfmgr_context.port; - OSM_LOG_ENTER(pm->log, osm_pm_mad_send_err_callback); + OSM_LOG_ENTER(pm->log, osm_perfmgr_mad_send_err_callback); osm_log(pm->log, OSM_LOG_ERROR, "osm_perfmgr_mad_send_err_callback: ERR 4C02: 0x%" PRIx64 @@ -251,11 +251,11 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) osm_bind_info_t bind_info; ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(pm->log, osm_pm_bind); + OSM_LOG_ENTER(pm->log, osm_perfmgr_bind); if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { osm_log(pm->log, OSM_LOG_ERROR, - "osm_pm_mad_ctrl_bind: ERR 4C03: Multiple binds not allowed\n"); + "osm_perfmgr_mad_ctrl_bind: ERR 4C03: Multiple binds not allowed\n"); status = IB_ERROR; goto Exit; } @@ -270,7 +270,7 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_pm_mad_bind: " + "osm_perfmgr_mad_bind: " "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); pm->bind_handle = osm_vendor_bind(pm->vendor, @@ -283,7 +283,7 @@ osm_perfmgr_bind(osm_perfmgr_t * const pm, const ib_net64_t port_guid) if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { status = IB_ERROR; osm_log(pm->log, OSM_LOG_ERROR, - "osm_pm_mad_bind: ERR 4C04: Vendor specific bind failed (%s)\n", + "osm_perfmgr_mad_bind: ERR 4C04: Vendor specific bind failed (%s)\n", ib_get_err_str(status)); goto Exit; } @@ -301,7 +301,7 @@ static void osm_perfmgr_mad_unbind(osm_perfmgr_t * const pm) OSM_LOG_ENTER(pm->log, osm_sa_mad_ctrl_unbind); if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { osm_log(pm->log, OSM_LOG_ERROR, - "osm_pm_mad_unbind: ERR 4C05: No previous bind\n"); + "osm_perfmgr_mad_unbind: ERR 4C05: No previous bind\n"); goto Exit; } osm_vendor_unbind(pm->bind_handle); @@ -471,13 +471,13 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) uint64_t node_guid = 0; ib_net32_t remote_qp; - OSM_LOG_ENTER(pm->log, __osm_pm_query_counters); + OSM_LOG_ENTER(pm->log, __osm_perfmgr_query_counters); cl_plock_acquire(pm->lock); node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); if (!node) { osm_log(pm->log, OSM_LOG_ERROR, - "__osm_pm_query_counters: ERR 4C07: Node guid 0x%" + "__osm_perfmgr_query_counters: ERR 4C07: Node guid 0x%" PRIx64 " no longer exists so removing from PerfMgr monitoring\n", mon_node->guid); @@ -493,7 +493,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) node->print_desc) != PERFMGR_EVENT_DB_SUCCESS) { osm_log(pm->log, OSM_LOG_ERROR, - "__osm_pm_query_counters: ERR 4C08: DB create entry failed for 0x%" + "__osm_perfmgr_query_counters: ERR 4C08: DB create entry failed for 0x%" PRIx64 " (%s) : %s\n", node_guid, node->print_desc, strerror(errno)); goto Exit; @@ -515,7 +515,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) lid = get_lid(node, port, mon_node); if (lid == 0) { osm_log(pm->log, OSM_LOG_DEBUG, - "__osm_pm_query_counters: WARN: node 0x%" PRIx64 + "__osm_perfmgr_query_counters: WARN: node 0x%" PRIx64 " port %d (%s): port out of range, skipping\n", cl_ntoh64(node->node_info.node_guid), port, node->print_desc); @@ -531,7 +531,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) gettimeofday(&(mad_context.perfmgr_context.query_start), NULL); #endif osm_log(pm->log, OSM_LOG_VERBOSE, - "__osm_pm_query_counters: Getting stats for node 0x%" + "__osm_perfmgr_query_counters: Getting stats for node 0x%" PRIx64 " port %d (lid %X) (%s)\n", node_guid, port, cl_ntoh16(lid), node->print_desc); status = @@ -539,7 +539,7 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) IB_MAD_METHOD_GET, &mad_context); if (status != IB_SUCCESS) osm_log(pm->log, OSM_LOG_ERROR, - "__osm_pm_query_counters: ERR 4C09: Failed to issue port counter query for node 0x%" + "__osm_perfmgr_query_counters: ERR 4C09: Failed to issue port counter query for node 0x%" PRIx64 " port %d (%s)\n", node->node_info.node_guid, port, node->print_desc); @@ -1250,7 +1250,7 @@ osm_perfmgr_init(osm_perfmgr_t * const pm, { ib_api_status_t status = IB_SUCCESS; - OSM_LOG_ENTER(log, osm_pm_init); + OSM_LOG_ENTER(log, osm_perfmgr_init); osm_log(log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Fix-log-messages-in-perfmgr-to-use-perfmgr-in-name.patch Type: application/octet-stream Size: 5657 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Nov 7 18:58:20 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:20 -0800 Subject: [ofa-general] [PATCH 2/5] Maintain a name of the node in the monitored node structure for log messages. Message-ID: <20071107185820.4f529d7b.weiny2@llnl.gov> >From f75aa716b3918989876d61863a8f03aff221405f Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Tue, 6 Nov 2007 18:34:28 -0800 Subject: [PATCH] Maintain a name of the node in the monitored node structure for log messages. Signed-off-by: Ira K. Weiny --- opensm/include/opensm/osm_perfmgr.h | 1 + opensm/opensm/osm_perfmgr.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/opensm/include/opensm/osm_perfmgr.h b/opensm/include/opensm/osm_perfmgr.h index 89d4fd8..0dd3ce4 100644 --- a/opensm/include/opensm/osm_perfmgr.h +++ b/opensm/include/opensm/osm_perfmgr.h @@ -101,6 +101,7 @@ typedef struct _monitored_node { cl_map_item_t map_item; struct _monitored_node *next; uint64_t guid; + char *name; uint32_t redir_tbl_size; redir_t redir_port[1]; /* redirection on a per port basis */ } __monitored_node_t; diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 89fe7a5..d78d747 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -147,6 +147,9 @@ static inline void __remove_marked_nodes(osm_perfmgr_t * pm) cl_qmap_remove_item(&(pm->monitored_map), (cl_map_item_t *) (pm->remove_list)); + + if (pm->remove_list->name) + free(pm->remove_list->name); free(pm->remove_list); pm->remove_list = next; } @@ -440,12 +443,13 @@ static void __collect_guids(cl_map_item_t * const p_map_item, void *context) mon_node = malloc(sizeof(*mon_node) + sizeof(redir_t) * size); if (!mon_node) { osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: __collect_guids ERR 4C06: malloc failed so not handling node GUID 0x%" - PRIx64 "\n", node_guid); + "PerfMgr: __collect_guids ERR 4C06: malloc failed: not handling node %s" + "(GUID 0x%" PRIx64 ")\n", node->print_desc, node_guid); goto Exit; } memset(mon_node, 0, sizeof(*mon_node) + sizeof(redir_t) * size); mon_node->guid = node_guid; + mon_node->name = strdup(node->print_desc); mon_node->redir_tbl_size = size + 1; cl_qmap_insert(&(pm->monitored_map), node_guid, (cl_map_item_t *) mon_node); @@ -477,10 +481,10 @@ __osm_perfmgr_query_counters(cl_map_item_t * const p_map_item, void *context) node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); if (!node) { osm_log(pm->log, OSM_LOG_ERROR, - "__osm_perfmgr_query_counters: ERR 4C07: Node guid 0x%" + "__osm_perfmgr_query_counters: ERR 4C07: Node \"%s\" (guid 0x%" PRIx64 - " no longer exists so removing from PerfMgr monitoring\n", - mon_node->guid); + ") no longer exists so removing from PerfMgr monitoring\n", + mon_node->name, mon_node->guid); __mark_for_removal(pm, mon_node); goto Exit; } -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Maintain-a-name-of-the-node-in-the-monitored-node-st.patch Type: application/octet-stream Size: 2751 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Nov 7 18:58:22 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:22 -0800 Subject: [ofa-general] [PATCH 4/5] use the monitored node's redirect information for this get_lid call because it is now available Message-ID: <20071107185822.1eb174f1.weiny2@llnl.gov> >From ae65158c0f481936be940899c6e4782973888215 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 7 Nov 2007 11:22:44 -0800 Subject: [PATCH] use the monitored node's redirect information for this get_lid call because it is now available Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_perfmgr.c | 4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index 767ba8d..6b84cff 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -1024,9 +1024,7 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, cl_plock_acquire(pm->lock); p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); - /* Could find monitored node for this rather than */ - /* potentially redoing redirection */ - lid = get_lid(p_node, port, NULL); + lid = get_lid(p_node, port, mon_node); cl_plock_release(pm->lock); if (lid == 0) { osm_log(pm->log, OSM_LOG_ERROR, -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0004-use-the-monitored-node-s-redirect-information-for-th.patch Type: application/octet-stream Size: 1047 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Nov 7 18:58:21 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:21 -0800 Subject: [ofa-general] [PATCH 3/5] Use monitored map lookup to get the name of the node for recieved mad processing in perfmgr Message-ID: <20071107185821.73af99c5.weiny2@llnl.gov> >From 003d4eb171cbad92c61fb4f0fd4c96b7efe3ff6a Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Tue, 6 Nov 2007 19:10:10 -0800 Subject: [PATCH] Use monitored map lookup to get the name of the node for recieved mad processing in perfmgr Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_perfmgr.c | 153 +++++++++++++++++++++++-------------------- 1 files changed, 82 insertions(+), 71 deletions(-) diff --git a/opensm/opensm/osm_perfmgr.c b/opensm/opensm/osm_perfmgr.c index d78d747..767ba8d 100644 --- a/opensm/opensm/osm_perfmgr.c +++ b/opensm/opensm/osm_perfmgr.c @@ -199,36 +199,38 @@ osm_perfmgr_mad_send_err_callback(void *bind_context, osm_madw_t * p_madw) osm_madw_context_t *context = &(p_madw->context); uint64_t node_guid = context->perfmgr_context.node_guid; uint8_t port = context->perfmgr_context.port; + cl_map_item_t *p_node; + __monitored_node_t *p_mon_node; OSM_LOG_ENTER(pm->log, osm_perfmgr_mad_send_err_callback); + /* go ahead and get the monitored node struct to have the printable + * name if needed in messages + */ + if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == + cl_qmap_end(&(pm->monitored_map))) { + osm_log(pm->log, OSM_LOG_ERROR, + "osm_pc_rcv_process: ERR 4C12: GUID 0x%016" + PRIx64 " not found in monitored map\n", + node_guid); + goto Exit; + } + p_mon_node = (__monitored_node_t *) p_node; + osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_send_err_callback: ERR 4C02: 0x%" PRIx64 - " port %d\n", node_guid, port); + "osm_perfmgr_mad_send_err_callback: ERR 4C02: %s (0x%" PRIx64 + ") port %d\n", p_mon_node->name, p_mon_node->guid, port); if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { - cl_map_item_t *p_node; - __monitored_node_t *p_mon_node; - /* First, find the node in the monitored map */ cl_plock_acquire(pm->lock); - if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == - cl_qmap_end(&(pm->monitored_map))) { - cl_plock_release(pm->lock); - osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_send_err_callback: ERR 4C15: GUID 0x%016" - PRIx64 " not found in monitored map\n", - node_guid); - goto Exit; - } - p_mon_node = (__monitored_node_t *) p_node; /* Now, validate port number */ if (port > p_mon_node->redir_tbl_size) { cl_plock_release(pm->lock); osm_log(pm->log, OSM_LOG_ERROR, - "osm_perfmgr_mad_send_err_callback: ERR 4C16: Invalid port num %d for GUID 0x%016" - PRIx64 " num ports %d\n", port, node_guid, - p_mon_node->redir_tbl_size); + "osm_perfmgr_mad_send_err_callback: ERR 4C16: Invalid port num %d for %s (GUID 0x%016" + PRIx64 ") num ports %d\n", port, p_mon_node->name, + p_mon_node->guid, p_mon_node->redir_tbl_size); goto Exit; } /* Clear redirection info */ @@ -902,18 +904,19 @@ void osm_perfmgr_destroy(osm_perfmgr_t * const pm) * will be missed. **********************************************************************/ static void -osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, uint64_t node_guid, +osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t port, perfmgr_db_err_reading_t * cr, perfmgr_db_data_cnt_reading_t * dc) { perfmgr_db_err_reading_t prev_err; perfmgr_db_data_cnt_reading_t prev_dc; - if (perfmgr_db_get_prev_err(pm->db, node_guid, port, &prev_err) + if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) != PERFMGR_EVENT_DB_SUCCESS) { osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_check_oob_clear: Failed to find previous error reading for 0x%" - PRIx64 " port %u\n", node_guid, port); + "osm_perfmgr_check_oob_clear: Failed to find previous " + "error reading for %s (guid 0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); return; } @@ -930,17 +933,19 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, uint64_t node_guid, cr->buffer_overrun < prev_err.buffer_overrun || cr->vl15_dropped < prev_err.vl15_dropped) { osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C0A: Detected an out of band error clear on node 0x%" - PRIx64 " port %u\n", node_guid, port); - perfmgr_db_clear_prev_err(pm->db, node_guid, port); + "PerfMgr: ERR 4C0A: Detected an out of band error clear " + "on %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); } /* FIXME handle extended counters */ - if (perfmgr_db_get_prev_dc(pm->db, node_guid, port, &prev_dc) + if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) != PERFMGR_EVENT_DB_SUCCESS) { osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_check_oob_clear: Failed to find previous data count reading for 0x%" - PRIx64 " port %u\n", node_guid, port); + "osm_perfmgr_check_oob_clear: Failed to find previous data count " + "reading for %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); return; } @@ -949,9 +954,10 @@ osm_perfmgr_check_oob_clear(osm_perfmgr_t * pm, uint64_t node_guid, dc->xmit_pkts < prev_dc.xmit_pkts || dc->rcv_pkts < prev_dc.rcv_pkts) { osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C0B: Detected an out of band data counter clear on node 0x%" - PRIx64 " port %u\n", node_guid, port); - perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + "PerfMgr: ERR 4C0B: Detected an out of band data counter " + "clear on node %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } } @@ -983,7 +989,7 @@ int counter_overflow_32(ib_net32_t val) * MAD to the port. **********************************************************************/ static void -osm_perfmgr_check_overflow(osm_perfmgr_t * pm, uint64_t node_guid, +osm_perfmgr_check_overflow(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t port, ib_port_counters_t * pc) { osm_madw_context_t mad_context; @@ -1012,26 +1018,27 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, uint64_t node_guid, ib_net16_t lid = 0; osm_log(pm->log, OSM_LOG_INFO, - "PerfMgr: Counter overflow: 0x%" PRIx64 - " port %d; clearing counters\n", node_guid, port); + "PerfMgr: Counter overflow: %s (0x%" PRIx64 + ") port %d; clearing counters\n", + mon_node->name, mon_node->guid, port); cl_plock_acquire(pm->lock); - p_node = osm_get_node_by_guid(pm->subn, cl_hton64(node_guid)); + p_node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); /* Could find monitored node for this rather than */ /* potentially redoing redirection */ lid = get_lid(p_node, port, NULL); cl_plock_release(pm->lock); if (lid == 0) { osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C0C: Failed to clear counters for node 0x%" - PRIx64 " port %d; failed to get lid\n", - node_guid, port); + "PerfMgr: ERR 4C0C: Failed to clear counters for %s (0x%" + PRIx64 ") port %d; failed to get lid\n", + mon_node->name, mon_node->guid, port); goto Exit; } remote_qp = get_qp(NULL, port); - mad_context.perfmgr_context.node_guid = node_guid; + mad_context.perfmgr_context.node_guid = mon_node->guid; mad_context.perfmgr_context.port = port; mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; /* clear port counters */ @@ -1040,10 +1047,11 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, uint64_t node_guid, IB_MAD_METHOD_SET, &mad_context); if (status != IB_SUCCESS) osm_log(pm->log, OSM_LOG_ERROR, - "PerfMgr: ERR 4C11: Failed to send clear counters MAD for node 0x%" - PRIx64 " port %d\n", node_guid, port); + "PerfMgr: ERR 4C11: Failed to send clear counters MAD for %s (0x%" + PRIx64 ") port %d\n", + mon_node->name, mon_node->guid, port); - perfmgr_db_clear_prev_dc(pm->db, node_guid, port); + perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); } Exit: @@ -1054,18 +1062,19 @@ osm_perfmgr_check_overflow(osm_perfmgr_t * pm, uint64_t node_guid, * Check values for logging of errors **********************************************************************/ static void -osm_perfmgr_log_events(osm_perfmgr_t * pm, uint64_t node_guid, uint8_t port, +osm_perfmgr_log_events(osm_perfmgr_t * pm, __monitored_node_t *mon_node, uint8_t port, perfmgr_db_err_reading_t * reading) { perfmgr_db_err_reading_t prev_read; time_t time_diff = 0; perfmgr_db_err_t err = - perfmgr_db_get_prev_err(pm->db, node_guid, port, &prev_read); + perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); if (err != PERFMGR_EVENT_DB_SUCCESS) { osm_log(pm->log, OSM_LOG_VERBOSE, - "osm_perfmgr_log_events: Failed to find previous reading for 0x%" - PRIx64 " port %u\n", node_guid, port); + "osm_perfmgr_log_events: Failed to find previous " + "reading for %s (0x%" PRIx64 ") port %u\n", + mon_node->name, mon_node->guid, port); return; } time_diff = (reading->time - prev_read.time); @@ -1075,26 +1084,26 @@ osm_perfmgr_log_events(osm_perfmgr_t * pm, uint64_t node_guid, uint8_t port, if (reading->symbol_err_cnt > prev_read.symbol_err_cnt) osm_log(pm->log, OSM_LOG_ERROR, "osm_perfmgr_log_events: ERR 4C0D: " - "Found %" PRIu64 " Symbol errors in %lu sec on node 0x%" - PRIx64 " port %u\n", + "Found %" PRIu64 " Symbol errors in %lu sec on %s (0x%" + PRIx64 ") port %u\n", (reading->symbol_err_cnt - prev_read.symbol_err_cnt), - time_diff, node_guid, port); + time_diff, mon_node->name, mon_node->guid, port); if (reading->rcv_err > prev_read.rcv_err) osm_log(pm->log, OSM_LOG_ERROR, "osm_perfmgr_log_events: ERR 4C0E: " "Found %" PRIu64 - " Receive errors in %lu sec on node 0x%" PRIx64 - " port %u\n", (reading->rcv_err - prev_read.rcv_err), - time_diff, node_guid, port); + " Receive errors in %lu sec on %s (0x%" PRIx64 + ") port %u\n", (reading->rcv_err - prev_read.rcv_err), + time_diff, mon_node->name, mon_node->guid, port); if (reading->xmit_discards > prev_read.xmit_discards) osm_log(pm->log, OSM_LOG_ERROR, "osm_perfmgr_log_events: ERR 4C0F: " - "Found %" PRIu64 " Xmit Discards in %lu sec on node 0x%" - PRIx64 " port %u\n", + "Found %" PRIu64 " Xmit Discards in %lu sec on %s (0x%" + PRIx64 ") port %u\n", (reading->xmit_discards - prev_read.xmit_discards), - time_diff, node_guid, port); + time_diff, mon_node->name, mon_node->guid, port); } /********************************************************************** @@ -1114,9 +1123,24 @@ static void osm_pc_rcv_process(void *context, void *data) uint8_t port = mad_context->perfmgr_context.port; perfmgr_db_err_reading_t err_reading; perfmgr_db_data_cnt_reading_t data_reading; + cl_map_item_t *p_node; + __monitored_node_t *p_mon_node; OSM_LOG_ENTER(pm->log, osm_pc_rcv_process); + /* go ahead and get the monitored node struct to have the printable + * name if needed in messages + */ + if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == + cl_qmap_end(&(pm->monitored_map))) { + osm_log(pm->log, OSM_LOG_ERROR, + "osm_pc_rcv_process: ERR 4C12: GUID 0x%016" + PRIx64 " not found in monitored map\n", + node_guid); + goto Exit; + } + p_mon_node = (__monitored_node_t *) p_node; + osm_log(pm->log, OSM_LOG_VERBOSE, "osm_pc_rcv_process: Processing received MAD status 0x%x context 0x%" PRIx64 " port %u\n", p_mad->status, node_guid, port); @@ -1127,8 +1151,6 @@ static void osm_pc_rcv_process(void *context, void *data) ib_class_port_info_t *cpi = (ib_class_port_info_t *) & (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); - cl_map_item_t *p_node; - __monitored_node_t *p_mon_node; ib_api_status_t status; osm_log(pm->log, OSM_LOG_VERBOSE, @@ -1152,18 +1174,7 @@ static void osm_pc_rcv_process(void *context, void *data) goto ReIssue; /* LID redirection support (easier than GID redirection) */ - /* First, find the node in the monitored map */ cl_plock_acquire(pm->lock); - if ((p_node = cl_qmap_get(&(pm->monitored_map), node_guid)) == - cl_qmap_end(&(pm->monitored_map))) { - cl_plock_release(pm->lock); - osm_log(pm->log, OSM_LOG_ERROR, - "osm_pc_rcv_process: ERR 4C12: GUID 0x%016" - PRIx64 " not found in monitored map\n", - node_guid); - goto Exit; - } - p_mon_node = (__monitored_node_t *) p_node; /* Now, validate port number */ if (port > p_mon_node->redir_tbl_size) { cl_plock_release(pm->lock); @@ -1203,11 +1214,11 @@ static void osm_pc_rcv_process(void *context, void *data) /* detect an out of band clear on the port */ if (mad_context->perfmgr_context.mad_method != IB_MAD_METHOD_SET) - osm_perfmgr_check_oob_clear(pm, node_guid, port, + osm_perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading, &data_reading); /* log any critical events from this reading */ - osm_perfmgr_log_events(pm, node_guid, port, &err_reading); + osm_perfmgr_log_events(pm, p_mon_node, port, &err_reading); if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { perfmgr_db_add_err_reading(pm->db, node_guid, port, @@ -1219,7 +1230,7 @@ static void osm_pc_rcv_process(void *context, void *data) perfmgr_db_clear_prev_dc(pm->db, node_guid, port); } - osm_perfmgr_check_overflow(pm, node_guid, port, wire_read); + osm_perfmgr_check_overflow(pm, p_mon_node, port, wire_read); #if ENABLE_OSM_PERF_MGR_PROFILE do { -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0003-Use-monitored-map-lookup-to-get-the-name-of-the-node.patch Type: application/octet-stream Size: 13485 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Nov 7 18:58:23 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 18:58:23 -0800 Subject: [ofa-general] [PATCH 5/5] opensm/opensm/osm_state_mgr.c: update log messages with node names Message-ID: <20071107185823.4fae4ea9.weiny2@llnl.gov> >From 8a3bd3c02ad07f90da5d9ac92752d0a9e9c45742 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 7 Nov 2007 16:32:39 -0800 Subject: [PATCH] opensm/opensm/osm_state_mgr.c: update log messages with node names Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_state_mgr.c | 41 +++++++++++++++++++++++++---------------- 1 files changed, 25 insertions(+), 16 deletions(-) diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index d0ce37d..c849741 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -453,8 +453,9 @@ static void __osm_state_mgr_reset_node_count(IN cl_map_item_t * if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { osm_log(p_mgr->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_node_count: " - "Resetting discovery count for node 0x%" PRIx64 "\n", - cl_ntoh64(osm_node_get_node_guid(p_node))); + "Resetting discovery count for node 0x%" PRIx64 "(%s)\n", + cl_ntoh64(osm_node_get_node_guid(p_node)), + p_node->print_desc); } p_node->discovery_count = 0; @@ -471,8 +472,9 @@ static void __osm_state_mgr_reset_port_count(IN cl_map_item_t * if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { osm_log(p_mgr->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_port_count: " - "Resetting discovery count for port 0x%" PRIx64 "\n", - cl_ntoh64(osm_port_get_guid(p_port))); + "Resetting discovery count for port 0x%" PRIx64 "(node %s)\n", + cl_ntoh64(osm_port_get_guid(p_port)), + p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); } p_port->discovery_count = 0; @@ -490,8 +492,9 @@ __osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) { osm_log(p_mgr->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_reset_switch_count: " - "Resetting discovery count for switch 0x%" PRIx64 "\n", - cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); + "Resetting discovery count for switch 0x%" PRIx64 " (%s)\n", + cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), + p_sw->p_node->print_desc); } p_sw->discovery_count = 0; @@ -749,8 +752,9 @@ static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_state_mgr_t * if (!p_port) { osm_log(p_mgr->p_log, OSM_LOG_ERROR, "__osm_state_mgr_is_sm_port_down: ERR 3309: " - "SM port with GUID:%016" PRIx64 " is unknown\n", - cl_ntoh64(port_guid)); + "SM port with GUID:%016" PRIx64 " (%s) is unknown\n", + cl_ntoh64(port_guid), + p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); state = IB_LINK_DOWN; CL_PLOCK_RELEASE(p_mgr->p_lock); goto Exit; @@ -899,8 +903,9 @@ static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_state_mgr_t * default: osm_log(p_mgr->p_log, OSM_LOG_ERROR, - "__osm_state_mgr_sweep_hop_1: ERR 3313: Unknown node type %d\n", - osm_node_get_type(p_node)); + "__osm_state_mgr_sweep_hop_1: ERR 3313: Unknown node type %d (%s)\n", + osm_node_get_type(p_node), + p_node->print_desc); } Exit: @@ -958,9 +963,9 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_state_mgr_t * "__osm_state_mgr_light_sweep_start: ERR 0108: " "Unknown remote side for node 0x%016" PRIx64 - " port %u. Adding to light sweep sampling list\n", + "(%s) port %u. Adding to light sweep sampling list\n", cl_ntoh64(osm_node_get_node_guid - (p_node)), port_num); + (p_node)), p_node->print_desc, port_num); osm_dump_dr_path(p_mgr->p_log, osm_physp_get_dr_path_ptr @@ -1084,7 +1089,8 @@ static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_exists_other_master_sm: " "Found remote master SM with guid:0x%016" PRIx64 - "\n", cl_ntoh64(p_sm->smi.guid)); + " (node %s)\n", cl_ntoh64(p_sm->smi.guid), + p_sm->p_port->p_node ? p_sm->p_port->p_node->print_desc : "UNKNOWN"); p_sm_res = p_sm; goto Exit; } @@ -1144,8 +1150,10 @@ static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_state_mgr_t * if (p_highest_sm != NULL) { osm_log(p_mgr->p_log, OSM_LOG_DEBUG, "__osm_state_mgr_get_highest_sm: " - "Found higher SM with guid: %016" PRIx64 "\n", - cl_ntoh64(p_highest_sm->smi.guid)); + "Found higher SM with guid: %016" PRIx64 " (node %s)\n", + cl_ntoh64(p_highest_sm->smi.guid), + p_highest_sm->p_port->p_node ? + p_highest_sm->p_port->p_node->print_desc : "UNKNOWN"); } OSM_LOG_EXIT(p_mgr->p_log); @@ -1196,7 +1204,8 @@ __osm_state_mgr_send_handover(IN osm_state_mgr_t * const p_mgr, osm_log(p_mgr->p_log, OSM_LOG_VERBOSE, "__osm_state_mgr_send_handover: " "Handing over mastership. Updating sm_state_mgr master_guid: %016" - PRIx64 "\n", cl_ntoh64(p_port->guid)); + PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), + p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); p_mgr->p_sm_state_mgr->master_guid = p_port->guid; context.smi_context.port_guid = p_port->guid; -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0005-opensm-opensm-osm_state_mgr.c-update-log-messages-w.patch Type: application/octet-stream Size: 5038 bytes Desc: not available URL: From weiny2 at llnl.gov Wed Nov 7 19:16:03 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 7 Nov 2007 19:16:03 -0800 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071105193358.GM8766@sashak.voltaire.com> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> Message-ID: <20071107191603.490b3121.weiny2@llnl.gov> On Mon, 5 Nov 2007 21:33:58 +0200 Sasha Khapyorsky wrote: > On 10:32 Mon 05 Nov , Ira Weiny wrote: > > On Sun, 4 Nov 2007 18:07:43 +0200 > > Sasha Khapyorsky wrote: > > > So --with-node-name-map=file configure option is removed completely from > > > diags and how mapping will work by default is only depens on how OpenSM > > > (which is separate package) was configured. > > > > > > Do you think it would be useful useful to keep default map name as > > > configure option for infiniband-diags and to not depend from OpenSM > > > configuration (it seems easy doable - we could put some > > > default_node_name_map_file variable in ibdiag_common.c or so)? And then > > > we probably don't need such configure option for OpenSM? > > > > > > > Actually I would prefer a default which does not have to be configured. Would > > this be acceptable? > > Yes. I'm fine this this. > > > Perhaps /ib-node-name-map? > > Or under /etc/ofa/ ? I think is appropriate. OFED can specify /etc/ofa if they wish. The patch is attached, Ira >From f395cae1c20daa15ce4ab5674df266bf28c2d318 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 7 Nov 2007 19:12:57 -0800 Subject: [PATCH] Specify a default node-name-map of /ib-node-name-map Signed-off-by: Ira K. Weiny --- opensm/configure.in | 19 +++++++++---------- 1 files changed, 9 insertions(+), 10 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index abf36c5..2d5d72c 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -70,7 +70,9 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL dnl select performance manager or not OPENIB_OSM_PERF_MGR_SEL -dnl Check for the specification of a default node name map file +dnl Check for a different default node name map file +dnl default {sysconfdir}/ib-node-name-map +NODENAMEMAPFILE=ib-node-name-map AC_MSG_CHECKING(for --with-node-name-map ) AC_ARG_WITH(node-name-map, AC_HELP_STRING([--with-node-name-map=file], @@ -86,15 +88,12 @@ AC_ARG_WITH(node-name-map, ) AC_MSG_RESULT(${withnodenamemap=no}) -if test $withnodenamemap = "yes"; then - NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" - NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" - NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" - - AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, - ["$NODENAMEMAP"], - [Define a default node name map file]) -fi +NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" +NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" +NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, + ["$NODENAMEMAP"], + [Define a default node name map file]) dnl select example event plugin or not OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Specify-a-default-node-name-map-of-sysconfdir-ib-n.patch Type: application/octet-stream Size: 1797 bytes Desc: not available URL: From talecarrying at dkrims.com Wed Nov 7 20:16:43 2007 From: talecarrying at dkrims.com (Norbert Smith) Date: Thu, 08 Nov 2007 07:16:43 +0300 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c821bd$6493c300$0100007f@localhost> cheapxpsoft3. com From dotanb at dev.mellanox.co.il Wed Nov 7 22:07:41 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 08 Nov 2007 08:07:41 +0200 Subject: [ofa-general] IB post send lost. In-Reply-To: <20071108002831.GA8339@vt.edu> References: <20071108002831.GA8339@vt.edu> Message-ID: <4732A7AD.4020405@dev.mellanox.co.il> Hi. Bharath Ramesh wrote: > I have a multi-threaded application. My application has its own message > exchange protocol, it uses IB as the communication layer. I send a lot > of messages which are normally of the order of few ten thousands. After > sometime it seems like one message from one of the node is lost. I am > using RC QP type. This causes the thread to deadlock. The other threads > are still able to communicate exchanging messages without any problem > over the same QP. Both ends are using SRQs and there is sufficient > buffers posted so that I dont run out of buffers. I even tried doubling > the buffers posted I see the same problem again. One message being lost. > The ibv_post_send doesnt report any error. I am trying to get this done > for a conference deadline early next week. I would really appreciate any > help in suggesting any possibilities which might cause the message to be > dropped without any error being returned. > If you don't have any bugs in your code, the described scenario should work. I need some more info in order to try to help you: Do you use the same QP from several threads (and post send from all of them)? How do you poll the CQ (several threads/one)? which HW/SW do you use? thanks Dotan From bramesh at vt.edu Wed Nov 7 22:19:10 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Thu, 8 Nov 2007 01:19:10 -0500 Subject: [ofa-general] IB post send lost. In-Reply-To: <4732A7AD.4020405@dev.mellanox.co.il> References: <20071108002831.GA8339@vt.edu> <4732A7AD.4020405@dev.mellanox.co.il> Message-ID: <20071108061910.GA9863@vt.edu> * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: > Hi. > > Bharath Ramesh wrote: >> I have a multi-threaded application. My application has its own message >> exchange protocol, it uses IB as the communication layer. I send a lot >> of messages which are normally of the order of few ten thousands. After >> sometime it seems like one message from one of the node is lost. I am >> using RC QP type. This causes the thread to deadlock. The other threads >> are still able to communicate exchanging messages without any problem >> over the same QP. Both ends are using SRQs and there is sufficient >> buffers posted so that I dont run out of buffers. I even tried doubling >> the buffers posted I see the same problem again. One message being lost. >> The ibv_post_send doesnt report any error. I am trying to get this done >> for a conference deadline early next week. I would really appreciate any >> help in suggesting any possibilities which might cause the message to be >> dropped without any error being returned. >> > If you don't have any bugs in your code, the described scenario should > work. > > I need some more info in order to try to help you: > > Do you use the same QP from several threads (and post send from all of > them)? Yes, I use the same the QP from three threads. The application has close to 5 threads. The receives are handled by a single thread. Most of the sends are posted by a single thread. Occasionally a third thread posts a few sends to the QP. The same QP is also used for RDMA Writes. Majority of the RDMA Writes are also performed by the same thread that posts majority of the send messages. > How do you poll the CQ (several threads/one)? I have two CQs, one for receive and the other for send. The receive CQ is polled only by the receive thread. The send CQ is polled by the three threads. Occasionally by the receiver thread to clear out an send CQEs because I use IBV_SEND_SIGNALED for every 16 IBV_SEND_INLINEs. Otherwise the send CQ is polled by the single thread that does majority of the sends. Occasionally the third thread when doing a send might poll the send CQ as well for completion CQE in case of a RDMA Write. > > which HW/SW do you use? I am using Yellow Dog Linux 5.0 on Apple Xserves. Thanks, Bharath --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From vlad at dev.mellanox.co.il Thu Nov 8 01:50:00 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 08 Nov 2007 11:50:00 +0200 Subject: [ofa-general] Debian/Ubuntu status In-Reply-To: References: Message-ID: <4732DBC8.1040105@dev.mellanox.co.il> Robert LeBlanc wrote: > In doing some research, I've noticed that the wiki states for 1.3 that > Ubuntu is under the Supported OSes for basic testing. What does that mean? > I've looked through the documentation and everything still looks very RPM > based. I've noticed a couple of messages in the archive directed to Vlad > about the status of certain Debain/Ubuntu issues, but really didn't see any > responses. We hacked our own scripts to compile 1.1, which have been working > well for us (MPI, IPoIB and Lustre), but we are unsure if we are missing > anything since we are new at this. It would be real nice to be able to build > .debs straight from the OFED source. We would be willing to help were we > can. A nice overview of the build process for RPMs would help us translate > it (not extremely familiar with RPM distros). > > Thanks, > Robert > > Robert LeBlanc > College of Life Sciences Computer Support > Brigham Young University > leblanc at byu.edu > (801)422-1882 > Hi Robert, Currently deb packages are not supported by OFED, but you can try to install OFED-1.3 using RPMs. First, install 'rpm' package on your Ubuntu and then run: OFED-1.3/install.pl --without-depcheck. Regards, Vladimir From vlad at lists.openfabrics.org Thu Nov 8 03:03:22 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 8 Nov 2007 03:03:22 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071108-0200 daily build status Message-ID: <20071108110322.8A37CE603A5@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.18 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: From sashak at voltaire.com Thu Nov 8 05:18:40 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 8 Nov 2007 15:18:40 +0200 Subject: [ofa-general] [PATCH] opensm: print error details when OpenSM opt cache file open fails Message-ID: <20071108131840.GI6153@sashak.voltaire.com> Be more verbose about OpenSM options cache file opening failures. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_subnet.c | 24 +++++++++++++++++++----- 1 files changed, 19 insertions(+), 5 deletions(-) diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index 7114305..8da3139 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -702,8 +702,14 @@ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) strcat(file_name, "/opensm.opts"); opts_file = fopen(file_name, "r"); - if (!opts_file) - return (errno == ENOENT) ? IB_SUCCESS : IB_ERROR; + if (!opts_file) { + if (errno == ENOENT) + return IB_SUCCESS; + osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, + "cannot open file \'%s\': %s\n", + file_name, strerror(errno)); + return IB_ERROR; + } while (fgets(line, 1023, opts_file) != NULL) { /* get the first token */ @@ -1055,8 +1061,13 @@ ib_api_status_t osm_subn_parse_conf_file(IN osm_subn_opt_t * const p_opts) strcat(file_name, "/opensm.opts"); opts_file = fopen(file_name, "r"); - if (!opts_file) - return (errno == ENOENT) ? IB_SUCCESS : IB_ERROR; + if (!opts_file) { + if (errno == ENOENT) + return IB_SUCCESS; + printf("cannot open file \'%s\': %s\n", + file_name, strerror(errno)); + return IB_ERROR; + } while (fgets(line, 1023, opts_file) != NULL) { /* get the first token */ @@ -1298,8 +1309,11 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) strcat(file_name, "/opensm.opts"); opts_file = fopen(file_name, "w"); - if (!opts_file) + if (!opts_file) { + printf("cannot open file \'%s\' for writing: %s\n", + file_name, strerror(errno)); return IB_ERROR; + } fprintf(opts_file, "#\n# DEVICE ATTRIBUTES OPTIONS\n#\n" -- 1.5.3.rc2.29.gc4640f From tbfvoc at blueplanettech.com Thu Nov 8 05:38:34 2007 From: tbfvoc at blueplanettech.com (Cole Winston) Date: Thu, 8 Nov 2007 16:38:34 +0300 Subject: [ofa-general] Style up your life with classy replica watches! Message-ID: <01c82225$cdd65810$f3596a4e@tbfvoc> Genuine luxury watches cost a fortune, cheap fakes you will find on the streets are of low quality. We offer you truly undetectable replica watches which look classy and professional. You will definitely find the watch to your taste from our list of brands. The perfect place to buy replica watches as we offer best quality, excellent service, money back guarantee in case you are not satisfied and have many other strong points, such as fast delivery, helpful and caring customer service. http://joomaya.com Quality replica watch is a perfect gift! From synarthrosis at laserdrivers.com Thu Nov 8 06:26:05 2007 From: synarthrosis at laserdrivers.com (Dion Bryan) Date: Thu, 08 Nov 2007 15:26:05 +0100 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c82212$515f1280$0100007f@localhost> cheapxpsoft4. com From dotanb at dev.mellanox.co.il Thu Nov 8 06:56:35 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 08 Nov 2007 16:56:35 +0200 Subject: [ofa-general] IB post send lost. In-Reply-To: <20071108061910.GA9863@vt.edu> References: <20071108002831.GA8339@vt.edu> <4732A7AD.4020405@dev.mellanox.co.il> <20071108061910.GA9863@vt.edu> Message-ID: <473323A3.1020500@dev.mellanox.co.il> Hi. i need some more info. Which IB HW do you use? (you can get this info from ibv_devinfo) Which IB SW do you use? (you can get this info from ofed_info) Dotan Bharath Ramesh wrote: > * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: > >> Hi. >> >> Bharath Ramesh wrote: >> >>> I have a multi-threaded application. My application has its own message >>> exchange protocol, it uses IB as the communication layer. I send a lot >>> of messages which are normally of the order of few ten thousands. After >>> sometime it seems like one message from one of the node is lost. I am >>> using RC QP type. This causes the thread to deadlock. The other threads >>> are still able to communicate exchanging messages without any problem >>> over the same QP. Both ends are using SRQs and there is sufficient >>> buffers posted so that I dont run out of buffers. I even tried doubling >>> the buffers posted I see the same problem again. One message being lost. >>> The ibv_post_send doesnt report any error. I am trying to get this done >>> for a conference deadline early next week. I would really appreciate any >>> help in suggesting any possibilities which might cause the message to be >>> dropped without any error being returned. >>> >>> >> If you don't have any bugs in your code, the described scenario should >> work. >> >> I need some more info in order to try to help you: >> >> Do you use the same QP from several threads (and post send from all of >> them)? >> > > Yes, I use the same the QP from three threads. The application has close > to 5 threads. The receives are handled by a single thread. Most of the > sends are posted by a single thread. Occasionally a third thread posts a > few sends to the QP. The same QP is also used for RDMA Writes. Majority > of the RDMA Writes are also performed by the same thread that posts > majority of the send messages. > > >> How do you poll the CQ (several threads/one)? >> > > I have two CQs, one for receive and the other for send. The receive CQ > is polled only by the receive thread. The send CQ is polled by the three > threads. Occasionally by the receiver thread to clear out an send CQEs > because I use IBV_SEND_SIGNALED for every 16 IBV_SEND_INLINEs. Otherwise > the send CQ is polled by the single thread that does majority of the > sends. Occasionally the third thread when doing a send might poll the > send CQ as well for completion CQE in case of a RDMA Write. > > >> which HW/SW do you use? >> > > I am using Yellow Dog Linux 5.0 on Apple Xserves. > > Thanks, > > Bharath > > --- > Bharath Ramesh http://people.cs.vt.edu/~bramesh > > > From yangdong at ncic.ac.cn Thu Nov 8 07:05:10 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Thu, 08 Nov 2007 23:05:10 +0800 Subject: [ofa-general] ibv_post_recv error Message-ID: <473325A6.1000106@ncic.ac.cn> when i use ibv_poll_cq to poll recv op completion ( while ((ret = ibv_poll_cq(cq, 1, &wc)) > 0) {...} ), i can find wc.status != IBV_WC_SUCCESS, it is IBV_WC_LOC_PROT_ERR, could someone tell me what means of kinds of ibv_wc_status? enum ibv_wc_status { IBV_WC_SUCCESS, IBV_WC_LOC_LEN_ERR, IBV_WC_LOC_QP_OP_ERR, IBV_WC_LOC_EEC_OP_ERR, IBV_WC_LOC_PROT_ERR, IBV_WC_WR_FLUSH_ERR, IBV_WC_MW_BIND_ERR, IBV_WC_BAD_RESP_ERR, IBV_WC_LOC_ACCESS_ERR, IBV_WC_REM_INV_REQ_ERR, IBV_WC_REM_ACCESS_ERR, IBV_WC_REM_OP_ERR, IBV_WC_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR, IBV_WC_LOC_RDD_VIOL_ERR, IBV_WC_REM_INV_RD_REQ_ERR, IBV_WC_REM_ABORT_ERR, IBV_WC_INV_EECN_ERR, IBV_WC_INV_EEC_STATE_ERR, IBV_WC_FATAL_ERR, IBV_WC_RESP_TIMEOUT_ERR, IBV_WC_GENERAL_ERR }; From yangdong at ncic.ac.cn Thu Nov 8 07:07:01 2007 From: yangdong at ncic.ac.cn (yangdong) Date: Thu, 08 Nov 2007 23:07:01 +0800 Subject: [ofa-general] ibv_post_recv error Message-ID: <47332615.1020602@ncic.ac.cn> when i use ibv_poll_cq to poll recv op completion ( while ((ret = ibv_poll_cq(cq, 1, &wc)) > 0) {...} ), i can find wc.status != IBV_WC_SUCCESS, it is IBV_WC_LOC_PROT_ERR, could someone tell me what means of kinds of ibv_wc_status? enum ibv_wc_status { IBV_WC_SUCCESS, IBV_WC_LOC_LEN_ERR, IBV_WC_LOC_QP_OP_ERR, IBV_WC_LOC_EEC_OP_ERR, IBV_WC_LOC_PROT_ERR, IBV_WC_WR_FLUSH_ERR, IBV_WC_MW_BIND_ERR, IBV_WC_BAD_RESP_ERR, IBV_WC_LOC_ACCESS_ERR, IBV_WC_REM_INV_REQ_ERR, IBV_WC_REM_ACCESS_ERR, IBV_WC_REM_OP_ERR, IBV_WC_RETRY_EXC_ERR, IBV_WC_RNR_RETRY_EXC_ERR, IBV_WC_LOC_RDD_VIOL_ERR, IBV_WC_REM_INV_RD_REQ_ERR, IBV_WC_REM_ABORT_ERR, IBV_WC_INV_EECN_ERR, IBV_WC_INV_EEC_STATE_ERR, IBV_WC_FATAL_ERR, IBV_WC_RESP_TIMEOUT_ERR, IBV_WC_GENERAL_ERR }; From dotanb at dev.mellanox.co.il Thu Nov 8 08:16:32 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 08 Nov 2007 18:16:32 +0200 Subject: [ofa-general] ibv_post_recv error In-Reply-To: <47332615.1020602@ncic.ac.cn> References: <47332615.1020602@ncic.ac.cn> Message-ID: <47333660.5020603@dev.mellanox.co.il> Please check if there is a mismatch in the Work Request between the lkey and the address. (the address that you gave is not part of this Memory Region which "own" this lkey). Dotan yangdong wrote: > when i use ibv_poll_cq to poll recv op completion ( while ((ret = > ibv_poll_cq(cq, 1, &wc)) > 0) {...} ), i can find wc.status != > IBV_WC_SUCCESS, > it is IBV_WC_LOC_PROT_ERR, could someone tell me what means of kinds of > ibv_wc_status? > enum ibv_wc_status { > IBV_WC_SUCCESS, > IBV_WC_LOC_LEN_ERR, > IBV_WC_LOC_QP_OP_ERR, > IBV_WC_LOC_EEC_OP_ERR, > IBV_WC_LOC_PROT_ERR, > IBV_WC_WR_FLUSH_ERR, > IBV_WC_MW_BIND_ERR, > IBV_WC_BAD_RESP_ERR, > IBV_WC_LOC_ACCESS_ERR, > IBV_WC_REM_INV_REQ_ERR, > IBV_WC_REM_ACCESS_ERR, > IBV_WC_REM_OP_ERR, > IBV_WC_RETRY_EXC_ERR, > IBV_WC_RNR_RETRY_EXC_ERR, > IBV_WC_LOC_RDD_VIOL_ERR, > IBV_WC_REM_INV_RD_REQ_ERR, > IBV_WC_REM_ABORT_ERR, > IBV_WC_INV_EECN_ERR, > IBV_WC_INV_EEC_STATE_ERR, > IBV_WC_FATAL_ERR, > IBV_WC_RESP_TIMEOUT_ERR, > IBV_WC_GENERAL_ERR > }; > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From bramesh at vt.edu Thu Nov 8 09:20:50 2007 From: bramesh at vt.edu (Bharath Ramesh) Date: Thu, 8 Nov 2007 12:20:50 -0500 Subject: [ofa-general] IB post send lost. In-Reply-To: <473323A3.1020500@dev.mellanox.co.il> References: <20071108002831.GA8339@vt.edu> <4732A7AD.4020405@dev.mellanox.co.il> <20071108061910.GA9863@vt.edu> <473323A3.1020500@dev.mellanox.co.il> Message-ID: <20071108172050.GA12397@vt.edu> * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: > Hi. > > i need some more info. > > Which IB HW do you use? > (you can get this info from ibv_devinfo) The IB HW used are the Mellanox Cougar Cards. output of ibv_devinfo: hca_id: mthca0 fw_ver: 3.5.0 node_guid: 0002:c901:08fe:76a0 sys_image_guid: 0002:c901:08fe:76a3 vendor_id: 0x02c9 vendor_part_id: 23108 hw_ver: 0xA1 board_id: MT_0000000001 phys_port_cnt: 2 > > Which IB SW do you use? > (you can get this info from ofed_info) The IB SW I am using is OFED 1.2. The linux kernel used are 2.6.21.1-xserve I am not sure if this might help. Basically every time I send a message I wait for an ack to be received. I wait on a pthread_cond_wait. Since the message gets dropped my thread is blocked on pthread_cond_wait forever. The other thread which occasionally sends messages is still able to send/receive messages over the QP. Block for the ack and receive the ack while this thread never receives the ack because of the dropped message. To verify if the messages were being dropped I printed every single message being sent and received on either ends. The dropped message is sent but the receiver never receives it. Thanks, Bharath > > > Dotan > > Bharath Ramesh wrote: >> * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: >> >>> Hi. >>> >>> Bharath Ramesh wrote: >>> >>>> I have a multi-threaded application. My application has its own message >>>> exchange protocol, it uses IB as the communication layer. I send a lot >>>> of messages which are normally of the order of few ten thousands. After >>>> sometime it seems like one message from one of the node is lost. I am >>>> using RC QP type. This causes the thread to deadlock. The other threads >>>> are still able to communicate exchanging messages without any problem >>>> over the same QP. Both ends are using SRQs and there is sufficient >>>> buffers posted so that I dont run out of buffers. I even tried doubling >>>> the buffers posted I see the same problem again. One message being lost. >>>> The ibv_post_send doesnt report any error. I am trying to get this done >>>> for a conference deadline early next week. I would really appreciate any >>>> help in suggesting any possibilities which might cause the message to be >>>> dropped without any error being returned. >>>> >>> If you don't have any bugs in your code, the described scenario should >>> work. >>> >>> I need some more info in order to try to help you: >>> >>> Do you use the same QP from several threads (and post send from all of >>> them)? >>> >> >> Yes, I use the same the QP from three threads. The application has close >> to 5 threads. The receives are handled by a single thread. Most of the >> sends are posted by a single thread. Occasionally a third thread posts a >> few sends to the QP. The same QP is also used for RDMA Writes. Majority >> of the RDMA Writes are also performed by the same thread that posts >> majority of the send messages. >> >> >>> How do you poll the CQ (several threads/one)? >>> >> >> I have two CQs, one for receive and the other for send. The receive CQ >> is polled only by the receive thread. The send CQ is polled by the three >> threads. Occasionally by the receiver thread to clear out an send CQEs >> because I use IBV_SEND_SIGNALED for every 16 IBV_SEND_INLINEs. Otherwise >> the send CQ is polled by the single thread that does majority of the >> sends. Occasionally the third thread when doing a send might poll the >> send CQ as well for completion CQE in case of a RDMA Write. >> >> >>> which HW/SW do you use? >>> >> >> I am using Yellow Dog Linux 5.0 on Apple Xserves. >> >> Thanks, >> >> Bharath >> >> --- >> Bharath Ramesh >> http://people.cs.vt.edu/~bramesh >> >> >> > --- Bharath Ramesh http://people.cs.vt.edu/~bramesh From kliteyn at mellanox.co.il Wed Nov 7 21:18:25 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 8 Nov 2007 07:18:25 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-08:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-07 OpenSM git rev = Tue_Nov_6_19:09:16_2007 [dcad36c34e71a25d328e8c2c6fc7862751b24a34] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From swise at opengridcomputing.com Thu Nov 8 11:56:52 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 08 Nov 2007 13:56:52 -0600 Subject: [ofa-general] ofed--1.3 rdma_connect problems Message-ID: <47336A04.7090806@opengridcomputing.com> Sean, I'm testing iwarp usermode on ofed-1.3 and I always get a -22 error from rdma_connect(). I tried rping and a home brew unit test program and bot this this error. I'm diving in now to see who's returning it, but wanted to give you a heads up... Stay tuned... Steve. From swise at opengridcomputing.com Thu Nov 8 12:15:02 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 08 Nov 2007 14:15:02 -0600 Subject: [ofa-general] ofed--1.3 rdma_connect problems In-Reply-To: <47336A04.7090806@opengridcomputing.com> References: <47336A04.7090806@opengridcomputing.com> Message-ID: <47336E46.4070007@opengridcomputing.com> Looks like the following commit exposed a bug in the chelsio driver. iw_cxgb3 was _not_ setting the max_qp_init_rd_atom attribute. > commit 487a52078fe1ba322273a6b893d31e0caaa69a57 > Author: Sean Hefty > Date: Tue Oct 16 14:59:21 2007 -0700 > > librdmacm/cma: provide sanity checks for max outstanding rdma ops > > Ensure that the responder_resources and initiator_depth values > provided by the user are supported by the local hardware. This > traps errors sooner during connection establishment (when calling > rdma_connect), rather than waiting until the modify QP fails > (after calling rdma_accept). > > Signed-off-by: Sean Hefty I've opened bug 777 to fix this. Tziporet/Vlad, can we get this in beta? I will provide a patch shortly. Steve. Steve Wise wrote: > Sean, > > I'm testing iwarp usermode on ofed-1.3 and I always get a -22 error from > rdma_connect(). I tried rping and a home brew unit test program and bot > this this error. I'm diving in now to see who's returning it, but > wanted to give you a heads up... > > Stay tuned... > > Steve. > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From sean.hefty at intel.com Thu Nov 8 12:31:02 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 8 Nov 2007 12:31:02 -0800 Subject: [ofa-general] question about using in_words in ib_uverbs_write() Message-ID: <000001c82246$47f087e0$ff0da8c0@amr.corp.intel.com> At the end of ib_uverbs_write(): return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, hdr.in_words * 4, hdr.out_words * 4); Since buf is adjusted by sizeof hdr, should hdr.in_words * 4 also be adjusted by - sizeof hdr? This is just a question based on looking at the code, and not in response to any seen problem. (I doubt this would cause any real harm anywhere.) - Sean From bs at q-leap.de Thu Nov 8 12:41:52 2007 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 8 Nov 2007 21:41:52 +0100 Subject: [ofa-general] MT25418 Message-ID: <200711082141.53113.bs@q-leap.de> Hi, we have a card here that is not supported by 2.6.22, I haven't tested 2.6.23 yet, but if I'm not mistaken this pci-id is not defined in 2.6.23 too. 09:00.0 InfiniBand: Mellanox Technologies Unknown device 634a (rev a0) Subsystem: Mellanox Technologies Unknown device 634a Flags: bus master, fast devsel, latency 0, IRQ 18 Memory at d8800000 (64-bit, non-prefetchable) [size=1M] Memory at d8000000 (64-bit, prefetchable) [size=8M] Memory at d8900000 (64-bit, non-prefetchable) [size=8K] Capabilities: [40] Power Management version 3 Capabilities: [48] Vital Product Data Capabilities: [84] MSI-X: Enable- Mask- TabSize=256 Capabilities: [60] Express Endpoint IRQ 0 I tried to grep for mlx4 devices, but don't find any definition at all. Any help is appreciated. Thanks, Bernd -- Bernd Schubert Q-Leap Networks GmbH From rdreier at cisco.com Thu Nov 8 13:34:08 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 08 Nov 2007 13:34:08 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711082141.53113.bs@q-leap.de> (Bernd Schubert's message of "Thu, 8 Nov 2007 21:41:52 +0100") References: <200711082141.53113.bs@q-leap.de> Message-ID: > we have a card here that is not supported by 2.6.22, I haven't tested 2.6.23 > yet, but if I'm not mistaken this pci-id is not defined in 2.6.23 too. > 09:00.0 InfiniBand: Mellanox Technologies Unknown device 634a (rev a0) As far as I can tell, device ID 634a was defined in the initial mlx4 merge, which went in before 2.6.22. What happens if you do "modprobe mlx4_ib" with your kernel? - R. From bs at q-leap.de Thu Nov 8 14:06:32 2007 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 8 Nov 2007 23:06:32 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> Message-ID: <20071108220632.GA18389@lanczos.q-leap.de> On Thu, Nov 08, 2007 at 01:34:08PM -0800, Roland Dreier wrote: > > we have a card here that is not supported by 2.6.22, I haven't tested 2.6.23 > > yet, but if I'm not mistaken this pci-id is not defined in 2.6.23 too. > > > 09:00.0 InfiniBand: Mellanox Technologies Unknown device 634a (rev a0) > > As far as I can tell, device ID 634a was defined in the initial mlx4 > merge, which went in before 2.6.22. What happens if you do "modprobe mlx4_ib" > with your kernel? Roland, thanks for your help. On modprobing this module simply nothing does happen, absolutely nothing in dmesg. Can you tell me where the mlx4 ids are defined? Thanks, Bernd From moshek at voltaire.com Thu Nov 8 14:11:41 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Fri, 9 Nov 2007 00:11:41 +0200 Subject: [ofa-general] MT25418 References: <200711082141.53113.bs@q-leap.de> <20071108220632.GA18389@lanczos.q-leap.de> Message-ID: <39C75744D164D948A170E9792AF8E7CA0D14E9@exil.voltaire.com> Sometime when mlx4 card is burn with old ver FWR it does not response good on the PCI bus. In this case you have to burn it using mlxburn. Moshe -----Original Message----- From: general-bounces at lists.openfabrics.org on behalf of Bernd Schubert Sent: Fri 11/9/2007 12:06 AM To: Roland Dreier Cc: general at openib.org Subject: Re: [ofa-general] MT25418 On Thu, Nov 08, 2007 at 01:34:08PM -0800, Roland Dreier wrote: > > we have a card here that is not supported by 2.6.22, I haven't tested 2.6.23 > > yet, but if I'm not mistaken this pci-id is not defined in 2.6.23 too. > > > 09:00.0 InfiniBand: Mellanox Technologies Unknown device 634a (rev a0) > > As far as I can tell, device ID 634a was defined in the initial mlx4 > merge, which went in before 2.6.22. What happens if you do "modprobe mlx4_ib" > with your kernel? Roland, thanks for your help. On modprobing this module simply nothing does happen, absolutely nothing in dmesg. Can you tell me where the mlx4 ids are defined? Thanks, Bernd _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From rdreier at cisco.com Thu Nov 8 14:27:03 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 08 Nov 2007 14:27:03 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <20071108220632.GA18389@lanczos.q-leap.de> (Bernd Schubert's message of "Thu, 8 Nov 2007 23:06:32 +0100") References: <200711082141.53113.bs@q-leap.de> <20071108220632.GA18389@lanczos.q-leap.de> Message-ID: > Roland, thanks for your help. On modprobing this module simply nothing does > happen, absolutely nothing in dmesg. Do you know if mlx4_core was already loaded? It probably would be loaded by driver autoloading based on matching the PCI device. If so then mlx4_ib wouldn't print anything further, but IB stuff should work, eg you should see a /sys/class/infiniband/mlx4_0 directory, etc. > Can you tell me where the mlx4 ids are defined? drivers/net/mlx4/main.c - R. From bs at q-leap.de Thu Nov 8 14:39:44 2007 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 8 Nov 2007 23:39:44 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <20071108220632.GA18389@lanczos.q-leap.de> Message-ID: <20071108223944.GA18515@lanczos.q-leap.de> On Thu, Nov 08, 2007 at 02:27:03PM -0800, Roland Dreier wrote: > > Roland, thanks for your help. On modprobing this module simply nothing does > > happen, absolutely nothing in dmesg. > > Do you know if mlx4_core was already loaded? It probably would be > loaded by driver autoloading based on matching the PCI device. If so > then mlx4_ib wouldn't print anything further, but IB stuff should > work, eg you should see a /sys/class/infiniband/mlx4_0 directory, etc. > Ah, mlx4_core was already loaded. After unloading it and loading mlx4_ib again I also get kernel messages. [10010.988484] mlx4_core: Mellanox ConnectX core driver v0.01 (May 1, 2007) [10010.988490] mlx4_core: Initializing 0000:09:00.0 [10010.988532] ACPI: PCI Interrupt 0000:09:00.0[A] -> GSI 18 (level, low) -> IRQ 18 [10010.988555] PCI: Setting latency timer of device 0000:09:00.0 to 64 At least defining the ib0 interface works now, too. Presently can't further test it now, since its connected to a flaky mts2400 switch, which needs a reset. > > Can you tell me where the mlx4 ids are defined? > > drivers/net/mlx4/main.c Hmm, I already grepped there. Will look tomorrow again. Btw, does the mlx4 driver in 2.6.22 work reliable or have their been major issues fixed in 2.6.23? Thanks a lot for your help, Bernd From rdreier at cisco.com Thu Nov 8 14:50:55 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 08 Nov 2007 14:50:55 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <20071108223944.GA18515@lanczos.q-leap.de> (Bernd Schubert's message of "Thu, 8 Nov 2007 23:39:44 +0100") References: <200711082141.53113.bs@q-leap.de> <20071108220632.GA18389@lanczos.q-leap.de> <20071108223944.GA18515@lanczos.q-leap.de> Message-ID: > Btw, does the mlx4 driver in 2.6.22 work reliable or have > their been major issues fixed in 2.6.23? There is a data corruption bug fix in 2.6.23, but it is probably hard to trigger. Other than that, nothing is too major. From mshefty at ichips.intel.com Thu Nov 8 15:50:50 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 08 Nov 2007 15:50:50 -0800 Subject: [ofa-general] librdmacm feature request In-Reply-To: <47321265.5070702@ichips.intel.com> References: <1191767680.19888.310.camel@firewall.xsintricity.com> <470A632D.1050001@ichips.intel.com> <1191894507.19888.360.camel@firewall.xsintricity.com> <47321265.5070702@ichips.intel.com> Message-ID: <4733A0DA.10301@ichips.intel.com> The only idea I've been able to come up with for moving an rdma_cm_id between fd's is to perform a two-step process. (The two steps would be done by a single API call.) 1. prepare to modify(old_fd, rdma_cm_id) Somehow mark that the rdma_cm_id will migrate to a new fd. 2. commit modify(new_fd, rdma_cm_id) Migrates the rdma_cm_id to the new fd and moves the events. Using two steps provides the kernel code the file context that it needs, and should protect against the fd's being closed. The biggest hurdle to this is ensuring that the same user owns both fd's, possibly by validating some sort of key between the two steps. I just haven't figured out what works as a key. - Sean From meier3 at llnl.gov Thu Nov 8 16:35:08 2007 From: meier3 at llnl.gov (Timothy A. Meier) Date: Thu, 08 Nov 2007 16:35:08 -0800 Subject: [ofa-general] [PATCH] opensm: osm_log - added the log message type to the message Message-ID: <4733AB3C.8040707@llnl.gov> Sasha I have been doing a bit of parsing through osm log files lately, and these changes would help me understand the context (current log level) of the log file, and the nature (log type) of the individual messages. 1. The act of changing the log filter or verbosity level is logged 2. Each log message includes its level. From cea8ddbc9b591aefab31a6012a9d43081903ddb5 Mon Sep 17 00:00:00 2001 From: Tim Meier Date: Thu, 8 Nov 2007 16:18:41 -0800 Subject: [PATCH] opensm: osm_log - added the log message type to the message Inserted the log message type (verbosity level) into the log, so it will be more obvious why a particular message appears there. Also, the act of setting or changing the logging level is logged. Signed-off-by: Tim Meier --- opensm/include/opensm/osm_log.h | 12 +++++++----- opensm/opensm/osm_log.c | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/opensm/include/opensm/osm_log.h b/opensm/include/opensm/osm_log.h index 6c487aa..4db96fb 100644 --- a/opensm/include/opensm/osm_log.h +++ b/opensm/include/opensm/osm_log.h @@ -99,6 +99,7 @@ typedef uint8_t osm_log_level_t; #define OSM_LOG_FUNCS 0x10 #define OSM_LOG_FRAMES 0x20 #define OSM_LOG_ROUTING 0x40 +#define OSM_LOG_ALL 0x7f #define OSM_LOG_SYS 0x80 /* @@ -283,6 +284,11 @@ osm_log_init(IN osm_log_t * const p_log, * Same as osm_log_init_v2() but without max_size parameter */ +void +osm_log(IN osm_log_t * const p_log, + IN const osm_log_level_t verbosity, + IN const char *p_str, ...) STRICT_OSM_LOG_FORMAT; + /****f* OpenSM: Log/osm_log_get_level * NAME * osm_log_get_level @@ -326,6 +332,7 @@ static inline void osm_log_set_level(IN osm_log_t * const p_log, IN const osm_log_level_t level) { p_log->level = level; + osm_log(p_log, OSM_LOG_ALL, "Setting log level to: 0x%02x\n", level); } /* @@ -386,11 +393,6 @@ extern int osm_log_printf(osm_log_t * p_log, osm_log_level_t level, const char *fmt, ...); void -osm_log(IN osm_log_t * const p_log, - IN const osm_log_level_t verbosity, - IN const char *p_str, ...) STRICT_OSM_LOG_FORMAT; - -void osm_log_raw(IN osm_log_t * const p_log, IN const osm_log_level_t verbosity, IN const char *p_buf); diff --git a/opensm/opensm/osm_log.c b/opensm/opensm/osm_log.c index 7efe93e..97f8920 100644 --- a/opensm/opensm/osm_log.c +++ b/opensm/opensm/osm_log.c @@ -184,19 +184,19 @@ osm_log(IN osm_log_t * const p_log, _retry: ret = fprintf(p_log->out_port, - "[%02d:%02d:%02d:%03d][%04X] -> %s", st.wHour, + "[%02d:%02d:%02d:%03d][%04X] 0x%02x -> %s", st.wHour, st.wMinute, st.wSecond, st.wMilliseconds, pid, - buffer); + verbosity, buffer); #else pid = pthread_self(); _retry: ret = fprintf(p_log->out_port, - "%s %02d %02d:%02d:%02d %06d [%04X] -> %s", + "%s %02d %02d:%02d:%02d %06d [%04X] 0x%02x -> %s", (result.tm_mon < 12 ? month_str[result.tm_mon] : "???"), result.tm_mday, result.tm_hour, result.tm_min, - result.tm_sec, usecs, pid, buffer); + result.tm_sec, usecs, pid, verbosity, buffer); #endif /* flush log */ -- 1.5.1 -- Timothy A. Meier Computer Scientist ICCD/High Performance Computing 925.422.3341 meier3 at llnl.gov -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0001-opensm-osm_log-added-the-log-message-type-to-the.patch URL: From cyclohexyl at fbainsurance.com Fri Nov 9 00:11:49 2007 From: cyclohexyl at fbainsurance.com (Kevin Zimmerman) Date: Fri, 09 Nov 2007 08:11:49 +0000 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c822a5$c3cd0280$0100007f@localhost> cheapxpsoft6. com From sashak at voltaire.com Fri Nov 9 02:57:05 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 12:57:05 +0200 Subject: [ofa-general] Re: [PATCH 1/5] Fix log messages in perfmgr to use "perfmgr" in name, _not_ "pm" In-Reply-To: <20071107185819.2103ad72.weiny2@llnl.gov> References: <20071107185819.2103ad72.weiny2@llnl.gov> Message-ID: <20071109105705.GQ6153@sashak.voltaire.com> On 18:58 Wed 07 Nov , Ira Weiny wrote: > From e89613ccb9386c4f1360246d9171e756aff664eb Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Tue, 6 Nov 2007 18:22:06 -0800 > Subject: [PATCH] Fix log messages in perfmgr to use "perfmgr" in name, _not_ "pm" > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From vlad at lists.openfabrics.org Fri Nov 9 02:56:19 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 9 Nov 2007 02:56:19 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071109-0200 daily build status Message-ID: <20071109105619.B1B6BE60854@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel_2_6_24_rc1 Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: (Each undeclared identifier is reported only once /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: for each function it appears in.) /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: too many arguments to function 'dev_get_by_name' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1481: warning: assignment from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1495: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c: In function 'iscsi_transport_exit': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1506: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi_f.o] Error 1 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071109-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From sendoa.daughtee at suell.dk Fri Nov 9 02:58:58 2007 From: sendoa.daughtee at suell.dk (May Allred) Date: Fri, 9 Nov 2007 19:58:58 +0900 Subject: [ofa-general] Magic stick Message-ID: <01c8230a$f71ce410$4a0fbddd@sendoa.daughtee> -------------- next part -------------- A non-text attachment was scrubbed... Name: ab.gif Type: image/gif Size: 6140 bytes Desc: not available URL: From sashak at voltaire.com Fri Nov 9 03:24:46 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:24:46 +0200 Subject: [ofa-general] Re: [PATCH 2/5] Maintain a name of the node in the monitored node structure for log messages. In-Reply-To: <20071107185820.4f529d7b.weiny2@llnl.gov> References: <20071107185820.4f529d7b.weiny2@llnl.gov> Message-ID: <20071109112446.GR6153@sashak.voltaire.com> On 18:58 Wed 07 Nov , Ira Weiny wrote: > From f75aa716b3918989876d61863a8f03aff221405f Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Tue, 6 Nov 2007 18:34:28 -0800 > Subject: [PATCH] Maintain a name of the node in the monitored node structure for log messages. > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 9 03:25:07 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:25:07 +0200 Subject: [ofa-general] Re: [PATCH 3/5] Use monitored map lookup to get the name of the node for recieved mad processing in perfmgr In-Reply-To: <20071107185821.73af99c5.weiny2@llnl.gov> References: <20071107185821.73af99c5.weiny2@llnl.gov> Message-ID: <20071109112507.GS6153@sashak.voltaire.com> On 18:58 Wed 07 Nov , Ira Weiny wrote: > From 003d4eb171cbad92c61fb4f0fd4c96b7efe3ff6a Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Tue, 6 Nov 2007 19:10:10 -0800 > Subject: [PATCH] Use monitored map lookup to get the name of the node for recieved mad > processing in perfmgr > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 9 03:25:23 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:25:23 +0200 Subject: [ofa-general] Re: [PATCH 4/5] use the monitored node's redirect information for this get_lid call because it is now available In-Reply-To: <20071107185822.1eb174f1.weiny2@llnl.gov> References: <20071107185822.1eb174f1.weiny2@llnl.gov> Message-ID: <20071109112523.GT6153@sashak.voltaire.com> On 18:58 Wed 07 Nov , Ira Weiny wrote: > From ae65158c0f481936be940899c6e4782973888215 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 7 Nov 2007 11:22:44 -0800 > Subject: [PATCH] use the monitored node's redirect information for this get_lid call because it > is now available > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 9 03:30:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:30:42 +0200 Subject: [ofa-general] Re: [PATCH 5/5] opensm/opensm/osm_state_mgr.c: update log messages with node names In-Reply-To: <20071107185823.4fae4ea9.weiny2@llnl.gov> References: <20071107185823.4fae4ea9.weiny2@llnl.gov> Message-ID: <20071109113042.GU6153@sashak.voltaire.com> On 18:58 Wed 07 Nov , Ira Weiny wrote: > From 8a3bd3c02ad07f90da5d9ac92752d0a9e9c45742 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 7 Nov 2007 16:32:39 -0800 > Subject: [PATCH] opensm/opensm/osm_state_mgr.c: update log messages with node names > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 9 03:46:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:46:42 +0200 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071107191603.490b3121.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> Message-ID: <20071109114642.GV6153@sashak.voltaire.com> On 19:16 Wed 07 Nov , Ira Weiny wrote: > > > > > Perhaps /ib-node-name-map? > > > > Or under /etc/ofa/ ? > > I think is appropriate. OFED can specify /etc/ofa if they wish. But then they cannot do it in configure time. I think I will apply the patch as is now. And later I will add something like --opensm-config-dir for all config files, not just for node name map. Sasha From sashak at voltaire.com Fri Nov 9 03:55:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 13:55:42 +0200 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071107191603.490b3121.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> Message-ID: <20071109115542.GW6153@sashak.voltaire.com> On 19:16 Wed 07 Nov , Ira Weiny wrote: > > The patch is attached, > Ira > > > From f395cae1c20daa15ce4ab5674df266bf28c2d318 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 7 Nov 2007 19:12:57 -0800 > Subject: [PATCH] Specify a default node-name-map of /ib-node-name-map > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 9 04:03:24 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 9 Nov 2007 14:03:24 +0200 Subject: [ofa-general] Re: [PATCH] opensm: osm_log - added the log message type to the message In-Reply-To: <4733AB3C.8040707@llnl.gov> References: <4733AB3C.8040707@llnl.gov> Message-ID: <20071109120324.GX6153@sashak.voltaire.com> On 16:35 Thu 08 Nov , Timothy A. Meier wrote: > Sasha > > I have been doing a bit of parsing through osm log files lately, > and these changes would help me understand the context (current log level) > of the log file, and the nature (log type) of the individual messages. > > 1. The act of changing the log filter or verbosity level is logged > 2. Each log message includes its level. > > From cea8ddbc9b591aefab31a6012a9d43081903ddb5 Mon Sep 17 00:00:00 2001 > From: Tim Meier > Date: Thu, 8 Nov 2007 16:18:41 -0800 > Subject: [PATCH] opensm: osm_log - added the log message type to the message > > Inserted the log message type (verbosity level) into the > log, so it will be more obvious why a particular message > appears there. Also, the act of setting or changing the > logging level is logged. > > Signed-off-by: Tim Meier Applied. Thanks. (and your mailer still corrupt patches). Sasha From drkotey_occ at earthlink.net Fri Nov 9 04:16:13 2007 From: drkotey_occ at earthlink.net (Dr James) Date: Fri, 9 Nov 2007 04:16:13 -0800 (GMT-08:00) Subject: [ofa-general] Dear Friend, Message-ID: <14065274.1194610573260.JavaMail.root@elwamui-muscovy.atl.sa.earthlink.net> Dear Friend, On behalf of the board and management of Overseas Credit Commission(OCC).London UK, I Dr. James Kotey Operations Manager wishes to inform you that your Consignment/fund tagged diplomatic luggage 122 with Ref: No1226/X42/206 which was deposited in our vault for safe keeping by a Diplomatic courier company(Global) is due for immediate collection. Be Informed that we have concluded all arrangements to deliver your consignment at your doorstep through diplomatic means. In line with the binding diplomatic consignment delivery policies, kindly furnish us with the following as set forth. A copy of your international passport or any other means of identification as the true consignee. The address where the above cargo/funds should be delivered to and your phone number. List the nearest international airport to your address location. Meanwhile, we urge you to treat the above requirement with utmost urgency to enable us dispense our duties and obligation accordingly thereby allowing us to serve you in a timely fashion. Upon satisfactory receipt of All the above mentioned, you Will be further acquainted with the detailed delivery itinerary including information of the diplomat who will accompany your consignment. As always, feel very free to contact us should you have any further question as our customer's rights are continuou sly protected.We pledge our best service at all times Yours Sincerely Dr. James Kotey. Foreign Operations Manager From miasmatology at educationaltoys.com Fri Nov 9 06:09:29 2007 From: miasmatology at educationaltoys.com (Avery Fong) Date: Fri, 09 Nov 2007 15:09:29 +0100 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c822d9$520eac00$0100007f@localhost> cheapxpsoft6. com From kliteyn at mellanox.co.il Thu Nov 8 21:07:57 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 9 Nov 2007 07:07:57 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-09:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-08 OpenSM git rev = Tue_Nov_6_19:09:16_2007 [dcad36c34e71a25d328e8c2c6fc7862751b24a34] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From swise at opengridcomputing.com Fri Nov 9 07:21:58 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 09 Nov 2007 09:21:58 -0600 Subject: [ofa-general] [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. Message-ID: <20071109152158.21493.24110.stgit@dell3.ogc.int> RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. Attribute max_qp_init_rd_atom is not getting set. Version 1.0.4 of librdmacm now validates the user's requested initiator and responder resources vs the max supported by the device. Since iw_cxgb3 wasn't setting this attribute (and it defaulted to 0), all rdma_connect()s fail if there are initiator resources requested by the app. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f0c7775..b5436ca 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1000,6 +1000,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; From swise at opengridcomputing.com Fri Nov 9 07:33:57 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 09 Nov 2007 09:33:57 -0600 Subject: [ofa-general] librdmacm feature request In-Reply-To: <4733A0DA.10301@ichips.intel.com> References: <1191767680.19888.310.camel@firewall.xsintricity.com> <470A632D.1050001@ichips.intel.com> <1191894507.19888.360.camel@firewall.xsintricity.com> <47321265.5070702@ichips.intel.com> <4733A0DA.10301@ichips.intel.com> Message-ID: <47347DE5.5040404@opengridcomputing.com> Just thinking out loud: What if each cm_id has its own fd? Then they could be associated with a channel, which is just an object that tracks which fds are in the channel and uses select()/poll() on the entire set of fds for rdma_get_cm_event(). So moving a cm_id to another channel is simple and doesn't involve moving the events since they will be queued on the cm_id's fd. You just move the fd from one channel to another and events keep flowing as normal... Steve. Sean Hefty wrote: > The only idea I've been able to come up with for moving an rdma_cm_id > between fd's is to perform a two-step process. (The two steps would be > done by a single API call.) > > 1. prepare to modify(old_fd, rdma_cm_id) > Somehow mark that the rdma_cm_id will migrate to a new fd. > 2. commit modify(new_fd, rdma_cm_id) > Migrates the rdma_cm_id to the new fd and moves the events. > > Using two steps provides the kernel code the file context that it needs, > and should protect against the fd's being closed. The biggest hurdle to > this is ensuring that the same user owns both fd's, possibly by > validating some sort of key between the two steps. I just haven't > figured out what works as a key. > > - Sean > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From weiny2 at llnl.gov Fri Nov 9 08:21:51 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Fri, 9 Nov 2007 08:21:51 -0800 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071109114642.GV6153@sashak.voltaire.com> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> <20071109114642.GV6153@sashak.voltaire.com> Message-ID: <20071109082151.6efb0256.weiny2@llnl.gov> On Fri, 9 Nov 2007 13:46:42 +0200 Sasha Khapyorsky wrote: > On 19:16 Wed 07 Nov , Ira Weiny wrote: > > > > > > > Perhaps /ib-node-name-map? > > > > > > Or under /etc/ofa/ ? > > > > I think is appropriate. OFED can specify /etc/ofa if they wish. > > But then they cannot do it in configure time. > > I think I will apply the patch as is now. And later I will add something > like --opensm-config-dir for all config files, not just for node name > map. > I don't understand what you mean? They can use: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] This works for me. Perhaps I should change the other config files to use sysconfdir? I have not done so because of legacy reasons, but I think it would be more correct, no? Ira From Lan.Tran at 3leafsystems.com Fri Nov 9 09:01:37 2007 From: Lan.Tran at 3leafsystems.com (Lan Tran) Date: Fri, 9 Nov 2007 09:01:37 -0800 Subject: [ofa-general] Lost out-of-svc trap notifications during SM handover References: <20071108235054.014FFE2807F@openfabrics.org> Message-ID: <7C1D552561AF0544ACC7CF6F10E4966EAFDCCF@chronus.3leafnetworks.corp> Hi Sasha, I'm seeing a problem with missing out-of-svc trap notifications when a Master SM port is disabled. I'm taking a look into it now, but if you have any pointers or ideas of what might be going on or how to resolve it, that would be much appreciated! I am subscribing to be informed of out-of-service trap events (i.e. trap 65), registering my own callback. When I disable an IB port of a remote node that is running the Standby SM, then, as expected, my trap callback function is called. But when I disable the IB port of the remote node that is the Master SM, my trap 65 callback is never called. From looking at the opensm logs it seems what is happening is: 1) I disable port running Master SM 2) SM handover starts --> during Standby SM's heavy sweep, osm_drop_mgr_process() detects that the old Master SM port is down ... but at this point no subscribers to be informed because they are all subscribed with the old Master SM ---> Standby SM enters Master SM state, so now new Master SM 3) Several seconds later, I subscribe with the new Master SM for trap 65 notification (I do this whenever I receive IB_EVENT_CLIENT_REREGISTER event), but this is too late as the report notice for the dropped old Master SM port already occurred earlier. It seems I need to somehow make sure that I have subscribed for a trap 65 notification with the to-be new Master SM when it decides to report that the old Master SM port goes down. Not quite sure if this is possible though :) Thanks again! Lan -------------- next part -------------- A non-text attachment was scrubbed... Name: winmail.dat Type: application/ms-tnef Size: 3438 bytes Desc: not available URL: From mshefty at ichips.intel.com Fri Nov 9 09:11:59 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Fri, 09 Nov 2007 09:11:59 -0800 Subject: [ofa-general] librdmacm feature request In-Reply-To: <47347DE5.5040404@opengridcomputing.com> References: <1191767680.19888.310.camel@firewall.xsintricity.com> <470A632D.1050001@ichips.intel.com> <1191894507.19888.360.camel@firewall.xsintricity.com> <47321265.5070702@ichips.intel.com> <4733A0DA.10301@ichips.intel.com> <47347DE5.5040404@opengridcomputing.com> Message-ID: <473494DF.8020200@ichips.intel.com> > What if each cm_id has its own fd? Then they could be associated with a > channel, which is just an object that tracks which fds are in the > channel and uses select()/poll() on the entire set of fds for > rdma_get_cm_event(). Hmm... I'll give this some thought. This might change the kernel ABI, but would affect the library ABI (which exposes the event channel fd), bumping the major version. I don't know without doing some research how to handle new connection requests, but that should be possible. > So moving a cm_id to another channel is simple and doesn't involve > moving the events since they will be queued on the cm_id's fd. You just > move the fd from one channel to another and events keep flowing as > normal... We still need to handle the window where an event is pulled from one channel immediately before the cm_id is migrated to a new channel. Maybe we do this through documentation, but it would be nice to have the interface behave in a way that's easy to program to. (I don't think an fd per cm_id makes this any more difficult.) What I did finally find was a kernel call fget(fd) that at least seems to be usable for what I need. - Sean From bs at q-leap.de Fri Nov 9 10:00:20 2007 From: bs at q-leap.de (Bernd Schubert) Date: Fri, 9 Nov 2007 19:00:20 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <20071108223944.GA18515@lanczos.q-leap.de> Message-ID: <200711091900.20292.bs@q-leap.de> On Thursday 08 November 2007 23:50:55 Roland Dreier wrote: > > Btw, does the mlx4 driver in 2.6.22 work reliable or have > > their been major issues fixed in 2.6.23? > > There is a data corruption bug fix in 2.6.23, but it is probably hard > to trigger. Other than that, nothing is too major. Can you tell me which git commit this is? 23f1b38481596ad77e5f51562977b12c8418eee3: IB/mlx4: Fix error path in create_qp_common() 0981582dbfae86ba0306406f1af329bb702752d2: mlx4_core: Change command token on timeout 7f5eb9bb8c7fb3bd411674b856872d7ab4a7b1a3: IB/mlx4: Return receive queue sizes for userspace QPs from query QP Cheers, Bernd -- Bernd Schubert Q-Leap Networks GmbH From bs at q-leap.de Fri Nov 9 10:05:14 2007 From: bs at q-leap.de (Bernd Schubert) Date: Fri, 9 Nov 2007 19:05:14 +0100 Subject: [ofa-general] MT25418 In-Reply-To: <20071108223944.GA18515@lanczos.q-leap.de> References: <200711082141.53113.bs@q-leap.de> <20071108223944.GA18515@lanczos.q-leap.de> Message-ID: <200711091905.14279.bs@q-leap.de> On Thursday 08 November 2007 23:39:44 Bernd Schubert wrote: > At least defining the ib0 interface works now, too. Presently can't further > test it now, since its connected to a flaky mts2400 switch, which needs a > reset. In principal the cards do work, but only port-1. There's a problem with port-2. I get a connection to the switch, can run ibnetdiscover and see full topology, but IPoIB doesn't work. Best, Bernd -- Bernd Schubert Q-Leap Networks GmbH From dermestes at sequoiabeverage.com Fri Nov 9 13:21:20 2007 From: dermestes at sequoiabeverage.com (Earle Brown) Date: Fri, 09 Nov 2007 16:21:20 -0500 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c82304$f5c18180$0100007f@localhost> cheapxpsoft6. com From rdreier at cisco.com Fri Nov 9 12:00:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 09 Nov 2007 12:00:58 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711091900.20292.bs@q-leap.de> (Bernd Schubert's message of "Fri, 9 Nov 2007 19:00:20 +0100") References: <200711082141.53113.bs@q-leap.de> <20071108223944.GA18515@lanczos.q-leap.de> <200711091900.20292.bs@q-leap.de> Message-ID: > > There is a data corruption bug fix in 2.6.23, but it is probably hard > > to trigger. Other than that, nothing is too major. > > Can you tell me which git commit this is? The one with "Fix data corruption" in the description :) 6e694ea3 ("IB/mlx4: Fix data corruption triggered by wrong headroom marking order") From rdreier at cisco.com Fri Nov 9 12:01:52 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 09 Nov 2007 12:01:52 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711091905.14279.bs@q-leap.de> (Bernd Schubert's message of "Fri, 9 Nov 2007 19:05:14 +0100") References: <200711082141.53113.bs@q-leap.de> <20071108223944.GA18515@lanczos.q-leap.de> <200711091905.14279.bs@q-leap.de> Message-ID: > In principal the cards do work, but only port-1. There's a problem with > port-2. I get a connection to the switch, can run ibnetdiscover and see full > topology, but IPoIB doesn't work. Are you trying to use IPoIB on both ports at once, with both ports connected to the same fabric? If so you're probably running into the standard ARP filtering issue... From bs at q-leap.de Fri Nov 9 12:15:51 2007 From: bs at q-leap.de (Bernd Schubert) Date: Fri, 9 Nov 2007 21:15:51 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <200711091905.14279.bs@q-leap.de> Message-ID: <200711092115.51939.bs@q-leap.de> On Friday 09 November 2007 21:01:52 Roland Dreier wrote: > > In principal the cards do work, but only port-1. There's a problem with > > port-2. I get a connection to the switch, can run ibnetdiscover and see > > full topology, but IPoIB doesn't work. > > Are you trying to use IPoIB on both ports at once, with both ports > connected to the same fabric? If so you're probably running into the > standard ARP filtering issue... No, only one port connected. Cheers, Bernd -- Bernd Schubert Q-Leap Networks GmbH From bs at q-leap.de Fri Nov 9 12:16:27 2007 From: bs at q-leap.de (Bernd Schubert) Date: Fri, 9 Nov 2007 21:16:27 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <200711091900.20292.bs@q-leap.de> Message-ID: <200711092116.27835.bs@q-leap.de> On Friday 09 November 2007 21:00:58 Roland Dreier wrote: > > > There is a data corruption bug fix in 2.6.23, but it is probably hard > > > to trigger. Other than that, nothing is too major. > > > > Can you tell me which git commit this is? > > The one with "Fix data corruption" in the description :) > > 6e694ea3 ("IB/mlx4: Fix data corruption triggered by wrong headroom marking > order") Hmm, sometimes I'm simply blind ;) Thanks for the hint! Cheers, Bernd -- Bernd Schubert Q-Leap Networks GmbH From rdreier at cisco.com Fri Nov 9 12:32:47 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 09 Nov 2007 12:32:47 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711092115.51939.bs@q-leap.de> (Bernd Schubert's message of "Fri, 9 Nov 2007 21:15:51 +0100") References: <200711082141.53113.bs@q-leap.de> <200711091905.14279.bs@q-leap.de> <200711092115.51939.bs@q-leap.de> Message-ID: > No, only one port connected. So it works fine if you connect port 1, and if you connect port 2 it doesn't work? I can't think of any theory to explain that behavior other than perhaps a slightly flaky connector or something like that. From bs at q-leap.de Fri Nov 9 12:41:50 2007 From: bs at q-leap.de (Bernd Schubert) Date: Fri, 9 Nov 2007 21:41:50 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <200711092115.51939.bs@q-leap.de> Message-ID: <200711092141.51243.bs@q-leap.de> On Friday 09 November 2007 21:32:47 you wrote: > > No, only one port connected. > > So it works fine if you connect port 1, and if you connect port 2 it > doesn't work? I can't think of any theory to explain that behavior > other than perhaps a slightly flaky connector or something like that. Yes exactly and reproducable on all 6 nodes with connectX presently here in our test lab. Just by accident I first always had connected port 2. Shortly before I already thought it doesn't work at all, I tried the other port... Cheers, Bernd -- Bernd Schubert Q-Leap Networks GmbH From rdreier at cisco.com Fri Nov 9 13:48:19 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 09 Nov 2007 13:48:19 -0800 Subject: [ofa-general] Re: [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. In-Reply-To: <20071109152158.21493.24110.stgit@dell3.ogc.int> (Steve Wise's message of "Fri, 09 Nov 2007 09:21:58 -0600") References: <20071109152158.21493.24110.stgit@dell3.ogc.int> Message-ID: thanks, applied. From jenos at ncsa.uiuc.edu Fri Nov 9 19:10:29 2007 From: jenos at ncsa.uiuc.edu (Jeremy Enos) Date: Fri, 09 Nov 2007 21:10:29 -0600 Subject: [ofa-general] rhel5 updated, ofed 1.2.5.2 breaks Message-ID: <47352125.2040206@ncsa.uiuc.edu> 232 updates to rhel5 in the last 2 weeks, so I decided to update. Went from kernel-2.6.18-53.el5 to kernel-2.6.18-8.1.15.el5. Now ofed build barks about various kernel headers during the build. See build log at: http://yams.ncsa.uiuc.edu/~jenos/OFED.build.26335.log Any ideas? thx- Jeremy Enos From pentosides at selnrealt.com Fri Nov 9 19:25:15 2007 From: pentosides at selnrealt.com (Marguerite Phillips) Date: Sat, 10 Nov 2007 11:25:15 +0800 Subject: [ofa-general] Adobe Master Suite for $299, Retail Price $2499, Save $2200 Message-ID: <000001c82348$7e154600$0100007f@localhost> cheapxpsoft5. com From troy at scl.ameslab.gov Fri Nov 9 21:30:27 2007 From: troy at scl.ameslab.gov (Troy Benjegerdes) Date: Fri, 09 Nov 2007 23:30:27 -0600 Subject: [ofa-general] saquery hangs/timeouts Message-ID: <473541F3.60401@scl.ameslab.gov> What reasons could cause the following: [root at sm1 infiniband]# saquery -d Nov 09 22:30:00 692541 [C94EDFB0] -> osm_vendor_bind: Binding to port 0x2c9021b701236 Nov 09 22:30:04 779705 [41001940] -> umad_receiver: ERR 5409: send completed with error (method=0x12 attr=0x11 trans_id=0x6400000001) -- dropping Nov 09 22:30:04 779716 [41001940] -> umad_receiver: ERR 5410: class 0x3 LID 0x12 Query SA failed: IB_TIMEOUT This occurs on a machine which has had both a mthca and mlx4 card, and an almost identical machine with another mlx4 card works just fine. The only real difference I can tell is that the machine that works previously had OFED-1.3 alpha 1 installed, and the one that does not work has not had OFED-1.3 installed. I also get the hang on my debian systems that I built the kernel, libibverbs, libmthca, etc myself. The debian system gets the following behavior: bash-3.1# /opt/sc07/sbin/saquery -d Nov 09 22:36:14 336986 [F7EDD6C0] -> osm_vendor_bind: Binding to port 0x2c90300001dd1 NodeRecord dump: lid.....................0xA1 reserved................0x0 base_version............0x From troy at scl.ameslab.gov Fri Nov 9 21:38:54 2007 From: troy at scl.ameslab.gov (Troy Benjegerdes) Date: Fri, 09 Nov 2007 23:38:54 -0600 Subject: [ofa-general] Re: saquery hangs/timeouts In-Reply-To: <473541F3.60401@scl.ameslab.gov> References: <473541F3.60401@scl.ameslab.gov> Message-ID: <473543EE.4010407@scl.ameslab.gov> saquery apparently very much dislikes having two ports active at once. If I pull the cable off the second port it works. Troy Benjegerdes wrote: > What reasons could cause the following: > > [root at sm1 infiniband]# saquery -d > Nov 09 22:30:00 692541 [C94EDFB0] -> osm_vendor_bind: Binding to port > 0x2c9021b701236 > Nov 09 22:30:04 779705 [41001940] -> umad_receiver: ERR 5409: send > completed with error (method=0x12 attr=0x11 trans_id=0x6400000001) -- > dropping > Nov 09 22:30:04 779716 [41001940] -> umad_receiver: ERR 5410: class > 0x3 LID 0x12 > Query SA failed: IB_TIMEOUT > > This occurs on a machine which has had both a mthca and mlx4 card, and > an almost identical machine with another mlx4 card works just fine. > > The only real difference I can tell is that the machine that works > previously had OFED-1.3 alpha 1 installed, and the one that does not > work has not had OFED-1.3 installed. I also get the hang on my debian > systems that I built the kernel, libibverbs, libmthca, etc myself. > > The debian system gets the following behavior: > > bash-3.1# /opt/sc07/sbin/saquery -d > Nov 09 22:36:14 336986 [F7EDD6C0] -> osm_vendor_bind: Binding to port > 0x2c90300001dd1 > NodeRecord dump: > lid.....................0xA1 > reserved................0x0 > base_version............0x > From vlad at lists.openfabrics.org Sat Nov 10 02:49:52 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 10 Nov 2007 02:49:52 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071110-0200 daily build status Message-ID: <20071110104952.9D0F3E6087F@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel_2_6_24_rc1 Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.19 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.20 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.22 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: (Each undeclared identifier is reported only once /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: for each function it appears in.) /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: too many arguments to function 'dev_get_by_name' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1481: warning: assignment from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1495: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c: In function 'iscsi_transport_exit': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1506: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi_f.o] Error 1 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071110-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From hal.rosenstock at gmail.com Sat Nov 10 03:41:48 2007 From: hal.rosenstock at gmail.com (Hal Rosenstock) Date: Sat, 10 Nov 2007 07:41:48 -0400 Subject: [ofa-general] Re: saquery hangs/timeouts In-Reply-To: <473543EE.4010407@scl.ameslab.gov> References: <473541F3.60401@scl.ameslab.gov> <473543EE.4010407@scl.ameslab.gov> Message-ID: On 11/10/07, Troy Benjegerdes wrote: > saquery apparently very much dislikes having two ports active at once. > If I pull the cable off the second port it works. It's indicating a timeout querying the SA (for node records which is the default query). What SM/SA ? Can you provide an ibnetdiscover output of the topology ? Does it always work without the -d (with both ports plugged in) ? Unfortunately, I don't have a machine on which to look at this right now but perhaps it can be looked at in simulation. How critical is this ? -- Hal > Troy Benjegerdes wrote: > > What reasons could cause the following: > > > > [root at sm1 infiniband]# saquery -d > > Nov 09 22:30:00 692541 [C94EDFB0] -> osm_vendor_bind: Binding to port > > 0x2c9021b701236 > > Nov 09 22:30:04 779705 [41001940] -> umad_receiver: ERR 5409: send > > completed with error (method=0x12 attr=0x11 trans_id=0x6400000001) -- > > dropping > > Nov 09 22:30:04 779716 [41001940] -> umad_receiver: ERR 5410: class > > 0x3 LID 0x12 > > Query SA failed: IB_TIMEOUT > > > > This occurs on a machine which has had both a mthca and mlx4 card, and > > an almost identical machine with another mlx4 card works just fine. > > > > The only real difference I can tell is that the machine that works > > previously had OFED-1.3 alpha 1 installed, and the one that does not > > work has not had OFED-1.3 installed. I also get the hang on my debian > > systems that I built the kernel, libibverbs, libmthca, etc myself. > > > > The debian system gets the following behavior: > > > > bash-3.1# /opt/sc07/sbin/saquery -d > > Nov 09 22:36:14 336986 [F7EDD6C0] -> osm_vendor_bind: Binding to port > > 0x2c90300001dd1 > > NodeRecord dump: > > lid.....................0xA1 > > reserved................0x0 > > base_version............0x > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Sat Nov 10 06:50:25 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 10 Nov 2007 16:50:25 +0200 Subject: [ofa-general] [PATCH] opensm: simplify PortInfo.LinkSpeed setup flow Message-ID: <20071110145025.GE6493@sashak.voltaire.com> Simplify setup of PortInfo.LinkSpeed setup flow based on opt.force_link_speed OpenSM option. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_link_mgr.c | 25 +++++++------------------ 1 files changed, 7 insertions(+), 18 deletions(-) diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index d5be7b5..19d03d9 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -312,24 +312,13 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, sizeof(p_pi->link_width_enabled))) send_set = TRUE; - if (p_mgr->p_subn->opt.force_link_speed) { - if (p_mgr->p_subn->opt.force_link_speed == 15) { /* LinkSpeedSupported */ - if (ib_port_info_get_link_speed_enabled - (p_old_pi) != - ib_port_info_get_link_speed_sup(p_pi)) - ib_port_info_set_link_speed_enabled - (p_pi, - IB_PORT_LINK_SPEED_ENABLED_MASK); - else - ib_port_info_set_link_speed_enabled - (p_pi, - ib_port_info_get_link_speed_enabled - (p_old_pi)); - } else - ib_port_info_set_link_speed_enabled(p_pi, - p_mgr-> - p_subn->opt. - force_link_speed); + if (p_mgr->p_subn->opt.force_link_speed && + (p_mgr->p_subn->opt.force_link_speed != 15 || + ib_port_info_get_link_speed_enabled(p_pi) != + ib_port_info_get_link_speed_sup(p_pi))) { + ib_port_info_set_link_speed_enabled(p_pi, + p_mgr->p_subn->opt. + force_link_speed); if (memcmp(&p_pi->link_speed, &p_old_pi->link_speed, sizeof(p_pi->link_speed))) send_set = TRUE; -- 1.5.3.4.206.g58ba4 From sashak at voltaire.com Sat Nov 10 06:51:45 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 10 Nov 2007 16:51:45 +0200 Subject: [ofa-general] [PATCH] opensm: PortInfo set decision flow simplification Message-ID: <20071110145145.GF6493@sashak.voltaire.com> This simplifies (but doesn't change) flow for PortInfo set decision in lid and link mgrs - mostly to make the code more readable. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_lid_mgr.c | 9 +++++---- opensm/opensm/osm_link_mgr.c | 15 ++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index 9527185..9da6fcf 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -1184,9 +1184,11 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, 3. got_set_resp on the physical port is FALSE. This means we haven't seen this port before and we need to send Set of PortInfo to it. */ - if (send_set || p_mgr->p_subn->first_time_master_sweep == TRUE || - p_physp->got_set_resp == FALSE) { + if (p_mgr->p_subn->first_time_master_sweep == TRUE || + p_physp->got_set_resp == FALSE) + send_set = TRUE; + if (send_set) { p_mgr->send_set_reqs = TRUE; status = osm_req_set(p_mgr->p_req, osm_physp_get_dr_path_ptr(p_physp), @@ -1199,8 +1201,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, Exit: OSM_LOG_EXIT(p_mgr->p_log); - return (send_set || p_mgr->p_subn->first_time_master_sweep == TRUE || - p_physp->got_set_resp == FALSE); + return send_set; } /********************************************************************** diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 19d03d9..b151c76 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -389,15 +389,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, b. got_set_resp on the physical port is FALSE. This means we haven't seen this port before - need to send PortInfoSet to it. */ - if (send_set || - (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH - && p_physp->got_set_resp == FALSE) - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH - && port_num == 0 && p_physp->got_set_resp == FALSE) - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH - && port_num != 0 - && (p_mgr->p_subn->first_time_master_sweep == TRUE - || p_physp->got_set_resp == FALSE))) { + if (p_physp->got_set_resp == FALSE + || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num + && p_mgr->p_subn->first_time_master_sweep == TRUE)) + send_set = TRUE; + + if (send_set) { p_mgr->send_set_reqs = TRUE; status = osm_req_set(p_mgr->p_req, osm_physp_get_dr_path_ptr(p_physp), -- 1.5.3.4.206.g58ba4 From loo_juyt at web.de Sat Nov 10 06:41:12 2007 From: loo_juyt at web.de (Ju Lo) Date: Sat, 10 Nov 2007 15:41:12 +0100 Subject: [ofa-general] Attention: Winner, Message-ID: <1174011281@web.de> Attention: Winner, Congratulations to you as we bring to your notice the result of Euromillion Loteria Español 2007 promotions.We are happy to inform you that your email address have emerged a winner of Five hundred and fifty thousand euro(550, 000.00)Euros. NOTE: to file for your claim, please contact the claim department below, REDBOATH FINANCE AND SECURITIES,S.L Contact person: Mr.Javier Lopez Tell: +34 696 756 270 Email: milloooffice at aim.com [mailto:milloooffice at aim.com] Find below your promotion date, Reference and Batch numbers. Remember to quote these numbers in your correspondence with your claims agent(Mr.Javier Lopez ) PROMOTION DATE:08th of Nov. 2007. REFERENCE NUMBER: LSLUK/2031/8161/07 BATCH NUMBER: 15/051/IPD Note that you will be required to pay for the legalization of your winning prize. Congratulations once again from all our staff and thank you for being part of our promotions program. Sincerely Yours, MARIA LEO. (promotion secretary) Jetzt neu! Schützen Sie Ihren PC mit McAfee und WEB.DE. 3 Monate kostenlos testen. *http://www.pc-sicherheit.web.de/startseite/?mc=022220* [http://www.pc-sicherheit.web.de/startseite/?mc=022220] -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Fri Nov 9 21:16:58 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 10 Nov 2007 07:16:58 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-10:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-09 OpenSM git rev = Fri_Nov_9_14:01:33_2007 [8dcda9c86675275f373859b2ffdf2cc391f92283] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From sashak at voltaire.com Sun Nov 11 00:20:02 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 11 Nov 2007 10:20:02 +0200 Subject: [ofa-general] Re: [PATCH 3/7] Move nodenamemap out of infiniband-diags into libosmcomp In-Reply-To: <20071109082151.6efb0256.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> <20071109114642.GV6153@sashak.voltaire.com> <20071109082151.6efb0256.weiny2@llnl.gov> Message-ID: <20071111082002.GE8073@sashak.voltaire.com> On 08:21 Fri 09 Nov , Ira Weiny wrote: > On Fri, 9 Nov 2007 13:46:42 +0200 > Sasha Khapyorsky wrote: > > > On 19:16 Wed 07 Nov , Ira Weiny wrote: > > > > > > > > > Perhaps /ib-node-name-map? > > > > > > > > Or under /etc/ofa/ ? > > > > > > I think is appropriate. OFED can specify /etc/ofa if they wish. > > > > But then they cannot do it in configure time. > > > > I think I will apply the patch as is now. And later I will add something > > like --opensm-config-dir for all config files, not just for node name > > map. > > > > I don't understand what you mean? They can use: > > --sysconfdir=DIR read-only single-machine data [PREFIX/etc] > > This works for me. Great! > Perhaps I should change the other config files to use > sysconfdir? Yes, that what I thought. But clearly it is general issue and not related to node name map. > I have not done so because of legacy reasons, but I think it would > be more correct, no? I think so. Or maybe OpenSM own parameter (which will be defaulted to $sysconfdir/opensm or $syconfdir/ofa) - didn't think about it a lot yet. Sasha From vlad at lists.openfabrics.org Sun Nov 11 02:49:38 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 11 Nov 2007 02:49:38 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071111-0200 daily build status Message-ID: <20071111104939.23758E60A01@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Failed: Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: (Each undeclared identifier is reported only once /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: for each function it appears in.) /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: too many arguments to function 'dev_get_by_name' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1481: warning: assignment from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1495: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c: In function 'iscsi_transport_exit': /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi.c:1506: warning: passing argument 1 of 'transport_class_unregister' from incompatible pointer type make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi/scsi_transport_iscsi_f.o] Error 1 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/scsi] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From kliteyn at dev.mellanox.co.il Sun Nov 11 05:55:16 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 11 Nov 2007 15:55:16 +0200 Subject: [ofa-general] OpenSM: Bug in handling SubnAdmGet of PortInfoRecord Message-ID: <473709C4.6000001@dev.mellanox.co.il> Hi Sasha, I found a bug in the logic of handling SubnAdmGet method when attribute is PortInfoRecord: When the requests comes without lid/base lid in the component mask here's what OpenSM does: foreach *port* in fabric get node of this port for each port of this node check if it fits the request if it does - add it to list More specifically, if there's a request with only a port_number in the component mask, here's what OpenSM will do: foreach *port* in fabric get node of this port get port with the required port_num on this node check if it fits the request if it does - add it to list So if there is a node with two ports in the same subnet, and there's a request for port_num 1, OpenSM will add the same port to the list twice. Any particular reason why we're scanning the ports this way? Perhaps OpenSM should iterate through nodes instead of ports if there's no request for a specific lid/guid? Alternatively, we can check the list for duplicated guids, but it will consume some runtime. Also, do you know any other places in the where the same bug may happen? -- Yevgeny From swise at opengridcomputing.com Sun Nov 11 06:01:44 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Sun, 11 Nov 2007 08:01:44 -0600 Subject: [ofa-general] [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. In-Reply-To: <20071109152158.21493.24110.stgit@dell3.ogc.int> References: <20071109152158.21493.24110.stgit@dell3.ogc.int> Message-ID: <47370B48.5000104@opengridcomputing.com> Hey Vlad, Can you pull this in for 1.3 beta? Roland has merged it for 2.6.24, so it can be removed if we rebase and get it that way, but rping and most other rdma/iwarp apps are dead over chelsio without this fix. Please pull from: git://git.openfabrics.org/~swise/ofed-1.3 stevo Thanks, Steve. Steve Wise wrote: > RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. > > Attribute max_qp_init_rd_atom is not getting set. Version 1.0.4 of > librdmacm now validates the user's requested initiator and responder > resources vs the max supported by the device. Since iw_cxgb3 wasn't > setting this attribute (and it defaulted to 0), all rdma_connect()s fail > if there are initiator resources requested by the app. > > Signed-off-by: Steve Wise > --- > > drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c > index f0c7775..b5436ca 100644 > --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c > +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c > @@ -1000,6 +1000,7 @@ static int iwch_query_device(struct ib_device *ibdev, > props->max_sge = dev->attr.max_sge_per_wr; > props->max_sge_rd = 1; > props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; > + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; > props->max_cq = dev->attr.max_cqs; > props->max_cqe = dev->attr.max_cqes_per_cq; > props->max_mr = dev->attr.max_mem_regs; > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From vlad at dev.mellanox.co.il Sun Nov 11 06:37:30 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 11 Nov 2007 16:37:30 +0200 Subject: [ewg] Re: [ofa-general] [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. In-Reply-To: <47370B48.5000104@opengridcomputing.com> References: <20071109152158.21493.24110.stgit@dell3.ogc.int> <47370B48.5000104@opengridcomputing.com> Message-ID: <473713AA.8090507@dev.mellanox.co.il> Steve Wise wrote: > Hey Vlad, > > Can you pull this in for 1.3 beta? Roland has merged it for 2.6.24, so > it can be removed if we rebase and get it that way, but rping and most > other rdma/iwarp apps are dead over chelsio without this fix. > > Please pull from: > > git://git.openfabrics.org/~swise/ofed-1.3 stevo > > Thanks, > > Steve. Hi Steve, Merged into ofed_1_3/linux-2.6.git ofed_kernel_2_6_24_rc1. Please check (ofed_kernel_2_6_24_rc1 branch): Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: (Each undeclared identifier is reported only once /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: for each function it appears in.) /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: error: too many arguments to function 'dev_get_by_name' make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 Regards, Vladimir From jim at mellanox.com Sun Nov 11 07:56:06 2007 From: jim at mellanox.com (Jim Mott) Date: Sun, 11 Nov 2007 07:56:06 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - Fix bug where zcopy bcopy returns before data copied Message-ID: Mellanox regression testing for data correctness started failing after the recent addition of bzcopy. This was because sdp_sendmsg returned before all in-flight RC transfers completed. This allowed user space to modify buffers that had not been sent. A big oops. This fixes that bug. Small frame bandwidth is even worse now, but small frame latency is lower which is good. The default transfer size that triggers bzcopy has been increased to the bandwidth crossover point found in MLX4-MLX4 tests. More work will be required to find the best value for the release. Signed-off-by: Jim Mott --- Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp.h =================================================================== --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp.h 2007-11-11 07:33:36.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp.h 2007-11-11 07:36:47.000000000 -0800 @@ -148,6 +148,8 @@ struct sdp_sock { unsigned rx_tail; unsigned mseq_ack; unsigned bufs; + unsigned max_bufs; /* Initial buffers offered by other side */ + unsigned min_bufs; /* Low water mark to wake senders */ int remote_credits; @@ -168,13 +170,28 @@ struct sdp_sock { int recv_frags; /* max skb frags in recv packets */ int send_frags; /* max skb frags in send packets */ - /* ZCOPY data */ - int zcopy_thresh; + /* BZCOPY data */ + int zcopy_thresh; + void *zcopy_context; struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1]; struct ib_wc ibwc[SDP_NUM_WC]; }; +/* Context used for synchronous zero copy bcopy (BZCOY) */ +struct bzcopy_state { + unsigned char __user *u_base; + int u_len; + int left; + int page_cnt; + int cur_page; + int cur_offset; + int busy; + struct sdp_sock *ssk; + struct page **pages; +}; + + extern struct proto sdp_proto; extern struct workqueue_struct *sdp_workqueue; @@ -246,5 +263,6 @@ void sdp_remove_large_sock(struct sdp_so int sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size); void sdp_post_keepalive(struct sdp_sock *ssk); void sdp_start_keepalive_timer(struct sock *sk); +void sdp_bzcopy_write_space(struct sdp_sock *ssk); #endif Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_bcopy.c =================================================================== --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2007-11-11 07:33:36.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2007-11-11 07:36:47.000000000 -0800 @@ -240,6 +240,19 @@ struct sk_buff *sdp_send_completion(stru ssk->snd_una += TCP_SKB_CB(skb)->end_seq; ++ssk->tx_tail; + + /* TODO: AIO and real zcopy cdoe; add their context support here */ + if (ssk->zcopy_context && skb->data_len) { + struct bzcopy_state *bz; + struct sdp_bsdh *h; + + h = (struct sdp_bsdh *)skb->data; + if (h->mid == SDP_MID_DATA) { + bz = (struct bzcopy_state *)ssk->zcopy_context; + bz->busy--; + } + } + return skb; } @@ -668,8 +681,6 @@ static void sdp_handle_wc(struct sdp_soc wake_up(&ssk->wq); } } - - sk_stream_write_space(&ssk->isk.sk); } else { sdp_cnt(sdp_keepalive_probes_sent); @@ -688,11 +699,6 @@ static void sdp_handle_wc(struct sdp_soc return; } - if (likely(!wc->status)) { - sdp_post_recvs(ssk); - sdp_post_sends(ssk, 0); - } - if (ssk->time_wait && !ssk->isk.sk.sk_send_head && ssk->tx_head == ssk->tx_tail) { sdp_dbg(&ssk->isk.sk, "%s: destroy in time wait state\n", @@ -719,6 +725,21 @@ int sdp_poll_cq(struct sdp_sock *ssk, st ret = 0; } } while (n == SDP_NUM_WC); + + if (!ret) { + struct sock *sk = &ssk->isk.sk; + + sdp_post_recvs(ssk); + sdp_post_sends(ssk, 0); + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { + if (ssk->zcopy_context) + sdp_bzcopy_write_space(ssk); + else + sk_stream_write_space(&ssk->isk.sk); + } + } + return ret; } Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_cma.c =================================================================== --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_cma.c 2007-11-11 07:33:36.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_cma.c 2007-11-11 07:36:47.000000000 -0800 @@ -234,16 +234,19 @@ int sdp_connect_handler(struct sock *sk, return rc; } - sdp_sk(child)->bufs = ntohs(h->bsdh.bufs); + sdp_sk(child)->max_bufs = sdp_sk(child)->bufs = ntohs(h->bsdh.bufs); + sdp_sk(child)->min_bufs = sdp_sk(child)->bufs / 4; sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(child)->send_frags = PAGE_ALIGN(sdp_sk(child)->xmit_size_goal) / PAGE_SIZE; sdp_resize_buffers(sdp_sk(child), ntohl(h->desremrcvsz)); - sdp_dbg(child, "%s bufs %d xmit_size_goal %d\n", __func__, + sdp_dbg(child, "%s bufs %d xmit_size_goal %d send trigger %d\n", + __func__, sdp_sk(child)->bufs, - sdp_sk(child)->xmit_size_goal); + sdp_sk(child)->xmit_size_goal, + sdp_sk(child)->min_bufs); id->context = child; sdp_sk(child)->id = id; @@ -276,15 +279,18 @@ static int sdp_response_handler(struct s return 0; h = event->param.conn.private_data; - sdp_sk(sk)->bufs = ntohs(h->bsdh.bufs); + sdp_sk(sk)->max_bufs = sdp_sk(sk)->bufs = ntohs(h->bsdh.bufs); + sdp_sk(sk)->min_bufs = sdp_sk(sk)->bufs / 4; sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); sdp_sk(sk)->send_frags = PAGE_ALIGN(sdp_sk(sk)->xmit_size_goal) / PAGE_SIZE; - sdp_dbg(sk, "%s bufs %d xmit_size_goal %d\n", __func__, + sdp_dbg(sk, "%s bufs %d xmit_size_goal %d send trigger %d\n", + __func__, sdp_sk(sk)->bufs, - sdp_sk(sk)->xmit_size_goal); + sdp_sk(sk)->xmit_size_goal, + sdp_sk(sk)->min_bufs); ib_req_notify_cq(sdp_sk(sk)->cq, IB_CQ_NEXT_COMP); Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-11 07:33:36.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-11 07:36:47.000000000 -0800 @@ -74,16 +74,6 @@ unsigned int csum_partial_copy_from_user #include "sdp.h" #include -struct bzcopy_state { - unsigned char __user *u_base; - int u_len; - int left; - int page_cnt; - int cur_page; - int cur_offset; - struct page **pages; -}; - MODULE_AUTHOR("Michael S. Tsirkin"); MODULE_DESCRIPTION("InfiniBand SDP module"); MODULE_LICENSE("Dual BSD/GPL"); @@ -141,7 +131,7 @@ static unsigned int sdp_keepalive_time = module_param_named(sdp_keepalive_time, sdp_keepalive_time, uint, 0644); MODULE_PARM_DESC(sdp_keepalive_time, "Default idle time in seconds before keepalive probe sent."); -static int sdp_zcopy_thresh = 2048; +static int sdp_zcopy_thresh = 8192; module_param_named(sdp_zcopy_thresh, sdp_zcopy_thresh, int, 0644); MODULE_PARM_DESC(sdp_zcopy_thresh, "Zero copy send threshold; 0=0ff."); @@ -1213,9 +1203,12 @@ void sdp_push_one(struct sock *sk, unsig { } -static struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state *bz) +static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state *bz) { int i; + struct sdp_sock *ssk = (struct sdp_sock *)bz->ssk; + + ssk->zcopy_context = NULL; if (bz->pages) { for (i = bz->cur_page; i < bz->page_cnt; i++) @@ -1266,6 +1259,8 @@ static struct bzcopy_state *sdp_bz_setup bz->u_len = len; bz->left = len; bz->cur_offset = addr & ~PAGE_MASK; + bz->busy = 0; + bz->ssk = ssk; bz->page_cnt = PAGE_ALIGN(len + bz->cur_offset) >> PAGE_SHIFT; bz->pages = kcalloc(bz->page_cnt, sizeof(struct page *), GFP_KERNEL); @@ -1287,6 +1282,7 @@ static struct bzcopy_state *sdp_bz_setup } up_write(¤t->mm->mmap_sem); + ssk->zcopy_context = bz; return bz; @@ -1398,6 +1394,7 @@ static inline int sdp_bzcopy_get(struct int this_page, left; struct sdp_sock *ssk = sdp_sk(sk); + /* Push the first chunk to page align all following - TODO: review */ if (skb_shinfo(skb)->nr_frags == ssk->send_frags) { sdp_mark_push(ssk, skb); return SDP_NEW_SEG; @@ -1449,9 +1446,110 @@ static inline int sdp_bzcopy_get(struct } bz->left -= copy; + bz->busy++; return copy; } +static inline int slots_free(struct sdp_sock *ssk) +{ + int min_free; + + min_free = SDP_TX_SIZE - (ssk->tx_head - ssk->tx_tail); + if (ssk->bufs < min_free) + min_free = ssk->bufs; + min_free -= (min_free < SDP_MIN_BUFS) ? min_free : SDP_MIN_BUFS; + + return min_free; +}; + +/* like sk_stream_memory_free - except measures remote credits */ +static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk) +{ + struct bzcopy_state *bz = (struct bzcopy_state *)ssk->zcopy_context; + + BUG_ON(!bz); + return slots_free(ssk) > bz->busy; +} + +/* like sk_stream_wait_memory - except waits on remote credits */ +static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p) +{ + struct sock *sk = &ssk->isk.sk; + struct bzcopy_state *bz = (struct bzcopy_state *)ssk->zcopy_context; + int err = 0; + long vm_wait = 0; + long current_timeo = *timeo_p; + DEFINE_WAIT(wait); + + BUG_ON(!bz); + + if (sdp_bzcopy_slots_avail(ssk)) + current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; + + while (1) { + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + + if (unlikely(sk->sk_err | (sk->sk_shutdown & SEND_SHUTDOWN))) { + err = -EPIPE; + break; + } + + if (unlikely(!*timeo_p)) { + err = -EAGAIN; + break; + } + + if (unlikely(signal_pending(current))) { + err = sock_intr_errno(*timeo_p); + break; + } + + clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + if (sdp_bzcopy_slots_avail(ssk)) + break; + + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + sk->sk_write_pending++; + sk_wait_event(sk, ¤t_timeo, + sdp_bzcopy_slots_avail(ssk) && vm_wait); + sk->sk_write_pending--; + + if (vm_wait) { + vm_wait -= current_timeo; + current_timeo = *timeo_p; + if (current_timeo != MAX_SCHEDULE_TIMEOUT && + (current_timeo -= vm_wait) < 0) + current_timeo = 0; + vm_wait = 0; + } + *timeo_p = current_timeo; + } + + finish_wait(sk->sk_sleep, &wait); + return err; +} + +/* like sk_stream_write_space - execpt measures remote credits */ +void sdp_bzcopy_write_space(struct sdp_sock *ssk) +{ + struct sock *sk = &ssk->isk.sk; + struct socket *sock = sk->sk_socket; + + if (ssk->bufs >= ssk->min_bufs && + ssk->tx_head == ssk->tx_tail && + sock != NULL) { + clear_bit(SOCK_NOSPACE, &sock->flags); + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible(sk->sk_sleep); + if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) + sock_wake_async(sock, 2, POLL_OUT); + } +} + /* Like tcp_sendmsg */ /* TODO: check locking */ @@ -1510,11 +1608,20 @@ int sdp_sendmsg(struct kiocb *iocb, stru (copy = size_goal - skb->len) <= 0) { new_segment: - /* Allocate new segment. If the interface is SG, - * allocate skb fitting to single page. + /* + * Allocate a new segment + * For bcopy, we stop sending once we have + * SO_SENDBUF bytes in flight. For bzcopy + * we stop sending once we run out of remote + * receive credits. */ - if (!sk_stream_memory_free(sk)) - goto wait_for_sndbuf; + if (bz) { + if (!sdp_bzcopy_slots_avail(ssk)) + goto wait_for_sndbuf; + } else { + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; + } skb = sk_stream_alloc_pskb(sk, select_size(sk, ssk), 0, sk->sk_allocation); @@ -1586,7 +1693,9 @@ wait_for_memory: if (copied) sdp_push(sk, ssk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); - if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) + err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo) : + sk_stream_wait_memory(sk, &timeo); + if (err) goto do_error; mss_now = sdp_current_mss(sk, !(flags&MSG_OOB)); @@ -1595,12 +1704,30 @@ wait_for_memory: } out: - if (bz) - bz = sdp_bz_cleanup(bz); - if (copied) + if (copied) { sdp_push(sk, ssk, flags, mss_now, ssk->nonagle); - if (size > send_poll_thresh) - poll_send_cq(sk); + if (bz) { + int max_retry; + + /* Wait for in-flight sends; should be quick */ + for (max_retry = 0; max_retry < 10000; max_retry++) { + if (!bz->busy) + break; + + poll_send_cq(sk); + } + + if (bz->busy) + sdp_warn(sk, + "Could not reap %d in-flight sends\n", + bz->busy); + + bz = sdp_bz_cleanup(bz); + } else + if (size > send_poll_thresh) + poll_send_cq(sk); + } + release_sock(sk); return copied; From sashak at voltaire.com Sun Nov 11 09:02:02 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 11 Nov 2007 19:02:02 +0200 Subject: [ofa-general] Re: OpenSM: Bug in handling SubnAdmGet of PortInfoRecord In-Reply-To: <473709C4.6000001@dev.mellanox.co.il> References: <473709C4.6000001@dev.mellanox.co.il> Message-ID: <20071111170202.GA9776@sashak.voltaire.com> Hi Yevgeny, On 15:55 Sun 11 Nov , Yevgeny Kliteynik wrote: > > I found a bug in the logic of handling SubnAdmGet method when > attribute is PortInfoRecord: > > When the requests comes without lid/base lid in the component > mask here's what OpenSM does: > > foreach *port* in fabric > get node of this port > for each port of this node > check if it fits the request > if it does - add it to list > > More specifically, if there's a request with only a port_number > in the component mask, here's what OpenSM will do: > > foreach *port* in fabric > get node of this port > get port with the required port_num on this node > check if it fits the request > if it does - add it to list > > So if there is a node with two ports in the same subnet, and > there's a request for port_num 1, OpenSM will add the same > port to the list twice. Looks like a "typical" bug (similar broken logic was used with SubnetUp report printing). > Any particular reason why we're scanning the ports this way? Don't know, I didn't work with OpenSM then. > Perhaps OpenSM should iterate through nodes instead of ports > if there's no request for a specific lid/guid? Yes, I agree. Will you care about patch? > Alternatively, we can check the list for duplicated guids, but > it will consume some runtime. > > Also, do you know any other places in the where the same bug may happen? This seems to be a "typical" OpenSM bug, I guess there could be more similar places - we need to search. Sasha From supermechanical at fergusonroad.com Sun Nov 11 09:01:53 2007 From: supermechanical at fergusonroad.com (Cyrus Campbell) Date: Sun, 11 Nov 2007 19:01:53 +0200 Subject: [ofa-general] Autodesk 3D Studio Max 9 for XP for 149, Retails @ 6720 (You save 6590) Message-ID: <000001c82484$24199b00$0100007f@localhost> adobe dreamweaver cs3 - 59 sonic scenarist 3.0 - 49 microsoft visio 2007 professional - 39 sony vegas 6 - 69 sonic scenarist 3.0 - 49 adobe photoshop cs2 v 9.0 - 69 systran 6 premium translator - 159 luxology modo 301 for mac - 129 type cheapxpsoft7 .com in Internet Explorer From sashak at voltaire.com Sun Nov 11 09:33:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 11 Nov 2007 19:33:35 +0200 Subject: [ofa-general] Re: Lost out-of-svc trap notifications during SM handover In-Reply-To: <7C1D552561AF0544ACC7CF6F10E4966EAFDCCF@chronus.3leafnetworks.corp> References: <20071108235054.014FFE2807F@openfabrics.org> <7C1D552561AF0544ACC7CF6F10E4966EAFDCCF@chronus.3leafnetworks.corp> Message-ID: <20071111173335.GB9776@sashak.voltaire.com> Hi Lan, On 19:01 Fri 09 Nov , Lan Tran wrote: > > I'm seeing a problem with missing out-of-svc trap notifications when a Master SM port is disabled. I'm taking a look into it now, but if you have any pointers or ideas of what might be going on or how to resolve it, that would be much appreciated! > > I am subscribing to be informed of out-of-service trap events (i.e. trap 65), registering my own callback. When I disable an IB port of a remote node that is running the Standby SM, then, as expected, my trap callback function is called. But when I disable the IB port of the remote node that is the Master SM, my trap 65 callback is never called. From looking at the opensm logs it seems what is happening is: > 1) I disable port running Master SM > 2) SM handover starts > --> during Standby SM's heavy sweep, osm_drop_mgr_process() detects that the old Master SM port is down ... but at this point no subscribers to be informed because they are all subscribed with the old Master SM > ---> Standby SM enters Master SM state, so now new Master SM > 3) Several seconds later, I subscribe with the new Master SM for trap 65 notification (I do this whenever I receive IB_EVENT_CLIENT_REREGISTER event), but this is too late as the report notice for the dropped old Master SM port already occurred earlier. Right, it is how things work now. Stand-by OpenSM doesn't track subnet changes, so it will not send any notices on first sweep when becoming master (OpenSM which is doing master->stand-by transition sends, but in your case its port is disconnected). > It seems I need to somehow make sure that I have subscribed for a trap 65 notification with the to-be new Master SM when it decides to report that the old Master SM port goes down. Not quite sure if this is possible though :) This will not help. OpenSM doesn't send in/out service traps at first sweep. I don't see an easy solution here - we will need replicate SM and SA databases somehow. OTOH even then a trap can be lost due to transmission errors, etc.. Sasha From dledford at redhat.com Sun Nov 11 19:27:05 2007 From: dledford at redhat.com (Doug Ledford) Date: Sun, 11 Nov 2007 22:27:05 -0500 Subject: [ofa-general] Re: [ewg] New features for OFED 1.4 In-Reply-To: <4731E2FA.1060109@mellanox.co.il> References: <47308DF2.70409@mellanox.co.il> <20071107074923.GB3605@cuprite.pathscale.com> <4731E2FA.1060109@mellanox.co.il> Message-ID: <1194838025.9602.3.camel@firewall.xsintricity.com> On Wed, 2007-11-07 at 18:08 +0200, Tziporet Koren wrote: > Johann George wrote: > > Tziporet, > > > > > >> So we should assess how close we are to that goal and how we can put > >> OFED out of business. > >> > > > > Could you cover this topic during your session on "OFED 1.3: Procedure > > and Review"? It seems that this would be the right place to bring it > > up and we can attempt to extend your session to allow for it. > > > > > I think its more appropriate in the OFED 1.4 session > But maybe instead of talking about 1.3 status (which everybody can see > from the weekly meeting reports) I should talk about OFED in the future > > However I need some input from the distros Splitting the RPMs up was a *huge* step in the right direction. I think my last emails on the topic relayed why we aren't able to just directly import spec files over and over again, so once we have released tarballs and a single spec import (well, if ever on the spec import, a lot of times we just write our own that does what we want), then we are good. Beyond that, future thinking, is just that a collection of known interoperable tarballs is best for us. So, as Roland has mentioned many time, what's needed from me is a release, not a distribution. And the release need only consist of: dapl-2.0.3 + dapl-1.2.2 + ibverbs-1.1 + mthca-1.0.4 + blah, blah, blah are all known to work properly together. From that point, I just grab the appropriate tarballs that contain the releases mentioned, and I build them all through the build system and that's our release cycle. > Tziporet > > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg -- Doug Ledford GPG KeyID: CFBFF194 http://people.redhat.com/dledford Infiniband specific RPMs available at http://people.redhat.com/dledford/Infiniband -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 189 bytes Desc: This is a digitally signed message part URL: From dledford at redhat.com Sun Nov 11 19:27:58 2007 From: dledford at redhat.com (Doug Ledford) Date: Sun, 11 Nov 2007 22:27:58 -0500 Subject: [ofa-general] Re: [ewg] [ANNOUNCE] management tarballs release In-Reply-To: <20071106195704.GF6945@sashak.voltaire.com> References: <20071106195704.GF6945@sashak.voltaire.com> Message-ID: <1194838078.9602.5.camel@firewall.xsintricity.com> On Tue, 2007-11-06 at 21:57 +0200, Sasha Khapyorsky wrote: > Hi, > > There is a new release of the management (OpenSM and infiniband > diagnostics) tarballs available in: > > http://www.openfabrics.org/downloads/management/ > > md5sum: > > e270309f2fb0f948b098f63cb1f13bfb infiniband-diags-1.3.3.tar.gz > 25b9491f90c7e851f5bafd556bcac5f6 libibcommon-1.0.6.tar.gz > 0fa433e69cb04559efbc76a7157cc700 libibmad-1.1.3.tar.gz > b4297b00f3999c951f8b98df6f5e6b19 libibumad-1.1.4.tar.gz > 979b05d0534b1ee5f4a2eb12576a76e7 opensm-3.1.6.tar.gz Thank you, this is *exactly* what I need ;-) -- Doug Ledford GPG KeyID: CFBFF194 http://people.redhat.com/dledford Infiniband specific RPMs available at http://people.redhat.com/dledford/Infiniband -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 189 bytes Desc: This is a digitally signed message part URL: From keshetti85-student at yahoo.co.in Mon Nov 12 00:45:12 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Mon, 12 Nov 2007 14:15:12 +0530 Subject: [ofa-general] [openSM] Pkey index and Pkey value Message-ID: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> If I load the below partition configuration file using openSM, Default=0x7fff : ALL, SELF=full ; Partition1 = 0x0001 : 0x1234=full, 0x5678=limi, 0x9012 ; Partition2 = 0x0002 : 0x2468=full, 0x5678=limi, 0x9012 ; I am finding the pkey value 0x0002 at the pkey index 2 in the ports with guids 0x5678 and 0x9012 where as port with guid 0x2468 has the same pkey value i.e.0x0002 at different pkey index i.e. 1. Shouldn't the pkey value 0x0002 be loaded at the pkey index 2 in the port with guid 0x2468? regards, Mahesh From erkke at bradigans.com Mon Nov 12 02:18:47 2007 From: erkke at bradigans.com (Glen Ledbetter) Date: Mon, 12 Nov 2007 13:18:47 +0300 Subject: [ofa-general] Experience masturbation like never before. Message-ID: <01c8252e$8eae4c90$2e1b7a5b@erkke> No other sex toy will give you such a real feel of tight soft and warm pussy as the Personal Puss! as it is designed and made from super stretchable and soft materials to ensure the best possible sensations of a good fuck. It doesn't hurt and it can't cause allergy. Order your Personal Puss! with us and you'll get a sex toy that feels like tight, soft, warm and wet pussy as it is specially made from a super soft silicone which is textured and filled with an innovative lubricant to ensure the feel of a real thing. http://qddckj.com It gets the job done! From vlad at lists.openfabrics.org Mon Nov 12 02:57:48 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 12 Nov 2007 02:57:48 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071112-0200 daily build status Message-ID: <20071112105748.4DCCAE60886@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.18 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: From kliteyn at mellanox.co.il Sun Nov 11 21:16:18 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 12 Nov 2007 07:16:18 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-12:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-11 OpenSM git rev = Fri_Nov_9_14:01:33_2007 [8dcda9c86675275f373859b2ffdf2cc391f92283] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From hal.rosenstock at gmail.com Mon Nov 12 06:24:01 2007 From: hal.rosenstock at gmail.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 09:24:01 -0500 Subject: [ofa-general] [openSM] Pkey index and Pkey value In-Reply-To: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> References: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> Message-ID: On 11/12/07, Keshetti Mahesh wrote: > If I load the below partition configuration file using openSM, > > Default=0x7fff : ALL, SELF=full ; > Partition1 = 0x0001 : 0x1234=full, 0x5678=limi, 0x9012 ; > Partition2 = 0x0002 : 0x2468=full, 0x5678=limi, 0x9012 ; > > I am finding the pkey value 0x0002 at the pkey index 2 in the ports > with guids 0x5678 and 0x9012 where as port with guid 0x2468 has the > same pkey value i.e.0x0002 at different pkey index i.e. 1. > Shouldn't the pkey value 0x0002 be loaded at the pkey index 2 in the > port with guid 0x2468? No; there is no requirement for a pkey value to be in the same index in different ports. > > regards, > Mahesh > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From dotanb at dev.mellanox.co.il Mon Nov 12 06:43:26 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Mon, 12 Nov 2007 16:43:26 +0200 Subject: [ofa-general] IB post send lost. In-Reply-To: <20071108172050.GA12397@vt.edu> References: <20071108002831.GA8339@vt.edu> <4732A7AD.4020405@dev.mellanox.co.il> <20071108061910.GA9863@vt.edu> <473323A3.1020500@dev.mellanox.co.il> <20071108172050.GA12397@vt.edu> Message-ID: <4738668E.3010606@dev.mellanox.co.il> Hi. how much times does it take to reproduce this failure? thanks Dotan Bharath Ramesh wrote: > * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: > >> Hi. >> >> i need some more info. >> >> Which IB HW do you use? >> (you can get this info from ibv_devinfo) >> > > The IB HW used are the Mellanox Cougar Cards. > > output of ibv_devinfo: > hca_id: mthca0 > fw_ver: 3.5.0 > node_guid: 0002:c901:08fe:76a0 > sys_image_guid: 0002:c901:08fe:76a3 > vendor_id: 0x02c9 > vendor_part_id: 23108 > hw_ver: 0xA1 > board_id: MT_0000000001 > phys_port_cnt: 2 > > >> Which IB SW do you use? >> (you can get this info from ofed_info) >> > > The IB SW I am using is OFED 1.2. The linux kernel used are > 2.6.21.1-xserve > > I am not sure if this might help. Basically every time I send a message > I wait for an ack to be received. I wait on a pthread_cond_wait. Since > the message gets dropped my thread is blocked on pthread_cond_wait > forever. The other thread which occasionally sends messages is still > able to send/receive messages over the QP. Block for the ack and receive > the ack while this thread never receives the ack because of the dropped > message. To verify if the messages were being dropped I printed every > single message being sent and received on either ends. The dropped > message is sent but the receiver never receives it. > > Thanks, > > Bharath > > >> Dotan >> >> Bharath Ramesh wrote: >> >>> * Dotan Barak (dotanb at dev.mellanox.co.il) wrote: >>> >>> >>>> Hi. >>>> >>>> Bharath Ramesh wrote: >>>> >>>> >>>>> I have a multi-threaded application. My application has its own message >>>>> exchange protocol, it uses IB as the communication layer. I send a lot >>>>> of messages which are normally of the order of few ten thousands. After >>>>> sometime it seems like one message from one of the node is lost. I am >>>>> using RC QP type. This causes the thread to deadlock. The other threads >>>>> are still able to communicate exchanging messages without any problem >>>>> over the same QP. Both ends are using SRQs and there is sufficient >>>>> buffers posted so that I dont run out of buffers. I even tried doubling >>>>> the buffers posted I see the same problem again. One message being lost. >>>>> The ibv_post_send doesnt report any error. I am trying to get this done >>>>> for a conference deadline early next week. I would really appreciate any >>>>> help in suggesting any possibilities which might cause the message to be >>>>> dropped without any error being returned. >>>>> >>>>> >>>> If you don't have any bugs in your code, the described scenario should >>>> work. >>>> >>>> I need some more info in order to try to help you: >>>> >>>> Do you use the same QP from several threads (and post send from all of >>>> them)? >>>> >>>> >>> Yes, I use the same the QP from three threads. The application has close >>> to 5 threads. The receives are handled by a single thread. Most of the >>> sends are posted by a single thread. Occasionally a third thread posts a >>> few sends to the QP. The same QP is also used for RDMA Writes. Majority >>> of the RDMA Writes are also performed by the same thread that posts >>> majority of the send messages. >>> >>> >>> >>>> How do you poll the CQ (several threads/one)? >>>> >>>> >>> I have two CQs, one for receive and the other for send. The receive CQ >>> is polled only by the receive thread. The send CQ is polled by the three >>> threads. Occasionally by the receiver thread to clear out an send CQEs >>> because I use IBV_SEND_SIGNALED for every 16 IBV_SEND_INLINEs. Otherwise >>> the send CQ is polled by the single thread that does majority of the >>> sends. Occasionally the third thread when doing a send might poll the >>> send CQ as well for completion CQE in case of a RDMA Write. >>> >>> >>> >>>> which HW/SW do you use? >>>> >>>> >>> I am using Yellow Dog Linux 5.0 on Apple Xserves. >>> >>> Thanks, >>> >>> Bharath >>> >>> --- >>> Bharath Ramesh >>> http://people.cs.vt.edu/~bramesh >>> >>> >>> >>> > > --- > Bharath Ramesh http://people.cs.vt.edu/~bramesh > > > From swise at opengridcomputing.com Mon Nov 12 08:49:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 12 Nov 2007 10:49:46 -0600 Subject: [ewg] Re: [ofa-general] [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. In-Reply-To: <473713AA.8090507@dev.mellanox.co.il> References: <20071109152158.21493.24110.stgit@dell3.ogc.int> <47370B48.5000104@opengridcomputing.com> <473713AA.8090507@dev.mellanox.co.il> Message-ID: <4738842A.3010205@opengridcomputing.com> I haven't submitted a rhel4u5 backport yet for cxgb3. I'll do this today. Stay tuned. Steve. Vladimir Sokolovsky wrote: > Steve Wise wrote: >> Hey Vlad, >> >> Can you pull this in for 1.3 beta? Roland has merged it for 2.6.24, >> so it can be removed if we rebase and get it that way, but rping and >> most other rdma/iwarp apps are dead over chelsio without this fix. >> >> Please pull from: >> >> git://git.openfabrics.org/~swise/ofed-1.3 stevo >> >> Thanks, >> >> Steve. > > Hi Steve, > Merged into ofed_1_3/linux-2.6.git ofed_kernel_2_6_24_rc1. > > Please check (ofed_kernel_2_6_24_rc1 branch): > Build failed on x86_64 with linux-2.6.9-55.ELsmp > Log: > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: (Each undeclared identifier is reported only once > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: for each function it appears in.) > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: too many arguments to function 'dev_get_by_name' > make[4]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] > Error 1 > make[3]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] > Error 2 > make[2]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] > Error 2 > make[1]: *** > [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check] > Error 2 > make[1]: Leaving directory > `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' > make: *** [kernel] Error 2 > > Regards, > Vladimir From bs at q-leap.de Mon Nov 12 10:13:19 2007 From: bs at q-leap.de (Bernd Schubert) Date: Mon, 12 Nov 2007 19:13:19 +0100 Subject: [ofa-general] opensm --lcm Message-ID: <200711121913.19719.bs@q-leap.de> Hi, so far we always did run opensm without the --lmc option, but just recently I read in opensm's manpage about it. This option specifies the subnet's LMC value. The number of LIDs assigned to each port is 2^LMC. The LMC value must be in the range 0-7. LMC values > 0 allow multiple paths between ports. LMC values > 0 should only be used if the subnet topology actually provides multiple paths between ports, i.e. multiple interconnects between switches. Without -l, OpenSM defaults to LMC = 0, which allows one path between any two ports. In one of our configurations we do have a mts2400 (master) switch and connected to it are two mts14400 switches (each with 3 connections to the the mts2400). I think from the description of the opensm manpage we need to give the "--lmc" to make use of the 3 connections from each mts14400 switch to the mts2400 switch, don't we? Furthermore, from manpage I would think we even need this option for proper inter-communication between the switch modules? Thanks in advance, Bernd -- Bernd Schubert Q-Leap Networks GmbH From johann.george at qlogic.com Mon Nov 12 10:51:22 2007 From: johann.george at qlogic.com (Johann George) Date: Mon, 12 Nov 2007 10:51:22 -0800 Subject: [ofa-general] OpenFabrics Developer's Summit: this Thursday and Friday Message-ID: <20071112185122.GA24885@cuprite.pathscale.com> A reminder that the OpenFabrics Developer's Summit is being held this Thursday and Friday at the Boomtown Hotel in Reno, Nevada. So far, we have almost 60 registrants from 27 organizations. To register: http://www.acteva.com/booking.cfm?bevaid=143964 Registration is $195 with a student rate of $95. Dinner will be provided on Thursday as well as breakfast and lunch on Friday. Boomtown Hotel: 2100 Garson Road Reno, Nevada 89509 (800) 648-3790 (775) 345-6000 Directions to Boomtown Hotel from Convention Center: * Head north on S Virginia St toward W Peckham Ln (0.6 miles) * Turn right at E Moana Ln (0.7 miles, 2 minutes) * Turn left onto the ramp to Susanville (0.1 miles) * Merge onto US-395 N (2.8 mi, 3 minutes) * Take exit 68 to merge onto I-80 W toward Reno (10.5 miles, 11 minutes) * Take exit 4 for Boomtown/Garson Rd toward Boomtown (0.1 miles) * Keep right at the fork, follow signs for I-80 E/Reno (269 feet) * Turn right at Boomtown/Garson Rd (0.3 miles) * Slight right at Garson Rd/NF-100 (0.9 miles) If you are presenting, remember to send your slides to Jeff Becker . Thanks. Johann From hrosenstock at xsigo.com Mon Nov 12 11:24:22 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 11:24:22 -0800 Subject: [ofa-general] Re: [PATCH] opensm: PortInfo set decision flow simplification In-Reply-To: <20071110145145.GF6493@sashak.voltaire.com> References: <20071110145145.GF6493@sashak.voltaire.com> Message-ID: <1194895462.6542.52.camel@hrosenstock-ws.xsigo.com> Hi Sasha, On Sat, 2007-11-10 at 16:51 +0200, Sasha Khapyorsky wrote: > This simplifies (but doesn't change) flow for PortInfo set decision in > lid and link mgrs - mostly to make the code more readable. > > Signed-off-by: Sasha Khapyorsky > --- > opensm/opensm/osm_lid_mgr.c | 9 +++++---- > opensm/opensm/osm_link_mgr.c | 15 ++++++--------- > 2 files changed, 11 insertions(+), 13 deletions(-) > > diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c > index 9527185..9da6fcf 100644 > --- a/opensm/opensm/osm_lid_mgr.c > +++ b/opensm/opensm/osm_lid_mgr.c > @@ -1184,9 +1184,11 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > 3. got_set_resp on the physical port is FALSE. This means we haven't seen > this port before and we need to send Set of PortInfo to it. > */ > - if (send_set || p_mgr->p_subn->first_time_master_sweep == TRUE || > - p_physp->got_set_resp == FALSE) { > + if (p_mgr->p_subn->first_time_master_sweep == TRUE || > + p_physp->got_set_resp == FALSE) > + send_set = TRUE; > > + if (send_set) { > p_mgr->send_set_reqs = TRUE; > status = osm_req_set(p_mgr->p_req, > osm_physp_get_dr_path_ptr(p_physp), > @@ -1199,8 +1201,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > > Exit: > OSM_LOG_EXIT(p_mgr->p_log); > - return (send_set || p_mgr->p_subn->first_time_master_sweep == TRUE || > - p_physp->got_set_resp == FALSE); > + return send_set; > } > > /********************************************************************** > diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c > index 19d03d9..b151c76 100644 > --- a/opensm/opensm/osm_link_mgr.c > +++ b/opensm/opensm/osm_link_mgr.c > @@ -389,15 +389,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, > b. got_set_resp on the physical port is FALSE. This means we haven't > seen this port before - need to send PortInfoSet to it. > */ > - if (send_set || > - (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH > - && p_physp->got_set_resp == FALSE) > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > - && port_num == 0 && p_physp->got_set_resp == FALSE) > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > - && port_num != 0 > - && (p_mgr->p_subn->first_time_master_sweep == TRUE > - || p_physp->got_set_resp == FALSE))) { > + if (p_physp->got_set_resp == FALSE > + || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num > + && p_mgr->p_subn->first_time_master_sweep == TRUE)) This doesn't look logically the same to me. I think it sets send_set in some cases where it wasn't before. -- Hal > + send_set = TRUE; > + > + if (send_set) { > p_mgr->send_set_reqs = TRUE; > status = osm_req_set(p_mgr->p_req, > osm_physp_get_dr_path_ptr(p_physp), From swise at opengridcomputing.com Mon Nov 12 11:30:07 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 12 Nov 2007 13:30:07 -0600 Subject: [ewg] Re: [ofa-general] [PATCH 2.6.24] RDMA/cxgb3: Set the max_qp_init_rd_atom attribute. In-Reply-To: <473713AA.8090507@dev.mellanox.co.il> References: <20071109152158.21493.24110.stgit@dell3.ogc.int> <47370B48.5000104@opengridcomputing.com> <473713AA.8090507@dev.mellanox.co.il> Message-ID: <4738A9BF.3040605@opengridcomputing.com> Vlad, I added an rhel4u5 backport for cxgb3. Please full from: git://git.openfabrics.org/~swise/ofed-1.3 stevo Thanks, Steve. Vladimir Sokolovsky wrote: > Steve Wise wrote: >> Hey Vlad, >> >> Can you pull this in for 1.3 beta? Roland has merged it for 2.6.24, >> so it can be removed if we rebase and get it that way, but rping and >> most other rdma/iwarp apps are dead over chelsio without this fix. >> >> Please pull from: >> >> git://git.openfabrics.org/~swise/ofed-1.3 stevo >> >> Thanks, >> >> Steve. > > Hi Steve, > Merged into ofed_1_3/linux-2.6.git ofed_kernel_2_6_24_rc1. > > Please check (ofed_kernel_2_6_24_rc1 branch): > Build failed on x86_64 with linux-2.6.9-55.ELsmp > Log: > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: (Each undeclared identifier is reported only once > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: for each function it appears in.) > /home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.c:921: > error: too many arguments to function 'dev_get_by_name' > make[4]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3/cxio_hal.o] > Error 1 > make[3]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/hw/cxgb3] > Error 2 > make[2]: *** > [/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] > Error 2 > make[1]: *** > [_module_/home/vlad/tmp/ofa_1_3_kernel-20071111-0200_linux-2.6.9-55.ELsmp_x86_64_check] > Error 2 > make[1]: Leaving directory > `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' > make: *** [kernel] Error 2 > > Regards, > Vladimir From sashak at voltaire.com Mon Nov 12 12:09:13 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 22:09:13 +0200 Subject: [ofa-general] Re: [PATCH] opensm: PortInfo set decision flow simplification In-Reply-To: <1194895462.6542.52.camel@hrosenstock-ws.xsigo.com> References: <20071110145145.GF6493@sashak.voltaire.com> <1194895462.6542.52.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071112200913.GB8289@sashak.voltaire.com> On 11:24 Mon 12 Nov , Hal Rosenstock wrote: > > diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c > > index 19d03d9..b151c76 100644 > > --- a/opensm/opensm/osm_link_mgr.c > > +++ b/opensm/opensm/osm_link_mgr.c > > @@ -389,15 +389,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, > > b. got_set_resp on the physical port is FALSE. This means we haven't > > seen this port before - need to send PortInfoSet to it. > > */ > > - if (send_set || > > - (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH > > - && p_physp->got_set_resp == FALSE) > > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > > - && port_num == 0 && p_physp->got_set_resp == FALSE) > > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > > - && port_num != 0 > > - && (p_mgr->p_subn->first_time_master_sweep == TRUE > > - || p_physp->got_set_resp == FALSE))) { > > + if (p_physp->got_set_resp == FALSE > > + || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num > > + && p_mgr->p_subn->first_time_master_sweep == TRUE)) > > This doesn't look logically the same to me. I think it sets send_set in > some cases where it wasn't before. Could you elaborate? When? Sasha From sashak at voltaire.com Mon Nov 12 12:21:32 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 22:21:32 +0200 Subject: [ofa-general] opensm --lcm In-Reply-To: <200711121913.19719.bs@q-leap.de> References: <200711121913.19719.bs@q-leap.de> Message-ID: <20071112202132.GC8289@sashak.voltaire.com> Hi, On 19:13 Mon 12 Nov , Bernd Schubert wrote: > > so far we always did run opensm without the --lmc option, but just recently I > read in opensm's manpage about it. > > > This option specifies the subnet's LMC value. The number of LIDs assigned to > each port is 2^LMC. > The LMC value must be in the range 0-7. LMC values > 0 allow multiple paths > between ports. LMC values > 0 should only be used if the subnet topology > actually provides multiple paths between ports, i.e. multiple > interconnects between switches. Without -l, OpenSM defaults to LMC = 0, > which allows one path between any two ports. > > > In one of our configurations we do have a mts2400 (master) switch and > connected to it are two mts14400 switches (each with 3 connections to the the > mts2400). > I think from the description of the opensm manpage we need to give the "--lmc" > to make use of the 3 connections from each mts14400 switch to the mts2400 > switch, don't we? In theory (AFAIR by default OpenSM will not assign LMC for Switch port 0 due to some chip bug), when target is switch itself. But if you have end nodes connected to this switch and targets are end nodes when OpenSM will try to use all links for it. > Furthermore, from manpage I would think we even need this > option for proper inter-communication between the switch modules? For switch modules yes, but again OpenSM will not assign LMC for switches (even then --lmc specified), only for end nodes. In order to enable it you will need to turn on 'lmc_esp0' option in OpenSM options file. But as far as I remember it is not supported by switches now. Sasha From hrosenstock at xsigo.com Mon Nov 12 12:11:59 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 12:11:59 -0800 Subject: [ofa-general] Re: [PATCH] opensm: PortInfo set decision flow simplification In-Reply-To: <20071112200913.GB8289@sashak.voltaire.com> References: <20071110145145.GF6493@sashak.voltaire.com> <1194895462.6542.52.camel@hrosenstock-ws.xsigo.com> <20071112200913.GB8289@sashak.voltaire.com> Message-ID: <1194898319.6542.70.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-12 at 22:09 +0200, Sasha Khapyorsky wrote: > On 11:24 Mon 12 Nov , Hal Rosenstock wrote: > > > diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c > > > index 19d03d9..b151c76 100644 > > > --- a/opensm/opensm/osm_link_mgr.c > > > +++ b/opensm/opensm/osm_link_mgr.c > > > @@ -389,15 +389,12 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, > > > b. got_set_resp on the physical port is FALSE. This means we haven't > > > seen this port before - need to send PortInfoSet to it. > > > */ > > > - if (send_set || > > > - (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH > > > - && p_physp->got_set_resp == FALSE) > > > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > > > - && port_num == 0 && p_physp->got_set_resp == FALSE) > > > - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH > > > - && port_num != 0 > > > - && (p_mgr->p_subn->first_time_master_sweep == TRUE > > > - || p_physp->got_set_resp == FALSE))) { > > > + if (p_physp->got_set_resp == FALSE > > > + || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num > > > + && p_mgr->p_subn->first_time_master_sweep == TRUE)) > > > > This doesn't look logically the same to me. I think it sets send_set in > > some cases where it wasn't before. > > Could you elaborate? When? Never mind; I read it wrong; it looks OK to me now. -- Hal > Sasha From jenos at ncsa.uiuc.edu Mon Nov 12 12:13:31 2007 From: jenos at ncsa.uiuc.edu (Jeremy Enos) Date: Mon, 12 Nov 2007 14:13:31 -0600 Subject: [ofa-general] rhel5 updated, ofed 1.2.5.2 breaks In-Reply-To: <47352125.2040206@ncsa.uiuc.edu> References: <47352125.2040206@ncsa.uiuc.edu> Message-ID: <4738B3EB.8030304@ncsa.uiuc.edu> Technically, after the updates, I guess I'm working with rhel5.1 now. (x86_64, if that matters) thx- Jeremy Jeremy Enos wrote: > 232 updates to rhel5 in the last 2 weeks, so I decided to update. > Went from kernel-2.6.18-53.el5 to kernel-2.6.18-8.1.15.el5. Now ofed > build barks about various kernel headers during the build. > See build log at: > http://yams.ncsa.uiuc.edu/~jenos/OFED.build.26335.log > > Any ideas? thx- > > Jeremy Enos > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Mon Nov 12 12:14:44 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 12:14:44 -0800 Subject: [ofa-general] opensm --lcm In-Reply-To: <20071112202132.GC8289@sashak.voltaire.com> References: <200711121913.19719.bs@q-leap.de> <20071112202132.GC8289@sashak.voltaire.com> Message-ID: <1194898484.6542.74.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-12 at 22:21 +0200, Sasha Khapyorsky wrote: > Hi, > > On 19:13 Mon 12 Nov , Bernd Schubert wrote: > > > > so far we always did run opensm without the --lmc option, but just recently I > > read in opensm's manpage about it. > > > > > > This option specifies the subnet's LMC value. The number of LIDs assigned to > > each port is 2^LMC. > > The LMC value must be in the range 0-7. LMC values > 0 allow multiple paths > > between ports. LMC values > 0 should only be used if the subnet topology > > actually provides multiple paths between ports, i.e. multiple > > interconnects between switches. Without -l, OpenSM defaults to LMC = 0, > > which allows one path between any two ports. > > > > > > In one of our configurations we do have a mts2400 (master) switch and > > connected to it are two mts14400 switches (each with 3 connections to the the > > mts2400). > > I think from the description of the opensm manpage we need to give the "--lmc" > > to make use of the 3 connections from each mts14400 switch to the mts2400 > > switch, don't we? > > In theory (AFAIR by default OpenSM will not assign LMC for Switch port 0 Non 0 LMC is only valid (per spec) for enhanced SP0. LMC must be 0 for base SP0. > due to some chip bug), when target is switch itself. But if you have > end nodes connected to this switch and targets are end nodes when OpenSM > will try to use all links for it. > > > Furthermore, from manpage I would think we even need this > > option for proper inter-communication between the switch modules? > > For switch modules yes, but again OpenSM will not assign LMC for > switches (even then --lmc specified), only for end nodes. In order to > enable it you will need to turn on 'lmc_esp0' option in OpenSM options > file. But as far as I remember it is not supported by switches now. It didn't work last time I tried it and I don't think it's been fixed. -- Hal > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Mon Nov 12 12:36:38 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 22:36:38 +0200 Subject: [ofa-general] opensm --lcm In-Reply-To: <1194898484.6542.74.camel@hrosenstock-ws.xsigo.com> References: <200711121913.19719.bs@q-leap.de> <20071112202132.GC8289@sashak.voltaire.com> <1194898484.6542.74.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071112203638.GE8289@sashak.voltaire.com> On 12:14 Mon 12 Nov , Hal Rosenstock wrote: > On Mon, 2007-11-12 at 22:21 +0200, Sasha Khapyorsky wrote: > > Hi, > > > > On 19:13 Mon 12 Nov , Bernd Schubert wrote: > > > > > > so far we always did run opensm without the --lmc option, but just recently I > > > read in opensm's manpage about it. > > > > > > > > > This option specifies the subnet's LMC value. The number of LIDs assigned to > > > each port is 2^LMC. > > > The LMC value must be in the range 0-7. LMC values > 0 allow multiple paths > > > between ports. LMC values > 0 should only be used if the subnet topology > > > actually provides multiple paths between ports, i.e. multiple > > > interconnects between switches. Without -l, OpenSM defaults to LMC = 0, > > > which allows one path between any two ports. > > > > > > > > > In one of our configurations we do have a mts2400 (master) switch and > > > connected to it are two mts14400 switches (each with 3 connections to the the > > > mts2400). > > > I think from the description of the opensm manpage we need to give the "--lmc" > > > to make use of the 3 connections from each mts14400 switch to the mts2400 > > > switch, don't we? > > > > In theory (AFAIR by default OpenSM will not assign LMC for Switch port 0 > > Non 0 LMC is only valid (per spec) for enhanced SP0. LMC must be 0 for > base SP0. Of course, this is too. But also it doesn't work for enhanced SP0 with current switches. Sasha > > due to some chip bug), when target is switch itself. But if you have > > end nodes connected to this switch and targets are end nodes when OpenSM > > will try to use all links for it. > > > > > Furthermore, from manpage I would think we even need this > > > option for proper inter-communication between the switch modules? > > > > For switch modules yes, but again OpenSM will not assign LMC for > > switches (even then --lmc specified), only for end nodes. In order to > > enable it you will need to turn on 'lmc_esp0' option in OpenSM options > > file. But as far as I remember it is not supported by switches now. > > It didn't work last time I tried it and I don't think it's been fixed. > > -- Hal > > > Sasha > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Mon Nov 12 12:25:45 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 12:25:45 -0800 Subject: [ofa-general] opensm --lcm In-Reply-To: <20071112203638.GE8289@sashak.voltaire.com> References: <200711121913.19719.bs@q-leap.de> <20071112202132.GC8289@sashak.voltaire.com> <1194898484.6542.74.camel@hrosenstock-ws.xsigo.com> <20071112203638.GE8289@sashak.voltaire.com> Message-ID: <1194899145.6542.77.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-12 at 22:36 +0200, Sasha Khapyorsky wrote: > On 12:14 Mon 12 Nov , Hal Rosenstock wrote: > > On Mon, 2007-11-12 at 22:21 +0200, Sasha Khapyorsky wrote: > > > Hi, > > > > > > On 19:13 Mon 12 Nov , Bernd Schubert wrote: > > > > > > > > so far we always did run opensm without the --lmc option, but just recently I > > > > read in opensm's manpage about it. > > > > > > > > > > > > This option specifies the subnet's LMC value. The number of LIDs assigned to > > > > each port is 2^LMC. > > > > The LMC value must be in the range 0-7. LMC values > 0 allow multiple paths > > > > between ports. LMC values > 0 should only be used if the subnet topology > > > > actually provides multiple paths between ports, i.e. multiple > > > > interconnects between switches. Without -l, OpenSM defaults to LMC = 0, > > > > which allows one path between any two ports. > > > > > > > > > > > > In one of our configurations we do have a mts2400 (master) switch and > > > > connected to it are two mts14400 switches (each with 3 connections to the the > > > > mts2400). > > > > I think from the description of the opensm manpage we need to give the "--lmc" > > > > to make use of the 3 connections from each mts14400 switch to the mts2400 > > > > switch, don't we? > > > > > > In theory (AFAIR by default OpenSM will not assign LMC for Switch port 0 > > > > Non 0 LMC is only valid (per spec) for enhanced SP0. LMC must be 0 for > > base SP0. > > Of course, this is too. That's important because I think its a configuration option with IS3. It can show up as either base or enhanced SP0. > But also it doesn't work for enhanced SP0 with > current switches. Right. -- Hal > Sasha > > > > due to some chip bug), when target is switch itself. But if you have > > > end nodes connected to this switch and targets are end nodes when OpenSM > > > will try to use all links for it. > > > > > > > Furthermore, from manpage I would think we even need this > > > > option for proper inter-communication between the switch modules? > > > > > > For switch modules yes, but again OpenSM will not assign LMC for > > > switches (even then --lmc specified), only for end nodes. In order to > > > enable it you will need to turn on 'lmc_esp0' option in OpenSM options > > > file. But as far as I remember it is not supported by switches now. > > > > It didn't work last time I tried it and I don't think it's been fixed. > > > > -- Hal > > > > > Sasha > > > _______________________________________________ > > > general mailing list > > > general at lists.openfabrics.org > > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Mon Nov 12 13:09:07 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 23:09:07 +0200 Subject: [ofa-general] [PATCH] opensm: fix PortInfo update issues. Message-ID: <20071112210907.GH8289@sashak.voltaire.com> This fixes PortInfo update issues: - handle switch's port 0 (esp0 in link_mgr) as end port - remove check which is always FALSE (since *p_pi = *p_old_pi) Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_lid_mgr.c | 22 ++-------------------- opensm/opensm/osm_link_mgr.c | 13 ++----------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index d25605f..076d0e9 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -959,15 +959,9 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, */ memset(payload, 0, IB_SMP_DATA_SIZE); - - /* Correction by FUJITSU */ - if (port_num != 0) - memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); /* - Correction following a bug injected by the previous - FUJITSU line: - Should never write back a value that is bigger then 3 in the PortPhysicalState field, so cannot simply copy! @@ -976,19 +970,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, link down default state = polling port state - no change */ - /* these values can be set only for ca ports, so if we are - on a switch node, set these values to zero */ - if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) - p_pi->state_info2 = 0x0; - else { - p_pi->state_info2 = 0x02; - /* Check to see if the value we are setting is different than - the value in the port_info. If it is, turn on send_set flag */ - if (ib_port_info_get_link_down_def_state(p_pi) != - ib_port_info_get_link_down_def_state(p_old_pi)) - send_set = TRUE; - } - + p_pi->state_info2 = 0x02; ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); p_pi->m_key = p_mgr->p_subn->opt.m_key; diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index b151c76..80e34b2 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -166,27 +166,19 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, p_old_pi = &p_physp->port_info; memset(payload, 0, IB_SMP_DATA_SIZE); - - /* Correction by FUJITSU */ memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); /* - Correction following a bug injected by the previous - FUJITSU line: - Should never write back a value that is bigger then 3 in the PortPhysicalState field - so can not simply copy! Actually we want to write there: port physical state - no change, link down default state = polling - port state - no change + port state - as requested. */ p_pi->state_info2 = 0x02; - ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); - if (ib_port_info_get_link_down_def_state(p_pi) != - ib_port_info_get_link_down_def_state(p_old_pi)) - send_set = TRUE; + ib_port_info_set_port_state(p_pi, port_state); /* we only change port fields if we do not change state */ if (port_state == IB_LINK_NO_CHANGE) { @@ -358,7 +350,6 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, */ context.pi_context.ignore_errors = FALSE; - ib_port_info_set_port_state(p_pi, port_state); if (port_state != IB_LINK_NO_CHANGE && ib_port_info_get_port_state(p_pi) != ib_port_info_get_port_state(p_old_pi)) { -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Mon Nov 12 13:11:08 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 23:11:08 +0200 Subject: [ofa-general] [PATCH] opensm: eliminate some unneeded PortInfo Set requests In-Reply-To: <20071112210907.GH8289@sashak.voltaire.com> References: <20071112210907.GH8289@sashak.voltaire.com> Message-ID: <20071112211108.GJ8289@sashak.voltaire.com> This removes osm_physp_t got_set_resp flag and in this way eliminates some unneeded PortInfo Set requests. Originally this flag becomes TRUE only when PortInfo SetResp is arrived, and unless it is TRUE PortInfo Set request will be enforced (which means each port on a subnet will get PortInfo Set at least once). OTOH port could be already configured properly, in this case Set is not needed. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_port.h | 11 ----------- opensm/opensm/osm_lid_mgr.c | 9 +++++---- opensm/opensm/osm_link_mgr.c | 26 +++++++++++--------------- opensm/opensm/osm_port_info_rcv.c | 3 --- 4 files changed, 16 insertions(+), 33 deletions(-) diff --git a/opensm/include/opensm/osm_port.h b/opensm/include/opensm/osm_port.h index f2cfe18..ea60bfe 100644 --- a/opensm/include/opensm/osm_port.h +++ b/opensm/include/opensm/osm_port.h @@ -121,7 +121,6 @@ typedef struct _osm_physp { osm_pkey_tbl_t pkeys; ib_vl_arb_table_t vl_arb[4]; cl_ptr_vector_t slvl_by_port; - boolean_t got_set_resp; } osm_physp_t; /* * FIELDS @@ -174,16 +173,6 @@ typedef struct _osm_physp { * On switches have an entry for every other input port (inc SMA=0). * On CAs only one per port. * -* got_set_resp -* Marks whether or not we got a PortInfoSetResp from this port or not. -* This is used for minimizing the number of PortInfoSet requests sent. -* If we already got a set response from this port, then we will -* send a PortInfoSet only if the values we are updating are -* different than the ones on the port. If the haven't gotten a set -* response - then we want to send the request anyways - since -* every we need at least one PortInfoSet request for every port -* (by a new SM). -* * SEE ALSO * Port *********/ diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index 076d0e9..41e42bd 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -973,6 +973,10 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, p_pi->state_info2 = 0x02; ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + /* didn't get PortInfo before */ + if (!ib_port_info_get_port_state(p_old_pi)) + send_set = TRUE; + p_pi->m_key = p_mgr->p_subn->opt.m_key; if (memcmp(&p_pi->m_key, &p_old_pi->m_key, sizeof(p_pi->m_key))) send_set = TRUE; @@ -1144,11 +1148,8 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, 2. first_time_master_sweep flag on the subnet is TRUE. This means the SM just became master, and it then needs to send a PortInfo Set to every port. - 3. got_set_resp on the physical port is FALSE. This means we haven't seen - this port before and we need to send Set of PortInfo to it. */ - if (p_mgr->p_subn->first_time_master_sweep == TRUE || - p_physp->got_set_resp == FALSE) + if (p_mgr->p_subn->first_time_master_sweep == TRUE) send_set = TRUE; if (send_set) { diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 80e34b2..768d4c4 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -180,6 +180,10 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, p_pi->state_info2 = 0x02; ib_port_info_set_port_state(p_pi, port_state); + /* didn't get PortInfo before */ + if (!ib_port_info_get_port_state(p_old_pi)) + send_set = TRUE; + /* we only change port fields if we do not change state */ if (port_state == IB_LINK_NO_CHANGE) { /* The following fields are relevant only for CA port, router, or Enh. SP0 */ @@ -351,8 +355,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, context.pi_context.ignore_errors = FALSE; if (port_state != IB_LINK_NO_CHANGE && - ib_port_info_get_port_state(p_pi) != - ib_port_info_get_port_state(p_old_pi)) { + port_state != ib_port_info_get_port_state(p_old_pi)) { send_set = TRUE; if (port_state == IB_LINK_ACTIVE) context.pi_context.active_transition = TRUE; @@ -369,20 +372,13 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, /* We need to send the PortInfoSet request with the new sm_lid in the following cases: 1. There is a change in the values (send_set == TRUE) - 2. This is an ca port or a switch port 0 and got_set_resp is FALSE - (in this case we sent a PortInfoSet in the osm_lid_mgr, but for some - reason we didn't get a response) - try and re-send. - 3. This is a switch port and: - a. first_time_master_sweep flag on the subnet is TRUE. This means the - SM just became master, and it then needs to send at PortInfoSet to - every port (and this is the first time we can send a PortInfoSet to - switch external ports). - b. got_set_resp on the physical port is FALSE. This means we haven't - seen this port before - need to send PortInfoSet to it. + 2. This is a switch external port (so it wasn't handled yet by + osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE, + which means the SM just became master, and it then needs to send at + PortInfoSet to every port. */ - if (p_physp->got_set_resp == FALSE - || (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num - && p_mgr->p_subn->first_time_master_sweep == TRUE)) + if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num + && p_mgr->p_subn->first_time_master_sweep == TRUE) send_set = TRUE; if (send_set) { diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c index 68a04b9..70ee7df 100644 --- a/opensm/opensm/osm_port_info_rcv.c +++ b/opensm/opensm/osm_port_info_rcv.c @@ -563,9 +563,6 @@ osm_pi_rcv_process_set(IN const osm_pi_rcv_t * const p_rcv, osm_physp_set_port_info(p_physp, p_pi); - /* We got a PortInfoSetResp - set the got_set_resp flag to TRUE */ - p_physp->got_set_resp = TRUE; - OSM_LOG_EXIT(p_rcv->p_log); } -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Mon Nov 12 13:11:56 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 12 Nov 2007 23:11:56 +0200 Subject: [ofa-general] Re: [PATCH] opensm: eliminate some unneeded PortInfo Set requests In-Reply-To: <20071112211108.GJ8289@sashak.voltaire.com> References: <20071112210907.GH8289@sashak.voltaire.com> <20071112211108.GJ8289@sashak.voltaire.com> Message-ID: <20071112211156.GK8289@sashak.voltaire.com> >From 3fc4260eb97bbc9af20ed1f401c5925fe245a7c7 Mon Sep 17 00:00:00 2001 From: Sasha Khapyorsky Date: Sun, 11 Nov 2007 17:49:36 +0200 Subject: [PATCH] opensm/osm_link_mgr: use return status when PortInfo is updated This adds return status to __osm_link_mgr_set_physp_pi() function (instead of stored at link_mgr structure flag), which indicates PortInfo Set request sending. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_link_mgr.h | 5 ----- opensm/opensm/osm_link_mgr.c | 21 ++++++++------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/opensm/include/opensm/osm_link_mgr.h b/opensm/include/opensm/osm_link_mgr.h index 214dd80..11a7352 100644 --- a/opensm/include/opensm/osm_link_mgr.h +++ b/opensm/include/opensm/osm_link_mgr.h @@ -98,8 +98,6 @@ typedef struct _osm_link_mgr { osm_req_t *p_req; osm_log_t *p_log; cl_plock_t *p_lock; - boolean_t send_set_reqs; - } osm_link_mgr_t; /* * FIELDS @@ -115,9 +113,6 @@ typedef struct _osm_link_mgr { * p_lock * Pointer to the serializing lock. * -* send_set_reqs -* Boolean to indicate whether any set requests sent. -* * SEE ALSO * Link Manager object *********/ diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 768d4c4..6fcff72 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -102,7 +102,7 @@ osm_link_mgr_init(IN osm_link_mgr_t * const p_mgr, /********************************************************************** **********************************************************************/ -static void +static boolean_t __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, IN osm_physp_t * const p_physp, IN uint8_t const port_state) @@ -381,8 +381,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, && p_mgr->p_subn->first_time_master_sweep == TRUE) send_set = TRUE; - if (send_set) { - p_mgr->send_set_reqs = TRUE; + if (send_set) status = osm_req_set(p_mgr->p_req, osm_physp_get_dr_path_ptr(p_physp), payload, @@ -390,10 +389,10 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, IB_MAD_ATTR_PORT_INFO, cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); - } Exit: OSM_LOG_EXIT(p_mgr->p_log); + return send_set; } /********************************************************************** @@ -435,7 +434,6 @@ __osm_link_mgr_process_node(IN osm_link_mgr_t * const p_mgr, continue; current_state = osm_physp_get_port_state(p_physp); - if (current_state == IB_LINK_DOWN) continue; @@ -444,19 +442,16 @@ __osm_link_mgr_process_node(IN osm_link_mgr_t * const p_mgr, then required state. However, we need to send update if no state change required. */ - if ((link_state == IB_LINK_NO_CHANGE) || - (current_state < link_state)) { - p_mgr->send_set_reqs = FALSE; - __osm_link_mgr_set_physp_pi(p_mgr, p_physp, link_state); - - if (p_mgr->send_set_reqs == TRUE) - signal = OSM_SIGNAL_DONE_PENDING; - } else if (osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) + if (link_state != IB_LINK_NO_CHANGE && + link_state <= current_state) osm_log(p_mgr->p_log, OSM_LOG_DEBUG, "__osm_link_mgr_process_node: " "Physical port 0x%X already %s. Skipping\n", p_physp->port_num, ib_get_port_state_str(current_state)); + else if (__osm_link_mgr_set_physp_pi(p_mgr, p_physp, + link_state)) + signal = OSM_SIGNAL_DONE_PENDING; } OSM_LOG_EXIT(p_mgr->p_log); -- 1.5.3.rc2.29.gc4640f From rdreier at cisco.com Mon Nov 12 13:16:48 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 12 Nov 2007 13:16:48 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711092141.51243.bs@q-leap.de> (Bernd Schubert's message of "Fri, 9 Nov 2007 21:41:50 +0100") References: <200711082141.53113.bs@q-leap.de> <200711092115.51939.bs@q-leap.de> <200711092141.51243.bs@q-leap.de> Message-ID: > Yes exactly and reproducable on all 6 nodes with connectX presently here in > our test lab. > Just by accident I first always had connected port 2. Shortly before I already > thought it doesn't work at all, I tried the other port... Hmm, I don't see anything obvious in the code that could cause that, and I'm away from my lab this week to go to SC07. I'll try to debug when I return. - R. From hrosenstock at xsigo.com Mon Nov 12 13:40:25 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 13:40:25 -0800 Subject: [ofa-general] Re: [PATCH] opensm: fix PortInfo update issues. In-Reply-To: <20071112210907.GH8289@sashak.voltaire.com> References: <20071112210907.GH8289@sashak.voltaire.com> Message-ID: <1194903625.6542.93.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-12 at 23:09 +0200, Sasha Khapyorsky wrote: > This fixes PortInfo update issues: > - handle switch's port 0 (esp0 in link_mgr) as end port > - remove check which is always FALSE (since *p_pi = *p_old_pi) > > Signed-off-by: Sasha Khapyorsky > --- > opensm/opensm/osm_lid_mgr.c | 22 ++-------------------- > opensm/opensm/osm_link_mgr.c | 13 ++----------- > 2 files changed, 4 insertions(+), 31 deletions(-) > > diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c > index d25605f..076d0e9 100644 > --- a/opensm/opensm/osm_lid_mgr.c > +++ b/opensm/opensm/osm_lid_mgr.c > @@ -959,15 +959,9 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > */ > > memset(payload, 0, IB_SMP_DATA_SIZE); > - > - /* Correction by FUJITSU */ > - if (port_num != 0) > - memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); > + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); > > /* > - Correction following a bug injected by the previous > - FUJITSU line: > - > Should never write back a value that is bigger then 3 in > the PortPhysicalState field, so cannot simply copy! > > @@ -976,19 +970,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > link down default state = polling > port state - no change > */ > - /* these values can be set only for ca ports, so if we are > - on a switch node, set these values to zero */ > - if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) > - p_pi->state_info2 = 0x0; > - else { > - p_pi->state_info2 = 0x02; > - /* Check to see if the value we are setting is different than > - the value in the port_info. If it is, turn on send_set flag */ > - if (ib_port_info_get_link_down_def_state(p_pi) != > - ib_port_info_get_link_down_def_state(p_old_pi)) > - send_set = TRUE; > - } > - > + p_pi->state_info2 = 0x02; Isn't this a potential send_set change (as this could change LDDS) ? > ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); > > p_pi->m_key = p_mgr->p_subn->opt.m_key; > diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c > index b151c76..80e34b2 100644 > --- a/opensm/opensm/osm_link_mgr.c > +++ b/opensm/opensm/osm_link_mgr.c > @@ -166,27 +166,19 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, > p_old_pi = &p_physp->port_info; > > memset(payload, 0, IB_SMP_DATA_SIZE); > - > - /* Correction by FUJITSU */ > memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); > > /* > - Correction following a bug injected by the previous > - FUJITSU line: > - > Should never write back a value that is bigger then 3 in > the PortPhysicalState field - so can not simply copy! > > Actually we want to write there: > port physical state - no change, > link down default state = polling > - port state - no change > + port state - as requested. > */ > p_pi->state_info2 = 0x02; > - ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); > - if (ib_port_info_get_link_down_def_state(p_pi) != > - ib_port_info_get_link_down_def_state(p_old_pi)) > - send_set = TRUE; Same as above. > + ib_port_info_set_port_state(p_pi, port_state); > > /* we only change port fields if we do not change state */ > if (port_state == IB_LINK_NO_CHANGE) { > @@ -358,7 +350,6 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, > */ > context.pi_context.ignore_errors = FALSE; > > - ib_port_info_set_port_state(p_pi, port_state); > if (port_state != IB_LINK_NO_CHANGE && > ib_port_info_get_port_state(p_pi) != > ib_port_info_get_port_state(p_old_pi)) { From sashak at voltaire.com Mon Nov 12 14:17:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 00:17:35 +0200 Subject: [ofa-general] Re: [PATCH] opensm: fix PortInfo update issues. In-Reply-To: <1194903625.6542.93.camel@hrosenstock-ws.xsigo.com> References: <20071112210907.GH8289@sashak.voltaire.com> <1194903625.6542.93.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071112221735.GO8289@sashak.voltaire.com> On 13:40 Mon 12 Nov , Hal Rosenstock wrote: > On Mon, 2007-11-12 at 23:09 +0200, Sasha Khapyorsky wrote: > > This fixes PortInfo update issues: > > - handle switch's port 0 (esp0 in link_mgr) as end port > > - remove check which is always FALSE (since *p_pi = *p_old_pi) > > > > Signed-off-by: Sasha Khapyorsky > > --- > > opensm/opensm/osm_lid_mgr.c | 22 ++-------------------- > > opensm/opensm/osm_link_mgr.c | 13 ++----------- > > 2 files changed, 4 insertions(+), 31 deletions(-) > > > > diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c > > index d25605f..076d0e9 100644 > > --- a/opensm/opensm/osm_lid_mgr.c > > +++ b/opensm/opensm/osm_lid_mgr.c > > @@ -959,15 +959,9 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > > */ > > > > memset(payload, 0, IB_SMP_DATA_SIZE); > > - > > - /* Correction by FUJITSU */ > > - if (port_num != 0) > > - memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); > > + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); > > > > /* > > - Correction following a bug injected by the previous > > - FUJITSU line: > > - > > Should never write back a value that is bigger then 3 in > > the PortPhysicalState field, so cannot simply copy! > > > > @@ -976,19 +970,7 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, > > link down default state = polling > > port state - no change > > */ > > - /* these values can be set only for ca ports, so if we are > > - on a switch node, set these values to zero */ > > - if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) > > - p_pi->state_info2 = 0x0; > > - else { > > - p_pi->state_info2 = 0x02; > > - /* Check to see if the value we are setting is different than > > - the value in the port_info. If it is, turn on send_set flag */ > > - if (ib_port_info_get_link_down_def_state(p_pi) != > > - ib_port_info_get_link_down_def_state(p_old_pi)) > > - send_set = TRUE; > > - } > > - > > + p_pi->state_info2 = 0x02; > > Isn't this a potential send_set change (as this could change LDDS) ? Yes, I think you are right. I will change it in the patch and resubmit. Sasha From dwsnuggletownm at snuggletown.com Mon Nov 12 14:13:31 2007 From: dwsnuggletownm at snuggletown.com (Danielle Morzo) Date: Tue, 13 Nov 2007 01:13:31 +0300 Subject: [ofa-general] Quality medications can be cheap! Message-ID: <01c82592$67896690$0cff4c5b@dwsnuggletownm> There is no need to buy medications in America at sky-high prices. Purchase them in Canada! All you need is to find a trustworthy online drugstore. Prompt and discreet shipping directly to your doorstep! Confidentiality is guaranteed. http://throughbefore.cn Forget about high prices, order with ŤCanadianPharmacyť! Danielle Morzo From sashak at voltaire.com Mon Nov 12 14:34:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 00:34:42 +0200 Subject: [ofa-general] Re: [PATCH v2] opensm: fix PortInfo update issues. In-Reply-To: <20071112210907.GH8289@sashak.voltaire.com> References: <20071112210907.GH8289@sashak.voltaire.com> Message-ID: <20071112223442.GP8289@sashak.voltaire.com> This fixes PortInfo update for switch port 0 (esp0 in link_mgr) - handle it as end port. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/osm_lid_mgr.c | 26 ++++++-------------------- opensm/opensm/osm_link_mgr.c | 11 +++-------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index d25605f..c85f6f6 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -959,15 +959,9 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, */ memset(payload, 0, IB_SMP_DATA_SIZE); - - /* Correction by FUJITSU */ - if (port_num != 0) - memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); + memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); /* - Correction following a bug injected by the previous - FUJITSU line: - Should never write back a value that is bigger then 3 in the PortPhysicalState field, so cannot simply copy! @@ -976,21 +970,13 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, link down default state = polling port state - no change */ - /* these values can be set only for ca ports, so if we are - on a switch node, set these values to zero */ - if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) - p_pi->state_info2 = 0x0; - else { - p_pi->state_info2 = 0x02; - /* Check to see if the value we are setting is different than - the value in the port_info. If it is, turn on send_set flag */ - if (ib_port_info_get_link_down_def_state(p_pi) != - ib_port_info_get_link_down_def_state(p_old_pi)) - send_set = TRUE; - } - + p_pi->state_info2 = 0x02; ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + if (ib_port_info_get_link_down_def_state(p_pi) != + ib_port_info_get_link_down_def_state(p_old_pi)) + send_set = TRUE; + p_pi->m_key = p_mgr->p_subn->opt.m_key; if (memcmp(&p_pi->m_key, &p_old_pi->m_key, sizeof(p_pi->m_key))) send_set = TRUE; diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index b151c76..5cfd93b 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -166,24 +166,20 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, p_old_pi = &p_physp->port_info; memset(payload, 0, IB_SMP_DATA_SIZE); - - /* Correction by FUJITSU */ memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); /* - Correction following a bug injected by the previous - FUJITSU line: - Should never write back a value that is bigger then 3 in the PortPhysicalState field - so can not simply copy! Actually we want to write there: port physical state - no change, link down default state = polling - port state - no change + port state - as requested. */ p_pi->state_info2 = 0x02; - ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE); + ib_port_info_set_port_state(p_pi, port_state); + if (ib_port_info_get_link_down_def_state(p_pi) != ib_port_info_get_link_down_def_state(p_old_pi)) send_set = TRUE; @@ -358,7 +354,6 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, */ context.pi_context.ignore_errors = FALSE; - ib_port_info_set_port_state(p_pi, port_state); if (port_state != IB_LINK_NO_CHANGE && ib_port_info_get_port_state(p_pi) != ib_port_info_get_port_state(p_old_pi)) { -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Mon Nov 12 15:18:20 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 01:18:20 +0200 Subject: [ofa-general] [PATCH] opensm/Makefile: add libopensm -> opensm dependency Message-ID: <20071112231820.GR8289@sashak.voltaire.com> This makes opensm program be explicitly dependent from libopensm. Otherwise opensm build can fail on multiprocessor machines when option -jN (where N > 1) is used with make. This is reported in Bug#774 (https://bugs.openfabrics.org/show_bug.cgi?id=774). Signed-off-by: Sasha Khapyorsky --- opensm/opensm/Makefile.am | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/opensm/opensm/Makefile.am b/opensm/opensm/Makefile.am index 2895d18..146bc04 100644 --- a/opensm/opensm/Makefile.am +++ b/opensm/opensm/Makefile.am @@ -29,6 +29,7 @@ libopensm_la_LDFLAGS = -version-info $(opensm_api_version) \ libopensm_la_DEPENDENCIES = $(srcdir)/libopensm.map sbin_PROGRAMS = opensm +opensm_DEPENDENCIES = libopensm.la opensm_SOURCES = main.c osm_console.c osm_db_files.c \ osm_db_pack.c osm_drop_mgr.c osm_fwd_tbl.c \ osm_inform.c osm_lid_mgr.c osm_lin_fwd_rcv.c \ -- 1.5.3.4.206.g58ba4 From ralph.campbell at qlogic.com Mon Nov 12 15:15:02 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Mon, 12 Nov 2007 15:15:02 -0800 Subject: [ofa-general] [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() Message-ID: <1194909302.3612.74.camel@brick.pathscale.com> There are error cases in ib_uverbs_post_send() where the return value is negative (i.e., an error) and yet a non-zero resp.bad_wr is not written back to ibv_cmd_post_send(). In this case, ibv_cmd_post_send() should still set the bad_wr pointer. It seems to me that this should go into OFED 1.3. I will leave the mechanics of how that happens to Roland and Vlad. Signed-off-by: Ralph Campbell diff --git a/src/cmd.c b/src/cmd.c index 6d4331f..305e263 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -884,7 +884,8 @@ int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, while (--wr_count) i = i->next; *bad_wr = i; - } + } else if (ret) + *bad_wr = wr; return ret; } From sashak at voltaire.com Mon Nov 12 15:40:09 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 01:40:09 +0200 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release Message-ID: <20071112234009.GU8289@sashak.voltaire.com> Hi, There is a new release of ibsim - infiniband fabric simulator. Tarball is available in: http://www.openfabrics.org/downloads/management/ibsim-0.4.tar.gz md5sum: 13cb3338d0fa374cc01416df7735414e ibsim-0.4.tar.gz Sasha From hrosenstock at xsigo.com Mon Nov 12 15:35:18 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 12 Nov 2007 15:35:18 -0800 Subject: [ofa-general] Re: [ewg] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <20071112234009.GU8289@sashak.voltaire.com> References: <20071112234009.GU8289@sashak.voltaire.com> Message-ID: <1194910518.6542.119.camel@hrosenstock-ws.xsigo.com> Hi Sasha, On Tue, 2007-11-13 at 01:40 +0200, Sasha Khapyorsky wrote: > Hi, > > There is a new release of ibsim - infiniband fabric simulator. > Tarball is available in: > > http://www.openfabrics.org/downloads/management/ibsim-0.4.tar.gz > > md5sum: 13cb3338d0fa374cc01416df7735414e ibsim-0.4.tar.gz Excellent. Thanks! Do you know if this is also part of the OFED 1.3 daily build ? -- Hal > Sasha > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From sashak at voltaire.com Mon Nov 12 15:51:11 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 01:51:11 +0200 Subject: [ofa-general] Re: [ewg] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <1194910518.6542.119.camel@hrosenstock-ws.xsigo.com> References: <20071112234009.GU8289@sashak.voltaire.com> <1194910518.6542.119.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071112235111.GV8289@sashak.voltaire.com> On 15:35 Mon 12 Nov , Hal Rosenstock wrote: > > > > There is a new release of ibsim - infiniband fabric simulator. > > Tarball is available in: > > > > http://www.openfabrics.org/downloads/management/ibsim-0.4.tar.gz > > > > md5sum: 13cb3338d0fa374cc01416df7735414e ibsim-0.4.tar.gz > > Excellent. Thanks! > > Do you know if this is also part of the OFED 1.3 daily build ? Yes, as far as I know. Sasha From sashak at voltaire.com Mon Nov 12 17:49:09 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 03:49:09 +0200 Subject: [ofa-general] [PATCH] opensm.init: startup script fixes Message-ID: <20071113014909.GX8289@sashak.voltaire.com> Don't use redhat specific "daemon" function Fix opensm path to /usr/sbin Signed-off-by: Sasha Khapyorsky --- opensm/scripts/opensm.init | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/scripts/opensm.init b/opensm/scripts/opensm.init index 46b3a89..d717279 100644 --- a/opensm/scripts/opensm.init +++ b/opensm/scripts/opensm.init @@ -55,7 +55,7 @@ fi start () { echo -n "Starting opensm: " - daemon /usr/bin/opensm -B $OPTIONS + /usr/sbin/opensm -B $OPTIONS > /dev/null if [[ $RETVAL -eq 0 ]]; then touch /var/lock/subsys/opensm success -- 1.5.3.4.206.g58ba4 From keshetti85-student at yahoo.co.in Mon Nov 12 21:26:02 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Tue, 13 Nov 2007 10:56:02 +0530 Subject: [ofa-general] Re: [openSM] Pkey index and Pkey value In-Reply-To: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> References: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> Message-ID: <829ded920711122126k2bd7f3adu8859799442701602@mail.gmail.com> >No; there is no requirement for a pkey value to be in the same index >in different ports. But if you want to run an MPI application in all the ports belonging to one partition (e.g: partition2 in my previous exmple) a conflict will arise as most of the MPI implementations have provision of allowing only one Pkey index (e.g: ib_pkey_ix in openMPI). This problem can be easily solved by storing pkey value at the same pkey index in all ports belonging to same partition. -Mahesh From dotanb at dev.mellanox.co.il Mon Nov 12 22:23:06 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 13 Nov 2007 08:23:06 +0200 Subject: [ofa-general] Re: [ewg] [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() In-Reply-To: <1194909302.3612.74.camel@brick.pathscale.com> References: <1194909302.3612.74.camel@brick.pathscale.com> Message-ID: <473942CA.8070007@dev.mellanox.co.il> Hi. I believe that the same bug exactly exists in ibv_cmd_post_recv and ibv_post_srq_recv. thanks Dotan Ralph Campbell wrote: > There are error cases in ib_uverbs_post_send() where the return > value is negative (i.e., an error) and yet a non-zero resp.bad_wr > is not written back to ibv_cmd_post_send(). In this case, > ibv_cmd_post_send() should still set the bad_wr pointer. > > It seems to me that this should go into OFED 1.3. > I will leave the mechanics of how that happens to Roland and Vlad. > > Signed-off-by: Ralph Campbell > > diff --git a/src/cmd.c b/src/cmd.c > index 6d4331f..305e263 100644 > --- a/src/cmd.c > +++ b/src/cmd.c > @@ -884,7 +884,8 @@ int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, > while (--wr_count) > i = i->next; > *bad_wr = i; > - } > + } else if (ret) > + *bad_wr = wr; > > return ret; > } > > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > > From kliteyn at mellanox.co.il Mon Nov 12 23:48:41 2007 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Tue, 13 Nov 2007 09:48:41 +0200 Subject: [ofa-general] [PATCH] opensm/Makefile: add libopensm -> opensm dependency In-Reply-To: <20071112231820.GR8289@sashak.voltaire.com> References: <20071112231820.GR8289@sashak.voltaire.com> Message-ID: <473956D9.9060406@mellanox.co.il> Great, it does fix the problem. -- Yevgeny Sasha Khapyorsky wrote: > This makes opensm program be explicitly dependent from libopensm. > Otherwise opensm build can fail on multiprocessor machines when option > -jN (where N > 1) is used with make. This is reported in Bug#774 > (https://bugs.openfabrics.org/show_bug.cgi?id=774). > > Signed-off-by: Sasha Khapyorsky > --- > opensm/opensm/Makefile.am | 1 + > 1 files changed, 1 insertions(+), 0 deletions(-) > > diff --git a/opensm/opensm/Makefile.am b/opensm/opensm/Makefile.am > index 2895d18..146bc04 100644 > --- a/opensm/opensm/Makefile.am > +++ b/opensm/opensm/Makefile.am > @@ -29,6 +29,7 @@ libopensm_la_LDFLAGS = -version-info $(opensm_api_version) \ > libopensm_la_DEPENDENCIES = $(srcdir)/libopensm.map > > sbin_PROGRAMS = opensm > +opensm_DEPENDENCIES = libopensm.la > opensm_SOURCES = main.c osm_console.c osm_db_files.c \ > osm_db_pack.c osm_drop_mgr.c osm_fwd_tbl.c \ > osm_inform.c osm_lid_mgr.c osm_lin_fwd_rcv.c \ > From ogerlitz at voltaire.com Tue Nov 13 00:09:32 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Tue, 13 Nov 2007 10:09:32 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <473822FD.20208@Voltaire.COM> References: <473822FD.20208@Voltaire.COM> Message-ID: Yevgeny, iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port space as does RDS. The RDMA-CM signature is something which I am sure exists, you can look on the RDMA-CM IB spec Annex to see if such thing indeed exist or I am wrong. The TCP port is the 16 bit port portion of the ip:port address provided by a ULP that uses the RDMA-CM to rdma_resolve_addr(), again the annex explained how the port is embedded into the SID, I don't remember the location within the 64 bit string. Or. -------- Original Message -------- Subject: Re: QoS for iSER Date: Mon, 12 Nov 2007 11:41:43 +0200 From: Yevgeny Kliteynik Hi Erez, Erez Zilber wrote: > to create the SID, the rdma cm combines > > 1) the port space What is the port space for iSER? For SDP it's 0x10000 - 0x1FFFF. For RDS it's 0x1060000 - 0x106FFFF For iSER it's ...? > 2) the rdma cm signature Do you mean something iSER-specific, or just the way the cm builds the service ID out of port space and tcp port? Can you give an example? > 3) the destination tcp port provided to rdma_resolve_addr I guess that tcp port is in the lower 4 nibs of the service ID, similar to SDP. Right? -- Yevgeny From or.gerlitz at gmail.com Tue Nov 13 00:13:26 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Tue, 13 Nov 2007 10:13:26 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM> Message-ID: <15ddcffd0711130013r700db22ai655331bdb9bfc674@mail.gmail.com> On Nov 13, 2007 10:09 AM, Or Gerlitz wrote: > iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port > space as does RDS. The RDMA-CM signature is something which I am sure > exists, I meant to say, --not-- sure Or. -------------- next part -------------- An HTML attachment was scrubbed... URL: From keshetti85-student at yahoo.co.in Tue Nov 13 00:25:02 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Tue, 13 Nov 2007 13:55:02 +0530 Subject: [ofa-general] Credit loops in IB and ibdm utility Message-ID: <829ded920711130025p7a2c3394g41b5886da94251a3@mail.gmail.com> I have some basic questions about credit loops in infiniband networks. Can anyone of you tell me in brief what are credit loops, when they occur and how does IBDM utility verify the network for credit loops? n2------S2\ | \ | \ | S1------ n1 | / | / n3-----S3 In the above topology (S-switch and n-node ), I have loaded all the possible paths using the openSM's file based ucast manager. When I ran an MPI job which sends data from both n3 and n1 to n2 I have observed a credit loop. Why is it happening here ? -Mahesh From monisonlists at gmail.com Tue Nov 13 01:23:46 2007 From: monisonlists at gmail.com (Moni Shoua) Date: Tue, 13 Nov 2007 11:23:46 +0200 Subject: [ofa-general] ib-bonding release 19 is available. Message-ID: <47396D22.9040006@gmail.com> Hi, Please take from the usual place (latest.txt is up-to-date) Change Log: ------------- 1. Fix bug 733 (https://bugs.openfabrics.org/show_bug.cgi?id=773) thanks MoniS From vlad at lists.openfabrics.org Tue Nov 13 02:59:56 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 13 Nov 2007 02:59:56 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071113-0200 daily build status Message-ID: <20071113105956.A6DBAE6088A@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.20 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: From hal.rosenstock at gmail.com Tue Nov 13 04:07:55 2007 From: hal.rosenstock at gmail.com (Hal Rosenstock) Date: Tue, 13 Nov 2007 07:07:55 -0500 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM> Message-ID: Or, On 11/13/07, Or Gerlitz wrote: > Yevgeny, > > iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port > space as does RDS. The RDMA-CM signature is something which I am sure > exists, you can look on the RDMA-CM IB spec Annex to see if such thing > indeed exist or I am wrong. Did you really look at the annex for this ? > The TCP port is the 16 bit port portion of > the > ip:port address provided by a ULP that uses the RDMA-CM to > rdma_resolve_addr(), again the annex explained how the port is embedded > into the SID, I don't remember the location within the 64 bit string. It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. > Or. > > -------- Original Message -------- > Subject: > Re: QoS for iSER > Date: > Mon, 12 Nov 2007 11:41:43 +0200 > From: Yevgeny Kliteynik > > Hi Erez, > > Erez Zilber wrote: > > to create the SID, the rdma cm combines > > > > 1) the port space > > What is the port space for iSER? > For SDP it's 0x10000 - 0x1FFFF. > For RDS it's 0x1060000 - 0x106FFFF > For iSER it's ...? These numbers are too large for just "port space". iSER SID is 0x000000000106035c in your nomenclature, I guess 0x106035c 01 says RDMA aware ULP service ID range 06 says IP protocol is TCP 0x035c (port 860) is the well known TCP port for iSCSI -- Hal > > 2) the rdma cm signature > > Do you mean something iSER-specific, or just the way the cm > builds the service ID out of port space and tcp port? > Can you give an example? > > > 3) the destination tcp port provided to rdma_resolve_addr > > I guess that tcp port is in the lower 4 nibs of the service ID, > similar to SDP. Right? > -- Yevgeny > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From hrosenstock at xsigo.com Tue Nov 13 04:41:11 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 13 Nov 2007 04:41:11 -0800 Subject: [ofa-general] [PATCH][TRIVIAL] OpenSM/osm_subnet.c: Cosmetic format changes to opensm.opts file Message-ID: <1194957671.6542.167.camel@hrosenstock-ws.xsigo.com> OpenSM/osm_subnet.c: Cosmetic format changes to opensm.opts file Signed-off-by: Hal Rosenstock diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index 8da3139..0f109a5 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -1330,8 +1330,8 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) "# The LMC value used on this subnet\n" "lmc %u\n\n" "# lmc_esp0 determines whether LMC value used on subnet is used for\n" - "#enhanced switch port 0. If TRUE, LMC value for subnet is used for\n" - "#ESP0. Otherwise, LMC value for ESP0s is 0.\n" + "# enhanced switch port 0. If TRUE, LMC value for subnet is used for\n" + "# ESP0. Otherwise, LMC value for ESP0s is 0.\n" "lmc_esp0 %s\n\n" "# The code of maximal time a packet can live in a switch\n" "# The actual time is 4.096usec * 2^\n" @@ -1359,7 +1359,7 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) "# Force link speed enable on switch links\n" "# If 0, don't modify PortInfo:LinkSpeedEnabled on switch port\n" "# Otherwise, use value for PortInfo:LinkSpeedEnabled on switch port\n" - "# Default is 15 (to set to PortInfo:LinkSpeedSupported\n\n" + "# Default is 15 (to set to PortInfo:LinkSpeedSupported)\n\n" "force_link_speed %u\n\n" "# The subnet_timeout code that will be set for all the ports\n" "# The actual timeout is 4.096usec * 2^\n" From hrosenstock at xsigo.com Tue Nov 13 04:54:06 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 13 Nov 2007 04:54:06 -0800 Subject: [ofa-general] libibmad/dump.c: LinkSpeed/Width extension support Message-ID: <1194958446.6542.178.camel@hrosenstock-ws.xsigo.com> Hi Sasha, There are some changes in the 1.2.1 spec affecting LinkSpeedEnabled which make what previously looked like previous valid values now invalid. Also, there are a number of hardware vendors offering extensions in LinkSpeed/Width. Would you accept a patch supporting these ? I would add something like "(IBA extension)" to the end of the decode for these to indicate that these are "beyond the IBA spec". Make sense ? -- Hal From sashak at voltaire.com Tue Nov 13 06:07:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 16:07:48 +0200 Subject: [ofa-general] Re: [PATCH][TRIVIAL] OpenSM/osm_subnet.c: Cosmetic format changes to opensm.opts file In-Reply-To: <1194957671.6542.167.camel@hrosenstock-ws.xsigo.com> References: <1194957671.6542.167.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071113140748.GY8289@sashak.voltaire.com> On 04:41 Tue 13 Nov , Hal Rosenstock wrote: > OpenSM/osm_subnet.c: Cosmetic format changes to opensm.opts file > > Signed-off-by: Hal Rosenstock Applied. Thanks. Sasha From sashak at voltaire.com Tue Nov 13 06:09:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 13 Nov 2007 16:09:48 +0200 Subject: [ofa-general] Re: libibmad/dump.c: LinkSpeed/Width extension support In-Reply-To: <1194958446.6542.178.camel@hrosenstock-ws.xsigo.com> References: <1194958446.6542.178.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071113140948.GZ8289@sashak.voltaire.com> Hi Hal, On 04:54 Tue 13 Nov , Hal Rosenstock wrote: > > There are some changes in the 1.2.1 spec affecting LinkSpeedEnabled > which make what previously looked like previous valid values now > invalid. Also, there are a number of hardware vendors offering > extensions in LinkSpeed/Width. Would you accept a patch supporting > these ? Yes, of course. Thanks! > I would add something like "(IBA extension)" to the end of the > decode for these to indicate that these are "beyond the IBA spec". Make > sense ? Yes, seems good. Sasha From kliteyn at mellanox.co.il Mon Nov 12 21:18:47 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 13 Nov 2007 07:18:47 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-13:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-12 OpenSM git rev = Sun_Nov_11_19:10:57_2007 [6ad3e888850a68ebe9d328b6f5ec8ec89d10074c] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From barrowssix at uwclub.net Tue Nov 13 07:01:24 2007 From: barrowssix at uwclub.net (UK-LOTTO ORGANIZATION) Date: Tue, 13 Nov 2007 15:01:24 -0000 (GMT) Subject: [ofa-general] OFFICIAL PRIZE NOTIFICATION Message-ID: <49067.64.214.231.141.1194966084.squirrel@webmail.uwclub.net> UK-LOTTO ORGANIZATION Ticket Free/Online Winnings and Notification Department Ref Number: 1648UK Batch Number: B522XG WINNINGS NOTIFICATION OF £1.5M We happily announce to you the result of the U.K Lotto Organization's online draws and sweepstakes program held in Bangkok-Thailand on the 13th of November, 2007. Your e-mail address attached to ticket number: 569245398266LQ with Serial number: 546-532829 drew the lucky numbers: 42-90-58-33-84 which subsequently won you the lottery in the first category. You have therefore been approved to claim a total sum of £1.5M (One Million, Five Hundred Thousand GREAT BRITISH POUNDS) in cash credited tofile FST/26487710-95.This is from a total cash prize of £15,000,000.00 shared amongst the first Ten (10) lucky winners in this category.All participants were selected randomly from World Wide Web site through computer online draw system and extracted from over 100,000 companies. The below courier company has been given the contract to deliver all our winners cheques from the first to last category. ALPHA SECURITY AND FINANCE COURIER SERVICES Ltd,22Plumstead Road,London. Contact Agent: Mr.Brain Hunt (Director of Operations and Logistics) Email address: alphacourierservice07 at gmail.com Tell: +44 704 570 0112 You are to contact Mr.Brain Hunt on the above email address for quick delivery of your Uk-Lotto certfied winnings cashiers cheque. To avoid delay in the delivery of your winnings check to you, include the below highlighted information in your contact mail to Mr.Brain Hunt. -Your country -Your complete official names -Your nationality -Your age -Your sex -Your contact telephone, mobile and fax numbers -Your occupation -Your address where your wish to receive your winnings cheque -Your Ref Number and Batch Numbers -Your Ticket Number and Lucky Numbers -Your Serial and File Numbers -Date and venue of draw. Do not be scared of anything happening to your winnings cheque with the ALPHA SECURITY AND FINANCE COURIER SERVICES Ltd as all the cheques was processed in a such a way that no sum can be deducted from it till it gets to the beneficiary/winner's address and deposited in any bank of choice in your country or any in any part of the world for onward payment by the bank. For safety measures and to avoid fruadulent claims you are adviced to keep all about your winnings information and notification confidential and away from all third parties till your cheque arrives your address and cashed. Regards, Mrs.Monet Bronze Online Co-ordinator/Notification Officer UK-LOTTERY ORGANIZATION Sweepstakes International Program. From hrosenstock at xsigo.com Tue Nov 13 07:38:43 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 13 Nov 2007 07:38:43 -0800 Subject: [ofa-general] [PATCH] libibmad/dump.c: Support link speed and width vendor extensions Message-ID: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> libibmad/dump.c: Support link speed and width vendor extensions When decoding values, handle vendor extensions to link speed and width including accommodating a "documentation" change between IBA 1.2 and 1.2.1 Signed-off-by: Hal Rosenstock diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c index d743215..9628eba 100644 --- a/libibmad/src/dump.c +++ b/libibmad/src/dump.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004-2007 Voltaire Inc. All rights reserved. + * Copyright (c) 2007 Xsigo Systems Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -235,12 +236,21 @@ mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) case 1: snprintf(buf, bufsz, "1X"); break; + case 2: + snprintf(buf, bufsz, "4X (IBA extension)"); + break; case 3: snprintf(buf, bufsz, "1X or 4X"); break; + case 4: + snprintf(buf, bufsz, "8X (IBA extension)"); + break; case 7: snprintf(buf, bufsz, "1X or 4X or 8X"); break; + case 8: + snprintf(buf, bufsz, "12X (IBA extension)"); + break; case 11: snprintf(buf, bufsz, "1X or 4X or 12X"); break; @@ -304,9 +314,15 @@ mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) case 1: snprintf(buf, bufsz, "2.5 Gbps"); break; + case 2: + snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); + break; case 3: snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); break; + case 4: + snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); + break; case 5: snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); break; @@ -329,13 +345,13 @@ mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) snprintf(buf, bufsz, "2.5 Gbps"); break; case 2: - snprintf(buf, bufsz, "5.0 Gbps"); + snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); break; case 3: snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); break; case 4: - snprintf(buf, bufsz, "10.0 Gbps"); + snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); break; case 5: snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); From matthias.kaehlcke at gmail.com Tue Nov 13 10:45:03 2007 From: matthias.kaehlcke at gmail.com (Matthias Kaehlcke) Date: Tue, 13 Nov 2007 19:45:03 +0100 Subject: [ofa-general] [PATCH] QLogic InfiniPath: convert ipath_eep_sem to mutex Message-ID: <20071113184503.GE30483@traven> QLogic InfiniPath: convert the semaphore ipath_eep_sem to the mutex API Signed-off-by: Matthias Kaehlcke -- diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index e7c25db..a5b6299 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -510,10 +510,10 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -524,10 +524,10 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -616,9 +616,9 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - down(&dd->ipath_eep_sem); + mutex_lock(&dd->ipath_eep_lock); eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); @@ -764,14 +764,14 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) /* Grab semaphore and read current EEPROM. If we get an * error, let go, but if not, keep it until we finish write. */ - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (ret) { ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); goto free_bail; } ret = ipath_eeprom_internal_read(dd, 0, buf, len); if (ret) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "Unable read EEPROM for logging\n"); goto free_bail; } @@ -779,7 +779,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", csum, ifp->if_csum); ret = 1; @@ -849,7 +849,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 1); ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); } - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (ret) ipath_dev_err(dd, "Failed updating EEPROM\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 9dd0bac..9e9d6fa 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -348,7 +348,7 @@ static int init_chip_first(struct ipath_devdata *dd, spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); - sema_init(&dd->ipath_eep_sem, 1); + mutex_init(&dd->ipath_eep_lock); done: *pdp = pd; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 8786dd7..a6e7a60 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -616,7 +617,7 @@ struct ipath_devdata { /* control access to actual counters, timer */ spinlock_t ipath_eep_st_lock; /* control high-level access to EEPROM */ - struct semaphore ipath_eep_sem; + struct mutex ipath_eep_lock; /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ uint64_t ipath_traffic_wds; /* active time is kept in seconds, but logged in hours */ -- Matthias Kaehlcke Linux Application Developer Barcelona La libertad es como la mañana. Hay quienes esperan dormidos a que llegue, pero hay quienes desvelan y caminan la noche para alcanzarla (Subcomandante Marcos) .''`. using free software / Debian GNU/Linux | http://debian.org : :' : `. `'` gpg --keyserver pgp.mit.edu --recv-keys 47D8E5D4 `- From tziporet at mellanox.co.il Tue Nov 13 12:25:39 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 13 Nov 2007 22:25:39 +0200 Subject: [ofa-general] OFED 1.3 beta status on kernel 2.6.24 rebase Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E28C@mtlexch01.mtl.com> Hi All, We just fixed a critical bug in IPoIB and we start to run our internal regression tests on the new ofed_2_6_24 branch to get some confidence level with it. If the regression will pass we will start the daily build of OFED 1.3 based on the new kernel tomorrow. Vlad will send an email updating on this Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From auke-jan.h.kok at intel.com Tue Nov 13 13:28:14 2007 From: auke-jan.h.kok at intel.com (Kok, Auke) Date: Tue, 13 Nov 2007 13:28:14 -0800 Subject: [ofa-general] Re: [PATCH 10/10 REV5] [E1000] Implement batching In-Reply-To: <20070914090442.17589.23005.sendpatchset@K50wks273871wss.in.ibm.com> References: <20070914090058.17589.80352.sendpatchset@K50wks273871wss.in.ibm.com> <20070914090442.17589.23005.sendpatchset@K50wks273871wss.in.ibm.com> Message-ID: <473A16EE.6080101@intel.com> Krishna Kumar wrote: > E1000: Implement batching capability (ported thanks to changes taken from > Jamal). > > Signed-off-by: Krishna Kumar this doesn't apply anymore and it would help if you could re-spin this for e1000e. I don't know what the status for merging of the batched xmit patches is right now but it would help if you could rewrite them against e1000e, which I assume is what most people want to test with. There are also significant changes upstream right now in jgarzik/netdev-2.6 #upstream... I'm still very interested in these patches BTW. Auke > --- > e1000_main.c | 104 ++++++++++++++++++++++++++++++++++++++++++----------------- > 1 files changed, 75 insertions(+), 29 deletions(-) > > diff -ruNp org/drivers/net/e1000/e1000_main.c new/drivers/net/e1000/e1000_main.c > --- org/drivers/net/e1000/e1000_main.c 2007-09-14 10:30:57.000000000 +0530 > +++ new/drivers/net/e1000/e1000_main.c 2007-09-14 10:31:02.000000000 +0530 > @@ -990,7 +990,7 @@ e1000_probe(struct pci_dev *pdev, > if (pci_using_dac) > netdev->features |= NETIF_F_HIGHDMA; > > - netdev->features |= NETIF_F_LLTX; > + netdev->features |= NETIF_F_LLTX | NETIF_F_BATCH_SKBS; > > adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); > > @@ -3092,6 +3092,17 @@ e1000_tx_map(struct e1000_adapter *adapt > return count; > } > > +static void e1000_kick_DMA(struct e1000_adapter *adapter, > + struct e1000_tx_ring *tx_ring, int i) > +{ > + wmb(); > + > + writel(i, adapter->hw.hw_addr + tx_ring->tdt); > + /* we need this if more than one processor can write to our tail > + * at a time, it syncronizes IO on IA64/Altix systems */ > + mmiowb(); > +} > + > static void > e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, > int tx_flags, int count) > @@ -3138,13 +3149,7 @@ e1000_tx_queue(struct e1000_adapter *ada > * know there are new descriptors to fetch. (Only > * applicable for weak-ordered memory model archs, > * such as IA-64). */ > - wmb(); > - > tx_ring->next_to_use = i; > - writel(i, adapter->hw.hw_addr + tx_ring->tdt); > - /* we need this if more than one processor can write to our tail > - * at a time, it syncronizes IO on IA64/Altix systems */ > - mmiowb(); > } > > /** > @@ -3251,22 +3256,23 @@ static int e1000_maybe_stop_tx(struct ne > } > > #define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) > + > +#define NETDEV_TX_DROPPED -5 > + > static int > -e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) > +e1000_prep_queue_frame(struct sk_buff *skb, struct net_device *netdev) > { > struct e1000_adapter *adapter = netdev_priv(netdev); > struct e1000_tx_ring *tx_ring; > unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; > unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; > unsigned int tx_flags = 0; > - unsigned int len = skb->len; > - unsigned long flags; > - unsigned int nr_frags = 0; > - unsigned int mss = 0; > + unsigned int len = skb->len - skb->data_len; > + unsigned int nr_frags; > + unsigned int mss; > int count = 0; > int tso; > unsigned int f; > - len -= skb->data_len; > > /* This goes back to the question of how to logically map a tx queue > * to a flow. Right now, performance is impacted slightly negatively > @@ -3276,7 +3282,7 @@ e1000_xmit_frame(struct sk_buff *skb, st > > if (unlikely(skb->len <= 0)) { > dev_kfree_skb_any(skb); > - return NETDEV_TX_OK; > + return NETDEV_TX_DROPPED; > } > > /* 82571 and newer doesn't need the workaround that limited descriptor > @@ -3322,7 +3328,7 @@ e1000_xmit_frame(struct sk_buff *skb, st > DPRINTK(DRV, ERR, > "__pskb_pull_tail failed.\n"); > dev_kfree_skb_any(skb); > - return NETDEV_TX_OK; > + return NETDEV_TX_DROPPED; > } > len = skb->len - skb->data_len; > break; > @@ -3366,22 +3372,15 @@ e1000_xmit_frame(struct sk_buff *skb, st > (adapter->hw.mac_type == e1000_82573)) > e1000_transfer_dhcp_info(adapter, skb); > > - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) > - /* Collision - tell upper layer to requeue */ > - return NETDEV_TX_LOCKED; > - > /* need: count + 2 desc gap to keep tail from touching > * head, otherwise try next time */ > - if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) { > - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); > + if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) > return NETDEV_TX_BUSY; > - } > > if (unlikely(adapter->hw.mac_type == e1000_82547)) { > if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { > netif_stop_queue(netdev); > mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1); > - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); > return NETDEV_TX_BUSY; > } > } > @@ -3396,8 +3395,7 @@ e1000_xmit_frame(struct sk_buff *skb, st > tso = e1000_tso(adapter, tx_ring, skb); > if (tso < 0) { > dev_kfree_skb_any(skb); > - spin_unlock_irqrestore(&tx_ring->tx_lock, flags); > - return NETDEV_TX_OK; > + return NETDEV_TX_DROPPED; > } > > if (likely(tso)) { > @@ -3416,13 +3414,61 @@ e1000_xmit_frame(struct sk_buff *skb, st > e1000_tx_map(adapter, tx_ring, skb, first, > max_per_txd, nr_frags, mss)); > > - netdev->trans_start = jiffies; > + return NETDEV_TX_OK; > +} > + > +static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) > +{ > + struct e1000_adapter *adapter = netdev_priv(netdev); > + struct e1000_tx_ring *tx_ring = adapter->tx_ring; > + struct sk_buff_head *blist; > + int ret, skbs_done = 0; > + unsigned long flags; > + > + if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { > + /* Collision - tell upper layer to requeue */ > + return NETDEV_TX_LOCKED; > + } > > - /* Make sure there is space in the ring for the next send. */ > - e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); > + blist = netdev->skb_blist; > + > + if (!skb || (blist && skb_queue_len(blist))) { > + /* > + * Either batching xmit call, or single skb case but there are > + * skbs already in the batch list from previous failure to > + * xmit - send the earlier skbs first to avoid out of order. > + */ > + if (skb) > + __skb_queue_tail(blist, skb); > + skb = __skb_dequeue(blist); > + } else { > + blist = NULL; > + } > + > + do { > + ret = e1000_prep_queue_frame(skb, netdev); > + if (likely(ret == NETDEV_TX_OK)) > + skbs_done++; > + else { > + if (ret == NETDEV_TX_BUSY) { > + if (blist) > + __skb_queue_head(blist, skb); > + break; > + } > + /* skb dropped, not a TX error */ > + ret = NETDEV_TX_OK; > + } > + } while (blist && (skb = __skb_dequeue(blist)) != NULL); > + > + if (skbs_done) { > + e1000_kick_DMA(adapter, tx_ring, adapter->tx_ring->next_to_use); > + netdev->trans_start = jiffies; > + /* Make sure there is space in the ring for the next send. */ > + e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); > + } > > spin_unlock_irqrestore(&tx_ring->tx_lock, flags); > - return NETDEV_TX_OK; > + return ret; > } > > /** > - > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo at vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html From julia at diku.dk Tue Nov 13 13:35:14 2007 From: julia at diku.dk (Julia Lawall) Date: Tue, 13 Nov 2007 22:35:14 +0100 (CET) Subject: [ofa-general] [PATCH 3/4] drivers/infiniband: Drop redundant includes of moduleparam.h Message-ID: From: Julia Lawall Drop #include in files that also include #include . module.h includes moduleparam.h already. The semantic patch implementing this change is as follows: @ includesmodule @ @@ #include @ depends on includesmodule @ @@ - #include Signed-off-by: Julia Lawall --- diff -u -p -b -B a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c --- a/drivers/infiniband/hw/amso1100/c2.c 2007-10-22 11:25:09.000000000 +0200 +++ b/drivers/infiniband/hw/amso1100/c2.c 2007-11-13 17:50:03.000000000 +0100 @@ -31,7 +31,6 @@ * SOFTWARE. */ #include -#include #include #include #include diff -u -p -b -B a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c --- a/drivers/infiniband/hw/amso1100/c2_provider.c 2007-10-22 11:25:09.000000000 +0200 +++ b/drivers/infiniband/hw/amso1100/c2_provider.c 2007-11-13 17:50:05.000000000 +0100 @@ -33,7 +33,6 @@ */ #include -#include #include #include #include diff -u -p -b -B a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c --- a/drivers/infiniband/hw/amso1100/c2_rnic.c 2007-02-09 17:34:09.000000000 +0100 +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c 2007-11-13 17:50:06.000000000 +0100 @@ -34,7 +34,6 @@ #include -#include #include #include #include diff -u -p -b -B a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h --- a/drivers/infiniband/hw/ehca/ehca_tools.h 2007-10-22 11:25:09.000000000 +0200 +++ b/drivers/infiniband/hw/ehca/ehca_tools.h 2007-11-13 17:50:06.000000000 +0100 @@ -52,7 +52,6 @@ #include #include #include -#include #include #include #include diff -u -p -b -B a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c --- a/drivers/infiniband/hw/mthca/mthca_profile.c 2007-06-02 22:32:12.000000000 +0200 +++ b/drivers/infiniband/hw/mthca/mthca_profile.c 2007-11-13 17:50:23.000000000 +0100 @@ -34,7 +34,6 @@ */ #include -#include #include #include diff -u -p -b -B a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c --- a/drivers/infiniband/hw/cxgb3/iwch.c 2007-06-02 22:32:12.000000000 +0200 +++ b/drivers/infiniband/hw/cxgb3/iwch.c 2007-11-13 17:50:25.000000000 +0100 @@ -30,7 +30,6 @@ * SOFTWARE. */ #include -#include #include diff -u -p -b -B a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c 2007-07-20 17:45:46.000000000 +0200 +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c 2007-11-13 17:50:27.000000000 +0100 @@ -30,7 +30,6 @@ * SOFTWARE. */ #include -#include #include #include #include From rdreier at cisco.com Tue Nov 13 14:50:05 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 13 Nov 2007 14:50:05 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() In-Reply-To: <1194909302.3612.74.camel@brick.pathscale.com> (Ralph Campbell's message of "Mon, 12 Nov 2007 15:15:02 -0800") References: <1194909302.3612.74.camel@brick.pathscale.com> Message-ID: > + } else if (ret) > + *bad_wr = wr; Hmm... it seems a little strange to pick the first WR for no reason when the kernel didn't point at a particular work request. Maybe it would make more sense to set bad_wr to NULL in this case? From gsadasiv7 at gmail.com Tue Nov 13 15:58:23 2007 From: gsadasiv7 at gmail.com (Ganesh Sadasivan) Date: Tue, 13 Nov 2007 15:58:23 -0800 Subject: [ofa-general] CQ destory Message-ID: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> Hi, Is there any way to figure out that a cq does not have any pending completion entries and thus is safe to call ibv_destroy_cq. Or is it ok to call destroy regardless of whether there are completion entries? I am seeing a seg fault in ibv_poll_cq if I adopt the latter approach. Thanks Ganesh From ralph.campbell at qlogic.com Tue Nov 13 16:02:36 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Tue, 13 Nov 2007 16:02:36 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() In-Reply-To: References: <1194909302.3612.74.camel@brick.pathscale.com> Message-ID: <1194998556.3612.95.camel@brick.pathscale.com> On Tue, 2007-11-13 at 14:50 -0800, Roland Dreier wrote: > > + } else if (ret) > > + *bad_wr = wr; > > Hmm... it seems a little strange to pick the first WR for no reason > when the kernel didn't point at a particular work request. Maybe it > would make more sense to set bad_wr to NULL in this case? Well, ibv_cmd_post_send() isn't going to know if the EINVAL is due to the size of the write being different from what the kernel expects (i.e., unrelated to WRs) or if the ID of the address handle is invalid and thus it is related to the WR. Now that I look at ib_uverbs_post_send() again, it seems that this later case should set resp.bad_wr and copy out the index of the WR that had the error. But even if there is an error not related to any WR directly, it is still the first WR that is not sent. I guess NULL could be used to give slightly more information to the caller but I don't really expect most application error recovery code to make the distinction. From weiny2 at llnl.gov Tue Nov 13 16:07:54 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:07:54 -0800 Subject: [ofa-general] [PATCH 0/7] "Clean up" some of the config files and the way they are configured. In-Reply-To: <20071111082002.GE8073@sashak.voltaire.com> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> <20071109114642.GV6153@sashak.voltaire.com> <20071109082151.6efb0256.weiny2@llnl.gov> <20071111082002.GE8073@sashak.voltaire.com> Message-ID: <20071113160754.666fdb06.weiny2@llnl.gov> Allong the lines of this email I have changed and added the place and way to change the config files being fed to opensm. This patch series adds the following options to the configure: --with-opensm-conf-sub-dir=dir define a directory name for opensm's conf files / (default "opensm") --with-partitions-conf=file define a partitions config file (default partitions.conf) --with-qos-policy-conf=file define a QOS policy config file (default qos-policy.conf) As well this cleans up some other parts of the configure. Ira On Sun, 11 Nov 2007 10:20:02 +0200 Sasha Khapyorsky wrote: > On 08:21 Fri 09 Nov , Ira Weiny wrote: > > Perhaps I should change the other config files to use > > sysconfdir? > > Yes, that what I thought. But clearly it is general issue and not > related to node name map. > > > I have not done so because of legacy reasons, but I think it would > > be more correct, no? > > I think so. Or maybe OpenSM own parameter (which will be defaulted to > $sysconfdir/opensm or $syconfdir/ofa) - didn't think about it a lot yet. > > Sasha From weiny2 at llnl.gov Tue Nov 13 16:08:04 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:08:04 -0800 Subject: [ofa-general] [PATCH 1/7] Break out a "CONF_DIR" variable in configure to base other config files on. Message-ID: <20071113160804.35df4baf.weiny2@llnl.gov> >From fcbc593bf2cf1cdb41aadd8153901e5a10bc62ac Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Sun, 11 Nov 2007 09:04:47 -0800 Subject: [PATCH] Break out a "CONF_DIR" variable in configure to base other config files on. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 11 ++++++----- 1 files changed, 6 insertions(+), 5 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index 2d5d72c..1a637fa 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -70,6 +70,11 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL dnl select performance manager or not OPENIB_OSM_PERF_MGR_SEL +dnl Set up /opensm config dir. +CONF_DIR_TMP1="`eval echo ${sysconfdir}/opensm`" +CONF_DIR_TMP2="`echo $CONF_DIR_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" +CONF_DIR="`eval echo $CONF_DIR_TMP2`" + dnl Check for a different default node name map file dnl default {sysconfdir}/ib-node-name-map NODENAMEMAPFILE=ib-node-name-map @@ -87,12 +92,8 @@ AC_ARG_WITH(node-name-map, esac ] ) AC_MSG_RESULT(${withnodenamemap=no}) - -NODENAMEMAP_TMP1="`eval echo ${sysconfdir}/$NODENAMEMAPFILE`" -NODENAMEMAP_TMP2="`echo $NODENAMEMAP_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" -NODENAMEMAP="`eval echo $NODENAMEMAP_TMP2`" AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, - ["$NODENAMEMAP"], + ["$CONF_DIR/$NODENAMEMAPFILE"], [Define a default node name map file]) dnl select example event plugin or not -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-Break-out-a-CONF_DIR-variable-in-configure-to-base.patch Type: application/octet-stream Size: 1459 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:08:36 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:08:36 -0800 Subject: [ofa-general] [PATCH 2/7] Add option to change the default "opensm" dir under sysconfdir as the config dir Message-ID: <20071113160836.49310444.weiny2@llnl.gov> >From 126ea6a37634d93ea3a91b33f3e308cb931210fa Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Sun, 11 Nov 2007 09:22:30 -0800 Subject: [PATCH] Add option to change the default "opensm" dir under sysconfdir as the config dir. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 19 ++++++++++++++++++- 1 files changed, 18 insertions(+), 1 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index 1a637fa..dcec910 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -70,8 +70,25 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL dnl select performance manager or not OPENIB_OSM_PERF_MGR_SEL +dnl Check for a different subdir for the config files. +OPENSM_CONF_SUB_DIR=opensm dnl define a default +AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) +AC_ARG_WITH(opensm-conf-sub-dir, + AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], + [define a directory name for opensm's conf files / (default "opensm")]), + [ case "$withval" in + no) + ;; + *) + withopensmconfsubdir=yes + OPENSM_CONF_SUB_DIR=$withval + ;; + esac ] +) +AC_MSG_RESULT(${withopensmconfsubdir=no}) + dnl Set up /opensm config dir. -CONF_DIR_TMP1="`eval echo ${sysconfdir}/opensm`" +CONF_DIR_TMP1="`eval echo ${sysconfdir}/$OPENSM_CONF_SUB_DIR`" CONF_DIR_TMP2="`echo $CONF_DIR_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" CONF_DIR="`eval echo $CONF_DIR_TMP2`" -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0002-Add-option-to-change-the-default-opensm-dir-under.patch Type: application/octet-stream Size: 1484 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:08:41 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:08:41 -0800 Subject: [ofa-general] [PATCH 3/7] Add the default in the with-node-name-map help string Message-ID: <20071113160841.42d495b1.weiny2@llnl.gov> >From 6ff358984eb65d369c557db522b7b9443c764283 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 12 Nov 2007 09:18:00 -0800 Subject: [PATCH] Add the default in the with-node-name-map help string Signed-off-by: Ira K. Weiny --- opensm/configure.in | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index dcec910..ca6e659 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -98,7 +98,7 @@ NODENAMEMAPFILE=ib-node-name-map AC_MSG_CHECKING(for --with-node-name-map ) AC_ARG_WITH(node-name-map, AC_HELP_STRING([--with-node-name-map=file], - [define a default node name map file]), + [define a default node name map file (default ib-node-name-map)]), [ case "$withval" in no) ;; -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0003-Add-the-default-in-the-with-node-name-map-help-strin.patch Type: application/octet-stream Size: 864 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:08:44 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:08:44 -0800 Subject: [ofa-general] [PATCH 4/7] opensm/configure.in: remove unecessary comment Message-ID: <20071113160844.5bd501d1.weiny2@llnl.gov> >From f2c77266bd9beb19a7b9b980e32f45fbff775529 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Sun, 11 Nov 2007 16:39:02 -0800 Subject: [PATCH] opensm/configure.in: remove unecessary comment. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index ca6e659..f6b4dd1 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -93,7 +93,6 @@ CONF_DIR_TMP2="`echo $CONF_DIR_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" CONF_DIR="`eval echo $CONF_DIR_TMP2`" dnl Check for a different default node name map file -dnl default {sysconfdir}/ib-node-name-map NODENAMEMAPFILE=ib-node-name-map AC_MSG_CHECKING(for --with-node-name-map ) AC_ARG_WITH(node-name-map, -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0004-opensm-configure.in-remove-unecessary-comment.patch Type: application/octet-stream Size: 827 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:08:48 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:08:48 -0800 Subject: [ofa-general] [PATCH 5/7] Add --with-qos-policy-conf to configure Message-ID: <20071113160848.5a1d5cff.weiny2@llnl.gov> >From 6df9c989499df81d87eec4251770e8b84c8dd4d3 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Sun, 11 Nov 2007 17:41:41 -0800 Subject: [PATCH] Add --with-qos-policy-conf to configure. As well as adding this option, change the default location/name for this file to be // to be consistent with other config files. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 20 ++++++++++++++++++++ opensm/include/opensm/osm_base.h | 14 +++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index f6b4dd1..14fd60a 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -112,6 +112,26 @@ AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, ["$CONF_DIR/$NODENAMEMAPFILE"], [Define a default node name map file]) +dnl Check for a different QOS policy file +QOS_POLICY_FILE=qos-policy.conf +AC_MSG_CHECKING(for --with-qos-policy-conf) +AC_ARG_WITH(qos-policy-conf, + AC_HELP_STRING([--with-qos-policy-conf=file], + [define a QOS policy config file (default qos-policy.conf)]), + [ case "$withval" in + no) + ;; + *) + withqospolicyconf=yes + QOS_POLICY_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT(${withqospolicyconf=no}) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_QOS_POLICY_FILE, + ["$CONF_DIR/$QOS_POLICY_FILE"], + [Define a QOS policy config file]) + dnl select example event plugin or not OPENIB_OSM_DEFAULT_EVENT_PLUGIN_SEL diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index 26ef067..c8695a0 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -48,6 +48,10 @@ #ifndef _OSM_BASE_H_ #define _OSM_BASE_H_ +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + #ifdef __WIN__ #include #define OSM_CDECL __cdecl @@ -236,9 +240,13 @@ BEGIN_C_DECLS */ #ifdef __WIN__ #define OSM_DEFAULT_QOS_POLICY_FILE strcat(GetOsmCachePath(), "osm-qos-policy.conf") -#else -#define OSM_DEFAULT_QOS_POLICY_FILE "/etc/ofa/opensm-qos-policy.conf" -#endif +#else /* !__WIN__ */ +# ifdef HAVE_DEFAULT_QOS_POLICY_FILE +# define OSM_DEFAULT_QOS_POLICY_FILE HAVE_DEFAULT_QOS_POLICY_FILE +# else /* !HAVE_DEFAULT_QOS_POLICY_FILE */ +# define OSM_DEFAULT_QOS_POLICY_FILE "/etc/ofa/opensm-qos-policy.conf" +# endif /* HAVE_DEFAULT_QOS_POLICY_FILE */ +#endif /* __WIN__ */ /***********/ /****d* OpenSM: Base/OSM_DEFAULT_SWEEP_INTERVAL_SECS -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0005-Add-with-qos-policy-conf-to-configure.patch Type: application/octet-stream Size: 2598 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:09:00 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:09:00 -0800 Subject: [ofa-general] [PATCH 6/7] Add --with-partitions-conf to configure Message-ID: <20071113160900.6a009766.weiny2@llnl.gov> >From 2d1d28ca45cc47e3de2bf4c23a5321f15cabd804 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Sun, 11 Nov 2007 18:12:35 -0800 Subject: [PATCH] Add --with-partitions-conf to configure. As well as adding this option, change the default location/name for this file to be // to be consistent with other config files. Signed-off-by: Ira K. Weiny --- opensm/configure.in | 20 ++++++++++++++++++++ opensm/include/opensm/osm_base.h | 10 +++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/opensm/configure.in b/opensm/configure.in index 14fd60a..8e56cc7 100644 --- a/opensm/configure.in +++ b/opensm/configure.in @@ -112,6 +112,26 @@ AC_DEFINE_UNQUOTED(HAVE_DEFAULT_NODENAME_MAP, ["$CONF_DIR/$NODENAMEMAPFILE"], [Define a default node name map file]) +dnl Check for a different partition conf file +PARTITION_CONFIG_FILE=partitions.conf +AC_MSG_CHECKING(for --with-partitions-conf) +AC_ARG_WITH(partitions-conf, + AC_HELP_STRING([--with-partitions-conf=file], + [define a partitions config file (default partitions.conf)]), + [ case "$withval" in + no) + ;; + *) + withpartitionsconf=yes + PARTITION_CONFIG_FILE=$withval + ;; + esac ] +) +AC_MSG_RESULT(${withpartitionsconf=no}) +AC_DEFINE_UNQUOTED(HAVE_DEFAULT_PARTITION_CONFIG_FILE, + ["$CONF_DIR/$PARTITION_CONFIG_FILE"], + [Define a QOS policy config file]) + dnl Check for a different QOS policy file QOS_POLICY_FILE=qos-policy.conf AC_MSG_CHECKING(for --with-qos-policy-conf) diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index c8695a0..aa8d378 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -224,9 +224,13 @@ BEGIN_C_DECLS */ #ifdef __WIN__ #define OSM_DEFAULT_PARTITION_CONFIG_FILE strcat(GetOsmCachePath(), "osm-partitions.conf") -#else -#define OSM_DEFAULT_PARTITION_CONFIG_FILE "/etc/ofa/opensm-partitions.conf" -#endif +#else /* !__WIN__ */ +# ifdef HAVE_DEFAULT_PARTITION_CONFIG_FILE +# define OSM_DEFAULT_PARTITION_CONFIG_FILE HAVE_DEFAULT_PARTITION_CONFIG_FILE +# else /* !HAVE_DEFAULT_PARTITION_CONFIG_FILE */ +# define OSM_DEFAULT_PARTITION_CONFIG_FILE "/etc/ofa/opensm-partitions.conf" +# endif /* HAVE_DEFAULT_PARTITION_CONFIG_FILE */ +#endif /* __WIN__ */ /***********/ /****d* OpenSM: Base/OSM_DEFAULT_QOS_POLICY_FILE -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0006-Add-with-partitions-conf-to-configure.patch Type: application/octet-stream Size: 2492 bytes Desc: not available URL: From weiny2 at llnl.gov Tue Nov 13 16:09:08 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Tue, 13 Nov 2007 16:09:08 -0800 Subject: [ofa-general] [PATCH 7/7] opensm/config/osmvsel.m4: Convert help strings to AC_HELP_STRING Message-ID: <20071113160908.1115d762.weiny2@llnl.gov> >From bc6cded821a597dec2818470448240417e88fe85 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Mon, 12 Nov 2007 09:35:45 -0800 Subject: [PATCH] opensm/config/osmvsel.m4: Convert help strings to AC_HELP_STRING Signed-off-by: Ira K. Weiny --- opensm/config/osmvsel.m4 | 15 ++++++++++----- 1 files changed, 10 insertions(+), 5 deletions(-) diff --git a/opensm/config/osmvsel.m4 b/opensm/config/osmvsel.m4 index 36c5ddf..aa20c2f 100644 --- a/opensm/config/osmvsel.m4 +++ b/opensm/config/osmvsel.m4 @@ -13,19 +13,22 @@ AC_DEFUN([OPENIB_APP_OSMV_SEL], [ dnl Define a way for the user to provide the osm vendor type AC_ARG_WITH(osmv, -[ --with-osmv= define the osm vendor type to build], + AC_HELP_STRING([--with-osmv=], + [define the osm vendor type to build]), AC_MSG_NOTICE(Using OSM Vendor Type:$with_osmv), with_osmv="openib") dnl Define a way for the user to provide the path to the ibumad installation AC_ARG_WITH(umad-prefix, -[ --with-umad-prefix= define the dir used as prefix for ibumad installation], + AC_HELP_STRING([--with-umad-prefix=], + [define the dir used as prefix for ibumad installation]), AC_MSG_NOTICE(Using ibumad installation prefix:$with_umad_prefix), with_umad_prefix="") dnl Define a way for the user to provide the path to the ibumad includes AC_ARG_WITH(umad-includes, -[ --with-umad-includes= define the dir where ibumad includes are installed], + AC_HELP_STRING([--with-umad-includes=], + [define the dir where ibumad includes are installed]), AC_MSG_NOTICE(Using ibumad includes from:$with_umad_includes), with_umad_includes="") @@ -37,7 +40,8 @@ fi dnl Define a way for the user to provide the path to the ibumad libs AC_ARG_WITH(umad-libs, -[ --with-umad-libs= define the dir where ibumad libs are installed], + AC_HELP_STRING([--with-umad-libs=], + [define the dir where ibumad libs are installed]), AC_MSG_NOTICE(Using ibumad libs from:$with_umad_libs), with_umad_libs="") @@ -54,7 +58,8 @@ fi dnl Define a way for the user to provide the path to the simulator installation AC_ARG_WITH(sim, -[ --with-sim= define the simulator prefix for building sim vendor (/usr)], + AC_HELP_STRING([--with-sim=], + [define the simulator prefix for building sim vendor (default /usr)]), AC_MSG_NOTICE(Using Simulator from:$with_sim), with_sim="/usr") -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0007-opensm-config-osmvsel.m4-Convert-help-strings-to-AC.patch Type: application/octet-stream Size: 2520 bytes Desc: not available URL: From keshetti85-student at yahoo.co.in Tue Nov 13 21:22:00 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Wed, 14 Nov 2007 10:52:00 +0530 Subject: [ofa-general] Credit loops in infiniband networks and ibdm utility Message-ID: <829ded920711132122y6482df8bn4ade2ae8eee20ae1@mail.gmail.com> I have some basic questions about credit loops in infiniband networks. Can anyone of you tell me in brief what are credit loops, when they occur and how does IBDM utility verify the network for credit loops? (I didn't find anything about credit loops in the IB spec.s) n2------S2\ | \ | \ | S1------ n1 | / | / n3-----S3 In the above topology (S-switch and n-node ), I have loaded all the possible paths using the openSM's file based ucast manager. When I ran an MPI job which sends data from both n3 and n1 to n2 I have observed a dead lock due to credit loop. Why is it happening here ? -Mahesh From moshek at voltaire.com Tue Nov 13 22:39:18 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Wed, 14 Nov 2007 08:39:18 +0200 Subject: [ofa-general] OFED 1.3 beta status on kernel 2.6.24 rebase In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E28C@mtlexch01.mtl.com> Message-ID: <39C75744D164D948A170E9792AF8E7CA4D2C3B@exil.voltaire.com> More info / patch / etc. ? And, Is this bug related only to 1.3 or also to previous versions 1.2.5.X ? Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Tziporet Koren Sent: Tuesday, November 13, 2007 10:26 PM To: ewg at lists.openfabrics.org Cc: Vladimir Sokolovsky; general at lists.openfabrics.org Subject: [ofa-general] OFED 1.3 beta status on kernel 2.6.24 rebase Hi All, We just fixed a critical bug in IPoIB and we start to run our internal regression tests on the new ofed_2_6_24 branch to get some confidence level with it. If the regression will pass we will start the daily build of OFED 1.3 based on the new kernel tomorrow. Vlad will send an email updating on this Tziporet -------------- next part -------------- An HTML attachment was scrubbed... URL: From lion_stars_girl at yahoo.com Tue Nov 13 22:37:30 2007 From: lion_stars_girl at yahoo.com (Global Mandiri) Date: Wed, 14 Nov 2007 13:37:30 +0700 Subject: [ofa-general] International Air and Sea Freight Forwarders Quotation Message-ID: <050501c82688$f2444fb0$0d00000a@nancy> Jakarta; November 01, 2007 To: Logistic/ Procurement/ Purchasing/ Import Departement Re : Rates Door To Door Services Direct Dengan hormat, Bersama ini kami Global Mandiri Indonesia (GMI) ingin memperkenalkan dan menawarkan jasa import "Door to Door Service Singapore / Worldwide to Jakarta" (Harga sudah all in) dengan tarif terbaik dan system delivery dengan cepat. Harga yang kami tawarkan sudah termasuk semua biaya-biaya hingga barang diterima di tempat. Berikut tarif yang dimaksud, efektif November 2007: Airfreight Sing-Jkt (General Cargo) - Regular Rp. 40.000,-/Kg 6-8 Hari kerja Min 5 Kg - Ekspress Rp. 50.000,-/Kg 3-4 Hari kerja Min 10 Kg - Handcarry Rp. 100.000,-/Kg 1-3 Hari kerja Min 30 Kg Seafreight Sing-Jkt (General Cargo) - Kg Rp. 9.000,-/Kg + 30 Hari kerja Min 100 Kg / 1 M3 - Kubikasi Rp. 2.900.000,-/M3 + 30 Hari kerja Additional Charge : 1. S A C ( Singapore Airport Charge ) 0 - 100 kg SGD 120.00 + 100 kg will Add. SGD 0. 5/kg * Untuk setiap shipment Air freight dari luar Singapore menggunakan Cargo agent / Shipping line etc 2. S S C ( Singapore Seaport Charge ): Untuk shipment dari luar Singapore via laut menggunakanShipping line/Cargo agent etc *Akan di charge Cost to cost sesuai dengan invoice agen kami di Singapore 3. Permitt Fee ( PF ) SGD 35.00/ shipment SGD 50.00/ shipment ( by Sing.Post ) * untuk Shipment dari luar Singapore menggunakan Courier agen ( FEDEX/UPS/DHL/ etc) Harga (General Cargo) Worldwide by air to Jakarta minimum 45 kg: ORIGIN +45 +100 +250 +500 +1000 1 CANADA Calgary 13.50 12.00 10.50 10.00 9.50 2 CHINA Beijing 13.50 11.00 10.25 10.00 9.50 Shanghai 14.50 11.50 10.50 10.00 9.75 3 EUROPE Amsterdam 19.00 14.00 11.50 11.00 10.75 Italia 14.00 11.00 10.00 9.50 9.00 London 14.00 12.00 10.00 9.50 9.00 German 14.00 12.00 10.00 9.50 9.25 4 HONGKONG Hongkong 13.00 10.50 10.00 9.50 9.25 5 JAPAN Nagoya 15.00 12.00 11.00 10.25 10.00 Tokyo 15.00 11.75 10.50 10.00 10.00 6 KOREA Seoul/Inchon 13.00 10.50 10.00 9.50 9.25 7 MALAYSIA Kuala Lumpur 12.00 10.00 9.00 8.50 8.25 8 PHILIPHINES Manila 13.50 12.00 11.00 10.50 10.00 9 TAIWAN Taipeh 12.00 10.50 9.50 9.00 8.75 10 THAILAND Bangkok 14.00 10.50 9.50 9.00 8.50 11 USA Chichago 14.00 11.00 10.00 9.50 9.00 Dallas 14.00 11.00 10.75 10.50 10.25 Huston 15.00 11.50 10.00 9.50 9.00 Lax 14.00 10.50 10.00 9.50 9.25 Memphis 15.00 13.50 11.50 11.00 10.50 Miami 14.00 11.50 10.50 10.00 9.50 Mineapolis 15.00 11.50 10.50 9.50 9.00 New York 14.50 11.00 10.00 9.50 9.00 12 AUSTRALIA Sydney 14.00 11.50 10.50 10.00 9.50 Brisbane 15.00 11.50 10.50 9.50 9.00 Melbourne 14.50 11.00 10.00 9.50 9.00 PT. GLOBAL MANDIRI INTERNUSA GLOBAL MANDIRI (S) PTE LTD Jl.Swasembada Timur XVII No.2 70, Alps Avenue Unit # 01-06 Tanjung Priok Jakarta Utara 14320 Singapore 498801 Indonesia Tel. +65 6546 1419 Tel. +62 21.4374007 Fax.+65 6546 1481 Fax. +62 21.4374007 Pic. Ms.Malini Wahab Pic. Nancy Manurung E-Mail: nancy at global-mandiri.com Catatan: a.. Setiap pengiriman harap diinformasikan kepada kami terlebih dahulu (packing list / invoice / PO) b.. Kami tidak bertanggung jawab atas kehilangan barang yang tidak sesuai dengan packing list c.. Tagihan akan dihitung berdasarkan berat atau kubikasi, diambil mana yang lebih besar d.. Pembayaran Freight Collect akan dikenakan biaya 5% dari nilai freight collect ditambah biaya bank charges sebesar Rp. 50,000. e.. Setiap barang transhipment Singapore harap dialamatkan ke kantor kami yang di airport dan akan dikenakan biaya tambahan yaitu SAC (Singapore airport charges) / SSC (Singapore seaport charges) / PF (Permit fee). f.. Jika kondisi RED LINE waktu pengiriman untuk airfreight menjadi 10 hari kerja dan seafreight jadi 30 hari kerja. g.. Jangka waktu pembayaran untuk shipment pertama COD (Cash on Delivery) dan shipment selanjutnya 1-2 minggu setelah barang diterima jika customer berlokasi di Jakarta h.. Dapat menangani pengiriman barang selain General cargo, seperti: Textile/Garment, medicine, chemical, electronic, T.V. plasma, Unit machine, dll. i.. Untuk pick up barang di Singapore, jika berat barang <20Kg dikenakan charges SGD. 30 / SHIPMENT dan jika >20Kg = FREE CHARGES. j.. Kami tidak bertanggung jawab atas kehilangan barang yang tidak sesuai dengan packing Selain dari layanan tersebut, kami juga memberikan jasa layanan untuk export/import dari berbagai negara lainnya dan layanan untuk IMPORT RESMI di pelabuhan Tanjung Priok Jakarta dan bandara Int'n Soekarno Hatta dengan perincian sebagai berikut : Under Name Fee Import Licence fee : API-U /Angka Pengenal Importir -Umum NPWP/Nomer Pokok Wajib Pajak TDP/Tanda Daftar Perusahaan SIUP/Surat Ijin Usaha Perdagangan NPIK/Nomer Pokok Importir Khusus ELEKTRONIC NPIK/Nomer Pokok Importir Khusus SEPATU NPIK/Nomer Pokok Importir Khusus MAINAN ANAK-ANAK No.Register Kepabeanan Handling Customs Resmi : LCL 20ft 40ft Costum Clearance/min1-3cbm :Rp. 750,000,-/1-3 cbm Rp. 450,000,-/contr Rp. 450,000,-/contr Additional/cbm/cotnr :Rp. 500,000,-/cbm Rp. 75,000,-/contr Rp. 75,000,-/contr Jalur Merah/min 1-3 cbm :Rp.1,000,000,-/1-3 cbm Rp. 750,000,-/contr Rp.1,000,000,/-contr Additonal/cbm/contr :Rp. 500,000,-/cbm Rp. 500,000,-/contr Rp. 750,000,-/contr Trucking Jkt area/min1-3 cbm :Rp. 400,000,-/trip Rp.1.000,000,-/contr Rp.1,200,000,-/contr Additional / cbm :Rp. 50,000,-/cbm Catatan: a.. Setiap pengiriman harap diinformasikan kepada kami terlebih dahulu (packing list / invoice / PO) sebelum cargo tiba di pelabuhan bongkar. b.. BM/Bea masuk,PPn/Pajak Pertambahan Nilai,PPh/Pajak Penghasilan,adm bank di tanggung actual consignee. c.. Biaya pelayaran (shipping charge)THC,DO,ADM fee,Agency Fee di tanggung actual consignee d.. Biaya penumpukan di gudang(storage & warehouse) di tanggung actual consignee e.. Untuk sistem Resmi & undername maka invoice & packing list kami dapat dari actual consignee. f.. Notul / Tambah Bayar di tanggung actual consignee. g.. Biaya kawalan untuk pengiriman barang/trucking di tanggung kami. h.. Kami tidak bertanggung jawab atas kehilangan barang yang tidak sesuai dengan packing i.. Sistem pembayaran dll ,bisa di bicarakan dengan pihak kami . Demikianlah surat penawaran harga yang dapat kami sampaikan,jika anda membutuhkan informasi lebih lengkap,silahkan menghubungi kami. Atas perhatian dan kerjasamanya kami ucapkan terimakasih. Hormat kami, Nancy Manurung PT GLOBAL MANDIRI INTERNUSA JL.SWASEMBADA TIMUR XVII no.2 TANJUNG PRIOK JAKARTA UTARA Telp.+6221.4374007 Hunting / Fax.+6221.4374004 Mobile :+6281383793069 Email : nancy at global-mandiri.com Website : www.global-mandiri.com Catatan: a.. Setiap pengiriman harap diinformasikan kepada kami terlebih dahulu (packing list / invoice / PO) b.. Kami tidak bertanggung jawab atas kehilangan barang yang tidak sesuai dengan packing list c.. Tagihan akan dihitung berdasarkan berat atau kubikasi, diambil mana yang lebih besar d.. Pembayaran Freight Collect akan dikenakan biaya 5% dari nilai freight collect ditambah biaya bank charges sebesar Rp. 50,000. e.. Setiap barang transhipment Singapore harap dialamatkan ke kantor kami yang di airport dan akan dikenakan biaya tambahan yaitu SAC (Singapore airport charges) / SSC (Singapore seaport charges) / PF (Permit fee). f.. Jika kondisi RED LINE waktu pengiriman untuk airfreight menjadi 10 hari kerja dan seafreight jadi 30 hari kerja. g.. Jangka waktu pembayaran untuk shipment pertama COD (Cash on Delivery) dan shipment selanjutnya 1-2 minggu setelah barang diterima jika customer berlokasi di Jakarta h.. Dapat menangani pengiriman barang selain General cargo, seperti: Textile/Garment, medicine, chemical, electronic, T.V. plasma, Unit machine, dll. i.. Untuk pick up barang di Singapore, jika berat barang <20Kg dikenakan charges SGD. 30 / SHIPMENT dan jika >20Kg = FREE CHARGES. -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: Logo Baru Global.BMP Type: imgage/bmp Size: 51254 bytes Desc: not available URL: From dotanb at dev.mellanox.co.il Tue Nov 13 23:48:54 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 14 Nov 2007 09:48:54 +0200 Subject: [ofa-general] CQ destory In-Reply-To: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> References: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> Message-ID: <473AA866.6040306@dev.mellanox.co.il> Hi. There isn't any way to know if there are any CQEs in the CQ. Destroying a CQ which has completions should create seg fault (unless you have another thread that try to use this CQ ...) Dotan Ganesh Sadasivan wrote: > Hi, > > Is there any way to figure out that a cq does not have any pending > completion entries and thus > is safe to call ibv_destroy_cq. Or is it ok to call destroy > regardless of whether there are completion > entries? I am seeing a seg fault in ibv_poll_cq if I adopt the > latter approach. > > Thanks > Ganesh > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From monil at voltaire.com Wed Nov 14 00:23:09 2007 From: monil at voltaire.com (Moni Levy) Date: Wed, 14 Nov 2007 10:23:09 +0200 Subject: [ewg] RE: [ofa-general] OFED 1.3 beta status on kernel 2.6.24 rebase In-Reply-To: <39C75744D164D948A170E9792AF8E7CA4D2C3B@exil.voltaire.com> References: <6C2C79E72C305246B504CBA17B5500C90282E28C@mtlexch01.mtl.com> <39C75744D164D948A170E9792AF8E7CA4D2C3B@exil.voltaire.com> Message-ID: <6a122cc00711140023h2b07d578l3c3b1dde3f44cd05@mail.gmail.com> On Nov 14, 2007 8:39 AM, Moshe Kazir wrote: > > > More info / patch / etc. ? I checked at: http://www.openfabrics.org/git/?p=ofed_1_3/linux-2.6.git;a=shortlog;h=ofed_kernel_2_6_24_rc1 I guess that the one Tziporet is talking about is: "IPoIB: Use shinfo->gso_size to indicate tcp mss instead of using link MTU" Tziporet, am I right ? -- Moni From krkumar2 at in.ibm.com Wed Nov 14 00:30:15 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Wed, 14 Nov 2007 14:00:15 +0530 Subject: [ofa-general] Re: [PATCH 10/10 REV5] [E1000] Implement batching In-Reply-To: <473A16EE.6080101@intel.com> Message-ID: Hi Auke, "Kok, Auke" wrote on 11/14/2007 02:58:14 AM: > this doesn't apply anymore and it would help if you could re-spin this for e1000e. > I don't know what the status for merging of the batched xmit patches is right now > but it would help if you could rewrite them against e1000e, which I assume is what > most people want to test with. There are also significant changes upstream right > now in jgarzik/netdev-2.6 #upstream... > > I'm still very interested in these patches BTW. I will make a latest version and test it out for some numbers and try to send it this week. Thanks, - KK From kliteyn at dev.mellanox.co.il Wed Nov 14 01:11:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 14 Nov 2007 11:11:42 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM> Message-ID: <473ABBCE.8010109@dev.mellanox.co.il> Hal Rosenstock wrote: > Or, > > On 11/13/07, Or Gerlitz wrote: >> Yevgeny, >> >> iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port >> space as does RDS. The RDMA-CM signature is something which I am sure >> exists, you can look on the RDMA-CM IB spec Annex to see if such thing >> indeed exist or I am wrong. > > Did you really look at the annex for this ? > >> The TCP port is the 16 bit port portion of >> the >> ip:port address provided by a ULP that uses the RDMA-CM to >> rdma_resolve_addr(), again the annex explained how the port is embedded >> into the SID, I don't remember the location within the 64 bit string. > > It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. > >> Or. >> >> -------- Original Message -------- >> Subject: >> Re: QoS for iSER >> Date: >> Mon, 12 Nov 2007 11:41:43 +0200 >> From: Yevgeny Kliteynik >> >> Hi Erez, >> >> Erez Zilber wrote: >>> to create the SID, the rdma cm combines >>> >>> 1) the port space >> What is the port space for iSER? >> For SDP it's 0x10000 - 0x1FFFF. >> For RDS it's 0x1060000 - 0x106FFFF >> For iSER it's ...? > > These numbers are too large for just "port space". > > iSER SID is 0x000000000106035c > > in your nomenclature, I guess 0x106035c > > 01 says RDMA aware ULP service ID range > 06 says IP protocol is TCP > 0x035c (port 860) is the well known TCP port for iSCSI Thanks, that is just what I needed. I'm preparing a (very) simplified interface for defining QoS policy. I'm adding an additional section in QoS policy file, where an admin will be able to configure QoS per ULP or per application w/o going into too many details. Here's the example of what I have in mind: qos-ulps default : 0 #default SL sdp, port 10000-20000 : 2 sdp : 0 #default SL for SDP rds, port 25000 : 2 #SL for RDS when destination port is 25000 rds, : 0 #default SL for RDS iser *??????* : 4 #SL for iSER ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 ipoib : 6 #default IPoIB partition - pkey=0x7FFF ... end-qos-ulps This syntax is possible only if there are well known facts such as SDP service ID, in which case admin will be able to just state "sdp: ", and OpenSM will (internally) generate relevant matching rule and QoS level based on this known service ID. So back to iSER: Can I assume that the target port for iSER will always be 860, hence the iSER service ID will always be 0x000000000106035c? Or perhaps I can do it similar to SDP, where there is an option to specify the port ranges along with the ULP name (SDP): - if administrator only specifies "iser", I can assume that the service ID is default 0x000000000106035c - if administrator only specifies "iser" and ports, OpenSM will build service ID based on a well known prefix (0x000000000106pppp) where the last 4 hex digits are target port number Keep in mind that if this doesn't look too flexible and doesn't cover all the cases, there's always the rest of the QoS policy file with all the advanced configuration. -- Yevgeny > -- Hal > >>> 2) the rdma cm signature >> Do you mean something iSER-specific, or just the way the cm >> builds the service ID out of port space and tcp port? >> Can you give an example? >> >>> 3) the destination tcp port provided to rdma_resolve_addr >> I guess that tcp port is in the lower 4 nibs of the service ID, >> similar to SDP. Right? >> -- Yevgeny >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > From vlad at lists.openfabrics.org Wed Nov 14 03:06:14 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 14 Nov 2007 03:06:14 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071114-0200 daily build status Message-ID: <20071114110614.BFDB9E60A19@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-8.el5 Failed: From histotherapy at wishpromo.com Wed Nov 14 05:03:20 2007 From: histotherapy at wishpromo.com (Shatter Burke) Date: Wed, 14 Nov 2007 18:03:20 +0500 Subject: [ofa-general] Adobe Font Folio 11 MAC/XP/Vista for 189, Retails @ 2599 (You save 2409) Message-ID: <000001c826b9$4ea06980$0100007f@localhost> steinberg nuendo 3.1 - 99 2003 microsoft office professional with business contact manager for outlook - 69 acronis true image workstation 9.1.3887 - 29 ulead videostudio 11.0 plus - 39 microsoft expression studio 1.0 - 79 borland developer studio 2006 - 149 cakewalk project 5 - 59 ms windows 2003 enterprise server - 69 type cheapxpsoft5. com in Internet Explorer From declivitiesu6 at auckland.ac.nz Wed Nov 14 04:37:28 2007 From: declivitiesu6 at auckland.ac.nz (Kristine Salas) Date: Wed, 14 Nov 2007 20:37:28 +0800 Subject: [ofa-general] Drugs for Openib Message-ID: <730577064.28610931673409@auckland.ac.nz> An HTML attachment was scrubbed... URL: From hrosenstock at xsigo.com Wed Nov 14 05:18:27 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Wed, 14 Nov 2007 05:18:27 -0800 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <473ABBCE.8010109@dev.mellanox.co.il> References: <473822FD.20208@Voltaire.COM> <473ABBCE.8010109@dev.mellanox.co.il> Message-ID: <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: > Hal Rosenstock wrote: > > Or, > > > > On 11/13/07, Or Gerlitz wrote: > >> Yevgeny, > >> > >> iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port > >> space as does RDS. The RDMA-CM signature is something which I am sure > >> exists, you can look on the RDMA-CM IB spec Annex to see if such thing > >> indeed exist or I am wrong. > > > > Did you really look at the annex for this ? > > > >> The TCP port is the 16 bit port portion of > >> the > >> ip:port address provided by a ULP that uses the RDMA-CM to > >> rdma_resolve_addr(), again the annex explained how the port is embedded > >> into the SID, I don't remember the location within the 64 bit string. > > > > It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. > > > >> Or. > >> > >> -------- Original Message -------- > >> Subject: > >> Re: QoS for iSER > >> Date: > >> Mon, 12 Nov 2007 11:41:43 +0200 > >> From: Yevgeny Kliteynik > >> > >> Hi Erez, > >> > >> Erez Zilber wrote: > >>> to create the SID, the rdma cm combines > >>> > >>> 1) the port space > >> What is the port space for iSER? > >> For SDP it's 0x10000 - 0x1FFFF. > >> For RDS it's 0x1060000 - 0x106FFFF I presume this is just saying RDS uses IP protocol TCP and there is no well known port (e.g. uses dynamic ports). So how do you know ahead of time which port ? > >> For iSER it's ...? > > > > These numbers are too large for just "port space". > > > > iSER SID is 0x000000000106035c > > > > in your nomenclature, I guess 0x106035c > > > > 01 says RDMA aware ULP service ID range > > 06 says IP protocol is TCP > > 0x035c (port 860) is the well known TCP port for iSCSI > > Thanks, that is just what I needed. > I'm preparing a (very) simplified interface for defining QoS policy. > I'm adding an additional section in QoS policy file, where an admin > will be able to configure QoS per ULP or per application w/o going > into too many details. > Here's the example of what I have in mind: > > qos-ulps > default : 0 #default SL > sdp, port 10000-20000 : 2 > sdp : 0 #default SL for SDP > rds, port 25000 : 2 #SL for RDS when destination port is 25000 Isn't there a chicken and egg problem here with this ? How do you know port 25000 will be assigned "in advance" ? > rds, : 0 #default SL for RDS I don't see how RDS can work separate from other CMA based protocols which use dynamic ports. > iser *??????* : 4 #SL for iSER > ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > ipoib : 6 #default IPoIB partition - pkey=0x7FFF ... > end-qos-ulps > > This syntax is possible only if there are well known facts > such as SDP service ID, in which case admin will be able to > just state "sdp: ", and OpenSM will (internally) generate > relevant matching rule and QoS level based on this known > service ID. > > So back to iSER: > > Can I assume that the target port for iSER will always be 860, > hence the iSER service ID will always be 0x000000000106035c? In terms of iSER, I was only commenting on what the spec says. I did not verify its operation in terms of the code. Does the code follow the spec ? -- Hal > Or perhaps I can do it similar to SDP, where there is an option > to specify the port ranges along with the ULP name (SDP): > - if administrator only specifies "iser", I can assume that > the service ID is default 0x000000000106035c > - if administrator only specifies "iser" and ports, OpenSM > will build service ID based on a well known prefix > (0x000000000106pppp) where the last 4 hex digits are target > port number > > Keep in mind that if this doesn't look too flexible and doesn't > cover all the cases, there's always the rest of the QoS policy > file with all the advanced configuration. > > -- Yevgeny > > > -- Hal > > > >>> 2) the rdma cm signature > >> Do you mean something iSER-specific, or just the way the cm > >> builds the service ID out of port space and tcp port? > >> Can you give an example? > >> > >>> 3) the destination tcp port provided to rdma_resolve_addr > >> I guess that tcp port is in the lower 4 nibs of the service ID, > >> similar to SDP. Right? > >> -- Yevgeny > >> > >> _______________________________________________ > >> general mailing list > >> general at lists.openfabrics.org > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >> > >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >> > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From kliteyn at mellanox.co.il Tue Nov 13 21:11:54 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 14 Nov 2007 07:11:54 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-14:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-13 OpenSM git rev = Tue_Nov_13_04:41:11_2007 [ba3e8be134171e4d649fe5b660aae83659f8fd66] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From jackm at dev.mellanox.co.il Wed Nov 14 06:33:27 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 14 Nov 2007 16:33:27 +0200 Subject: [ofa-general] [PATCH] mlx4: fix thinko in qp destroy (incorrect bitmap_free) Message-ID: <200711141633.28064.jackm@dev.mellanox.co.il> mlx4: fix thinko in commit eaf559bf566f76887533c077d425adce847f06c8. Need to call mlx4_bitmap_free if the qp is not a special QP, not if it is a special QP. Found by Dotan Barak of Mellanox. Signed-off-by: Jack Morgenstein --- Roland, This one needs to be applied immediately to the 2.6.24 tree -- bug fix. diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index cc4b1be..42b4763 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -240,7 +240,7 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); - if (qp->qpn < dev->caps.sqp_start + 8) + if (qp->qpn >= dev->caps.sqp_start + 8) mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); From kliteyn at dev.mellanox.co.il Wed Nov 14 07:30:43 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 14 Nov 2007 17:30:43 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> References: <473822FD.20208@Voltaire.COM> <473ABBCE.8010109@dev.mellanox.co.il> <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> Message-ID: <473B14A3.5090703@dev.mellanox.co.il> Hal Rosenstock wrote: > On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: >> Hal Rosenstock wrote: >>> Or, >>> >>> On 11/13/07, Or Gerlitz wrote: >>>> Yevgeny, >>>> >>>> iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port >>>> space as does RDS. The RDMA-CM signature is something which I am sure >>>> exists, you can look on the RDMA-CM IB spec Annex to see if such thing >>>> indeed exist or I am wrong. >>> Did you really look at the annex for this ? >>> >>>> The TCP port is the 16 bit port portion of >>>> the >>>> ip:port address provided by a ULP that uses the RDMA-CM to >>>> rdma_resolve_addr(), again the annex explained how the port is embedded >>>> into the SID, I don't remember the location within the 64 bit string. >>> It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. >>> >>>> Or. >>>> >>>> -------- Original Message -------- >>>> Subject: >>>> Re: QoS for iSER >>>> Date: >>>> Mon, 12 Nov 2007 11:41:43 +0200 >>>> From: Yevgeny Kliteynik >>>> >>>> Hi Erez, >>>> >>>> Erez Zilber wrote: >>>>> to create the SID, the rdma cm combines >>>>> >>>>> 1) the port space >>>> What is the port space for iSER? >>>> For SDP it's 0x10000 - 0x1FFFF. >>>> For RDS it's 0x1060000 - 0x106FFFF > > I presume this is just saying RDS uses IP protocol TCP and there is no > well known port (e.g. uses dynamic ports). So how do you know ahead of > time which port ? See below. >>>> For iSER it's ...? >>> These numbers are too large for just "port space". >>> >>> iSER SID is 0x000000000106035c >>> >>> in your nomenclature, I guess 0x106035c >>> >>> 01 says RDMA aware ULP service ID range >>> 06 says IP protocol is TCP >>> 0x035c (port 860) is the well known TCP port for iSCSI >> Thanks, that is just what I needed. >> I'm preparing a (very) simplified interface for defining QoS policy. >> I'm adding an additional section in QoS policy file, where an admin >> will be able to configure QoS per ULP or per application w/o going >> into too many details. >> Here's the example of what I have in mind: >> >> qos-ulps >> default : 0 #default SL >> sdp, port 10000-20000 : 2 >> sdp : 0 #default SL for SDP >> rds, port 25000 : 2 #SL for RDS when destination port is 25000 > > Isn't there a chicken and egg problem here with this ? How do you know > port 25000 will be assigned "in advance" ? See below. >> rds, : 0 #default SL for RDS > > I don't see how RDS can work separate from other CMA based protocols > which use dynamic ports. You're right, it can't. Moreover, as you know, OpenSM is not aware of the term "protocol" at all - it just sees Service IDs in PathRecord request, so the only differentiation it can create is by assigning certain SL to a certain service ID. What I'm trying to do here is to provide a simple way to configure QoS that will work in *many* cases, not in *all* cases. I'm giving the following options to differentiate traffic: - All the SDP traffic - Selected ports of the SDP traffic - SRP traffic by providing SRT target guids - iSER traffic (to a well known TCP port) - All the RDS traffic - Selected ports of the RDS traffic - IPoIB by pkey - Specific Service IDs - Specific pkeys - Specific destination port guids And as you've mentioned, some rules may overlap. For instance, if the rule for all the RDS traffic will appear before the iSER rule, then iSER requests will be caught by the RDS rule. There's alway an option to use the rest of the policy file, and then you can create more complex rules to differentiate applications and protocols (i.e. by using service ID range AND/OR source port groups AND/OR destination port groups AND/OR by QoS class. -- Yevgeny >> iser *??????* : 4 #SL for iSER >> ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 >> ipoib : 6 #default IPoIB partition - pkey=0x7FFF > ... >> end-qos-ulps >> >> This syntax is possible only if there are well known facts >> such as SDP service ID, in which case admin will be able to >> just state "sdp: ", and OpenSM will (internally) generate >> relevant matching rule and QoS level based on this known >> service ID. >> >> So back to iSER: >> >> Can I assume that the target port for iSER will always be 860, >> hence the iSER service ID will always be 0x000000000106035c? > > In terms of iSER, I was only commenting on what the spec says. I did not > verify its operation in terms of the code. Does the code follow the > spec ? > > -- Hal > >> Or perhaps I can do it similar to SDP, where there is an option >> to specify the port ranges along with the ULP name (SDP): >> - if administrator only specifies "iser", I can assume that >> the service ID is default 0x000000000106035c >> - if administrator only specifies "iser" and ports, OpenSM >> will build service ID based on a well known prefix >> (0x000000000106pppp) where the last 4 hex digits are target >> port number >> >> Keep in mind that if this doesn't look too flexible and doesn't >> cover all the cases, there's always the rest of the QoS policy >> file with all the advanced configuration. >> >> -- Yevgeny >> >>> -- Hal >>> >>>>> 2) the rdma cm signature >>>> Do you mean something iSER-specific, or just the way the cm >>>> builds the service ID out of port space and tcp port? >>>> Can you give an example? >>>> >>>>> 3) the destination tcp port provided to rdma_resolve_addr >>>> I guess that tcp port is in the lower 4 nibs of the service ID, >>>> similar to SDP. Right? >>>> -- Yevgeny >>>> >>>> _______________________________________________ >>>> general mailing list >>>> general at lists.openfabrics.org >>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>> >>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>>> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From hrosenstock at xsigo.com Wed Nov 14 08:14:43 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Wed, 14 Nov 2007 08:14:43 -0800 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <473B14A3.5090703@dev.mellanox.co.il> References: <473822FD.20208@Voltaire.COM> <473ABBCE.8010109@dev.mellanox.co.il> <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> <473B14A3.5090703@dev.mellanox.co.il> Message-ID: <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> On Wed, 2007-11-14 at 17:30 +0200, Yevgeny Kliteynik wrote: > Hal Rosenstock wrote: > > On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: > >> Hal Rosenstock wrote: > >>> Or, > >>> > >>> On 11/13/07, Or Gerlitz wrote: > >>>> Yevgeny, > >>>> > >>>> iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port > >>>> space as does RDS. The RDMA-CM signature is something which I am sure > >>>> exists, you can look on the RDMA-CM IB spec Annex to see if such thing > >>>> indeed exist or I am wrong. > >>> Did you really look at the annex for this ? > >>> > >>>> The TCP port is the 16 bit port portion of > >>>> the > >>>> ip:port address provided by a ULP that uses the RDMA-CM to > >>>> rdma_resolve_addr(), again the annex explained how the port is embedded > >>>> into the SID, I don't remember the location within the 64 bit string. > >>> It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. > >>> > >>>> Or. > >>>> > >>>> -------- Original Message -------- > >>>> Subject: > >>>> Re: QoS for iSER > >>>> Date: > >>>> Mon, 12 Nov 2007 11:41:43 +0200 > >>>> From: Yevgeny Kliteynik > >>>> > >>>> Hi Erez, > >>>> > >>>> Erez Zilber wrote: > >>>>> to create the SID, the rdma cm combines > >>>>> > >>>>> 1) the port space > >>>> What is the port space for iSER? > >>>> For SDP it's 0x10000 - 0x1FFFF. > >>>> For RDS it's 0x1060000 - 0x106FFFF > > > > I presume this is just saying RDS uses IP protocol TCP and there is no > > well known port (e.g. uses dynamic ports). So how do you know ahead of > > time which port ? > > See below. > > >>>> For iSER it's ...? > >>> These numbers are too large for just "port space". > >>> > >>> iSER SID is 0x000000000106035c > >>> > >>> in your nomenclature, I guess 0x106035c > >>> > >>> 01 says RDMA aware ULP service ID range > >>> 06 says IP protocol is TCP > >>> 0x035c (port 860) is the well known TCP port for iSCSI > >> Thanks, that is just what I needed. > >> I'm preparing a (very) simplified interface for defining QoS policy. > >> I'm adding an additional section in QoS policy file, where an admin > >> will be able to configure QoS per ULP or per application w/o going > >> into too many details. > >> Here's the example of what I have in mind: > >> > >> qos-ulps > >> default : 0 #default SL > >> sdp, port 10000-20000 : 2 > >> sdp : 0 #default SL for SDP > >> rds, port 25000 : 2 #SL for RDS when destination port is 25000 > > > > Isn't there a chicken and egg problem here with this ? How do you know > > port 25000 will be assigned "in advance" ? > > See below. > > >> rds, : 0 #default SL for RDS > > > > I don't see how RDS can work separate from other CMA based protocols > > which use dynamic ports. > > You're right, it can't. > Moreover, as you know, OpenSM is not aware of the term "protocol" > at all - it just sees Service IDs in PathRecord request, so the > only differentiation it can create is by assigning certain SL to > a certain service ID. Yes, I could see that you are trying to use protocol as an identifier to simplify configuration for a ServiceID range. > What I'm trying to do here is to provide a simple way to configure > QoS that will work in *many* cases, not in *all* cases. > > I'm giving the following options to differentiate traffic: > - All the SDP traffic > - Selected ports of the SDP traffic > - SRP traffic by providing SRT target guids > - iSER traffic (to a well known TCP port) > - All the RDS traffic > - Selected ports of the RDS traffic > - IPoIB by pkey > - Specific Service IDs > - Specific pkeys > - Specific destination port guids Understood. > And as you've mentioned, some rules may overlap. For instance, > if the rule for all the RDS traffic will appear before the iSER > rule, then iSER requests will be caught by the RDS rule. That doesn't sound so good but I don't see a good alternative here other than for this case to put the iSER rule first. The other fallback is the more detailed configuration but RDS falls into the generic range category which is problematic in terms of this (and can't be differentiated by ServiceID unlike the other ULPs). -- Hal > There's alway an option to use the rest of the policy file, and > then you can create more complex rules to differentiate applications > and protocols (i.e. by using service ID range AND/OR source port > groups AND/OR destination port groups AND/OR by QoS class. > > -- Yevgeny > > >> iser *??????* : 4 #SL for iSER > >> ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > >> ipoib : 6 #default IPoIB partition - pkey=0x7FFF > > ... > >> end-qos-ulps > >> > >> This syntax is possible only if there are well known facts > >> such as SDP service ID, in which case admin will be able to > >> just state "sdp: ", and OpenSM will (internally) generate > >> relevant matching rule and QoS level based on this known > >> service ID. > >> > >> So back to iSER: > >> > >> Can I assume that the target port for iSER will always be 860, > >> hence the iSER service ID will always be 0x000000000106035c? > > > > In terms of iSER, I was only commenting on what the spec says. I did not > > verify its operation in terms of the code. Does the code follow the > > spec ? > > > > -- Hal > > > >> Or perhaps I can do it similar to SDP, where there is an option > >> to specify the port ranges along with the ULP name (SDP): > >> - if administrator only specifies "iser", I can assume that > >> the service ID is default 0x000000000106035c > >> - if administrator only specifies "iser" and ports, OpenSM > >> will build service ID based on a well known prefix > >> (0x000000000106pppp) where the last 4 hex digits are target > >> port number > >> > >> Keep in mind that if this doesn't look too flexible and doesn't > >> cover all the cases, there's always the rest of the QoS policy > >> file with all the advanced configuration. > >> > >> -- Yevgeny > >> > >>> -- Hal > >>> > >>>>> 2) the rdma cm signature > >>>> Do you mean something iSER-specific, or just the way the cm > >>>> builds the service ID out of port space and tcp port? > >>>> Can you give an example? > >>>> > >>>>> 3) the destination tcp port provided to rdma_resolve_addr > >>>> I guess that tcp port is in the lower 4 nibs of the service ID, > >>>> similar to SDP. Right? > >>>> -- Yevgeny > >>>> > >>>> _______________________________________________ > >>>> general mailing list > >>>> general at lists.openfabrics.org > >>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>>> > >>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > >>>> > >> _______________________________________________ > >> general mailing list > >> general at lists.openfabrics.org > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >> > >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > From rdreier at cisco.com Wed Nov 14 08:23:07 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 08:23:07 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will pull some low-level driver fixes for 2.6.24: Ali Ayoub (1): mlx4_core: Fix possible bad free in mlx4_buf_free() Jack Morgenstein (1): mlx4_core: Fix thinko in QP destroy (incorrect bitmap_free) Joachim Fenkes (2): IB/ehca: Return physical link information in query_port() IB/ehca: Fix static rate calculation Ralph Campbell (2): IB/ipath: Fix memory leak in ipath_resize_cq() if copy_to_user() fails IB/ipath: Fix race with ACK retry timeout list management Steve Wise (1): RDMA/cxgb3: Set the max_qp_init_rd_atom attribute in query_device drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + drivers/infiniband/hw/ehca/ehca_av.c | 48 ++++++++++++++++++++++----- drivers/infiniband/hw/ehca/ehca_classes.h | 1 - drivers/infiniband/hw/ehca/ehca_hca.c | 20 ++++++++--- drivers/infiniband/hw/ehca/ehca_iverbs.h | 3 ++ drivers/infiniband/hw/ehca/ehca_main.c | 3 -- drivers/infiniband/hw/ehca/ehca_qp.c | 29 +++++++--------- drivers/infiniband/hw/ehca/hipz_hw.h | 6 +++- drivers/infiniband/hw/ipath/ipath_cq.c | 11 ++++-- drivers/infiniband/hw/ipath/ipath_rc.c | 5 ++- drivers/net/mlx4/alloc.c | 7 ++-- drivers/net/mlx4/qp.c | 2 +- 12 files changed, 89 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f0c7775..b5436ca 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1000,6 +1000,7 @@ static int iwch_query_device(struct ib_device *ibdev, props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 97d1086..453eb99 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -50,6 +50,38 @@ static struct kmem_cache *av_cache; +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { int ret; @@ -69,15 +101,13 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) av->av.slid_path_bits = ah_attr->src_path_bits; if (ehca_static_rate < 0) { - int ah_mult = ib_rate_to_mult(ah_attr->static_rate); - int ehca_mult = - ib_rate_to_mult(shca->sport[ah_attr->port_num].rate ); - - if (ah_mult >= ehca_mult) - av->av.ipd = 0; - else - av->av.ipd = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; } else av->av.ipd = ehca_static_rate; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 2d660ae..87f12d4 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -95,7 +95,6 @@ struct ehca_sma_attr { struct ehca_sport { struct ib_cq *ibcq_aqp1; struct ib_qp *ibqp_aqp1; - enum ib_rate rate; enum ib_port_state port_state; struct ehca_sma_attr saved_attr; }; diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 15806d1..5bd7b59 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -151,7 +151,6 @@ int ehca_query_port(struct ib_device *ibdev, } memset(props, 0, sizeof(struct ib_port_attr)); - props->state = rblock->state; switch (rblock->max_mtu) { case 0x1: @@ -188,11 +187,20 @@ int ehca_query_port(struct ib_device *ibdev, props->subnet_timeout = rblock->subnet_timeout; props->init_type_reply = rblock->init_type_reply; - props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; - - /* at the moment (logical) link state is always LINK_UP */ - props->phys_state = 0x5; + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = 0x1; + } query_port1: ehca_free_fw_ctrlblock(rblock); diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index dce503b..5485799 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -189,6 +189,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void ehca_poll_eqs(unsigned long data); +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c6cd38c..90d4334 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -327,9 +327,6 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hw_level = ehca_hw_level; ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - shca->sport[0].rate = IB_RATE_30_GBPS; - shca->sport[1].rate = IB_RATE_30_GBPS; - shca->hca_cap = rblock->hca_cap_indicators; ehca_gen_dbg(" ... HCA capabilities:"); for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index de18264..2e3e654 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1196,10 +1196,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); } if (attr_mask & IB_QP_AV) { - int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult(shca->sport[my_qp-> - init_attr.port_num].rate); - mqpcb->dlid = attr->ah_attr.dlid; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); mqpcb->source_path_bits = attr->ah_attr.src_path_bits; @@ -1207,11 +1203,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ah_mult < ehca_mult) - mqpcb->max_static_rate = (ah_mult > 0) ? - ((ehca_mult - 1) / ah_mult) : 0; - else - mqpcb->max_static_rate = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* @@ -1280,10 +1277,6 @@ static int internal_modify_qp(struct ib_qp *ibqp, (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); } if (attr_mask & IB_QP_ALT_PATH) { - int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate); - int ehca_mult = ib_rate_to_mult( - shca->sport[my_qp->init_attr.port_num].rate); - if (attr->alt_port_num < 1 || attr->alt_port_num > shca->num_ports) { ret = -EINVAL; @@ -1309,10 +1302,12 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ah_mult > 0 && ah_mult < ehca_mult) - mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult; - else - mqpcb->max_static_rate_al = 0; + if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } /* OpenIB doesn't support alternate retry counts - copy them */ mqpcb->retry_count_al = mqpcb->retry_count; diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h index d9739e5..485b840 100644 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ b/drivers/infiniband/hw/ehca/hipz_hw.h @@ -402,7 +402,11 @@ struct hipz_query_port { u64 max_msg_sz; u32 max_mtu; u32 vl_cap; - u8 reserved2[1900]; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; u64 guid_entries[255]; } __attribute__ ((packed)); diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 645ed71..08d8ae1 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -404,7 +404,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) - goto bail; + goto bail_free; } spin_lock_irq(&cq->lock); @@ -424,10 +424,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) else n = head - tail; if (unlikely((u32)cqe < n)) { - spin_unlock_irq(&cq->lock); - vfree(wc); ret = -EOVERFLOW; - goto bail; + goto bail_unlock; } for (n = 0; tail != head; n++) { if (cq->ip) @@ -459,7 +457,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) } ret = 0; + goto bail; +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 5c29b2b..120a61b 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -959,8 +959,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode, /* If this is a partial ACK, reset the retransmit timer. */ if (qp->s_last != qp->s_tail) { spin_lock(&dev->pending_lock); - list_add_tail(&qp->timerwait, - &dev->pending[dev->pending_index]); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); spin_unlock(&dev->pending_lock); /* * If we get a partial ACK for a resent operation, diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index f8d63d3..b226e01 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -171,9 +171,10 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) buf->u.direct.map); else { for (i = 0; i < buf->nbufs; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - buf->u.page_list[i].buf, - buf->u.page_list[i].map); + if (buf->u.page_list[i].buf) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->u.page_list[i].buf, + buf->u.page_list[i].map); kfree(buf->u.page_list); } } diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index cc4b1be..42b4763 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -240,7 +240,7 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn); mlx4_table_put(dev, &qp_table->qp_table, qp->qpn); - if (qp->qpn < dev->caps.sqp_start + 8) + if (qp->qpn >= dev->caps.sqp_start + 8) mlx4_bitmap_free(&qp_table->bitmap, qp->qpn); } EXPORT_SYMBOL_GPL(mlx4_qp_free); From rdreier at cisco.com Wed Nov 14 08:23:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 08:23:45 -0800 Subject: [ofa-general] Re: [PATCH] mlx4: fix thinko in qp destroy (incorrect bitmap_free) In-Reply-To: <200711141633.28064.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Wed, 14 Nov 2007 16:33:27 +0200") References: <200711141633.28064.jackm@dev.mellanox.co.il> Message-ID: > This one needs to be applied immediately to the 2.6.24 tree -- bug fix. Yes, that as a stupid mistake... applied. From umaxx at oleco.net Wed Nov 14 08:26:00 2007 From: umaxx at oleco.net (Joerg Zinke) Date: Wed, 14 Nov 2007 17:26:00 +0100 Subject: [ofa-general] Any doc update on the fork() support ? In-Reply-To: References: Message-ID: <20071114172600.0e2af3b6@marvin.local> On Mon, 29 Oct 2007 16:27:34 +0000 "Tang, Changqing" wrote: > Here is a statement from OFED 1.3 alpha 2 release notes, it has not > been changed for a few releases. is there any update ? Thanks. > > 3. Fork support from kernel 2.6.12 and above is available provided > that applications do not use threads. The fork() is supported as > long as the parent process does not run before the child exits or > calls exec(). The former can be achieved by calling wait(childpid), > and the latter can be achieved by application specific means. The > Posix system() call is supported. I'm interested in that too. Any news? Regards, Joerg From rdreier at cisco.com Wed Nov 14 08:28:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 08:28:04 -0800 Subject: [ofa-general] CQ destory In-Reply-To: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> (Ganesh Sadasivan's message of "Tue, 13 Nov 2007 15:58:23 -0800") References: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> Message-ID: > Is there any way to figure out that a cq does not have any pending > completion entries and thus > is safe to call ibv_destroy_cq. Or is it ok to call destroy > regardless of whether there are completion > entries? I am seeing a seg fault in ibv_poll_cq if I adopt the > latter approach. The only way to know that a CQ has no entries is to poll the CQ and not see any entries. However, it is fine to destroy a CQ if there are entries present. I don't understand how destroying a CQ could be connected to a seg fault in ibv_poll_cq, unless you are polling the CQ after calling ibv_destroy_cq() for that same CQ, which is a use-after-free bug in your code. - R. From or.gerlitz at gmail.com Wed Nov 14 08:29:42 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 14 Nov 2007 08:29:42 -0800 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM> Message-ID: <15ddcffd0711140829v51c411am50836846fd30630@mail.gmail.com> On Nov 13, 2007 4:07 AM, Hal Rosenstock wrote: > On 11/13/07, Or Gerlitz wrote: > > iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port > > space as does RDS. The RDMA-CM signature is something which I am not > sure > > exists, you can look on the RDMA-CM IB spec Annex to see if such thing > > indeed exist or I am wrong. > > Did you really look at the annex for this ? > Generally speaking I believe people should not be fed with teaspoon but rather do some digging and work through which they will understand better the materials. Specifically here, being on the road without the annex in my laptop, I provided to the individual that asked me the question 95% of the answer, so what's the problem? Or. -------------- next part -------------- An HTML attachment was scrubbed... URL: From eli at dev.mellanox.co.il Wed Nov 14 08:29:43 2007 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Wed, 14 Nov 2007 18:29:43 +0200 Subject: [ewg] RE: [ofa-general] OFED 1.3 beta status on kernel 2.6.24 rebase In-Reply-To: <6a122cc00711140023h2b07d578l3c3b1dde3f44cd05@mail.gmail.com> References: <6C2C79E72C305246B504CBA17B5500C90282E28C@mtlexch01.mtl.com> <39C75744D164D948A170E9792AF8E7CA4D2C3B@exil.voltaire.com> <6a122cc00711140023h2b07d578l3c3b1dde3f44cd05@mail.gmail.com> Message-ID: <1195057783.24476.60.camel@mtls03> On Wed, 2007-11-14 at 10:23 +0200, Moni Levy wrote: > I guess that the one Tziporet is talking about is: > > "IPoIB: Use shinfo->gso_size to indicate tcp mss instead of using link MTU" > > Tziporet, am I right ? > No this is not the critical bug we're Tziporet was referring to. The one she was referring to was a backport patch for ipoib from kernel 2.6.24 to older kernels. The previous version of the backport did not return a correct return value from the poll function. Here is the new backport for 2.6.23: Backport IPOIB to kernel 2.6.23 Signed-off-by: Eli Cohen --- Index: ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib.h =================================================================== --- ofed_kernel-2.6.23.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-11-13 11:56:43.000000000 +0200 +++ ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib.h 2007-11-13 11:56:48.000000000 +0200 @@ -324,8 +324,6 @@ struct ipoib_dev_priv { struct net_device *dev; - struct napi_struct napi; - unsigned long flags; struct mutex mcast_mutex; @@ -451,7 +449,7 @@ extern struct workqueue_struct *ipoib_wo /* functions */ -int ipoib_poll(struct napi_struct *napi, int budget); +int ipoib_poll(struct net_device *dev, int *budget); void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, Index: ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib_ib.c =================================================================== --- ofed_kernel-2.6.23.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-11-13 11:56:43.000000000 +0200 +++ ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-11-13 12:02:29.000000000 +0200 @@ -295,20 +295,19 @@ static void ipoib_ib_handle_tx_wc(struct wc->status, wr_id, wc->vendor_err); } -int ipoib_poll(struct napi_struct *napi, int budget) +int ipoib_poll(struct net_device *dev, int *budget) { - struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); - struct net_device *dev = priv->dev; + struct ipoib_dev_priv *priv = netdev_priv(dev); + int max = min(*budget, dev->quota); int done; int t; int n, i; + int ret; done = 0; poll_more: - while (done < budget) { - int max = (budget - done); - + while (max) { t = min(IPOIB_NUM_WC, max); n = ib_poll_cq(priv->cq, t, priv->ibwc); @@ -317,6 +316,7 @@ poll_more: if (wc->wr_id & IPOIB_OP_RECV) { ++done; + --max; if (wc->wr_id & IPOIB_OP_CM) ipoib_cm_handle_rx_wc(dev, wc); else @@ -333,24 +333,26 @@ poll_more: break; } - if (done < budget) { - netif_rx_complete(dev, napi); + if (max) { + netif_rx_complete(dev); if (unlikely(ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - netif_rx_reschedule(dev, napi)) + netif_rx_reschedule(dev, 0)) goto poll_more; - } + ret = 0; + } else + ret = 1; + + dev->quota -= done; + *budget -= done; - return done; + return ret; } void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) { - struct net_device *dev = dev_ptr; - struct ipoib_dev_priv *priv = netdev_priv(dev); - - netif_rx_schedule(dev, &priv->napi); + netif_rx_schedule(dev_ptr); } static inline int post_send(struct ipoib_dev_priv *priv, Index: ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib_main.c =================================================================== --- ofed_kernel-2.6.23.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-13 11:56:43.000000000 +0200 +++ ofed_kernel-2.6.23/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-13 11:56:48.000000000 +0200 @@ -101,20 +101,16 @@ int ipoib_open(struct net_device *dev) ipoib_dbg(priv, "bringing up interface\n"); - napi_enable(&priv->napi); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(dev)) return 0; - if (ipoib_ib_dev_open(dev)) { - napi_disable(&priv->napi); + if (ipoib_ib_dev_open(dev)) return -EINVAL; - } if (ipoib_ib_dev_up(dev)) { ipoib_ib_dev_stop(dev, 1); - napi_disable(&priv->napi); return -EINVAL; } @@ -147,7 +143,6 @@ static int ipoib_stop(struct net_device ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - napi_disable(&priv->napi); netif_stop_queue(dev); @@ -805,7 +800,7 @@ static void ipoib_timeout(struct net_dev static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - const void *daddr, const void *saddr, unsigned len) + void *daddr, void *saddr, unsigned len) { struct ipoib_header *header; struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -980,10 +975,6 @@ void ipoib_dev_cleanup(struct net_device priv->tx_ring = NULL; } -static const struct header_ops ipoib_header_ops = { - .create = ipoib_hard_header, -}; - static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -993,13 +984,13 @@ static void ipoib_setup(struct net_devic dev->change_mtu = ipoib_change_mtu; dev->hard_start_xmit = ipoib_start_xmit; dev->tx_timeout = ipoib_timeout; - dev->header_ops = &ipoib_header_ops; + dev->hard_header = ipoib_hard_header; dev->set_multicast_list = ipoib_set_mcast_list; dev->neigh_setup = ipoib_neigh_setup_dev; - ipoib_set_ethtool_ops(dev); + dev->poll = ipoib_poll; + dev->weight = 100; - netif_napi_add(dev, &priv->napi, ipoib_poll, 100); dev->watchdog_timeo = HZ; From sashak at voltaire.com Wed Nov 14 08:47:27 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 14 Nov 2007 18:47:27 +0200 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Support link speed and width vendor extensions In-Reply-To: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> References: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071114164727.GC17237@sashak.voltaire.com> On 07:38 Tue 13 Nov , Hal Rosenstock wrote: > libibmad/dump.c: Support link speed and width vendor extensions > > When decoding values, handle vendor extensions to link speed and width > including accommodating a "documentation" change between IBA 1.2 and > 1.2.1 > > Signed-off-by: Hal Rosenstock Applied. Thanks. Sasha From umaxx at oleco.net Wed Nov 14 08:40:49 2007 From: umaxx at oleco.net (Joerg Zinke) Date: Wed, 14 Nov 2007 17:40:49 +0100 Subject: [ofa-general] OFED install check-buildroot failure Message-ID: <20071114174049.534f68b7@marvin.local> Hi, i tried several OFED version (1.2.5.2, 1.3-alpha2 and 1.3-daily-builds) on fedora core and they all have the same in common: the install.{pl|sh} fails with the following error: + /usr/lib/rpm/check-buildroot /var/tmp/OFED/usr/src/ofa_kernel/configure.mk.kernel:CWD=/var/tmp/OFED_topdir/BUILD/ofa_kernel-1.3 [...] Binary file /var/tmp/OFED/lib/modules/2.6.23.1-49.fc8/updates/kernel/drivers/infiniband/core/ib_umad.ko matches Found '/var/tmp/OFED' in installed files; aborting Fehler: Fehler-Status beim Beenden von /var/tmp/rpm-tmp.71974 (%install) one strange thing i can see is, the *.ko files contain strings like: /var/tmp/OFED_topdir/BUILD/ofa_kernel-1.3/drivers/infiniband/core this is why check-buildroot fails. Normally *.ko should not contain any hardcoded path like this, right? Why does this happen? Wrong configure/make arguments? This is on Fedora Core 8 with distribution kernel 2.6.23.1-49.fc8. Regards, Joerg From Arkady.Kanevsky at netapp.com Wed Nov 14 09:22:01 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 14 Nov 2007 12:22:01 -0500 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il> <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> Message-ID: what happens when multiple apps runs on the same server? Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Hal Rosenstock [mailto:hrosenstock at xsigo.com] > Sent: Wednesday, November 14, 2007 8:18 AM > To: Yevgeny Kliteynik > Cc: gdror at mellanox.co.il; general at lists.openfabrics.org > Subject: Re: [ofa-general] RE: QoS for iSER > > On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: > > Hal Rosenstock wrote: > > > Or, > > > > > > On 11/13/07, Or Gerlitz wrote: > > >> Yevgeny, > > >> > > >> iSER (as you can learn from doing a grep) is using the > RDMA-CM TCP > > >> port space as does RDS. The RDMA-CM signature is > something which I > > >> am sure exists, you can look on the RDMA-CM IB spec > Annex to see if > > >> such thing indeed exist or I am wrong. > > > > > > Did you really look at the annex for this ? > > > > > >> The TCP port is the 16 bit port portion of the ip:port address > > >> provided by a ULP that uses the RDMA-CM to rdma_resolve_addr(), > > >> again the annex explained how the port is embedded into > the SID, I > > >> don't remember the location within the 64 bit string. > > > > > > It's in the low 16 bits (bytes 6-7) of the SID as the > annex indicates. > > > > > >> Or. > > >> > > >> -------- Original Message -------- > > >> Subject: > > >> Re: QoS for iSER > > >> Date: > > >> Mon, 12 Nov 2007 11:41:43 +0200 > > >> From: Yevgeny Kliteynik > > >> > > >> Hi Erez, > > >> > > >> Erez Zilber wrote: > > >>> to create the SID, the rdma cm combines > > >>> > > >>> 1) the port space > > >> What is the port space for iSER? > > >> For SDP it's 0x10000 - 0x1FFFF. > > >> For RDS it's 0x1060000 - 0x106FFFF > > I presume this is just saying RDS uses IP protocol TCP and > there is no well known port (e.g. uses dynamic ports). So how > do you know ahead of time which port ? > > > >> For iSER it's ...? > > > > > > These numbers are too large for just "port space". > > > > > > iSER SID is 0x000000000106035c > > > > > > in your nomenclature, I guess 0x106035c > > > > > > 01 says RDMA aware ULP service ID range > > > 06 says IP protocol is TCP > > > 0x035c (port 860) is the well known TCP port for iSCSI > > > > Thanks, that is just what I needed. > > I'm preparing a (very) simplified interface for defining QoS policy. > > I'm adding an additional section in QoS policy file, where an admin > > will be able to configure QoS per ULP or per application w/o going > > into too many details. > > Here's the example of what I have in mind: > > > > qos-ulps > > default : 0 #default SL > > sdp, port 10000-20000 : 2 > > sdp : 0 #default SL for SDP > > rds, port 25000 : 2 #SL for RDS when > destination port is 25000 > > Isn't there a chicken and egg problem here with this ? How do > you know port 25000 will be assigned "in advance" ? > > > rds, : 0 #default SL for RDS > > I don't see how RDS can work separate from other CMA based > protocols which use dynamic ports. > > > iser *??????* : 4 #SL for iSER > > ipoib, pkey 0x0001 : 5 #SL for IPoIB on > partition with pkey 0x0001 > > ipoib : 6 #default IPoIB > partition - pkey=0x7FFF > ... > > end-qos-ulps > > > > This syntax is possible only if there are well known facts > such as SDP > > service ID, in which case admin will be able to just state "sdp: > > ", and OpenSM will (internally) generate relevant matching rule > > and QoS level based on this known service ID. > > > > So back to iSER: > > > > Can I assume that the target port for iSER will always be > 860, hence > > the iSER service ID will always be 0x000000000106035c? > > In terms of iSER, I was only commenting on what the spec > says. I did not verify its operation in terms of the code. > Does the code follow the spec ? > > -- Hal > > > Or perhaps I can do it similar to SDP, where there is an option to > > specify the port ranges along with the ULP name (SDP): > > - if administrator only specifies "iser", I can assume that > > the service ID is default 0x000000000106035c > > - if administrator only specifies "iser" and ports, OpenSM > > will build service ID based on a well known prefix > > (0x000000000106pppp) where the last 4 hex digits are target > > port number > > > > Keep in mind that if this doesn't look too flexible and > doesn't cover > > all the cases, there's always the rest of the QoS policy > file with all > > the advanced configuration. > > > > -- Yevgeny > > > > > -- Hal > > > > > >>> 2) the rdma cm signature > > >> Do you mean something iSER-specific, or just the way the > cm builds > > >> the service ID out of port space and tcp port? > > >> Can you give an example? > > >> > > >>> 3) the destination tcp port provided to rdma_resolve_addr > > >> I guess that tcp port is in the lower 4 nibs of the service ID, > > >> similar to SDP. Right? > > >> -- Yevgeny > > >> > > >> _______________________________________________ > > >> general mailing list > > >> general at lists.openfabrics.org > > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > >> > > >> To unsubscribe, please visit > > >> http://openib.org/mailman/listinfo/openib-general > > >> > > > > > > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Wed Nov 14 09:43:33 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 14 Nov 2007 19:43:33 +0200 Subject: [ofa-general] Re: [PATCH 2/7] Add option to change the default "opensm" dir under sysconfdir as the config dir In-Reply-To: <20071113160836.49310444.weiny2@llnl.gov> References: <20071113160836.49310444.weiny2@llnl.gov> Message-ID: <20071114174333.GF17237@sashak.voltaire.com> Hi Ira, On 16:08 Tue 13 Nov , Ira Weiny wrote: > From 126ea6a37634d93ea3a91b33f3e308cb931210fa Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Sun, 11 Nov 2007 09:22:30 -0800 > Subject: [PATCH] Add option to change the default "opensm" dir under sysconfdir as the config dir. > > Signed-off-by: Ira K. Weiny > --- > opensm/configure.in | 19 ++++++++++++++++++- > 1 files changed, 18 insertions(+), 1 deletions(-) > > diff --git a/opensm/configure.in b/opensm/configure.in > index 1a637fa..dcec910 100644 > --- a/opensm/configure.in > +++ b/opensm/configure.in > @@ -70,8 +70,25 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL > dnl select performance manager or not > OPENIB_OSM_PERF_MGR_SEL > > +dnl Check for a different subdir for the config files. > +OPENSM_CONF_SUB_DIR=opensm dnl define a default For some reason my autoconf doesn't like this comment after blank (and generates broken script). I will move it, something like: --- a/opensm/configure.in +++ b/opensm/configure.in @@ -71,7 +71,7 @@ dnl select performance manager or not OPENIB_OSM_PERF_MGR_SEL dnl Check for a different subdir for the config files. -OPENSM_CONF_SUB_DIR=opensm dnl define a default +OPENSM_CONF_SUB_DIR=opensm AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) AC_ARG_WITH(opensm-conf-sub-dir, AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], Sasha > +AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) > +AC_ARG_WITH(opensm-conf-sub-dir, > + AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], > + [define a directory name for opensm's conf files / (default "opensm")]), > + [ case "$withval" in > + no) > + ;; > + *) > + withopensmconfsubdir=yes > + OPENSM_CONF_SUB_DIR=$withval > + ;; > + esac ] > +) > +AC_MSG_RESULT(${withopensmconfsubdir=no}) > + > dnl Set up /opensm config dir. > -CONF_DIR_TMP1="`eval echo ${sysconfdir}/opensm`" > +CONF_DIR_TMP1="`eval echo ${sysconfdir}/$OPENSM_CONF_SUB_DIR`" > CONF_DIR_TMP2="`echo $CONF_DIR_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" > CONF_DIR="`eval echo $CONF_DIR_TMP2`" > > -- > 1.5.1 > From weiny2 at llnl.gov Wed Nov 14 09:34:35 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Wed, 14 Nov 2007 09:34:35 -0800 Subject: [ofa-general] Re: [PATCH 2/7] Add option to change the default "opensm" dir under sysconfdir as the config dir In-Reply-To: <20071114174333.GF17237@sashak.voltaire.com> References: <20071113160836.49310444.weiny2@llnl.gov> <20071114174333.GF17237@sashak.voltaire.com> Message-ID: <20071114093435.093a0cb5.weiny2@llnl.gov> I thought that might not work but it did for me... That is fine. Sorry about the trouble, Ira On Wed, 14 Nov 2007 19:43:33 +0200 Sasha Khapyorsky wrote: > Hi Ira, > > On 16:08 Tue 13 Nov , Ira Weiny wrote: > > From 126ea6a37634d93ea3a91b33f3e308cb931210fa Mon Sep 17 00:00:00 2001 > > From: Ira K. Weiny > > Date: Sun, 11 Nov 2007 09:22:30 -0800 > > Subject: [PATCH] Add option to change the default "opensm" dir under sysconfdir as the config dir. > > > > Signed-off-by: Ira K. Weiny > > --- > > opensm/configure.in | 19 ++++++++++++++++++- > > 1 files changed, 18 insertions(+), 1 deletions(-) > > > > diff --git a/opensm/configure.in b/opensm/configure.in > > index 1a637fa..dcec910 100644 > > --- a/opensm/configure.in > > +++ b/opensm/configure.in > > @@ -70,8 +70,25 @@ OPENIB_OSM_CONSOLE_SOCKET_SEL > > dnl select performance manager or not > > OPENIB_OSM_PERF_MGR_SEL > > > > +dnl Check for a different subdir for the config files. > > +OPENSM_CONF_SUB_DIR=opensm dnl define a default > > For some reason my autoconf doesn't like this comment after blank (and > generates broken script). I will move it, something like: > > > --- a/opensm/configure.in > +++ b/opensm/configure.in > @@ -71,7 +71,7 @@ dnl select performance manager or not > OPENIB_OSM_PERF_MGR_SEL > > dnl Check for a different subdir for the config files. > -OPENSM_CONF_SUB_DIR=opensm dnl define a default > +OPENSM_CONF_SUB_DIR=opensm > AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) > AC_ARG_WITH(opensm-conf-sub-dir, > AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], > > Sasha > > > +AC_MSG_CHECKING(for --with-opensm-conf-sub-dir) > > +AC_ARG_WITH(opensm-conf-sub-dir, > > + AC_HELP_STRING([--with-opensm-conf-sub-dir=dir], > > + [define a directory name for opensm's conf files / (default "opensm")]), > > + [ case "$withval" in > > + no) > > + ;; > > + *) > > + withopensmconfsubdir=yes > > + OPENSM_CONF_SUB_DIR=$withval > > + ;; > > + esac ] > > +) > > +AC_MSG_RESULT(${withopensmconfsubdir=no}) > > + > > dnl Set up /opensm config dir. > > -CONF_DIR_TMP1="`eval echo ${sysconfdir}/opensm`" > > +CONF_DIR_TMP1="`eval echo ${sysconfdir}/$OPENSM_CONF_SUB_DIR`" > > CONF_DIR_TMP2="`echo $CONF_DIR_TMP1 | sed 's/^NONE/$ac_default_prefix/'`" > > CONF_DIR="`eval echo $CONF_DIR_TMP2`" > > > > -- > > 1.5.1 > > From gsadasiv7 at gmail.com Wed Nov 14 09:50:20 2007 From: gsadasiv7 at gmail.com (Ganesh Sadasivan) Date: Wed, 14 Nov 2007 09:50:20 -0800 Subject: [ofa-general] CQ destory In-Reply-To: References: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> Message-ID: <532b813a0711140950t32533a7ao1ada8bd6a04fce36@mail.gmail.com> Here is what I think is happening: There are a couple of cq-s registerd with the same completion channel. ibv_cq_get_event is done in a thread. When this thread is woken up by one or more completion events, ibv_poll_cq is done to extract each of the completion entry. If the completion status is bad the cq is destroyed as part of processing this completion entry. The question is will destroy of cq flush all the completion events for this cq? If not what is the way to get around this problem? Thanks Ganesh On Nov 14, 2007 8:28 AM, Roland Dreier wrote: > > > Is there any way to figure out that a cq does not have any pending > > completion entries and thus > > is safe to call ibv_destroy_cq. Or is it ok to call destroy > > regardless of whether there are completion > > entries? I am seeing a seg fault in ibv_poll_cq if I adopt the > > latter approach. > > The only way to know that a CQ has no entries is to poll the CQ and > not see any entries. However, it is fine to destroy a CQ if there are > entries present. I don't understand how destroying a CQ could be > connected to a seg fault in ibv_poll_cq, unless you are polling the CQ > after calling ibv_destroy_cq() for that same CQ, which is a > use-after-free bug in your code. > > - R. > From sashak at voltaire.com Wed Nov 14 10:29:39 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 14 Nov 2007 20:29:39 +0200 Subject: [ofa-general] [PATCH] opensm: update default config names in man pages and doc In-Reply-To: <20071113160900.6a009766.weiny2@llnl.gov> References: <20071113160900.6a009766.weiny2@llnl.gov> Message-ID: <20071114182939.GG17237@sashak.voltaire.com> Update default config names in man pages and docs: /etc/ofa -> /etc/opensm. Signed-off-by: Sasha Khapyorsky --- opensm/doc/partition-config.txt | 4 ++-- opensm/man/opensm.8 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/opensm/doc/partition-config.txt b/opensm/doc/partition-config.txt index 1904c33..5afc1bb 100644 --- a/opensm/doc/partition-config.txt +++ b/opensm/doc/partition-config.txt @@ -2,8 +2,8 @@ OpenSM Partition configuration =============================== The default name of OpenSM partitions configuration file is -'/etc/ofa/opensm-partitions.conf'. The default may be changed by using ---Pconfig (-P) option with OpenSM. +'/etc/opensm/opensm-partitions.conf'. The default may be changed by +using --Pconfig (-P) option with OpenSM. The default partition will be created by OpenSM unconditionally even when partition configuration file does not exist or cannot be accessed. diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 index 2bdea8e..9a5bb71 100644 --- a/opensm/man/opensm.8 +++ b/opensm/man/opensm.8 @@ -184,7 +184,7 @@ is accumulative. .TP \fB\-P\fR, \fB\-\-Pconfig\fR This option defines the optional partition configuration file. -The default name is \'/etc/ofa/opensm-partitions.conf\'. +The default name is \'/etc/opensm/opensm-partitions.conf\'. .TP \fB\-Q\fR, \fB\-\-qos\fR This option enables QoS setup. It is disabled by default. -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Wed Nov 14 10:34:32 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 14 Nov 2007 20:34:32 +0200 Subject: [ofa-general] Re: [PATCH 0/7] "Clean up" some of the config files and the way they are configured. In-Reply-To: <20071113160754.666fdb06.weiny2@llnl.gov> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> <20071109114642.GV6153@sashak.voltaire.com> <20071109082151.6efb0256.weiny2@llnl.gov> <20071111082002.GE8073@sashak.voltaire.com> <20071113160754.666fdb06.weiny2@llnl.gov> Message-ID: <20071114183432.GH17237@sashak.voltaire.com> On 16:07 Tue 13 Nov , Ira Weiny wrote: > Allong the lines of this email I have changed and added the place and way to > change the config files being fed to opensm. > > This patch series adds the following options to the configure: > > --with-opensm-conf-sub-dir=dir > define a directory name for opensm's conf files > / (default "opensm") > --with-partitions-conf=file > define a partitions config file (default > partitions.conf) > --with-qos-policy-conf=file > define a QOS policy config file (default > qos-policy.conf) > > As well this cleans up some other parts of the configure. All patches are applied. The only temporary downside I can see is changing a default config files location from /etc/ofa to /etc/opensm, but it is configurable, so OFED can change it if necessary. Sasha From sashak at voltaire.com Wed Nov 14 10:35:45 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 14 Nov 2007 20:35:45 +0200 Subject: [ofa-general] Re: [PATCH 0/7] "Clean up" some of the config files and the way they are configured. In-Reply-To: <20071114183432.GH17237@sashak.voltaire.com> References: <20071101201508.51b5e363.weiny2@llnl.gov> <20071104160743.GX6945@sashak.voltaire.com> <20071105103229.32e41a31.weiny2@llnl.gov> <20071105193358.GM8766@sashak.voltaire.com> <20071107191603.490b3121.weiny2@llnl.gov> <20071109114642.GV6153@sashak.voltaire.com> <20071109082151.6efb0256.weiny2@llnl.gov> <20071111082002.GE8073@sashak.voltaire.com> <20071113160754.666fdb06.weiny2@llnl.gov> <20071114183432.GH17237@sashak.voltaire.com> Message-ID: <20071114183545.GI17237@sashak.voltaire.com> On 20:34 Wed 14 Nov , Sasha Khapyorsky wrote: > On 16:07 Tue 13 Nov , Ira Weiny wrote: > > Allong the lines of this email I have changed and added the place and way to > > change the config files being fed to opensm. > > > > This patch series adds the following options to the configure: > > > > --with-opensm-conf-sub-dir=dir > > define a directory name for opensm's conf files > > / (default "opensm") > > --with-partitions-conf=file > > define a partitions config file (default > > partitions.conf) > > --with-qos-policy-conf=file > > define a QOS policy config file (default > > qos-policy.conf) > > > > As well this cleans up some other parts of the configure. > > All patches are applied. Thanks! Sasha From disaccorded at siestatradewinds.com Wed Nov 14 10:42:55 2007 From: disaccorded at siestatradewinds.com (Edmond Carter) Date: Wed, 14 Nov 2007 12:42:55 -0600 Subject: [ofa-general] Microsoft Office 2007 Enterprise for 79, Retails @ 899 (You Save 819) Message-ID: <000001c826ed$ab1d7c00$0100007f@localhost> autodesk autocad 2008 - 129 autodesk 3ds max 9.0 - 149 adobe acrobat 8.0 professional - 79 coreldraw graphics suite x3 - 59 virtual pc 7.0 for mac - 49 autodesk autocad lt 2008 - 69 zend studio - 49 masterwriter 1.0 - 49 type cheapxpsoft6. com in Internet Explorer From kliteyn at dev.mellanox.co.il Wed Nov 14 12:14:20 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 14 Nov 2007 22:14:20 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> References: <473822FD.20208@Voltaire.COM> <473ABBCE.8010109@dev.mellanox.co.il> <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> <473B14A3.5090703@dev.mellanox.co.il> <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> Message-ID: <473B571C.4080605@dev.mellanox.co.il> Hal Rosenstock wrote: > On Wed, 2007-11-14 at 17:30 +0200, Yevgeny Kliteynik wrote: >> Hal Rosenstock wrote: >>> On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: >>>> Hal Rosenstock wrote: >>>>> Or, >>>>> >>>>> On 11/13/07, Or Gerlitz wrote: >>>>>> Yevgeny, >>>>>> >>>>>> iSER (as you can learn from doing a grep) is using the RDMA-CM TCP port >>>>>> space as does RDS. The RDMA-CM signature is something which I am sure >>>>>> exists, you can look on the RDMA-CM IB spec Annex to see if such thing >>>>>> indeed exist or I am wrong. >>>>> Did you really look at the annex for this ? >>>>> >>>>>> The TCP port is the 16 bit port portion of >>>>>> the >>>>>> ip:port address provided by a ULP that uses the RDMA-CM to >>>>>> rdma_resolve_addr(), again the annex explained how the port is embedded >>>>>> into the SID, I don't remember the location within the 64 bit string. >>>>> It's in the low 16 bits (bytes 6-7) of the SID as the annex indicates. >>>>> >>>>>> Or. >>>>>> >>>>>> -------- Original Message -------- >>>>>> Subject: >>>>>> Re: QoS for iSER >>>>>> Date: >>>>>> Mon, 12 Nov 2007 11:41:43 +0200 >>>>>> From: Yevgeny Kliteynik >>>>>> >>>>>> Hi Erez, >>>>>> >>>>>> Erez Zilber wrote: >>>>>>> to create the SID, the rdma cm combines >>>>>>> >>>>>>> 1) the port space >>>>>> What is the port space for iSER? >>>>>> For SDP it's 0x10000 - 0x1FFFF. >>>>>> For RDS it's 0x1060000 - 0x106FFFF >>> I presume this is just saying RDS uses IP protocol TCP and there is no >>> well known port (e.g. uses dynamic ports). So how do you know ahead of >>> time which port ? >> See below. >> >>>>>> For iSER it's ...? >>>>> These numbers are too large for just "port space". >>>>> >>>>> iSER SID is 0x000000000106035c >>>>> >>>>> in your nomenclature, I guess 0x106035c >>>>> >>>>> 01 says RDMA aware ULP service ID range >>>>> 06 says IP protocol is TCP >>>>> 0x035c (port 860) is the well known TCP port for iSCSI >>>> Thanks, that is just what I needed. >>>> I'm preparing a (very) simplified interface for defining QoS policy. >>>> I'm adding an additional section in QoS policy file, where an admin >>>> will be able to configure QoS per ULP or per application w/o going >>>> into too many details. >>>> Here's the example of what I have in mind: >>>> >>>> qos-ulps >>>> default : 0 #default SL >>>> sdp, port 10000-20000 : 2 >>>> sdp : 0 #default SL for SDP >>>> rds, port 25000 : 2 #SL for RDS when destination port is 25000 >>> Isn't there a chicken and egg problem here with this ? How do you know >>> port 25000 will be assigned "in advance" ? >> See below. >> >>>> rds, : 0 #default SL for RDS >>> I don't see how RDS can work separate from other CMA based protocols >>> which use dynamic ports. >> You're right, it can't. >> Moreover, as you know, OpenSM is not aware of the term "protocol" >> at all - it just sees Service IDs in PathRecord request, so the >> only differentiation it can create is by assigning certain SL to >> a certain service ID. > > Yes, I could see that you are trying to use protocol as an identifier to > simplify configuration for a ServiceID range. > >> What I'm trying to do here is to provide a simple way to configure >> QoS that will work in *many* cases, not in *all* cases. >> >> I'm giving the following options to differentiate traffic: >> - All the SDP traffic >> - Selected ports of the SDP traffic >> - SRP traffic by providing SRT target guids >> - iSER traffic (to a well known TCP port) >> - All the RDS traffic >> - Selected ports of the RDS traffic >> - IPoIB by pkey >> - Specific Service IDs >> - Specific pkeys >> - Specific destination port guids > > Understood. > >> And as you've mentioned, some rules may overlap. For instance, >> if the rule for all the RDS traffic will appear before the iSER >> rule, then iSER requests will be caught by the RDS rule. > > That doesn't sound so good but I don't see a good alternative here other > than for this case to put the iSER rule first. Right > The other fallback is the > more detailed configuration but RDS falls into the generic range > category which is problematic in terms of this (and can't be > differentiated by ServiceID unlike the other ULPs). Right again. If in addition to RDS there are other ULPs in the fabric that fall in the same range of service ID, then there's no escape from the rest of the policy file. For instance, RDS traffic might be differentiated by TCP port range (service ID range) and target port guids. But of course, you might find real life examples where nothing helps - RDS and some other ULP traffic are using same TCP port range, using the same port guids as source, and using the same guids as target. In this case I give up - they would end up using the same SL. -- Yevgeny > -- Hal > >> There's alway an option to use the rest of the policy file, and >> then you can create more complex rules to differentiate applications >> and protocols (i.e. by using service ID range AND/OR source port >> groups AND/OR destination port groups AND/OR by QoS class. >> >> -- Yevgeny >> >>>> iser *??????* : 4 #SL for iSER >>>> ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 >>>> ipoib : 6 #default IPoIB partition - pkey=0x7FFF >>> ... >>>> end-qos-ulps >>>> >>>> This syntax is possible only if there are well known facts >>>> such as SDP service ID, in which case admin will be able to >>>> just state "sdp: ", and OpenSM will (internally) generate >>>> relevant matching rule and QoS level based on this known >>>> service ID. >>>> >>>> So back to iSER: >>>> >>>> Can I assume that the target port for iSER will always be 860, >>>> hence the iSER service ID will always be 0x000000000106035c? >>> In terms of iSER, I was only commenting on what the spec says. I did not >>> verify its operation in terms of the code. Does the code follow the >>> spec ? >>> >>> -- Hal >>> >>>> Or perhaps I can do it similar to SDP, where there is an option >>>> to specify the port ranges along with the ULP name (SDP): >>>> - if administrator only specifies "iser", I can assume that >>>> the service ID is default 0x000000000106035c >>>> - if administrator only specifies "iser" and ports, OpenSM >>>> will build service ID based on a well known prefix >>>> (0x000000000106pppp) where the last 4 hex digits are target >>>> port number >>>> >>>> Keep in mind that if this doesn't look too flexible and doesn't >>>> cover all the cases, there's always the rest of the QoS policy >>>> file with all the advanced configuration. >>>> >>>> -- Yevgeny >>>> >>>>> -- Hal >>>>> >>>>>>> 2) the rdma cm signature >>>>>> Do you mean something iSER-specific, or just the way the cm >>>>>> builds the service ID out of port space and tcp port? >>>>>> Can you give an example? >>>>>> >>>>>>> 3) the destination tcp port provided to rdma_resolve_addr >>>>>> I guess that tcp port is in the lower 4 nibs of the service ID, >>>>>> similar to SDP. Right? >>>>>> -- Yevgeny >>>>>> >>>>>> _______________________________________________ >>>>>> general mailing list >>>>>> general at lists.openfabrics.org >>>>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>>>> >>>>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >>>>>> >>>> _______________________________________________ >>>> general mailing list >>>> general at lists.openfabrics.org >>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>> >>>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Wed Nov 14 12:21:23 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Wed, 14 Nov 2007 22:21:23 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il> <1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> Message-ID: <473B58C3.40708@dev.mellanox.co.il> Kanevsky, Arkady wrote: > what happens when multiple apps runs on the same server? I guess that when you say "server" you mean "host" and not the server from server-client terminology. This is what the whole point of QoS is: if the applications are using the same ULP, they probably would get the same Service Level, unless they were differentiated by the administrator is some other way, e.g. they all use SDP, but connect to different TCP port of the server application. If the applications are using different ULPs, they will get Service Level accordingly to the ULPs that they are using. -- Yevgeny > Arkady Kanevsky email: arkady at netapp.com > Network Appliance Inc. phone: 781-768-5395 > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > Waltham, MA 02451 central phone: 781-768-5300 > > >> -----Original Message----- >> From: Hal Rosenstock [mailto:hrosenstock at xsigo.com] >> Sent: Wednesday, November 14, 2007 8:18 AM >> To: Yevgeny Kliteynik >> Cc: gdror at mellanox.co.il; general at lists.openfabrics.org >> Subject: Re: [ofa-general] RE: QoS for iSER >> >> On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: >>> Hal Rosenstock wrote: >>>> Or, >>>> >>>> On 11/13/07, Or Gerlitz wrote: >>>>> Yevgeny, >>>>> >>>>> iSER (as you can learn from doing a grep) is using the >> RDMA-CM TCP >>>>> port space as does RDS. The RDMA-CM signature is >> something which I >>>>> am sure exists, you can look on the RDMA-CM IB spec >> Annex to see if >>>>> such thing indeed exist or I am wrong. >>>> Did you really look at the annex for this ? >>>> >>>>> The TCP port is the 16 bit port portion of the ip:port address >>>>> provided by a ULP that uses the RDMA-CM to rdma_resolve_addr(), >>>>> again the annex explained how the port is embedded into >> the SID, I >>>>> don't remember the location within the 64 bit string. >>>> It's in the low 16 bits (bytes 6-7) of the SID as the >> annex indicates. >>>>> Or. >>>>> >>>>> -------- Original Message -------- >>>>> Subject: >>>>> Re: QoS for iSER >>>>> Date: >>>>> Mon, 12 Nov 2007 11:41:43 +0200 >>>>> From: Yevgeny Kliteynik >>>>> >>>>> Hi Erez, >>>>> >>>>> Erez Zilber wrote: >>>>>> to create the SID, the rdma cm combines >>>>>> >>>>>> 1) the port space >>>>> What is the port space for iSER? >>>>> For SDP it's 0x10000 - 0x1FFFF. >>>>> For RDS it's 0x1060000 - 0x106FFFF >> I presume this is just saying RDS uses IP protocol TCP and >> there is no well known port (e.g. uses dynamic ports). So how >> do you know ahead of time which port ? >> >>>>> For iSER it's ...? >>>> These numbers are too large for just "port space". >>>> >>>> iSER SID is 0x000000000106035c >>>> >>>> in your nomenclature, I guess 0x106035c >>>> >>>> 01 says RDMA aware ULP service ID range >>>> 06 says IP protocol is TCP >>>> 0x035c (port 860) is the well known TCP port for iSCSI >>> Thanks, that is just what I needed. >>> I'm preparing a (very) simplified interface for defining QoS policy. >>> I'm adding an additional section in QoS policy file, where an admin >>> will be able to configure QoS per ULP or per application w/o going >>> into too many details. >>> Here's the example of what I have in mind: >>> >>> qos-ulps >>> default : 0 #default SL >>> sdp, port 10000-20000 : 2 >>> sdp : 0 #default SL for SDP >>> rds, port 25000 : 2 #SL for RDS when >> destination port is 25000 >> >> Isn't there a chicken and egg problem here with this ? How do >> you know port 25000 will be assigned "in advance" ? >> >>> rds, : 0 #default SL for RDS >> I don't see how RDS can work separate from other CMA based >> protocols which use dynamic ports. >> >>> iser *??????* : 4 #SL for iSER >>> ipoib, pkey 0x0001 : 5 #SL for IPoIB on >> partition with pkey 0x0001 >>> ipoib : 6 #default IPoIB >> partition - pkey=0x7FFF >> ... >>> end-qos-ulps >>> >>> This syntax is possible only if there are well known facts >> such as SDP >>> service ID, in which case admin will be able to just state "sdp: >>> ", and OpenSM will (internally) generate relevant matching rule >>> and QoS level based on this known service ID. >>> >>> So back to iSER: >>> >>> Can I assume that the target port for iSER will always be >> 860, hence >>> the iSER service ID will always be 0x000000000106035c? >> In terms of iSER, I was only commenting on what the spec >> says. I did not verify its operation in terms of the code. >> Does the code follow the spec ? >> >> -- Hal >> >>> Or perhaps I can do it similar to SDP, where there is an option to >>> specify the port ranges along with the ULP name (SDP): >>> - if administrator only specifies "iser", I can assume that >>> the service ID is default 0x000000000106035c >>> - if administrator only specifies "iser" and ports, OpenSM >>> will build service ID based on a well known prefix >>> (0x000000000106pppp) where the last 4 hex digits are target >>> port number >>> >>> Keep in mind that if this doesn't look too flexible and >> doesn't cover >>> all the cases, there's always the rest of the QoS policy >> file with all >>> the advanced configuration. >>> >>> -- Yevgeny >>> >>>> -- Hal >>>> >>>>>> 2) the rdma cm signature >>>>> Do you mean something iSER-specific, or just the way the >> cm builds >>>>> the service ID out of port space and tcp port? >>>>> Can you give an example? >>>>> >>>>>> 3) the destination tcp port provided to rdma_resolve_addr >>>>> I guess that tcp port is in the lower 4 nibs of the service ID, >>>>> similar to SDP. Right? >>>>> -- Yevgeny >>>>> >>>>> _______________________________________________ >>>>> general mailing list >>>>> general at lists.openfabrics.org >>>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>>> >>>>> To unsubscribe, please visit >>>>> http://openib.org/mailman/listinfo/openib-general >>>>> >>> _______________________________________________ >>> general mailing list >>> general at lists.openfabrics.org >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>> >>> To unsubscribe, please visit >>> http://openib.org/mailman/listinfo/openib-general >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> > From rdreier at cisco.com Wed Nov 14 12:52:30 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 12:52:30 -0800 Subject: [ofa-general] CQ destory In-Reply-To: <532b813a0711140950t32533a7ao1ada8bd6a04fce36@mail.gmail.com> (Ganesh Sadasivan's message of "Wed, 14 Nov 2007 09:50:20 -0800") References: <532b813a0711131558o230cf6efq8c2f732a2aae79ad@mail.gmail.com> <532b813a0711140950t32533a7ao1ada8bd6a04fce36@mail.gmail.com> Message-ID: > There are a couple of cq-s registerd with the same completion channel. > > ibv_cq_get_event is done in a thread. When this thread is woken up by > one or more completion events, ibv_poll_cq is done to extract each of > the completion entry. > If the completion status is bad the cq is destroyed as part of processing this > completion entry. The question is will destroy of cq flush all the > completion events > for this cq? If not what is the way to get around this problem? I think the current code is OK. Destroying a CQ does remove all the events associated with that CQ from its completion channel. However there is an unavoidable race if you call ibv_destroy_cq() from one thread but then retrieve a completion event for the same CQ from another thread before the destroy operation actually takes place. To deal with this, the destroy CQ operation will not actually return until you have acked all the completion events returned with ibv_ack_cq_events(). - R. From rdreier at cisco.com Wed Nov 14 12:54:52 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 12:54:52 -0800 Subject: [ofa-general] Re: [PATCH 3/4] drivers/infiniband: Drop redundant includes of moduleparam.h In-Reply-To: (Julia Lawall's message of "Tue, 13 Nov 2007 22:35:14 +0100 (CET)") References: Message-ID: > Drop #include in files that also include #include > . module.h includes moduleparam.h already. Do we want to make this sort of source code change? I thought that the consensus about the kernel was that we wanted to avoid relying of implicit includes of by -- in this case a better change would actually seem to be to explicitly include moduleparam.h in files using module parameters and then remove the include from module.h. - R. From rdreier at cisco.com Wed Nov 14 12:56:12 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 12:56:12 -0800 Subject: [ofa-general] Re: [PATCH] QLogic InfiniPath: convert ipath_eep_sem to mutex In-Reply-To: <20071113184503.GE30483@traven> (Matthias Kaehlcke's message of "Tue, 13 Nov 2007 19:45:03 +0100") References: <20071113184503.GE30483@traven> Message-ID: [ Forwarding to ipath entry from MAINTAINERS file... if someone from Qlogic will ACK this, I'll add it to my tree for 2.6.25 - Roland ] QLogic InfiniPath: convert the semaphore ipath_eep_sem to the mutex API Signed-off-by: Matthias Kaehlcke -- diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index e7c25db..a5b6299 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -510,10 +510,10 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -524,10 +524,10 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, { int ret; - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (!ret) { ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); } return ret; @@ -616,9 +616,9 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) goto bail; } - down(&dd->ipath_eep_sem); + mutex_lock(&dd->ipath_eep_lock); eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (eep_stat) { ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); @@ -764,14 +764,14 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) /* Grab semaphore and read current EEPROM. If we get an * error, let go, but if not, keep it until we finish write. */ - ret = down_interruptible(&dd->ipath_eep_sem); + ret = mutex_lock_interruptible(&dd->ipath_eep_lock); if (ret) { ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); goto free_bail; } ret = ipath_eeprom_internal_read(dd, 0, buf, len); if (ret) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "Unable read EEPROM for logging\n"); goto free_bail; } @@ -779,7 +779,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 0); if (csum != ifp->if_csum) { - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", csum, ifp->if_csum); ret = 1; @@ -849,7 +849,7 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd) csum = flash_csum(ifp, 1); ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); } - up(&dd->ipath_eep_sem); + mutex_unlock(&dd->ipath_eep_lock); if (ret) ipath_dev_err(dd, "Failed updating EEPROM\n"); diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 9dd0bac..9e9d6fa 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -348,7 +348,7 @@ static int init_chip_first(struct ipath_devdata *dd, spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); - sema_init(&dd->ipath_eep_sem, 1); + mutex_init(&dd->ipath_eep_lock); done: *pdp = pd; diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 8786dd7..a6e7a60 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -616,7 +617,7 @@ struct ipath_devdata { /* control access to actual counters, timer */ spinlock_t ipath_eep_st_lock; /* control high-level access to EEPROM */ - struct semaphore ipath_eep_sem; + struct mutex ipath_eep_lock; /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ uint64_t ipath_traffic_wds; /* active time is kept in seconds, but logged in hours */ -- Matthias Kaehlcke Linux Application Developer Barcelona La libertad es como la mañana. Hay quienes esperan dormidos a que llegue, pero hay quienes desvelan y caminan la noche para alcanzarla (Subcomandante Marcos) .''`. using free software / Debian GNU/Linux | http://debian.org : :' : `. `'` gpg --keyserver pgp.mit.edu --recv-keys 47D8E5D4 `- From rdreier at cisco.com Wed Nov 14 12:56:43 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 14 Nov 2007 12:56:43 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() In-Reply-To: <1194998556.3612.95.camel@brick.pathscale.com> (Ralph Campbell's message of "Tue, 13 Nov 2007 16:02:36 -0800") References: <1194909302.3612.74.camel@brick.pathscale.com> <1194998556.3612.95.camel@brick.pathscale.com> Message-ID: > But even if there is an error not related to any WR directly, > it is still the first WR that is not sent. I guess NULL could > be used to give slightly more information to the caller but > I don't really expect most application error recovery code to > make the distinction. Makes sense. I'll apply this and also fix the other spots that Dotan pointed out. Thanks, Roland From rdunlap at xenotime.net Wed Nov 14 13:00:52 2007 From: rdunlap at xenotime.net (Randy Dunlap) Date: Wed, 14 Nov 2007 13:00:52 -0800 Subject: [ofa-general] Re: [PATCH 3/4] drivers/infiniband: Drop redundant includes of moduleparam.h In-Reply-To: References: Message-ID: <20071114130052.8118c397.rdunlap@xenotime.net> On Wed, 14 Nov 2007 12:54:52 -0800 Roland Dreier wrote: > > Drop #include in files that also include #include > > . module.h includes moduleparam.h already. > > Do we want to make this sort of source code change? I thought that > the consensus about the kernel was that we wanted to avoid relying of > implicit includes of by -- in this case a better change > would actually seem to be to explicitly include moduleparam.h in files > using module parameters and then remove the include from module.h. That's correct AFAIK (what Roland said). --- ~Randy From julia at diku.dk Wed Nov 14 13:18:07 2007 From: julia at diku.dk (Julia Lawall) Date: Wed, 14 Nov 2007 22:18:07 +0100 (CET) Subject: [ofa-general] Re: [PATCH 3/4] drivers/infiniband: Drop redundant includes of moduleparam.h In-Reply-To: References: Message-ID: On Wed, 14 Nov 2007, Roland Dreier wrote: > > Drop #include in files that also include #include > > . module.h includes moduleparam.h already. > > Do we want to make this sort of source code change? I thought that > the consensus about the kernel was that we wanted to avoid relying of > implicit includes of by -- in this case a better change > would actually seem to be to explicitly include moduleparam.h in files > using module parameters and then remove the include from module.h. Someone else made a similar comment, so it seems reasonable to forget about the patches. I could easily make a patch to go the other way if there is an interest in that. On the other hand, perhaps the concensus is to just leave things as they are. julia From glenn at lists.openfabrics.org Wed Nov 14 14:14:53 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:14:53 -0800 (PST) Subject: [ofa-general] [PATCH 0/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114221453.3ADD5E609F0@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards The kernel code can be found in: git://git.openfabrics.org/~glenn/linux-2.6.git The userspace code can be found in: git://git.openfabrics.org/~glenn/libnes.git Signed-off-by: Glenn Grundstrom --- drivers/infiniband/hw/nes/Makefile | 4 - drivers/infiniband/hw/nes/nes.c | 103 ++-- drivers/infiniband/hw/nes/nes.h | 404 +++++-------- drivers/infiniband/hw/nes/nes_cm.c | 130 ++--- drivers/infiniband/hw/nes/nes_cm.h | 184 +++--- drivers/infiniband/hw/nes/nes_hw.c | 510 ++++++++++++----- drivers/infiniband/hw/nes/nes_hw.h | 288 ++++++---- drivers/infiniband/hw/nes/nes_nic.c | 511 ++++++++++++----- drivers/infiniband/hw/nes/nes_user.h | 17 +- drivers/infiniband/hw/nes/nes_utils.c | 461 ++++++++------- drivers/infiniband/hw/nes/nes_verbs.c | 944 ++++++++++++++++++------------- drivers/infiniband/hw/nes/nes_verbs.h | 146 +++--- 12 files changed, 2130 insertions(+), 1572 deletions(-) From glenn at lists.openfabrics.org Wed Nov 14 14:19:26 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:19:26 -0800 (PST) Subject: [ofa-general] [PATCH 1/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114221926.AA5E5E60A24@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for Makefile, nes.c and nes.h Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile index 6e94d56..3514851 100644 --- a/drivers/infiniband/hw/nes/Makefile +++ b/drivers/infiniband/hw/nes/Makefile @@ -1,7 +1,3 @@ - -EXTRA_CFLAGS += -DNES_MINICM - obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o - diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index ecf60a6..4f7ae5c 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -70,12 +71,8 @@ MODULE_VERSION(DRV_VERSION); int max_mtu = 9000; int nics_per_function = 1; - -#ifdef NES_INT_MODERATE -int interrupt_mod_interval = 128; -#else int interrupt_mod_interval = 0; -#endif + /* Interoperability */ int mpa_version = 1; @@ -96,9 +93,9 @@ unsigned int nes_drv_opt = 0; module_param(nes_drv_opt, int, 0); MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); -unsigned int nes_debug_level = 0xffffffff; -module_param(nes_debug_level, uint, 0644); -MODULE_PARM_DESC(nes_debug_level, "Enable debug output level"); +unsigned int nes_debug_level = 0x0; +module_param_named(debug_level, nes_debug_level, uint, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug output level"); LIST_HEAD(nes_adapter_list); LIST_HEAD(nes_dev_list); @@ -127,7 +124,7 @@ MODULE_DEVICE_TABLE(pci, nes_pci_table); static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); -static int notifiers_registered = 0; +static int nes_notifiers_registered = 0; static struct notifier_block nes_inetaddr_notifier = { @@ -165,7 +162,7 @@ static int nes_inetaddr_event(struct notifier_block *notifier, netdev = nesdev->netdev[0]; nesvnic = netdev_priv(netdev); if (netdev == event_netdev) { - if (0 == nesvnic->rdma_enabled) { + if (nesvnic->rdma_enabled == 0) { nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" " RDMA is not enabled.\n", netdev->name); @@ -224,7 +221,7 @@ static int nes_net_event(struct notifier_block *notifier, netdev = nesdev->netdev[0]; nesvnic = netdev_priv(netdev); if (netdev == neigh->dev) { - if (0 == nesvnic->rdma_enabled) { + if (nesvnic->rdma_enabled == 0) { nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n", netdev->name); } else { @@ -268,6 +265,7 @@ void nes_add_ref(struct ib_qp *ibqp) */ void nes_rem_ref(struct ib_qp *ibqp) { + unsigned long flags; u64 u64temp; struct nes_qp *nesqp; struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); @@ -288,15 +286,28 @@ void nes_rem_ref(struct ib_qp *ibqp) atomic_inc(&qps_destroyed); /* Free the control structures */ - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.sq_vbase, - nesqp->hwqp.sq_pbase); + + if (nesqp->pbl_vbase) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + kunmap(nesqp->page); + + } else { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + } nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); /* Destroy the QP */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); return; } @@ -307,7 +318,7 @@ void nes_rem_ref(struct ib_qp *ibqp) cpu_to_le32(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP); if (nesqp->hte_added) { - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_QP_DEL_HTE); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_QP_DEL_HTE); nesqp->hte_added = 0; } @@ -323,8 +334,7 @@ void nes_rem_ref(struct ib_qp *ibqp) cqp_wqe->wqe_words[NES_CQP_QP_WQE_CONTEXT_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); kfree(nesqp->allocated_buffer); } @@ -397,7 +407,7 @@ static irqreturn_t nes_interrupt(int irq, void *dev_id) } } if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) && - (0 == handled)) { + (handled == 0)) { intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); if ((intf_int_stat & nesdev->intf_int_req) != 0) { handled = 1; @@ -420,7 +430,7 @@ static irqreturn_t nes_interrupt(int irq, void *dev_id) if (handled) { #ifdef NES_NAPI - if (0 == nes_napi_isr(nesdev)) { + if (nes_napi_isr(nesdev) == 0) { #endif tasklet_schedule(&nesdev->dpc_tasklet); #ifdef NES_NAPI @@ -506,14 +516,13 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i pci_set_master(pcidev); /* Allocate hardware structure */ - nesdev = kmalloc(sizeof(struct nes_device), GFP_KERNEL); + nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL); if (!nesdev) { printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev)); ret = -ENOMEM; goto bail2; } - memset(nesdev, 0, sizeof(struct nes_device)); nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev); nesdev->pcidev = pcidev; pci_set_drvdata(pcidev, nesdev); @@ -536,7 +545,6 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i /* Ensure interrupts are disabled */ nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); -#ifdef CONFIG_PCI_MSI if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) { if (!pci_enable_msi(nesdev->pcidev)) { nesdev->msi_enabled = 1; @@ -550,24 +558,25 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n", pci_name(pcidev)); } -#else - nes_debug(NES_DBG_INIT, "MSI not supported by this kernel for device %s\n", - pci_name(pcidev)); -#endif - nesdev->et_rx_coalesce_usecs_irq = interrupt_mod_interval; nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0); - nesdev->doorbell_start = pci_resource_start(nesdev->pcidev, BAR_1); + nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1); /* Init the adapter */ nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev); + nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; if (!nesdev->nesadapter) { printk(KERN_ERR PFX "Unable to initialize adapter.\n" ); ret = -ENOMEM; goto bail5; } - nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn)%nesdev->nesadapter->port_count; + /* nesdev->base_doorbell_index = + nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ + nesdev->base_doorbell_index = 1; + nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count; + tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); /* bring up the Control QP */ @@ -589,7 +598,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i } /* TODO: This really should be the first driver to load, not function 0 */ - if (0 == PCI_FUNC(nesdev->pcidev->devfn)) { + if (PCI_FUNC(nesdev->pcidev->devfn) == 0) { /* pick up PCI and critical errors if the first driver to load */ nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR; nesdev->int_req |= NES_INT_INTF; @@ -616,11 +625,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i list_add_tail(&nesdev->list, &nes_dev_list); /* Request an interrupt line for the driver */ -#ifdef IRQF_SHARED ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev); -#else - ret = request_irq(pcidev->irq, nes_interrupt, SA_SHIRQ, DRV_NAME, nesdev); -#endif if (ret) { printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", pci_name(pcidev), pcidev->irq); @@ -629,11 +634,11 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); - if (!notifiers_registered) { + if (nes_notifiers_registered == 0) { register_inetaddr_notifier(&nes_inetaddr_notifier); register_netevent_notifier(&nes_net_notifier); - notifiers_registered = 1; } + nes_notifiers_registered++; /* Initialize network devices */ if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) { @@ -673,10 +678,10 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", nesdev->netdev_count, nesdev->nesadapter->netdev_count); - if (notifiers_registered) { + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { unregister_netevent_notifier(&nes_net_notifier); unregister_inetaddr_notifier(&nes_inetaddr_notifier); - notifiers_registered = 0; } list_del(&nesdev->list); @@ -685,11 +690,9 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i bail65: printk(KERN_ERR PFX "bail65\n"); free_irq(pcidev->irq, nesdev); -#ifdef CONFIG_PCI_MSI if (nesdev->msi_enabled) { pci_disable_msi(pcidev); } -#endif bail6: printk(KERN_ERR PFX "bail6\n"); tasklet_kill(&nesdev->dpc_tasklet); @@ -722,9 +725,7 @@ static void __devexit nes_remove(struct pci_dev *pcidev) { struct nes_device *nesdev = pci_get_drvdata(pcidev); struct net_device *netdev; - int netdev_index=0; - - nes_debug(NES_DBG_SHUTDOWN, "called.\n"); + int netdev_index = 0; if (nesdev->netdev_count) { netdev = nesdev->netdev[netdev_index]; @@ -738,10 +739,11 @@ static void __devexit nes_remove(struct pci_dev *pcidev) nesdev->nesadapter->netdev_count--; } } - if (notifiers_registered) { + + nes_notifiers_registered--; + if (nes_notifiers_registered == 0) { unregister_netevent_notifier(&nes_net_notifier); unregister_inetaddr_notifier(&nes_inetaddr_notifier); - notifiers_registered = 0; } list_del(&nesdev->list); @@ -753,11 +755,9 @@ static void __devexit nes_remove(struct pci_dev *pcidev) free_irq(pcidev->irq, nesdev); -#ifdef CONFIG_PCI_MSI if (nesdev->msi_enabled) { pci_disable_msi(pcidev); } -#endif iounmap(nesdev->regs); kfree(nesdev); @@ -788,11 +788,7 @@ static int __init nes_init_module(void) printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); return retval; } -#ifdef OFED_1_2 - return(pci_module_init(&nes_pci_driver)); -#else - return(pci_register_driver(&nes_pci_driver)); -#endif + return pci_register_driver(&nes_pci_driver); } @@ -808,4 +804,3 @@ static void __exit nes_exit_module(void) module_init(nes_init_module); module_exit(nes_exit_module); - diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 96dfadd..48082ed 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -53,106 +54,109 @@ #include #include -#define TBIRD -#define NES_TWO_PORT -#define NES_ENABLE_CQE_READ #define NES_SEND_FIRST_WRITE #define QUEUE_DISCONNECTS -#define DRV_BUILD "1" +#define DRV_BUILD "1" #define DRV_NAME "iw_nes" -#define DRV_VERSION "0.5 Build " DRV_BUILD +#define DRV_VERSION "0.6 Build " DRV_BUILD #define PFX DRV_NAME ": " /* * NetEffect PCI vendor id and NE010 PCI device id. */ #ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ -#define PCI_VENDOR_ID_NETEFFECT 0x1678 +#define PCI_VENDOR_ID_NETEFFECT 0x1678 #define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 #endif -#define NE020_REV 4 -#define NE020_REV1 5 +#define NE020_REV 4 +#define NE020_REV1 5 -#define BAR_0 0 -#define BAR_1 2 +#define BAR_0 0 +#define BAR_1 2 -#define RX_BUF_SIZE (1536 + 8) +#define RX_BUF_SIZE (1536 + 8) +#define NES_REG0_SIZE (4 * 1024) +#define NES_TX_TIMEOUT (6*HZ) +#define NES_FIRST_QPN 64 +#define NES_SW_CONTEXT_ALIGN 1024 -#define NES_REG0_SIZE (4 * 1024) -#define NES_TX_TIMEOUT (6*HZ) -#define NES_FIRST_QPN 64 -#define NES_SW_CONTEXT_ALIGN 1024 +#define NES_NIC_MAX_NICS 16 +#define NES_MAX_ARP_TABLE_SIZE 4096 -#define NES_NIC_MAX_NICS 16 -#define NES_MAX_ARP_TABLE_SIZE 4096 +#define NES_NIC_CEQ_SIZE 8 +/* NICs will be on a separate CQ */ +#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32) -#define MAX_DPC_ITERATIONS 128 +#define NES_MAX_PORT_COUNT 4 -#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 -#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 -#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 -#define NES_DRV_OPT_DISABLE_INTF 0x00000008 -#define NES_DRV_OPT_ENABLE_MSI 0x00000010 -#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 -#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040 -#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 +#define MAX_DPC_ITERATIONS 128 -#define NES_AEQ_EVENT_TIMEOUT 2500 -#define NES_DISCONNECT_EVENT_TIMEOUT 2000 +#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 +#define NES_CQP_REQUEST_RING_DOORBELL 1 + +#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001 +#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002 +#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004 +#define NES_DRV_OPT_DISABLE_INTF 0x00000008 +#define NES_DRV_OPT_ENABLE_MSI 0x00000010 +#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020 +#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040 +#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 +#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 +#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 + +#define NES_AEQ_EVENT_TIMEOUT 2500 +#define NES_DISCONNECT_EVENT_TIMEOUT 2000 /* debug levels */ -#define NES_DBG_HW 0x00000001 -#define NES_DBG_INIT 0x00000002 -#define NES_DBG_ISR 0x00000004 -#define NES_DBG_PHY 0x00000008 -#define NES_DBG_NETDEV 0x00000010 -#define NES_DBG_CM 0x00000020 +/* must match userspace */ +#define NES_DBG_HW 0x00000001 +#define NES_DBG_INIT 0x00000002 +#define NES_DBG_ISR 0x00000004 +#define NES_DBG_PHY 0x00000008 +#define NES_DBG_NETDEV 0x00000010 +#define NES_DBG_CM 0x00000020 #define NES_DBG_CM1 0x00000040 -#define NES_DBG_NIC_RX 0x00000080 -#define NES_DBG_NIC_TX 0x00000100 -#define NES_DBG_CQP 0x00000200 -#define NES_DBG_MMAP 0x00000400 -#define NES_DBG_MR 0x00000800 -#define NES_DBG_PD 0x00001000 -#define NES_DBG_CQ 0x00002000 -#define NES_DBG_QP 0x00004000 -#define NES_DBG_MOD_QP 0x00008000 -#define NES_DBG_AEQ 0x00010000 -#define NES_DBG_IW_RX 0x00020000 -#define NES_DBG_IW_TX 0x00040000 -#define NES_DBG_SHUTDOWN 0x00080000 -#define NES_DBG_RSVD1 0x10000000 -#define NES_DBG_RSVD2 0x20000000 -#define NES_DBG_RSVD3 0x40000000 -#define NES_DBG_RSVD4 0x80000000 -#define NES_DBG_ALL 0xffffffff +#define NES_DBG_NIC_RX 0x00000080 +#define NES_DBG_NIC_TX 0x00000100 +#define NES_DBG_CQP 0x00000200 +#define NES_DBG_MMAP 0x00000400 +#define NES_DBG_MR 0x00000800 +#define NES_DBG_PD 0x00001000 +#define NES_DBG_CQ 0x00002000 +#define NES_DBG_QP 0x00004000 +#define NES_DBG_MOD_QP 0x00008000 +#define NES_DBG_AEQ 0x00010000 +#define NES_DBG_IW_RX 0x00020000 +#define NES_DBG_IW_TX 0x00040000 +#define NES_DBG_SHUTDOWN 0x00080000 +#define NES_DBG_RSVD1 0x10000000 +#define NES_DBG_RSVD2 0x20000000 +#define NES_DBG_RSVD3 0x40000000 +#define NES_DBG_RSVD4 0x80000000 +#define NES_DBG_ALL 0xffffffff #ifdef CONFIG_INFINIBAND_NES_DEBUG -#define assert(expr) \ -if(!(expr)) { \ - printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ - #expr, __FILE__, __FUNCTION__, __LINE__); \ -} - #define nes_debug(level, fmt, args...) \ if (level & nes_debug_level) \ printk(KERN_ERR PFX "%s[%u]: " fmt, __FUNCTION__, __LINE__, ##args) -#ifndef dprintk -#define dprintk(fmt, args...) do { printk(KERN_ERR PFX fmt, ##args); } while (0) -#endif -#define NES_EVENT_TIMEOUT 1200000 -/* #define NES_EVENT_TIMEOUT 1200 */ +#define assert(expr) \ +if(!(expr)) { \ + printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ + #expr, __FILE__, __FUNCTION__, __LINE__); \ +} + +#define NES_EVENT_TIMEOUT 1200000 #else -#define assert(expr) do {} while (0) #define nes_debug(level, fmt, args...) -#define dprintk(fmt, args...) do {} while (0) +#define assert(expr) do {} while (0) -#define NES_EVENT_TIMEOUT 100000 +#define NES_EVENT_TIMEOUT 100000 #endif #include "nes_hw.h" @@ -205,6 +209,14 @@ extern atomic_t cm_accel_dropped_pkts; extern atomic_t cm_resets_recvd; extern u32 crit_err_count; +extern u32 int_mod_timer_init; +extern u32 int_mod_cq_depth_256; +extern u32 int_mod_cq_depth_128; +extern u32 int_mod_cq_depth_32; +extern u32 int_mod_cq_depth_24; +extern u32 int_mod_cq_depth_16; +extern u32 int_mod_cq_depth_4; +extern u32 int_mod_cq_depth_1; extern u32 mh_detected; extern u32 mh_pauses_sent; @@ -217,67 +229,60 @@ extern atomic_t cqp_reqs_redriven; struct nes_device { - struct nes_adapter *nesadapter; - void __iomem *regs; - void __iomem *index_reg; - struct pci_dev *pcidev; - struct net_device *netdev[NES_NIC_MAX_NICS]; - u64 link_status_interrupts; - struct tasklet_struct dpc_tasklet; - spinlock_t indexed_regs_lock; - unsigned long doorbell_start; - unsigned long csr_start; - unsigned long mac_tx_errors; - unsigned long mac_pause_frames_sent; - unsigned long mac_pause_frames_received; - unsigned long mac_rx_errors; - unsigned long mac_rx_crc_errors; - unsigned long mac_rx_symbol_err_frames; - unsigned long mac_rx_jabber_frames; - unsigned long mac_rx_oversized_frames; - unsigned long mac_rx_short_frames; - unsigned int mac_index; - unsigned int nes_stack_start; + struct nes_adapter *nesadapter; + void __iomem *regs; + void __iomem *index_reg; + struct pci_dev *pcidev; + struct net_device *netdev[NES_NIC_MAX_NICS]; + u64 link_status_interrupts; + struct tasklet_struct dpc_tasklet; + spinlock_t indexed_regs_lock; + unsigned long csr_start; + unsigned long doorbell_region; + unsigned long doorbell_start; + unsigned long mac_tx_errors; + unsigned long mac_pause_frames_sent; + unsigned long mac_pause_frames_received; + unsigned long mac_rx_errors; + unsigned long mac_rx_crc_errors; + unsigned long mac_rx_symbol_err_frames; + unsigned long mac_rx_jabber_frames; + unsigned long mac_rx_oversized_frames; + unsigned long mac_rx_short_frames; + unsigned int mac_index; + unsigned int nes_stack_start; /* Control Structures */ - void *cqp_vbase; - dma_addr_t cqp_pbase; - u32 cqp_mem_size; - u8 ceq_index; - u8 nic_ceq_index; - struct nes_hw_cqp cqp; - struct nes_hw_cq ccq; - struct list_head cqp_avail_reqs; - struct list_head cqp_pending_reqs; + void *cqp_vbase; + dma_addr_t cqp_pbase; + u32 cqp_mem_size; + u8 ceq_index; + u8 nic_ceq_index; + struct nes_hw_cqp cqp; + struct nes_hw_cq ccq; + struct list_head cqp_avail_reqs; + struct list_head cqp_pending_reqs; struct nes_cqp_request *nes_cqp_requests; - u32 int_req; - u32 int_stat; - u32 timer_int_req; - u32 timer_only_int_count; - u32 intf_int_req; - u32 et_rx_coalesce_usecs_irq; - u32 last_mac_tx_pauses; - u32 last_used_chunks_tx; - struct list_head list; - - u16 base_doorbell_index; - u8 msi_enabled; - u8 netdev_count; - u8 napi_isr_ran; - u8 disable_rx_flow_control; - u8 disable_tx_flow_control; + u32 int_req; + u32 int_stat; + u32 timer_int_req; + u32 timer_only_int_count; + u32 intf_int_req; + u32 last_mac_tx_pauses; + u32 last_used_chunks_tx; + struct list_head list; + + u16 base_doorbell_index; + u16 deepcq_count; + u8 msi_enabled; + u8 netdev_count; + u8 napi_isr_ran; + u8 disable_rx_flow_control; + u8 disable_tx_flow_control; }; -static inline int nes_skb_is_gso(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_size; -} - -#define nes_skb_linearize(_skb) skb_linearize(_skb) - - /* Read from memory-mapped device */ static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index) { @@ -358,8 +363,6 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter, return -EMFILE; } } - nes_debug(NES_DBG_HW, "find_next_zero_bit returned = %u (max = %u).\n", - resource_num, max_resources); set_bit(resource_num, resource_array); *next = resource_num+1; if (*next == max_resources) { @@ -397,151 +400,52 @@ static inline void nes_free_resource(struct nes_adapter *nesadapter, spin_unlock_irqrestore(&nesadapter->resource_lock, flags); } -static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) { - return(container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic); -} - -static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) { - return(container_of(ibpd, struct nes_pd, ibpd)); -} - -static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) { - return(container_of(ibucontext, struct nes_ucontext, ibucontext)); +static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev) +{ + return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic; } -static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) { - return(container_of(ibmr, struct nes_mr, ibmr)); +static inline struct nes_pd *to_nespd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct nes_pd, ibpd); } -static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr) { - return(container_of(ibfmr, struct nes_mr, ibfmr)); +static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct nes_ucontext, ibucontext); } -static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) { - return(container_of(ibmw, struct nes_mr, ibmw)); +static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct nes_mr, ibmr); } -static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr) { - return(container_of(nesmr, struct nes_fmr, nesmr)); +static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct nes_mr, ibfmr); } -static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) { - return(container_of(ibcq, struct nes_cq, ibcq)); +static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct nes_mr, ibmw); } -static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) { - return(container_of(ibqp, struct nes_qp, ibqp)); +static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr) +{ + return container_of(nesmr, struct nes_fmr, nesmr); } - -#define NES_CQP_REQUEST_NOT_HOLDING_LOCK 0 -#define NES_CQP_REQUEST_HOLDING_LOCK 1 -#define NES_CQP_REQUEST_NO_DOORBELL_RING 0 -#define NES_CQP_REQUEST_RING_DOORBELL 1 - -static inline struct nes_cqp_request - *nes_get_cqp_request(struct nes_device *nesdev, int holding_lock) { - unsigned long flags; - struct nes_cqp_request *cqp_request = NULL; - - if (!holding_lock) { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - } - if (!list_empty(&nesdev->cqp_avail_reqs)) { - cqp_request = list_entry(nesdev->cqp_avail_reqs.next, - struct nes_cqp_request, list); - atomic_inc(&cqp_reqs_allocated); - list_del_init(&cqp_request->list); - } else if (!holding_lock) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - cqp_request = kzalloc(sizeof(struct nes_cqp_request), - GFP_KERNEL); - if (cqp_request) { - cqp_request->dynamic = 1; - INIT_LIST_HEAD(&cqp_request->list); - atomic_inc(&cqp_reqs_dynallocated); - } - spin_lock_irqsave(&nesdev->cqp.lock, flags); - } - if (!holding_lock) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - - if (cqp_request) { - init_waitqueue_head(&cqp_request->waitq); - cqp_request->waiting = 0; - cqp_request->request_done = 0; - init_waitqueue_head(&cqp_request->waitq); - nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n", - cqp_request); - } else - printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n", - __FUNCTION__); - - return cqp_request; +static inline struct nes_cq *to_nescq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct nes_cq, ibcq); } -static inline void nes_post_cqp_request(struct nes_device *nesdev, - struct nes_cqp_request *cqp_request, int holding_lock, int ring_doorbell) +static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp) { - /* caller must be holding CQP lock */ - struct nes_hw_cqp_wqe *cqp_wqe; - unsigned long flags; - u32 cqp_head; - - if (!holding_lock) { - spin_lock_irqsave(&nesdev->cqp.lock, flags); - } - - if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) & - (nesdev->cqp.sq_size - 1)) != 1) - && (list_empty(&nesdev->cqp_pending_reqs))) { - cqp_head = nesdev->cqp.sq_head++; - nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; - cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); - barrier(); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = cpu_to_le32((u32)((u64)(cqp_request))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = cpu_to_le32((u32)(((u64)(cqp_request))>>32)); - nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," - " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," - " waiting = %d, refcount = %d.\n", - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, - nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, - cqp_request->waiting, atomic_read(&cqp_request->refcount)); - barrier(); - if (ring_doorbell) { - /* Ring doorbell (1 WQEs) */ - nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); - } - - barrier(); - } else { - atomic_inc(&cqp_reqs_queued); - nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X" - " put on the pending queue.\n", - cqp_request, - cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]&0x3f, - cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]); - list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs); - } - - if (!holding_lock) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } - - return; + return container_of(ibqp, struct nes_qp, ibqp); } -/* Utils */ -#define CRC32C_POLY 0x1EDC6F41 -#define ORDER 32 -#define REFIN 1 -#define REFOUT 1 -#define NES_HASH_CRC_INITAL_VALUE 0xFFFFFFFF -#define NES_HASH_CRC_FINAL_XOR 0xFFFFFFFF /* nes.c */ void nes_add_ref(struct ib_qp *); @@ -550,6 +454,7 @@ struct ib_qp *nes_get_qp(struct ib_device *, int); /* nes_hw.c */ struct nes_adapter *nes_init_adapter(struct nes_device *, u8); +void nes_nic_init_timer_defaults(struct nes_device *, u8 ); unsigned int nes_reset_adapter_ne020(struct nes_device *, u8 *); int nes_init_serdes(struct nes_device *, u8, u8, u8); void nes_init_csr_ne020(struct nes_device *, u8, u8); @@ -572,6 +477,7 @@ int nes_destroy_cqp(struct nes_device *); int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); /* nes_nic.c */ +void nes_netdev_set_multicast_list(struct net_device *); void nes_netdev_exit(struct nes_vnic *); struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); void nes_netdev_destroy(struct net_device *); @@ -584,10 +490,8 @@ void nes_update_arp(unsigned char *, u32, u32, u16, u16); void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32); void nes_sock_release(struct nes_qp *, unsigned long *); struct nes_cm_core *nes_cm_alloc_core(void); -void nes_disconnect_worker(void *); void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32); int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32); - int nes_cm_disconn(struct nes_qp *); void nes_cm_disconn_worker(void *); @@ -605,6 +509,8 @@ void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16); void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *); void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16); void nes_read_10G_phy_reg(struct nes_device *, u16, u8); +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *); +void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int); int nes_arp_table(struct nes_device *, u32, u8 *, u32); void nes_mh_fix(unsigned long); void nes_dump_mem(unsigned int, void *, int); From glenn at lists.openfabrics.org Wed Nov 14 14:21:35 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:21:35 -0800 (PST) Subject: [ofa-general] [PATCH 2/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114222135.D127AE60A30@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for nes_cm.c and nes_cm.h Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 561dcf3..4023a2c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -120,7 +120,7 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, return NULL; /* allocate an empty event */ - event = (struct nes_cm_event *)kzalloc(sizeof(*event), GFP_ATOMIC); + event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) return NULL; @@ -211,11 +211,7 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret = 0; -#ifdef OFED_1_2 - struct tcphdr *tcph = skb->h.th; -#else struct tcphdr *tcph = tcp_hdr(skb); -#endif /* first check to see if this a FIN pkt */ if (tcph->fin) { @@ -265,19 +261,12 @@ struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, ethh = (struct ethhdr *) buf; buf += ETH_HLEN; -#ifdef OFED_1_2 - iph = skb->nh.iph = (struct iphdr *)buf; - buf += sizeof(*iph); - tcph = skb->h.th = (struct tcphdr *)buf; - skb->mac.raw = skb->data; -#else iph = (struct iphdr *)buf; buf += sizeof(*iph); tcph = (struct tcphdr *)buf; skb_reset_mac_header(skb); skb_set_network_header(skb, ETH_HLEN); skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph)); -#endif buf += sizeof(*tcph); skb->ip_summed = CHECKSUM_PARTIAL; @@ -404,11 +393,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, } if (type == NES_TIMER_TYPE_SEND) { -#ifdef OFED_1_2 - new_send->seq_num = htonl(skb->h.th->seq); -#else new_send->seq_num = htonl(tcp_hdr(skb)->seq); -#endif atomic_inc(&new_send->skb->users); ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); @@ -433,11 +418,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } if (type == NES_TIMER_TYPE_RECV) { -#ifdef OFED_1_2 - new_send->seq_num = htonl(skb->h.th->seq); -#else new_send->seq_num = htonl(tcp_hdr(skb)->seq); -#endif new_send->timetosend = jiffies; spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_add_tail(&new_send->list, &cm_node->recv_list); @@ -687,7 +668,7 @@ int send_syn(struct nes_cm_node *cm_node, u32 sendack) options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; options->as_windowscale.length = sizeof(struct option_windowscale); - options->as_windowscale.shiftcount = NES_CM_DEFAULT_RCV_WND_SCALE; + options->as_windowscale.shiftcount = cm_node->tcp_cntxt.snd_wscale; optionssize += sizeof(struct option_windowscale); if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) @@ -1054,11 +1035,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, struct nes_adapter *nesadapter; /* create an hte and cm_node for this instance */ - cm_node = (struct nes_cm_node *)kzalloc(sizeof(*cm_node), GFP_ATOMIC); + cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); if (!cm_node) return NULL; - memset(cm_node, 0, sizeof(struct nes_cm_node)); /* set our node specific transport info */ cm_node->loc_addr = cm_info->loc_addr; cm_node->rem_addr = cm_info->rem_addr; @@ -1072,6 +1052,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", + cm_node->listener, cm_node->cm_id); + INIT_LIST_HEAD(&cm_node->retrans_list); spin_lock_init(&cm_node->retrans_list_lock); INIT_LIST_HEAD(&cm_node->recv_list); @@ -1281,14 +1264,10 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, int optionsize; int datasize; int ret = 0; -#ifdef OFED_1_2 - struct tcphdr *tcph = skb->h.th; -#else struct tcphdr *tcph = tcp_hdr(skb); -#endif u32 inc_sequence; - if ((!tcph) || (NES_CM_STATE_TSA == cm_node->state)) { + if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { BUG_ON(!tcph); atomic_inc(&cm_accel_dropped_pkts); return -1; @@ -1301,6 +1280,7 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, switch (cm_node->state) { case NES_CM_STATE_LISTENING: rem_ref_cm_node(cm_core, cm_node); + break; case NES_CM_STATE_TSA: case NES_CM_STATE_CLOSED: break; @@ -1333,11 +1313,7 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); -#ifdef OFED_1_2 - skb_pull(skb, skb->nh.iph->ihl << 2); -#else skb_pull(skb, ip_hdr(skb)->ihl << 2); -#endif skb_pull(skb, tcph->doff << 2); datasize = skb->len; @@ -1361,6 +1337,12 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + + if (optionsize) { + u8 *optionsloc = (u8 *)&tcph[1]; + process_options(cm_node, optionsloc, optionsize); + } + cm_node->tcp_cntxt.snd_wnd = htons(tcph->window) << cm_node->tcp_cntxt.snd_wscale; @@ -1368,11 +1350,6 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; } - if (optionsize) { - u8 *optionsloc = (u8 *)&tcph[1]; - process_options(cm_node, optionsloc, optionsize); - } - if (tcph->ack) { cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); switch (cm_node->state) { @@ -1566,8 +1543,11 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, struct nes_cm_listener *listener; unsigned long flags; + nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", + htonl(cm_info->loc_addr), htons(cm_info->loc_port)); + /* cannot have multiple matching listeners */ - listener = find_listener( cm_core, htonl(cm_info->loc_addr), + listener = find_listener(cm_core, htonl(cm_info->loc_addr), htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ @@ -1578,7 +1558,7 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, if (!listener) { /* create a CM listen node (1/2 node to compare incoming traffic to) */ - listener = (struct nes_cm_listener *)kzalloc(sizeof(*listener), GFP_ATOMIC); + listener = kzalloc(sizeof(*listener), GFP_ATOMIC); if (!listener) { nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n"); return NULL; @@ -1656,7 +1636,7 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); - if (NULL == loopbackremotelistener) { + if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { u16 temp; @@ -1672,9 +1652,10 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, loopbackremotenode->mpa_frame_size = mpa_frame_size - sizeof(struct ietf_mpa_frame); - create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); // we are done handling this state, set node to a TSA state cm_node->state = NES_CM_STATE_TSA; + + create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } return cm_node; } @@ -1684,6 +1665,7 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, /* init our MPA frame ptr */ memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); cm_node->mpa_frame_size = mpa_frame_size; + cm_node->tcp_cntxt.snd_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; @@ -1809,13 +1791,8 @@ int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); -#ifdef OFED_1_2 - skb->nh.iph = iph; - skb->h.th = tcph; -#else skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(*tcph)); -#endif skb->len = htons(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); @@ -1823,6 +1800,9 @@ int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); + nes_debug(NES_DBG_CM, "Received packet: dest=0x%08X:0x%04X src=0x%08X:0x%04X\n", + iph->daddr, tcph->dest, iph->saddr, tcph->source); + /* note: this call is going to increment cm_node ref count */ cm_node = find_node(cm_core, nfo.rem_port, nfo.rem_addr, @@ -2071,12 +2051,11 @@ int nes_cm_disconn(struct nes_qp *nesqp) unsigned long flags; spin_lock_irqsave(&nesqp->lock, flags); - if (0==nesqp->disconn_pending) { + if (nesqp->disconn_pending == 0) { nesqp->disconn_pending++; spin_unlock_irqrestore(&nesqp->lock, flags); /* nes_add_ref(&nesqp->ibqp); */ /* init our disconnect work element, to */ - /* NES_INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker, (void *)nesqp); */ INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); @@ -2092,9 +2071,8 @@ int nes_cm_disconn(struct nes_qp *nesqp) /** * nes_disconnect_worker */ -void nes_disconnect_worker(void *parm) +void nes_disconnect_worker(struct work_struct *work) { - struct work_struct *work = parm; struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work); nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", @@ -2113,6 +2091,7 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) struct iw_cm_id *cm_id; struct iw_cm_event cm_event; struct nes_vnic *nesvnic; + struct nes_cm_node *cm_node = NULL; u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; @@ -2184,7 +2163,7 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) original_ibqp_state = nesqp->ibqp_state; last_ae = nesqp->last_aeq; - if ((0 == issued_disconnect_reset) && (nesqp->cm_id) && + if ((issued_disconnect_reset == 0) && (nesqp->cm_id) && ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || @@ -2213,7 +2192,7 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) cm_id->rem_ref(cm_id); spin_lock_irqsave(&nesqp->lock, flags); - if (0 == nesqp->flush_issued) { + if (nesqp->flush_issued == 0) { nesqp->flush_issued = 1; spin_unlock_irqrestore(&nesqp->lock, flags); flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); @@ -2305,6 +2284,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; + struct iw_cm_id *lb_cm_id; int ret; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); @@ -2317,6 +2297,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesdev = nesvnic->nesdev; adapter = nesdev->nesadapter; + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", + nesvnic, nesvnic->netdev, nesvnic->netdev->name); + /* since this is from a listen, we were able to put node handle into cm_id */ cm_node = (struct nes_cm_node *)cm_id->provider_data; @@ -2407,17 +2390,14 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); memset(&nes_quad, 0, sizeof(nes_quad)); - - nes_quad.DstIpAdrIndex = (u32)PCI_FUNC(nesdev->pcidev->devfn) << 27; + nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; /* Produce hash key */ - nesqp->hte_index = nes_crc32(1, NES_HASH_CRC_INITAL_VALUE, - NES_HASH_CRC_FINAL_XOR, sizeof(nes_quad), - (u8 *)&nes_quad, ORDER, REFIN, REFOUT); - + nesqp->hte_index = cpu_to_be32( + crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); @@ -2602,9 +2582,14 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) if (!nesvnic) return -EINVAL; adapter = nesvnic->nesdev->nesadapter; + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", + nesvnic, nesvnic->netdev, nesvnic->netdev->name); + + nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n", + nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr); /* setup listen params in our api call struct */ - cm_info.loc_addr = cm_id->local_addr.sin_addr.s_addr; + cm_info.loc_addr = nesvnic->local_ipaddr; cm_info.loc_port = cm_id->local_addr.sin_port; cm_info.backlog = backlog; cm_info.cm_id = cm_id; @@ -2789,18 +2774,16 @@ void cm_event_connected(struct nes_cm_event *event) memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = (u32)PCI_FUNC(nesdev->pcidev->devfn) << 27; + nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; - nesqp->hte_index = nes_crc32( 1, NES_HASH_CRC_INITAL_VALUE, - NES_HASH_CRC_FINAL_XOR, sizeof(nes_quad), (u8 *)&nes_quad, - ORDER, REFIN, REFOUT); - - nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X, TcpPorts = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask, - le32_to_cpu(nes_quad.TcpPorts)); + /* Produce hash key */ + nesqp->hte_index = cpu_to_be32( + crc32c(~0, (void *)&nes_quad, sizeof(nes_quad)) ^ 0xffffffff); + nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); nesqp->hte_index &= nesadapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); @@ -2907,6 +2890,9 @@ void cm_event_reset(struct nes_cm_event *event) if (!event->cm_node) return; + if (!event->cm_node->cm_id) + return; + cm_id = event->cm_node->cm_id; nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); @@ -2925,6 +2911,7 @@ void cm_event_reset(struct nes_cm_event *event) ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); + /* notify OF layer about this connection error event */ cm_id->rem_ref(cm_id); @@ -2975,7 +2962,7 @@ void cm_event_mpa_req(struct nes_cm_event *event) } -static void nes_cm_event_handler(void *parm); +static void nes_cm_event_handler(struct work_struct *); /** * nes_cm_post_event @@ -2986,7 +2973,6 @@ int nes_cm_post_event(struct nes_cm_event *event) atomic_inc(&event->cm_node->cm_core->events_posted); add_ref_cm_node(event->cm_node); event->cm_info.cm_id->add_ref(event->cm_info.cm_id); - /* NES_INIT_WORK(&event->event_work, nes_cm_event_handler, (void *)event); */ INIT_WORK(&event->event_work, nes_cm_event_handler); nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); @@ -3002,9 +2988,8 @@ int nes_cm_post_event(struct nes_cm_event *event) * worker function to handle cm events * will free instance of nes_cm_event */ -static void nes_cm_event_handler(void *parm) +static void nes_cm_event_handler(struct work_struct *work) { - struct work_struct *work = parm; struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); struct nes_cm_core *cm_core; @@ -3025,7 +3010,8 @@ static void nes_cm_event_handler(void *parm) cm_event_reset(event); break; case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || (event->cm_node->state != NES_CM_STATE_TSA)) { + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) { break; } cm_event_connected(event); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 8956b32..00eaeb1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -43,7 +43,7 @@ #define IEFT_MPA_KEY_REQ "MPA ID Req Frame" #define IEFT_MPA_KEY_REP "MPA ID Rep Frame" #define IETF_MPA_KEY_SIZE 16 -#define IETF_MPA_VERSION 1 +#define IETF_MPA_VERSION 1 enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -131,32 +131,32 @@ struct nes_timer_entry { struct net_device *netdev; }; -#define NES_DEFAULT_RETRYS 64 +#define NES_DEFAULT_RETRYS 64 #define NES_DEFAULT_RETRANS 8 #ifdef CONFIG_INFINIBAND_NES_DEBUG -#define NES_RETRY_TIMEOUT (1000*HZ/1000) +#define NES_RETRY_TIMEOUT (1000*HZ/1000) #else -#define NES_RETRY_TIMEOUT (1000*HZ/10000) +#define NES_RETRY_TIMEOUT (1000*HZ/10000) #endif -#define NES_SHORT_TIME (10) -#define NES_LONG_TIME (2000*HZ/1000) +#define NES_SHORT_TIME (10) +#define NES_LONG_TIME (2000*HZ/1000) -#define NES_CM_HASHTABLE_SIZE 1024 -#define NES_CM_TCP_TIMER_INTERVAL 3000 -#define NES_CM_DEFAULT_MTU 1540 +#define NES_CM_HASHTABLE_SIZE 1024 +#define NES_CM_TCP_TIMER_INTERVAL 3000 +#define NES_CM_DEFAULT_MTU 1540 #define NES_CM_DEFAULT_FRAME_CNT 10 -#define NES_CM_THREAD_STACK_SIZE 256 -#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed -#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed -#define NES_CM_DEFAULT_RCV_WND_SCALE 2 -#define NES_CM_DEFAULT_FREE_PKTS 0x000A -#define NES_CM_FREE_PKT_LO_WATERMARK 2 +#define NES_CM_THREAD_STACK_SIZE 256 +#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed +#define NES_CM_DEFAULT_RCV_WND_SCALE 2 +#define NES_CM_DEFAULT_FREE_PKTS 0x000A +#define NES_CM_FREE_PKT_LO_WATERMARK 2 -#define NES_CM_DEF_SEQ 0x159bf75f -#define NES_CM_DEF_LOCAL_ID 0x3b47 +#define NES_CM_DEF_SEQ 0x159bf75f +#define NES_CM_DEF_LOCAL_ID 0x3b47 -#define NES_CM_DEF_SEQ2 0x18ed5740 -#define NES_CM_DEF_LOCAL_ID2 0xb807 +#define NES_CM_DEF_SEQ2 0x18ed5740 +#define NES_CM_DEF_LOCAL_ID2 0xb807 typedef u32 nes_addr_t; @@ -192,15 +192,15 @@ enum nes_cm_conn_type { /* CM context params */ struct nes_cm_tcp_context { - u8 client; + u8 client; - u32 loc_seq_num; - u32 loc_ack_num; - u32 rem_ack_num; - u32 rcv_nxt; + u32 loc_seq_num; + u32 loc_ack_num; + u32 rem_ack_num; + u32 rcv_nxt; - u32 loc_id; - u32 rem_id; + u32 loc_id; + u32 rem_id; u32 snd_wnd; u32 max_snd_wnd; @@ -211,7 +211,7 @@ struct nes_cm_tcp_context { u8 rcv_wscale; struct nes_cm_tsa_context tsa_cntxt; - struct timeval sent_ts; + struct timeval sent_ts; }; @@ -222,68 +222,67 @@ enum nes_cm_listener_state { }; struct nes_cm_listener { - struct list_head list; - u64 session_id; - struct nes_cm_core *cm_core; - u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr; - u16 loc_port; - struct iw_cm_id *cm_id; - enum nes_cm_conn_type conn_type; - atomic_t ref_count; - struct nes_vnic *nesvnic; - atomic_t pend_accepts_cnt; - int backlog; + struct list_head list; + u64 session_id; + struct nes_cm_core *cm_core; + u8 loc_mac[ETH_ALEN]; + nes_addr_t loc_addr; + u16 loc_port; + struct iw_cm_id *cm_id; + enum nes_cm_conn_type conn_type; + atomic_t ref_count; + struct nes_vnic *nesvnic; + atomic_t pend_accepts_cnt; + int backlog; enum nes_cm_listener_state listener_state; - u32 reused_node; + u32 reused_node; }; /* per connection node and node state information */ struct nes_cm_node { - u64 session_id; - u32 hashkey; - - nes_addr_t loc_addr, rem_addr; - u16 loc_port, rem_port; + u64 session_id; + u32 hashkey; + nes_addr_t loc_addr, rem_addr; + u16 loc_port, rem_port; - u8 loc_mac[ETH_ALEN]; - u8 rem_mac[ETH_ALEN]; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; - enum nes_cm_node_state state; + enum nes_cm_node_state state; struct nes_cm_tcp_context tcp_cntxt; - struct nes_cm_core *cm_core; - struct sk_buff_head resend_list; - atomic_t ref_count; - struct net_device *netdev; - - struct nes_cm_node *loopbackpartner ; - struct list_head retrans_list; - spinlock_t retrans_list_lock; - struct list_head recv_list; - spinlock_t recv_list_lock; + struct nes_cm_core *cm_core; + struct sk_buff_head resend_list; + atomic_t ref_count; + struct net_device *netdev; + + struct nes_cm_node *loopbackpartner; + struct list_head retrans_list; + spinlock_t retrans_list_lock; + struct list_head recv_list; + spinlock_t recv_list_lock; - int send_write0; + int send_write0; union { struct ietf_mpa_frame mpa_frame; - u8 mpa_frame_buf[NES_CM_DEFAULT_MTU]; + u8 mpa_frame_buf[NES_CM_DEFAULT_MTU]; }; - u16 mpa_frame_size; - struct iw_cm_id *cm_id; - struct list_head list; - int accelerated; - struct nes_cm_listener *listener; - enum nes_cm_conn_type conn_type; - struct nes_vnic *nesvnic; - int apbvt_set; - int accept_pend; + u16 mpa_frame_size; + struct iw_cm_id *cm_id; + struct list_head list; + int accelerated; + struct nes_cm_listener *listener; + enum nes_cm_conn_type conn_type; + struct nes_vnic *nesvnic; + int apbvt_set; + int accept_pend; }; /* structure for client or CM to fill when making CM api calls. */ /* - only need to set relevant data, based on op. */ struct nes_cm_info { union { - struct iw_cm_id *cm_id; + struct iw_cm_id *cm_id; struct net_device *netdev; }; @@ -325,36 +324,36 @@ struct nes_cm_event { }; struct nes_cm_core { - enum nes_cm_node_state state; - atomic_t session_id; - - atomic_t listen_node_cnt; - struct nes_cm_node listen_list; - spinlock_t listen_list_lock; - - u32 mtu; - u32 free_tx_pkt_max; - u32 rx_pkt_posted; - struct sk_buff_head tx_free_list; - atomic_t ht_node_cnt; - struct list_head connected_nodes; + enum nes_cm_node_state state; + atomic_t session_id; + + atomic_t listen_node_cnt; + struct nes_cm_node listen_list; + spinlock_t listen_list_lock; + + u32 mtu; + u32 free_tx_pkt_max; + u32 rx_pkt_posted; + struct sk_buff_head tx_free_list; + atomic_t ht_node_cnt; + struct list_head connected_nodes; /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ - spinlock_t ht_lock; + spinlock_t ht_lock; - struct timer_list tcp_timer; + struct timer_list tcp_timer; - struct nes_cm_ops *api; + struct nes_cm_ops *api; int (*post_event)(struct nes_cm_event *event); - atomic_t events_posted; + atomic_t events_posted; struct workqueue_struct *event_wq; struct workqueue_struct *disconn_wq; - atomic_t node_cnt; - u64 aborted_connects; - u32 options; + atomic_t node_cnt; + u64 aborted_connects; + u32 options; - struct nes_cm_node *current_listen_node; + struct nes_cm_node *current_listen_node; }; @@ -408,7 +407,7 @@ int mini_cm_get(struct nes_cm_core *); int mini_cm_set(struct nes_cm_core *, u32, u32); int nes_cm_disconn(struct nes_qp *); -void nes_disconnect_worker(void *); +void nes_disconnect_worker(struct work_struct *); int nes_cm_disconn_true(struct nes_qp *); int nes_disconnect(struct nes_qp *, int); @@ -430,4 +429,3 @@ void cm_event_mpa_req(struct nes_cm_event *); int nes_cm_post_event(struct nes_cm_event *); #endif /* NES_CM_H */ - diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h From glenn at lists.openfabrics.org Wed Nov 14 14:25:33 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:25:33 -0800 (PST) Subject: [ofa-general] [PATCH 3/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114222533.8396AE28076@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for nes_hw.c and nes_hw.h Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 09d89a9..d2ab5a7 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -42,6 +42,14 @@ #include "nes.h" u32 crit_err_count = 0; +u32 int_mod_timer_init; +u32 int_mod_cq_depth_256; +u32 int_mod_cq_depth_128; +u32 int_mod_cq_depth_32; +u32 int_mod_cq_depth_24; +u32 int_mod_cq_depth_16; +u32 int_mod_cq_depth_4; +u32 int_mod_cq_depth_1; #include "nes_cm.h" @@ -80,6 +88,125 @@ static unsigned char *nes_tcp_state_str[] = { /** + * nes_nic_init_timer_defaults + */ +void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW; + shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH; + if (jumbomode) { + shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH; + } else { + shared_timer->threshold_low = DEFAULT_NES_QL_LOW; + shared_timer->threshold_target = DEFAULT_NES_QL_TARGET; + shared_timer->threshold_high = DEFAULT_NES_QL_HIGH; + } + + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_init_timer + */ +static void nes_nic_init_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->timer_in_use_old == 0) { + nesdev->deepcq_count = 0; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_in_use = NES_NIC_FAST_TIMER; + shared_timer->timer_in_use_old = 0; + + } + if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) { + shared_timer->timer_in_use_old = shared_timer->timer_in_use; + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(shared_timer->timer_in_use*8))); + } + /* todo use netdev->mtu to set thresholds */ + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** + * nes_nic_tune_timer + */ +static void nes_nic_tune_timer(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + + if (shared_timer->cq_count>1) { + nesdev->deepcq_count += shared_timer->cq_count; + if (shared_timer->cq_count <= shared_timer->threshold_low ) { /* increase timer gently */ + shared_timer->timer_direction_upward++; + shared_timer->timer_direction_downward = 0; + } + else if (shared_timer->cq_count <= shared_timer->threshold_target ) { /* balanced */ + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } + else if (shared_timer->cq_count <= shared_timer->threshold_high ) { /* decrease timer gently */ + shared_timer->timer_direction_downward++; + shared_timer->timer_direction_upward = 0; + } + else if (shared_timer->cq_count <= (shared_timer->threshold_high)*2) { + shared_timer->timer_in_use -= 2; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } + else { + shared_timer->timer_in_use -= 4; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward++; + } + + if (shared_timer->timer_direction_upward > 3 ) { /* using history */ + shared_timer->timer_in_use += 3; + shared_timer->timer_direction_upward = 0; + shared_timer->timer_direction_downward = 0; + } + if (shared_timer->timer_direction_downward > 5) { /* using history */ + shared_timer->timer_in_use -= 4 ; + shared_timer->timer_direction_downward = 0; + shared_timer->timer_direction_upward = 0; + } + } + + /* boundary checking */ + if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH) { + shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH; + } + else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) { + shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW; + } + + shared_timer->cq_count = 0; + + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); +} + + +/** * nes_init_adapter - initialize adapter */ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { @@ -139,17 +266,8 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { } nes_init_csr_ne020(nesdev, hw_rev, port_count); - /* Setup and enable the periodic timer */ - nesdev->et_rx_coalesce_usecs_irq = interrupt_mod_interval; - if (nesdev->et_rx_coalesce_usecs_irq) { - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | - ((u32)(nesdev->et_rx_coalesce_usecs_irq * 8))); - } else { - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000); - } - max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); - nes_debug(NES_DBG_INIT, "%s: QP_CTX_SIZE=%u\n", __FUNCTION__, max_qp); + nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE); if (max_qp > ((u32)1 << (u32temp & 0x001f))) { @@ -197,11 +315,11 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { adapter_size += sizeof(struct nes_qp **) * max_qp; /* allocate a new adapter struct */ - nesadapter = kmalloc(adapter_size, GFP_KERNEL); + nesadapter = kzalloc(adapter_size, GFP_KERNEL); if (nesadapter == NULL) { return NULL; } - memset(nesadapter, 0, adapter_size); + nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); @@ -211,6 +329,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->ref_count = 1; nesadapter->timer_int_req = 0xffff0000; nesadapter->OneG_Mode = OneG_Mode; + nesadapter->doorbell_start = nesdev->doorbell_region; /* nesadapter->tick_delta = clk_divisor; */ nesadapter->hw_rev = hw_rev; @@ -227,6 +346,26 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->free_4kpbl = max_4kpbl - 1; nesadapter->max_pd = num_pds; nesadapter->arp_table_size = arp_table_size; + + nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT; + if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + } else { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __FUNCTION__); + } + /* Setup and enable the periodic timer */ + if (nesadapter->et_rx_coalesce_usecs_irq) { + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 | + ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8))); + } else { + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000); + } + nesadapter->base_pd = 1; nesadapter->device_cap_flags = @@ -320,6 +459,8 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { spin_lock_init(&nesadapter->resource_lock); spin_lock_init(&nesadapter->phy_lock); + spin_lock_init(&nesadapter->pbl_lock); + spin_lock_init(&nesadapter->periodic_timer_lock); INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]); INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]); @@ -473,8 +614,6 @@ int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, u8 One if (port_count > 1) { /* init serdes 1 */ - // nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F008); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); if (!OneG_Mode) { @@ -540,6 +679,8 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) { u32 u32temp; + nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count); + nes_write_indexed(nesdev, 0x000001E4, 0x00000007); /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */ nes_write_indexed(nesdev, 0x000001E8, 0x00020844); @@ -563,6 +704,7 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) nes_write_indexed(nesdev, 0x0000220C, 0x00000001); nes_write_indexed(nesdev, 0x00002210, 0x000003c1); nes_write_indexed(nesdev, 0x0000221C, 0x75345678); + nes_write_indexed(nesdev, 0x00000908, 0x20000001); } if (port_count > 2) { nes_write_indexed(nesdev, 0x00002400, 0x00000001); @@ -571,6 +713,7 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) nes_write_indexed(nesdev, 0x0000240C, 0x00000001); nes_write_indexed(nesdev, 0x00002410, 0x000003c1); nes_write_indexed(nesdev, 0x0000241C, 0x75345678); + nes_write_indexed(nesdev, 0x00000910, 0x20000001); nes_write_indexed(nesdev, 0x00002600, 0x00000001); nes_write_indexed(nesdev, 0x00002604, 0x00000001); @@ -578,6 +721,7 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) nes_write_indexed(nesdev, 0x0000260C, 0x00000001); nes_write_indexed(nesdev, 0x00002610, 0x000003c1); nes_write_indexed(nesdev, 0x0000261C, 0x75345678); + nes_write_indexed(nesdev, 0x00000918, 0x20000001); } nes_write_indexed(nesdev, 0x00005000, 0x00018000); @@ -593,7 +737,7 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) nes_write_indexed(nesdev, 0x00000900, 0x20000001); nes_write_indexed(nesdev, 0x000060C0, 0x0000028e); nes_write_indexed(nesdev, 0x000060C8, 0x00000020); - + // nes_write_indexed(nesdev, 0x000001EC, 0x5b2625a0); /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */ @@ -647,10 +791,6 @@ int nes_init_cqp(struct nes_device *nesdev) u64 u64temp; u32 u32temp; -#define NES_NIC_CEQ_SIZE 8 -/* NICs will be on a separate CQ */ -#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32) - /* allocate CQP memory */ /* Need to add max_cq to the aeq size once cq overflow checking is added back */ /* SQ is 512 byte aligned, others are 256 byte aligned */ @@ -671,16 +811,15 @@ int nes_init_cqp(struct nes_device *nesdev) memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size); /* Allocate a twice the number of CQP requests as the SQ size */ - nesdev->nes_cqp_requests = kmalloc(sizeof(struct nes_cqp_request) * + nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) * 2 * NES_CQP_SQ_SIZE, GFP_KERNEL); - if (NULL == nesdev->nes_cqp_requests) { + if (nesdev->nes_cqp_requests == NULL) { nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n"); pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase, nesdev->cqp.sq_pbase); return -ENOMEM; } - memset(nesdev->nes_cqp_requests, 0, sizeof(struct nes_cqp_request) * - 2 * NES_CQP_SQ_SIZE); + nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n", nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size); @@ -744,7 +883,8 @@ int nes_init_cqp(struct nes_device *nesdev) pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size); cqp_qp_context = vmem; - cqp_qp_context->context_words[0] = cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10)); + cqp_qp_context->context_words[0] = + cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10)); cqp_qp_context->context_words[1] = 0; cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase); cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32); @@ -763,12 +903,10 @@ int nes_init_cqp(struct nes_device *nesdev) NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8), (u32)pmem); - nes_debug(NES_DBG_INIT, "Address of CQP SQ = %p.\n", nesdev->cqp.sq_vbase); - INIT_LIST_HEAD(&nesdev->cqp_avail_reqs); INIT_LIST_HEAD(&nesdev->cqp_pending_reqs); - for (count=0; count<2*NES_CQP_SQ_SIZE; count++) { + for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) { init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq); list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs); } @@ -776,12 +914,15 @@ int nes_init_cqp(struct nes_device *nesdev) /* Write Create CCQ WQE */ cqp_head = nesdev->cqp.sq_head++; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32(NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16)); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number | - ((u32)nesdev->ceq_index<<16)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + ((u32)nesdev->ceq_index << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; u64temp = (u64)nesdev->ccq.cq_pbase; @@ -789,13 +930,10 @@ int nes_init_cqp(struct nes_device *nesdev) cqp_wqe->wqe_words[NES_CQP_CQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; u64temp = (u64)&nesdev->ccq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp>>1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = cpu_to_le32(((u32)((u64temp)>>33))&0x7FFFFFFF); - nes_debug(NES_DBG_INIT, "CQ%u context = 0x%08X:0x%08X.\n", - nesdev->ccq.cq_number, - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX]), - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX])); - + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; /* Write Create CEQ WQE */ @@ -804,8 +942,10 @@ int nes_init_cqp(struct nes_device *nesdev) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8)); cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(ceq->ceq_size); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; u64temp = (u64)ceq->ceq_pbase; @@ -818,22 +958,27 @@ int nes_init_cqp(struct nes_device *nesdev) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)); cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(aeq->aeq_size); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; u64temp = (u64)aeq->aeq_pbase; cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_PBL_LOW_IDX] = cpu_to_le32((u32)u64temp); - cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_PBL_HIGH_IDX] = + cpu_to_le32((u32)(u64temp >> 32)); - /* Write Create CEQ WQE */ + /* Write Create NIC CEQ WQE */ cqp_head = nesdev->cqp.sq_head++; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8)); cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(nic_ceq->ceq_size); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; u64temp = (u64)nic_ceq->ceq_pbase; @@ -870,7 +1015,7 @@ int nes_init_cqp(struct nes_device *nesdev) } udelay(10); } while (((nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8))); + NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8))); /* dump the QP status value */ nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev, @@ -888,13 +1033,13 @@ int nes_init_cqp(struct nes_device *nesdev) int nes_destroy_cqp(struct nes_device *nesdev) { struct nes_hw_cqp_wqe *cqp_wqe; - u32 count=0; + u32 count = 0; u32 cqp_head; unsigned long flags; - nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP work to complete.\n"); do { - if (count++ > 1000) break; + if (count++ > 1000) + break; udelay(10); } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail)); @@ -904,46 +1049,51 @@ int nes_destroy_cqp(struct nes_device *nesdev) /* Disable device interrupts */ nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff); - /* Destroy the AEQ */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy the AEQ */ cqp_head = nesdev->cqp.sq_head++; nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ | - ((u32)PCI_FUNC(nesdev->pcidev->devfn)<<8)); + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; + /* Destroy the NIC CEQ */ cqp_head = nesdev->cqp.sq_head++; nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | - ((u32)nesdev->nic_ceq_index<<8)); + ((u32)nesdev->nic_ceq_index << 8)); + /* Destroy the CEQ */ cqp_head = nesdev->cqp.sq_head++; nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ | - (nesdev->ceq_index<<8)); + (nesdev->ceq_index << 8)); + /* Destroy the CCQ */ cqp_head = nesdev->cqp.sq_head++; nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( nesdev->ccq.cq_number || - ((u32)nesdev->ceq_index<<16)); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number | + ((u32)nesdev->ceq_index << 16)); + /* Destroy CQP */ cqp_head = nesdev->cqp.sq_head++; nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_CQP); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id); barrier(); - /* Ring doorbell (4 WQEs) */ + /* Ring doorbell (5 WQEs) */ nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id); - /* Wait for the destroy to complete */ spin_unlock_irqrestore(&nesdev->cqp.lock, flags); /* wait for the CCQ, CEQ, and AEQ to get destroyed */ @@ -956,7 +1106,7 @@ int nes_destroy_cqp(struct nes_device *nesdev) } udelay(10); } while (((nes_read_indexed(nesdev, - NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != 0)); + NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0)); /* dump the QP status value */ nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n", @@ -982,14 +1132,21 @@ int nes_init_phy(struct nes_device *nesdev) struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; u32 mac_index = nesdev->mac_index; + u32 tx_config; u16 phy_data; if (nesadapter->OneG_Mode) { nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { + printk(PFX "%s: Programming mdc config for 1G\n", __FUNCTION__); + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config |= 0x04; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } + nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", nesadapter->phy_index[mac_index], phy_data); - nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); /* Reset the PHY */ @@ -1043,6 +1200,13 @@ int nes_init_phy(struct nes_device *nesdev) nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); + } else { + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + /* setup 10G MDIO operation */ + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config |= 0x14; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } } return 0; } @@ -1085,7 +1249,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) nesnic->rq_head &= nesnic->rq_size - 1; atomic_dec(&nesvnic->rx_skbs_needed); barrier(); - if (++rx_wqes_posted==255) { + if (++rx_wqes_posted == 255) { nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); rx_wqes_posted = 0; } @@ -1094,7 +1258,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) __FUNCTION__, __LINE__, atomic_read(&nesvnic->rx_skbs_needed)); if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) && - (0 == atomic_read(&nesvnic->rx_skb_timer_running))) { + (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) { printk("%s[%u] Starting Timer.\n", __FUNCTION__, __LINE__); atomic_set(&nesvnic->rx_skb_timer_running, 1); nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */ @@ -1143,6 +1307,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) u32 cqp_head; u32 counter; u32 wqe_count; + u8 jumbomode=0; /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ nesvnic->nic_mem_size = 256 + @@ -1227,9 +1392,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) /* Send CreateCQ request to CQP */ spin_lock_irqsave(&nesdev->cqp.lock, flags); cqp_head = nesdev->cqp.sq_head; - nes_debug(NES_DBG_INIT, "Before filling out cqp_wqe, cqp=%p, sq_head=%u," - " sq_tail=%u, cqp_head=%u\n", - &nesdev->cqp, nesdev->cqp.sq_head, nesdev->cqp.sq_tail, cqp_head); cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; @@ -1238,8 +1400,8 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) ((u32)nesvnic->nic_cq.cq_size << 16)); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; u64temp = (u64)nesvnic->nic_cq.cq_pbase; @@ -1247,9 +1409,9 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) cqp_wqe->wqe_words[NES_CQP_CQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; u64temp = (u64)&nesvnic->nic_cq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp>>1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = - cpu_to_le32(((u32)((u64temp)>>33))&0x7FFFFFFF); + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; if (++cqp_head >= nesdev->cqp.sq_size) cqp_head = 0; @@ -1263,7 +1425,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); - if (0!= nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)) { + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) { nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); } @@ -1346,13 +1508,19 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) init_timer(&nesvnic->rq_wqes_timer); nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout; nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic; -#ifdef NES_INT_MODERATE - nes_debug(NES_DBG_INIT, "Default Interrupt Moderation Enabled\n"); -#endif #ifdef NES_NAPI nes_debug(NES_DBG_INIT, "NAPI support Enabled\n"); #endif + if (nesdev->nesadapter->et_use_adaptive_rx_coalesce) + { + nes_nic_init_timer(nesdev); + if (netdev->mtu > 1500) { + jumbomode = 1; + } + nes_nic_init_timer_defaults(nesdev, jumbomode); + } + return 0; } @@ -1381,29 +1549,36 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); } - /* Destroy NIC QP */ spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ cqp_head = nesdev->cqp.sq_head; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC); - cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic_cq.cq_number); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; if (++cqp_head >= nesdev->cqp.sq_size) cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; /* Destroy NIC CQ */ - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ | - ((u32)nesvnic->nic_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = + cpu_to_le32(NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16)); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; @@ -1437,13 +1612,13 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) nesvnic->nic_pbase); } - #ifdef NES_NAPI /** * nes_napi_isr */ int nes_napi_isr(struct nes_device *nesdev) { + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 int_stat; if (nesdev->napi_isr_ran) { @@ -1467,7 +1642,10 @@ int nes_napi_isr(struct nes_device *nesdev) /* Process the CEQs */ nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]); - if (nesdev->et_rx_coalesce_usecs_irq) { + if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesadapter->et_use_adaptive_rx_coalesce) && + (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) { if ((nesdev->int_req & NES_INT_TIMER) == 0) { /* Enable Periodic timer interrupts */ nesdev->int_req |= NES_INT_TIMER; @@ -1478,6 +1656,11 @@ int nes_napi_isr(struct nes_device *nesdev) nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER)); } + + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } /* Enable interrupts, except CEQs */ nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req)); } else { @@ -1485,8 +1668,9 @@ int nes_napi_isr(struct nes_device *nesdev) nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nesadapter->tune_timer.timer_in_use_old = 0; } - + nesdev->deepcq_count = 0; return 1; } else { return 0; @@ -1524,12 +1708,12 @@ void nes_dpc(unsigned long param) int_stat = nesdev->int_stat; } else int_stat = nes_read32(nesdev->regs+NES_INT_STAT); - if (0 != processed_intf_int) { + if (processed_intf_int != 0) { int_stat &= nesdev->int_req & ~NES_INT_INTF; } else { int_stat &= nesdev->int_req; } - if (0 == processed_timer_int) { + if (processed_timer_int == 0) { processed_timer_int = 1; if (int_stat & NES_INT_TIMER) { timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT); @@ -1616,19 +1800,24 @@ void nes_dpc(unsigned long param) NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3; } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS)); - if (1 == timer_ints) { - if (nesdev->et_rx_coalesce_usecs_irq) { - if (0 == completion_ints) { + if (timer_ints == 1) { + if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) { + if (completion_ints == 0) { nesdev->timer_only_int_count++; - if (nesdev->timer_only_int_count>=NES_TIMER_INT_LIMIT) { + if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) { nesdev->timer_only_int_count = 0; nesdev->int_req &= ~NES_INT_TIMER; nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req)); nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); + nesdev->nesadapter->tune_timer.timer_in_use_old = 0; } else { nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); } } else { + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nes_nic_init_timer(nesdev); + } nesdev->timer_only_int_count = 0; nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req)); } @@ -1641,7 +1830,11 @@ void nes_dpc(unsigned long param) nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); } } else { - if ((1 == completion_ints) && (nesdev->et_rx_coalesce_usecs_irq)) { + if ( (completion_ints == 1) && + (((nesadapter->et_rx_coalesce_usecs_irq) && + (!nesadapter->et_use_adaptive_rx_coalesce)) || + ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) && + (nesadapter->et_use_adaptive_rx_coalesce) )) ) { /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */ nesdev->timer_only_int_count = 0; nesdev->int_req |= NES_INT_TIMER; @@ -1654,6 +1847,7 @@ void nes_dpc(unsigned long param) nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req); } } + nesdev->deepcq_count = 0; } @@ -1690,6 +1884,7 @@ void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq) } else { break; } + } while (1); ceq->ceq_head = head; @@ -1827,6 +2022,22 @@ void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) phy_data = 0; nes_debug(NES_DBG_PHY, "PCS says the link is down\n"); } + } else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) { + nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); + temp_phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]); + phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __FUNCTION__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + } else { phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0; } @@ -1878,9 +2089,9 @@ void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq } #endif -// The MAX_RQES_TO_PROCESS defines how many max read requests to complete before -// getting out of nic_ce_handler -// +/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before +* getting out of nic_ce_handler +*/ #define MAX_RQES_TO_PROCESS 384 /** @@ -1892,6 +2103,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) dma_addr_t bus_address; struct nes_hw_nic *nesnic; struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_nic_rq_wqe *nic_rqe; struct nes_hw_nic_sq_wqe *nic_sqe; struct sk_buff *skb; @@ -1931,7 +2143,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]); u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32; bus_address = (dma_addr_t)u64temp; - if ((skb) && (skb_headlen(skb) > NES_FIRST_FRAG_SIZE)) { + if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) { pci_unmap_single(nesdev->pcidev, bus_address, le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]), @@ -1985,6 +2197,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) { nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); + nesadapter->tune_timer.cq_count += cqe_count; cqe_count = 0; nes_replenish_nic_rq(nesvnic); } @@ -1994,12 +2207,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) || (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) { - if (0 == (cqe_errv & - (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | - NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | - NES_NIC_ERRV_BITS_IPH_ERR | - NES_NIC_ERRV_BITS_WQE_OVERRUN))) { - if (0 == nesvnic->rx_checksum_disabled) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | + NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->rx_checksum_disabled == 0) { rx_skb->ip_summed = CHECKSUM_UNNECESSARY; } } else { @@ -2007,12 +2218,11 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) " errv = 0x%X, pkt_type = 0x%X.\n", nesvnic->netdev->name, cqe_errv, pkt_type); } - } else if (NES_PKT_TYPE_IPV4_BITS == (pkt_type & NES_PKT_TYPE_IPV4_MASK)) { - if (0 == (cqe_errv & - (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | - NES_NIC_ERRV_BITS_IPH_ERR | - NES_NIC_ERRV_BITS_WQE_OVERRUN))) { - if (0 == nesvnic->rx_checksum_disabled) { + } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) { + if ((cqe_errv & + (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | + NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { + if (nesvnic->rx_checksum_disabled == 0) { rx_skb->ip_summed = CHECKSUM_UNNECESSARY; /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n", nesvnic->netdev->name); */ @@ -2026,8 +2236,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n", pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ - if (NES_PKT_TYPE_APBVT_BITS == (pkt_type & NES_PKT_TYPE_APBVT_MASK)) { - /* nes_debug(NES_DBG_CQ, "APBVT bit set; Send up NES; nesif_rx\n"); */ + if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { nes_cm_recv(rx_skb, nesvnic->netdev); } else { if (cqe_misc & NES_NIC_CQE_TAG_VALID) { @@ -2065,6 +2274,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* Replenish Nic CQ */ nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); + nesdev->nesadapter->tune_timer.cq_count += cqe_count; cqe_count = 0; } #ifdef NES_NAPI @@ -2075,9 +2285,11 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) nesvnic->cqes_pending = 0; break; } +#ifndef NES_NAPI if (rqes_processed > MAX_RQES_TO_PROCESS) { break; } +#endif } while (1); cq->cq_head = head; @@ -2091,6 +2303,11 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) cq->cq_number | (cqe_count << 16)); nes_read32(nesdev->regs+NES_CQE_ALLOC); #endif + if (unlikely(nesadapter->et_use_adaptive_rx_coalesce)) + { + nesdev->nesadapter->tune_timer.cq_count += cqe_count; + nes_nic_tune_timer(nesdev); + } if (atomic_read(&nesvnic->rx_skbs_needed)) { nes_replenish_nic_rq(nesvnic); } @@ -2153,10 +2370,8 @@ void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) cqp_request, le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2168,9 +2383,7 @@ void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f); if (cqp_request->dynamic) { kfree(cqp_request); - atomic_inc(&cqp_reqs_dynfreed); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2199,7 +2412,6 @@ void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) while ((!list_empty(&nesdev->cqp_pending_reqs)) && ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) & (nesdev->cqp.sq_size - 1)) != 1)) { - atomic_inc(&cqp_reqs_redriven); cqp_request = list_entry(nesdev->cqp_pending_reqs.next, struct nes_cqp_request, list); list_del_init(&cqp_request->list); @@ -2261,18 +2473,16 @@ void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) async_event_id = (u16)aeq_info; tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; - nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p, Tcp state = %d, iWARP state = %d\n", + nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p, Tcp state = %s, iWARP state = %s\n", async_event_id, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, - tcp_state, iwarp_state); - /* nes_tcp_state_str[tcp_state], - nes_iwarp_state_str[iwarp_state]); */ + nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); switch (async_event_id) { case NES_AEQE_AEID_LLP_FIN_RECEIVED: nesqp = *((struct nes_qp **)&context); - if (atomic_inc_return(&nesqp->close_timer_started)==1) { + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { nesqp->cm_id->add_ref(nesqp->cm_id); nes_add_ref(&nesqp->ibqp); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, @@ -2317,7 +2527,7 @@ void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) if ((nesqp->ibqp_state == IB_QPS_RTS) && ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || - (async_event_id==NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { switch (nesqp->hw_iwarp_state) { case NES_AEQE_IWARP_STATE_RTS: next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; @@ -2352,7 +2562,7 @@ void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) return; } spin_unlock_irqrestore(&nesqp->lock, flags); - if (async_event_id==NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { + if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000; nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," @@ -2381,7 +2591,7 @@ void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) } if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || ((nesqp->ibqp_state == IB_QPS_RTS)&& - (async_event_id==NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { nes_add_ref(&nesqp->ibqp); nes_cm_disconn(nesqp); } else { @@ -2572,6 +2782,7 @@ void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq) return; } + /** * nes_manage_apbvt() */ @@ -2586,8 +2797,8 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, u16 major_code; /* Send manage APBVT request to CQP */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); return -ENOMEM; } @@ -2599,32 +2810,31 @@ int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port, accel_local_port, accel_local_port, nic_index); cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_MANAGE_APBVT | - ((add_port==NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)); + ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n"); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); - if (add_port==NES_MANAGE_APBVT_ADD) - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + if (add_port == NES_MANAGE_APBVT_ADD) + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); major_code = cqp_request->major_code; if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2654,15 +2864,12 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, nesdev = nesvnic->nesdev; arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action); if (arp_index == -1) { - /* nes_debug(NES_DBG_NETDEV, "nes_arp_table call returned -1\n"); */ return; } - /* nes_debug(NES_DBG_NETDEV, "Update the ARP entry, arp_index=%d\n", arp_index); */ - /* update the ARP entry */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n"); return; } @@ -2674,8 +2881,10 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32( (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; @@ -2695,8 +2904,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); atomic_set(&cqp_request->refcount, 1); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); } @@ -2711,8 +2919,8 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, struct nes_hw_cqp_wqe *cqp_wqe; int ret; - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n"); return; } @@ -2732,22 +2940,19 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); if (wait_completion) { /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u," " CQP Major:Minor codes = 0x%04X:0x%04X\n", ret, cqp_request->major_code, cqp_request->minor_code); if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2755,4 +2960,3 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, } } } - diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index d4b04f5..67fd2f3 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -33,6 +33,11 @@ #ifndef __NES_HW_H #define __NES_HW_H +#define NES_PHY_TYPE_1G 2 +#define NES_PHY_TYPE_IRIS 3 + +#define NES_MULTICAST_PF_MAX 8 + enum pci_regs { NES_INT_STAT = 0x0000, NES_INT_MASK = 0x0004, @@ -165,7 +170,7 @@ enum indexed_regs { NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154, }; -#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1 +#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1 #define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17) enum nes_cqp_opcodes { @@ -572,8 +577,8 @@ enum nes_nic_cqe_word_idx { #define NES_PKT_TYPE_UDPV4_BITS 0x0210 #define NES_PKT_TYPE_UDPV4_MASK 0x3f30 -#define NES_PKT_TYPE_IPV4_BITS 0x0010 -#define NES_PKT_TYPE_IPV4_MASK 0x3f30 +#define NES_PKT_TYPE_IPV4_BITS 0x0010 +#define NES_PKT_TYPE_IPV4_MASK 0x3f30 #define NES_PKT_TYPE_OTHER_BITS 0x0000 #define NES_PKT_TYPE_OTHER_MASK 0x0030 @@ -640,8 +645,6 @@ enum nes_aeqe_tcp_state { NES_AEQE_TCP_STATE_TIME_WAIT = 11 }; -#define NES_TIMER_INT_LIMIT 2 - enum nes_aeqe_aeid { NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102, NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103, @@ -731,7 +734,7 @@ enum nes_iwarp_sq_wqe_bits { }; #define NES_EEPROM_READ_REQUEST (1<<16) -#define NES_MAC_ADDR_VALID (1<<20) +#define NES_MAC_ADDR_VALID (1<<20) /* * NES index registers init values. @@ -739,7 +742,7 @@ enum nes_iwarp_sq_wqe_bits { struct nes_init_values { u32 index; u32 data; - u8 wrt; + u8 wrt; }; /* @@ -754,10 +757,10 @@ struct nes_pci_regs { u32 other_regs[59]; /* pad out to 256 bytes for now */ }; -#define NES_CQP_SQ_SIZE 128 -#define NES_CCQ_SIZE 128 -#define NES_NIC_WQ_SIZE 512 -#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512)) +#define NES_CQP_SQ_SIZE 128 +#define NES_CCQ_SIZE 128 +#define NES_NIC_WQ_SIZE 512 +#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512)) #define NES_NIC_BACK_STORE 0x00038000 struct nes_device; @@ -803,27 +806,27 @@ struct nes_hw_aeqe { }; struct nes_cqp_request { - wait_queue_head_t waitq; + wait_queue_head_t waitq; struct nes_hw_cqp_wqe cqp_wqe; - struct list_head list; - atomic_t refcount; - u16 major_code; - u16 minor_code; - u8 waiting; - u8 request_done; - u8 dynamic; - u8 padding[1]; + struct list_head list; + atomic_t refcount; + u16 major_code; + u16 minor_code; + u8 waiting; + u8 request_done; + u8 dynamic; + u8 padding[1]; }; struct nes_hw_cqp { struct nes_hw_cqp_wqe *sq_vbase; - dma_addr_t sq_pbase; - spinlock_t lock; - wait_queue_head_t waitq; - u16 qp_id; - u16 sq_head; - u16 sq_tail; - u16 sq_size; + dma_addr_t sq_pbase; + spinlock_t lock; + wait_queue_head_t waitq; + u16 qp_id; + u16 sq_head; + u16 sq_tail; + u16 sq_size; }; #define NES_FIRST_FRAG_SIZE 128 @@ -832,12 +835,13 @@ struct nes_first_frag { }; struct nes_hw_nic { - struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */ + struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */ struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */ struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ - struct sk_buff *tx_skb[NES_NIC_WQ_SIZE]; - struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; + struct sk_buff *tx_skb[NES_NIC_WQ_SIZE]; + struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; dma_addr_t frag_paddr[NES_NIC_WQ_SIZE]; + unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)]; dma_addr_t sq_pbase; /* PCI memory for host rings */ dma_addr_t rq_pbase; /* PCI memory for host rings */ @@ -865,7 +869,7 @@ struct nes_hw_nic_cq { struct nes_hw_qp { struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */ struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */ - void *q2_vbase; /* PCI memory for host rings */ + void *q2_vbase; /* PCI memory for host rings */ dma_addr_t sq_pbase; /* PCI memory for host rings */ dma_addr_t rq_pbase; /* PCI memory for host rings */ dma_addr_t q2_pbase; /* PCI memory for host rings */ @@ -876,8 +880,8 @@ struct nes_hw_qp { u16 rq_head; u16 rq_tail; u16 rq_size; - u8 rq_encoded_size; - u8 sq_encoded_size; + u8 rq_encoded_size; + u8 sq_encoded_size; }; struct nes_hw_cq { @@ -910,55 +914,93 @@ struct nic_qp_map { u8 is_hnic; }; -#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000 +#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000 #define NES_CQP_ARP_AEQ_INDEX_SHIFT 16 #define NES_CQP_APBVT_ADD 0x00008000 #define NES_CQP_APBVT_NIC_SHIFT 16 -#define NES_ARP_ADD 1 -#define NES_ARP_DELETE 2 +#define NES_ARP_ADD 1 +#define NES_ARP_DELETE 2 #define NES_ARP_RESOLVE 3 -#define NES_MAC_SW_IDLE 0 +#define NES_MAC_SW_IDLE 0 #define NES_MAC_SW_INTERRUPT 1 -#define NES_MAC_SW_MH 2 +#define NES_MAC_SW_MH 2 struct nes_arp_entry { u32 ip_addr; u8 mac_addr[ETH_ALEN]; }; +#define NES_NIC_FAST_TIMER 96 +#define NES_NIC_FAST_TIMER_LOW 40 +#define NES_NIC_FAST_TIMER_HIGH 1000 +#define DEFAULT_NES_QL_HIGH 256 +#define DEFAULT_NES_QL_LOW 16 +#define DEFAULT_NES_QL_TARGET 64 +#define DEFAULT_JUMBO_NES_QL_LOW 12 +#define DEFAULT_JUMBO_NES_QL_TARGET 40 +#define DEFAULT_JUMBO_NES_QL_HIGH 128 + +struct nes_hw_tune_timer { + u16 cq_count; + u16 threshold_low; + u16 threshold_target; + u16 threshold_high; + u16 timer_in_use; + u16 timer_in_use_old; + u16 timer_in_use_min; + u16 timer_in_use_max; + u8 timer_direction_upward; + u8 timer_direction_downward; +}; + +#define NES_TIMER_INT_LIMIT 2 +#define NES_TIMER_INT_LIMIT_DYNAMIC 10 +#define NES_TIMER_ENABLE_LIMIT 4 + struct nes_adapter { - u32 hw_rev; - u64 fw_ver; + u64 fw_ver; + unsigned long *allocated_qps; + unsigned long *allocated_cqs; + unsigned long *allocated_mrs; + unsigned long *allocated_pds; + unsigned long *allocated_arps; + struct nes_qp **qp_table; + struct workqueue_struct *work_q; + struct list_head list; + struct list_head active_listeners; + /* list of the netdev's associated with each logical port */ + struct list_head nesvnic_list[4]; + + struct timer_list mh_timer; + struct work_struct work; + spinlock_t resource_lock; + spinlock_t phy_lock; + spinlock_t pbl_lock; + spinlock_t periodic_timer_lock; + + struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE]; + + /* Adapter CEQ and AEQs */ + struct nes_hw_ceq ceq[16]; + struct nes_hw_aeq aeq[8]; + + struct nes_hw_tune_timer tune_timer; + + unsigned long doorbell_start; + + u32 hw_rev; u32 vendor_id; u32 vendor_part_id; - struct nes_qp **qp_table; u32 device_cap_flags; u32 tick_delta; u32 timer_int_req; - /* RNIC Resource Lists */ - unsigned long *allocated_qps; - unsigned long *allocated_cqs; - unsigned long *allocated_mrs; - unsigned long *allocated_pds; - struct list_head active_listeners; - spinlock_t resource_lock; - spinlock_t phy_lock; - - /* arp table */ - unsigned long *allocated_arps; - struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE]; - u32 arp_table_size; - u32 next_arp_index; + u32 arp_table_size; + u32 next_arp_index; - /* Adapter CEQ and AEQs */ - struct nes_hw_ceq ceq[16]; - struct nes_hw_aeq aeq[8]; - - /* RNIC Limits */ u32 max_mr; u32 max_256pbl; u32 max_4kpbl; @@ -993,61 +1035,69 @@ struct nes_adapter { u32 nic_rx_eth_route_err; + u32 et_rx_coalesce_usecs; + u32 et_rx_max_coalesced_frames; + u32 et_rx_coalesce_usecs_irq; + u32 et_rx_max_coalesced_frames_irq; + u32 et_pkt_rate_low; + u32 et_rx_coalesce_usecs_low; + u32 et_rx_max_coalesced_frames_low; + u32 et_pkt_rate_high; + u32 et_rx_coalesce_usecs_high; + u32 et_rx_max_coalesced_frames_high; + u32 et_rate_sample_interval; + u32 timer_int_limit; + /* Adapter base MAC address */ - u16 mac_addr_high; u32 mac_addr_low; + u16 mac_addr_high; u16 firmware_eeprom_offset; u16 software_eeprom_offset; u16 max_irrq_wr; + /* pd config for each port */ + u16 pd_config_size[4]; + u16 pd_config_base[4]; + + /* the phy index for each port */ + u8 phy_index[4]; + u8 mac_sw_state[4]; + u8 mac_link_down[4]; + u8 phy_type[4]; + /* PCI information */ - unsigned int devfn; + unsigned int devfn; unsigned char bus_number; unsigned char OneG_Mode; - struct list_head list; - /* list of the netdev's associated with each logical port */ - struct list_head nesvnic_list[4]; - unsigned char ref_count; - u8 netdev_count; - u8 netdev_max; /* from host nic address count in EEPROM */ - u8 port_count; - - /* the phy index for each port */ - u8 phy_index[4]; - u8 mac_sw_state[4]; - u8 mac_link_down[4]; - u8 ports[4]; - - struct timer_list mh_timer; - /* wait_queue_head_t wait_q; */ - struct work_struct work; - struct workqueue_struct *work_q; - /* u32 worker_quit; */ - - void *cm_context; + u8 netdev_count; + u8 netdev_max; /* from host nic address count in EEPROM */ + u8 port_count; + u8 virtwq; + u8 et_use_adaptive_rx_coalesce; }; struct nes_pbl { - u64 *pbl_vbase; - dma_addr_t pbl_pbase; - unsigned long user_base; - u32 pbl_size; + u64 *pbl_vbase; + dma_addr_t pbl_pbase; + struct page *page; + unsigned long user_base; + u32 pbl_size; struct list_head list; /* TODO: need to add list for two level tables */ }; struct nes_listener { - struct work_struct work; + struct work_struct work; struct workqueue_struct *wq; - struct nes_vnic *nesvnic; - struct iw_cm_id *cm_id; - struct list_head list; - unsigned long socket; - u8 accept_failed; + struct nes_vnic *nesvnic; + struct iw_cm_id *cm_id; + struct list_head list; + unsigned long socket; + u8 accept_failed; }; struct nes_ib_device; @@ -1070,39 +1120,39 @@ struct nes_vnic { struct nes_device *nesdev; struct net_device *netdev; struct vlan_group *vlan_grp; - atomic_t rx_skbs_needed; - atomic_t rx_skb_timer_running; - int budget; - int rx_cqes_completed; - int cqe_allocs_pending; - u32 msg_enable; + atomic_t rx_skbs_needed; + atomic_t rx_skb_timer_running; + int budget; + int rx_cqes_completed; + int cqe_allocs_pending; + u32 msg_enable; /* u32 tx_avail; */ - __be32 local_ipaddr; - - spinlock_t tx_lock; /* could use netdev tx lock? */ - struct timer_list rq_wqes_timer; - u32 nic_mem_size; - void *nic_vbase; - dma_addr_t nic_pbase; - struct nes_hw_nic nic; + __be32 local_ipaddr; + + spinlock_t tx_lock; /* could use netdev tx lock? */ + struct timer_list rq_wqes_timer; + u32 nic_mem_size; + void *nic_vbase; + dma_addr_t nic_pbase; + struct nes_hw_nic nic; struct nes_hw_nic_cq nic_cq; struct net_device_stats netstats; /* used to put the netdev on the adapters logical port list */ struct list_head list; u16 max_frame_size; - u8 netdev_open; - u8 linkup; - u8 logical_port; - u8 netdev_index; /* might not be needed, indexes nesdev->netdev */ - u8 perfect_filter_index; - u8 nic_index; - u8 qp_nic_index[4]; - u8 next_qp_nic_index; - u8 of_device_registered; - u8 rdma_enabled; - u8 cqes_pending; - u8 rx_checksum_disabled; + u8 netdev_open; + u8 linkup; + u8 logical_port; + u8 netdev_index; /* might not be needed, indexes nesdev->netdev */ + u8 perfect_filter_index; + u8 nic_index; + u8 qp_nic_index[4]; + u8 next_qp_nic_index; + u8 of_device_registered; + u8 rdma_enabled; + u8 cqes_pending; + u8 rx_checksum_disabled; }; struct nes_ib_device { From glenn at lists.openfabrics.org Wed Nov 14 14:37:25 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:37:25 -0800 (PST) Subject: [ofa-general] [PATCH 4/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114223725.762B7E2807D@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for nes_nic.c Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2d759c4..d75b327 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -92,7 +92,40 @@ static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; static int debug = -1; -static int rdma_enabled = 0; +extern atomic_t cm_connects; +extern atomic_t cm_accepts; +extern atomic_t cm_disconnects; +extern atomic_t cm_closes; +extern atomic_t cm_connecteds; +extern atomic_t cm_connect_reqs; +extern atomic_t cm_rejects; +extern atomic_t mod_qp_timouts; +extern atomic_t qps_created; +extern atomic_t qps_destroyed; +extern atomic_t sw_qps_destroyed; +extern u32 mh_detected; +extern u32 mh_pauses_sent; +extern u32 cm_packets_sent; +extern u32 cm_packets_bounced; +extern u32 cm_packets_created; +extern u32 cm_packets_received; +extern u32 cm_packets_dropped; +extern u32 cm_packets_retrans; +extern u32 cm_listens_created; +extern u32 cm_listens_destroyed; +extern u32 cm_backlog_drops; +extern atomic_t cm_nodes_created; +extern atomic_t cm_nodes_destroyed; +extern atomic_t cm_accel_dropped_pkts; +extern atomic_t cm_resets_recvd; +extern u32 int_mod_timer_init; +extern u32 int_mod_cq_depth_256; +extern u32 int_mod_cq_depth_128; +extern u32 int_mod_cq_depth_32; +extern u32 int_mod_cq_depth_24; +extern u32 int_mod_cq_depth_16; +extern u32 int_mod_cq_depth_4; +extern u32 int_mod_cq_depth_1; static int nes_netdev_open(struct net_device *); static int nes_netdev_stop(struct net_device *); @@ -122,7 +155,7 @@ static int nes_netdev_poll(struct net_device* netdev, int* budget) netdev->quota -= nesvnic->rx_cqes_completed; *budget -= nesvnic->rx_cqes_completed; - if (0 == nesvnic->cqes_pending) { + if (nesvnic->cqes_pending == 0) { netif_rx_complete(netdev); /* clear out completed cqes and arm */ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | @@ -136,7 +169,7 @@ static int nes_netdev_poll(struct net_device* netdev, int* budget) nesvnic->netdev->name); } - return((0 == nesvnic->cqes_pending) ? 0 : 1); + return (nesvnic->cqes_pending == 0) ? 0 : 1; } #endif @@ -190,9 +223,9 @@ static int nes_netdev_open(struct net_device *netdev) nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE); nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); @@ -204,7 +237,6 @@ static int nes_netdev_open(struct net_device *netdev) macaddr_low += ((u32)netdev->dev_addr[4]) << 8; macaddr_low += (u32)netdev->dev_addr[5]; -#define NES_MAX_PORT_COUNT 4 /* Program the various MAC regs */ for (i = 0; i < NES_MAX_PORT_COUNT; i++) { if (nesvnic->qp_nic_index[i] == 0xf) { @@ -264,8 +296,9 @@ static int nes_netdev_stop(struct net_device *netdev) u32 nic_active_mask; u32 nic_active; - nes_debug(NES_DBG_SHUTDOWN, "\n"); - if (0 == nesvnic->netdev_open) + nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", + nesvnic, nesdev, netdev, netdev->name); + if (nesvnic->netdev_open == 0) return 0; if (netif_msg_ifdown(nesvnic)) @@ -273,7 +306,7 @@ static int nes_netdev_stop(struct net_device *netdev) /* Disable network packets */ netif_stop_queue(netdev); - if ((nesdev->netdev[0] == netdev)&(nesvnic->logical_port == nesdev->mac_index)) { + if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) { nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); } @@ -287,6 +320,12 @@ static int nes_netdev_stop(struct net_device *netdev) nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); nic_active &= nic_active_mask; nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= nic_active_mask; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON); nic_active &= nic_active_mask; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); @@ -296,7 +335,6 @@ static int nes_netdev_stop(struct net_device *netdev) nes_destroy_ofa_device(nesvnic->nesibdev); nesvnic->nesibdev = NULL; nesvnic->of_device_registered = 0; - rdma_enabled = 0; } nes_destroy_nic_qp(nesvnic); @@ -317,9 +355,7 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) struct nes_hw_nic_sq_wqe *nic_sqe; #ifdef NETIF_F_TSO struct tcphdr *tcph; - /* struct udphdr *udph; */ #endif -// u64 *wqe_fragment_address; u16 *wqe_fragment_length; u32 wqe_misc; u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */ @@ -343,18 +379,14 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ if (skb->ip_summed == CHECKSUM_PARTIAL) { -#ifdef OFED_1_2 - tcph = skb->h.th; -#else tcph = tcp_hdr(skb); -#endif if (1) { #ifdef NETIF_F_TSO - if (nes_skb_is_gso(skb)) { + if (skb_is_gso(skb)) { /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n", - netdev->name, nes_skb_is_gso(skb)); */ + netdev->name, skb_is_gso(skb)); */ wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | - NES_NIC_SQ_WQE_COMPLETION | (u16)nes_skb_is_gso(skb); + NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); nic_sqe->wqe_words[NES_NIC_SQ_WQE_LSO_INFO_IDX] = cpu_to_le32(((u32)tcph->doff) | (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); @@ -383,6 +415,7 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) nesvnic->tx_sw_dropped++; return NETDEV_TX_LOCKED; } + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE, skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE); wqe_fragment_length[wqe_fragment_index++] = @@ -444,6 +477,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) #define NES_MAX_TSO_FRAGS 18 /* 64K segment plus overflow on each side */ dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; + dma_addr_t bus_address; u32 tso_frag_index; u32 tso_frag_count; u32 tso_wqe_length; @@ -454,6 +488,8 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) struct iphdr *iph; unsigned long flags; u16 *wqe_fragment_length; + u32 nr_frags; + u32 original_first_length; // u64 *wqe_fragment_address; /* first fragment (0) is used by copy buffer */ u16 wqe_fragment_index=1; @@ -466,12 +502,11 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 old_head; u32 wqe_misc; - if (nes_debug_level & NES_DBG_NIC_TX) { - nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - " (%u frags), tso_size=%u\n", - netdev->name, skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, nes_skb_is_gso(skb)); - } + /* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," + " (%u frags), tso_size=%u\n", + netdev->name, skb->len, skb_headlen(skb), + skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + */ local_irq_save(flags); if (!spin_trylock(&nesnic->sq_lock)) { local_irq_restore(flags); @@ -487,16 +522,20 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } + nr_frags = skb_shinfo(skb)->nr_frags; + if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { + nr_frags++; + } /* Check if too many fragments */ - if (unlikely((skb_shinfo(skb)->nr_frags) > 4)) { + if (unlikely((nr_frags > 4))) { #ifdef NETIF_F_TSO - if (nes_skb_is_gso(skb) && (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE)) { + if (skb_is_gso(skb)) { nesvnic->segmented_tso_requests++; nesvnic->tso_requests++; old_head = nesnic->sq_head; /* Basically 4 fragments available per WQE with extended fragments */ - wqes_needed = skb_shinfo(skb)->nr_frags >> 2; - wqes_needed += (skb_shinfo(skb)->nr_frags&3)?1:0; + wqes_needed = nr_frags >> 2; + wqes_needed += (nr_frags&3)?1:0; wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) & (nesnic->sq_size - 1); @@ -519,18 +558,10 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } tso_frag_index = 0; -#ifdef OFED_1_2 - curr_tcp_seq = ntohl(skb->h.th->seq); -#else curr_tcp_seq = ntohl(tcp_hdr(skb)->seq); -#endif -#ifdef OFED_1_2 - hoffset = skb->h.raw - skb->data; - nhoffset = skb->nh.raw - skb->data; -#else hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; -#endif + original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2); for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) { tso_wqe_length = 0; @@ -548,22 +579,20 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) /* bump past the vlan tag */ wqe_fragment_length++; -// wqe_fragment_address = -// (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; /* Assumes header totally fits in allocated buffer and is in first fragment */ - if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) { - nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, skb_headlen=%u, FIRST_FRAG_SIZE=%u\n", - skb_headlen(skb), NES_FIRST_FRAG_SIZE); + if (original_first_length > NES_FIRST_FRAG_SIZE) { + nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", + original_first_length, NES_FIRST_FRAG_SIZE); nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," " (%u frags), tso_size=%u\n", netdev->name, skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, nes_skb_is_gso(skb)); + skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); } memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), - skb_headlen(skb))); + original_first_length)); iph = (struct iphdr *) (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]); tcph = (struct tcphdr *) @@ -579,9 +608,21 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } tcph->seq = htonl(curr_tcp_seq); wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE), - skb_headlen(skb))); - - for (wqe_fragment_index = 1; wqe_fragment_index < 5;) { + original_first_length)); + + wqe_fragment_index = 1; + if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) { + set_bit(nesnic->sq_head, nesnic->first_frag_overflow); + bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length, + skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE); + wqe_fragment_length[wqe_fragment_index++] = + cpu_to_le16(skb_headlen(skb) - original_first_length); + wqe_fragment_length[wqe_fragment_index] = 0; + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG1_LOW_IDX] = cpu_to_le32((u32)((u64)(bus_address))); + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG1_HIGH_IDX] = cpu_to_le32((u32)(((u64)(bus_address))>>32)); + tso_wqe_length += skb_headlen(skb) - original_first_length; + } + while (wqe_fragment_index < 5) { wqe_fragment_length[wqe_fragment_index] = cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size); nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index)] = @@ -600,19 +641,19 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } else { nesnic->tx_skb[nesnic->sq_head] = NULL; } - wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)nes_skb_is_gso(skb); - if ((tso_wqe_length + skb_headlen(skb)) > nes_skb_is_gso(skb)) { - wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; - } else { - iph->tot_len = htons(tso_wqe_length + skb_headlen(skb) - nhoffset); - } + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); + if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) { + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; + } else { + iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); + } nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); nic_sqe->wqe_words[NES_NIC_SQ_WQE_LSO_INFO_IDX] = cpu_to_le32(((u32)tcph->doff) | (((u32)hoffset) << 4)); nic_sqe->wqe_words[NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX] = - cpu_to_le32(tso_wqe_length+skb_headlen(skb)); + cpu_to_le32(tso_wqe_length+original_first_length); curr_tcp_seq += tso_wqe_length; nesnic->sq_head++; nesnic->sq_head &= nesnic->sq_size-1; @@ -620,21 +661,11 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } else { #endif nesvnic->linearized_skbs++; -#ifdef OFED_1_2 - hoffset = skb->h.raw - skb->data; - nhoffset = skb->nh.raw - skb->data; -#else hoffset = skb_transport_header(skb) - skb->data; nhoffset = skb_network_header(skb) - skb->data; -#endif - nes_skb_linearize(skb); -#ifdef OFED_1_2 - skb->h.raw = skb->data + hoffset; - skb->nh.raw = skb->data + nhoffset; -#else + skb_linearize(skb); skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); -#endif send_rc = nes_nic_send(skb, netdev); if (send_rc != NETDEV_TX_OK) { spin_unlock_irqrestore(&nesnic->sq_lock, flags); @@ -767,28 +798,158 @@ static void nes_netdev_tx_timeout(struct net_device *netdev) } +#ifdef HAVE_SET_MAC_ADDR /** * nes_netdev_set_mac_address */ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) { - return -1; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct sockaddr *mac_addr = p; + int i; + u32 macaddr_low; + u16 macaddr_high; + + if (!is_valid_ether_addr(mac_addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); + printk(PFX "%s: Address length = %d, Address = %02X%02X%02X%02X%02X%02X..\n", + __FUNCTION__, netdev->addr_len, + mac_addr->sa_data[0], mac_addr->sa_data[1], + mac_addr->sa_data[2], mac_addr->sa_data[3], + mac_addr->sa_data[4], mac_addr->sa_data[5]); + macaddr_high = ((u16)netdev->dev_addr[0]) << 8; + macaddr_high += (u16)netdev->dev_addr[1]; + macaddr_low = ((u32)netdev->dev_addr[2]) << 24; + macaddr_low += ((u32)netdev->dev_addr[3]) << 16; + macaddr_low += ((u32)netdev->dev_addr[4]) << 8; + macaddr_low += (u32)netdev->dev_addr[5]; + + for (i = 0; i < NES_MAX_PORT_COUNT; i++) { + if (nesvnic->qp_nic_index[i] == 0xf) { + break; + } + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8), + macaddr_low); + nes_write_indexed(nesdev, + NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)nesvnic->nic_index) << 16))); + } + return 0; } +#endif +#ifdef HAVE_MULTICAST /** - * nes_netdev_change_mtu + * nes_netdev_set_multicast_list */ -static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +void nes_netdev_set_multicast_list(struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); - int ret = 0; + struct nes_device *nesdev = nesvnic->nesdev; + struct dev_mc_list *multicast_addr; + u32 nic_active_bit; + u32 nic_active; + u32 perfect_filter_register_address; + u32 macaddr_low; + u16 macaddr_high; + u8 mc_all_on = 0; + u8 mc_index; + + nic_active_bit = 1 << nesvnic->nic_index; + + if (netdev->flags & IFF_PROMISC) { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + mc_all_on = 1; + } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) || + (nesvnic->nic_index > 3)) { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + mc_all_on = 1; + } else { + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + } + + nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", + netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0, + (netdev->flags & IFF_ALLMULTI)?1:0); + if (!mc_all_on) { + multicast_addr = netdev->mc_list; + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80; + perfect_filter_register_address += nesvnic->nic_index*0x40; + for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { + if (multicast_addr) { + nes_debug(NES_DBG_NIC_RX, "Assigning MC Address = %02X%02X%02X%02X%02X%02X to register 0x%04X\n", + multicast_addr->dmi_addr[0], multicast_addr->dmi_addr[1], + multicast_addr->dmi_addr[2], multicast_addr->dmi_addr[3], + multicast_addr->dmi_addr[4], multicast_addr->dmi_addr[5], + perfect_filter_register_address+(mc_index * 8)); + macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; + macaddr_high += (u16)multicast_addr->dmi_addr[1]; + macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; + macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; + macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; + macaddr_low += (u32)multicast_addr->dmi_addr[5]; + nes_write_indexed(nesdev, + perfect_filter_register_address+(mc_index * 8), + macaddr_low); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + (u32)macaddr_high | NES_MAC_ADDR_VALID | + ((((u32)(1<nic_index)) << 16))); + multicast_addr = multicast_addr->next; + } else { + nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", + perfect_filter_register_address+(mc_index * 8)); + nes_write_indexed(nesdev, + perfect_filter_register_address+4+(mc_index * 8), + 0); + } + } + } +} +#endif - if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) + +/** + * nes_netdev_change_mtu + */ +static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + int ret = 0; + u8 jumbomode=0; + + if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; - netdev->mtu = new_mtu; - nesvnic->max_frame_size = new_mtu+ETH_HLEN; + netdev->mtu = new_mtu; + nesvnic->max_frame_size = new_mtu+ETH_HLEN; + + if (netdev->mtu > 1500) { + jumbomode=1; + } + nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { nes_netdev_stop(netdev); @@ -813,7 +974,6 @@ void nes_netdev_exit(struct nes_vnic *nesvnic) if ((nesvnic->rdma_enabled)&&(nesvnic->of_device_registered)) { nes_destroy_ofa_device( nesibdev ); nesvnic->of_device_registered = 0; - rdma_enabled = 0; nesvnic->nesibdev = NULL; } unregister_netdev(netdev); @@ -821,7 +981,7 @@ void nes_netdev_exit(struct nes_vnic *nesvnic) } -#define NES_ETHTOOL_STAT_COUNT 52 +#define NES_ETHTOOL_STAT_COUNT 54 static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = { "Link Change Interrupts", "Linearized SKBs", @@ -869,12 +1029,14 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] "CM Nodes Destroyed", "CM Accel Drops", "CM Resets Received", - "CQP Req Allocs", - "CQP Req Deallocs", - "CQP Req Dynamic Allocs", - "CQP Req Dynamic Deallocs", - "CQP Req Queues", - "CQP Req Redrives", + "Timer Inits", + "CQ Depth 1", + "CQ Depth 4", + "CQ Depth 16", + "CQ Depth 24", + "CQ Depth 32", + "CQ Depth 128", + "CQ Depth 256", }; @@ -1052,12 +1214,14 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, target_stat_values[43] = atomic_read(&cm_nodes_destroyed); target_stat_values[44] = atomic_read(&cm_accel_dropped_pkts); target_stat_values[45] = atomic_read(&cm_resets_recvd); - target_stat_values[46] = atomic_read(&cqp_reqs_allocated); - target_stat_values[47] = atomic_read(&cqp_reqs_freed); - target_stat_values[48] = atomic_read(&cqp_reqs_dynallocated); - target_stat_values[49] = atomic_read(&cqp_reqs_dynfreed); - target_stat_values[50] = atomic_read(&cqp_reqs_queued); - target_stat_values[51] = atomic_read(&cqp_reqs_redriven); + target_stat_values[46] = int_mod_timer_init; + target_stat_values[47] = int_mod_cq_depth_1; + target_stat_values[48] = int_mod_cq_depth_4; + target_stat_values[49] = int_mod_cq_depth_16; + target_stat_values[50] = int_mod_cq_depth_24; + target_stat_values[51] = int_mod_cq_depth_32; + target_stat_values[52] = int_mod_cq_depth_128; + target_stat_values[53] = int_mod_cq_depth_256; } @@ -1085,16 +1249,48 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev, * nes_netdev_set_coalesce */ static int nes_netdev_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *et_coalesce) + struct ethtool_coalesce *et_coalesce) { - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; + + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + if (et_coalesce->rx_max_coalesced_frames_low) { + shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low; + } + if (et_coalesce->rx_max_coalesced_frames_irq) { + shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq; + } + if (et_coalesce->rx_max_coalesced_frames_high) { + shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high; + } + if (et_coalesce->rx_coalesce_usecs_low) { + shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low; + } + if (et_coalesce->rx_coalesce_usecs_high) { + shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high; + } + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); /* using this to drive total interrupt moderation */ - nesvnic->nesdev->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; - if (nesdev->et_rx_coalesce_usecs_irq) { - nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, - 0x80000000 | ((u32)(nesdev->et_rx_coalesce_usecs_irq*8))); + nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq; + if (et_coalesce->use_adaptive_rx_coalesce) { + nesadapter->et_use_adaptive_rx_coalesce = 1; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC; + nesadapter->et_rx_coalesce_usecs_irq = 0; + if (et_coalesce->pkt_rate_low) { + nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low; + } + } else { + nesadapter->et_use_adaptive_rx_coalesce = 0; + nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT; + if (nesadapter->et_rx_coalesce_usecs_irq) { + nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, + 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8))); + } } return 0; } @@ -1104,18 +1300,33 @@ static int nes_netdev_set_coalesce(struct net_device *netdev, * nes_netdev_get_coalesce */ static int nes_netdev_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *et_coalesce) + struct ethtool_coalesce *et_coalesce) { - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct ethtool_coalesce temp_et_coalesce; + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct ethtool_coalesce temp_et_coalesce; + struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer; + unsigned long flags; memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce)); - temp_et_coalesce.rx_coalesce_usecs_irq = nesvnic->nesdev->et_rx_coalesce_usecs_irq; - memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); + temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq; + temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce; + temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval; + temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low; + spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags); + temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low; + temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target; + temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; + temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; + temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; + spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); + memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); return 0; } + /** * nes_netdev_get_pauseparam */ @@ -1125,8 +1336,8 @@ static void nes_netdev_get_pauseparam(struct net_device *netdev, struct nes_vnic *nesvnic = netdev_priv(netdev); et_pauseparam->autoneg = 0; - et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control==0)?1:0; - et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control==0)?1:0; + et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0; + et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0; } @@ -1144,14 +1355,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, /* TODO: should return unsupported */ return 0; } - if ((et_pauseparam->tx_pause==1) && (nesdev->disable_tx_flow_control==1)) { + if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 0; - } else if ((et_pauseparam->tx_pause==0) && (nesdev->disable_tx_flow_control==0)) { + } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; @@ -1159,14 +1370,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 1; } - if ((et_pauseparam->rx_pause==1) && (nesdev->disable_rx_flow_control==1)) { + if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp); nesdev->disable_rx_flow_control = 0; - } else if ((et_pauseparam->rx_pause==0) && (nesdev->disable_rx_flow_control==0)) { + } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40)); u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE; @@ -1190,6 +1401,7 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd u16 phy_data; et_cmd->duplex = DUPLEX_FULL; + et_cmd->port = PORT_MII; if (nesadapter->OneG_Mode) { et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg; et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg; @@ -1204,14 +1416,21 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd et_cmd->transceiver = XCVR_EXTERNAL; et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; } else { - et_cmd->supported = SUPPORTED_10000baseT_Full; - et_cmd->advertising = ADVERTISED_10000baseT_Full; + if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->advertising = ADVERTISED_FIBRE; + et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; + } else { + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->advertising = ADVERTISED_10000baseT_Full; + et_cmd->phy_address = nesdev->mac_index; + } et_cmd->speed = SPEED_10000; et_cmd->autoneg = AUTONEG_DISABLE; - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->phy_address = nesdev->mac_index; } - et_cmd->port = PORT_MII; et_cmd->maxtxpkt = 511; et_cmd->maxrxpkt = 511; return 0; @@ -1246,25 +1465,6 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd } -/** - * nes_netdev_get_msglevel - */ -static u32 nes_netdev_get_msglevel(struct net_device *netdev) -{ - return nes_debug_level; -} - - -/** - * nes_netdev_set_msglevel - */ -static void nes_netdev_set_msglevel(struct net_device *netdev, u32 level) -{ - nes_debug(NES_DBG_NETDEV, "Setting message level to: %u\n", level); - nes_debug_level = level; -} - - static struct ethtool_ops nes_ethtool_ops = { .get_link = ethtool_op_get_link, .get_settings = nes_netdev_get_settings, @@ -1280,8 +1480,6 @@ static struct ethtool_ops nes_ethtool_ops = { .set_coalesce = nes_netdev_set_coalesce, .get_pauseparam = nes_netdev_get_pauseparam, .set_pauseparam = nes_netdev_set_pauseparam, - .get_msglevel = nes_netdev_get_msglevel, - .set_msglevel = nes_netdev_set_msglevel, .set_tx_csum = ethtool_op_set_tx_csum, .set_rx_csum = nes_netdev_set_rx_csum, .set_sg = ethtool_op_set_sg, @@ -1324,6 +1522,8 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, struct net_device *netdev; struct nic_qp_map *curr_qp_map; u32 u32temp; + u16 phy_data; + u16 temp_phy_data; netdev = alloc_etherdev(sizeof(struct nes_vnic)); if (!netdev) { @@ -1331,7 +1531,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, return NULL; } - nes_debug(NES_DBG_INIT, "netdev = %p.\n", netdev); + nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name); SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); @@ -1341,6 +1541,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->get_stats = nes_netdev_get_stats; netdev->tx_timeout = nes_netdev_tx_timeout; netdev->set_mac_address = nes_netdev_set_mac_address; +#ifdef HAVE_MULTICAST + netdev->set_multicast_list = nes_netdev_set_multicast_list; +#endif netdev->change_mtu = nes_netdev_change_mtu; netdev->watchdog_timeo = NES_TX_TIMEOUT; netdev->irq = nesdev->pcidev->irq; @@ -1389,16 +1592,17 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->dev_addr[3] = (u8)(u64temp>>16); netdev->dev_addr[4] = (u8)(u64temp>>8); netdev->dev_addr[5] = (u8)u64temp; + memcpy(netdev->perm_addr, netdev->dev_addr, 6); if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) { #ifdef NETIF_F_TSO - netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; #endif #ifdef NETIF_F_GSO netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; #endif } else { - netdev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," @@ -1431,11 +1635,8 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, } nesvnic->next_qp_nic_index = 0; - if (0 == nesdev->netdev_count) { - if (rdma_enabled == 0) { - rdma_enabled = 1; - nesvnic->rdma_enabled = 1; - } + if (nesdev->netdev_count == 0) { + nesvnic->rdma_enabled = 1; } else { nesvnic->rdma_enabled = 0; } @@ -1447,7 +1648,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic, nesdev->mac_index); list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]); - if ((0 == nesdev->netdev_count) && + if ((nesdev->netdev_count == 0) && (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) { nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1))); @@ -1458,9 +1659,32 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, (0x200*(nesvnic->logical_port&1)), u32temp); u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200*(nesvnic->logical_port&1)) ); - if (0x0f0f0000 == (u32temp&0x0f1f0000)) { - nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); - nesvnic->linkup = 1; + if ((u32temp&0x0f1f0000) == 0x0f0f0000) { + if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + nes_read_10G_phy_reg(nesdev, 1, + nesdev->nesadapter->phy_index[nesvnic->logical_port]); + temp_phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + u32temp = 20; + do { + nes_read_10G_phy_reg(nesdev, 1, + nesdev->nesadapter->phy_index[nesvnic->logical_port]); + phy_data = (u16)nes_read_indexed(nesdev, + NES_IDX_MAC_MDIO_CONTROL); + if ((phy_data == temp_phy_data) || (!(--u32temp))) + break; + temp_phy_data = phy_data; + } while (1); + if (phy_data & 4) { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } else { + nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n"); + } + } else { + nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + nesvnic->linkup = 1; + } } nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n"); /* clear the MAC interrupt status, assumes direct logical to physical mapping */ @@ -1513,4 +1737,3 @@ int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev) return ret; } - From glenn at lists.openfabrics.org Wed Nov 14 14:38:48 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:38:48 -0800 (PST) Subject: [ofa-general] [PATCH 5/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114223848.F0DAFE281AA@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for nes_user.h and nes_utils.c Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index a170399..6ab2357 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -39,6 +39,9 @@ #include +#define NES_ABI_USERSPACE_VER 1 +#define NES_ABI_KERNEL_VER 1 + /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to @@ -47,11 +50,19 @@ * instead. */ +struct nes_alloc_ucontext_req { + __u32 reserved32; + __u8 userspace_ver; + __u8 reserved8[3]; +}; + struct nes_alloc_ucontext_resp { __u32 max_pds; /* maximum pds allowed for this user process */ __u32 max_qps; /* maximum qps allowed for this user process */ __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */ - __u32 reserved; + __u8 virtwq; /* flag to indicate if virtual WQ are to be used or not */ + __u8 kernel_ver; + __u8 reserved[2]; }; struct nes_alloc_pd_resp { @@ -63,6 +74,10 @@ struct nes_create_cq_req { __u64 user_cq_buffer; }; +struct nes_create_qp_req { + __u64 user_wqe_buffers; +}; + enum iwnes_memreg_type { IWNES_MEMREG_TYPE_MEM = 0x0000, IWNES_MEMREG_TYPE_QP = 0x0001, diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 1d478e0..b6aa6d3 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -50,19 +50,8 @@ #include "nes.h" -#define BITMASK(X) (1L << (X)) -#define NES_CRC_WID 32 - static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); -static u32 nesCRCTable[256]; -static u32 nesCRCInitialized = 0; - -static u32 nesCRCWidMask(u32); -static u32 nes_crc_table_gen(u32 *, u32, u32, u32); -static u32 reflect(u32, u32); -static u32 byte_swap(u32, u32); - u32 mh_detected; u32 mh_pauses_sent; @@ -76,7 +65,9 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada u16 eeprom_data; u16 eeprom_offset; u16 next_section_address; - u32 index; + u16 sw_section_ver; + u8 major_ver = 0; + u8 minor_ver = 0; /* TODO: deal with EEPROM endian issues */ if (nesadapter->firmware_eeprom_offset == 0) { @@ -104,6 +95,9 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada printk("Not a valid Software Image = 0x%04X\n", eeprom_data); return -1; } + sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6); + nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n", + sw_section_ver); eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2); nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n", @@ -179,7 +173,14 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada } eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8); printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + major_ver = (u8)(eeprom_data >> 8); + minor_ver = (u8)(eeprom_data); + if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) { + nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n"); + } else if (((major_ver == 2) && (minor_ver > 21)) || (major_ver > 2)) { + nesadapter->virtwq = 1; + } nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + (u32)((u8)eeprom_data); @@ -205,22 +206,60 @@ no_fw_rev: /* Read the Phy Type array */ eeprom_offset += 10; eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); - nes_debug(NES_DBG_HW, "PhyType: 0x%04x\n", eeprom_data); + nesadapter->phy_type[0] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[1] = (u8)eeprom_data; /* Read the port array */ eeprom_offset += 2; eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_type[2] = (u8)(eeprom_data >> 8); + nesadapter->phy_type[3] = (u8)eeprom_data; /* port_count is set by soft reset reg */ - for (index = 0; index < 4; index++) { - nesadapter->ports[index] = eeprom_data & 0x000f; - eeprom_data >>= 4; - } - nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u, port 2 -> %u, port 3 -> %u\n", + nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u," + " port 2 -> %u, port 3 -> %u\n", nesadapter->port_count, - nesadapter->ports[0], nesadapter->ports[1], - nesadapter->ports[2], nesadapter->ports[3]); + nesadapter->phy_type[0], nesadapter->phy_type[1], + nesadapter->phy_type[2], nesadapter->phy_type[3]); + + /* Read PD config array */ + eeprom_offset += 10; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[0] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[0] = eeprom_data; + nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]); - eeprom_offset += 46; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[1] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[1] = eeprom_data; + nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[2] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[2] = eeprom_data; + nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]); + + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_size[3] = eeprom_data; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->pd_config_base[3] = eeprom_data; + nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n", + nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]); + + /* Read Rx Pool Size */ + eeprom_offset += 22; /* 46 */ eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); eeprom_offset += 2; nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) + @@ -290,15 +329,26 @@ no_fw_rev: nesadapter->core_clock = (((u32)eeprom_data) << 16) + nes_read16_eeprom(nesdev->regs, eeprom_offset); nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock); - } - - nesadapter->phy_index[0] = 4; - nesadapter->phy_index[1] = 5; - nesadapter->phy_index[2] = 6; - nesadapter->phy_index[3] = 7; - /* TODO: get this from EEPROM */ - nesdev->base_doorbell_index = 1; + if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) { + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[1] = eeprom_data & 0x00ff; + eeprom_offset += 2; + eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset); + nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8; + nesadapter->phy_index[3] = eeprom_data & 0x00ff; + } else { + nesadapter->phy_index[0] = 4; + nesadapter->phy_index[1] = 5; + nesadapter->phy_index[2] = 6; + nesadapter->phy_index[3] = 7; + } + nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n", + nesadapter->phy_index[0],nesadapter->phy_index[1], + nesadapter->phy_index[2],nesadapter->phy_index[3]); + } return 0; } @@ -316,7 +366,7 @@ static u16 nes_read16_eeprom(void __iomem *addr, u16 offset) } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) & NES_EEPROM_READ_REQUEST); - return(readw((void __iomem *)addr + NES_EEPROM_DATA)); + return readw((void __iomem *)addr + NES_EEPROM_DATA); } @@ -363,8 +413,8 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 u32 counter; unsigned long flags; - /* nes_debug(NES_DBG_PHY, "%s: phy addr = %d, mac_index = %d\n", - __FUNCTION__, phy_addr, nesdev->mac_index); */ + /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n", + phy_addr, nesdev->mac_index); */ spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, @@ -400,12 +450,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u32 u32temp; u32 counter; - dev_addr = 5; - port_addr = 0; + dev_addr = 1; + port_addr = phy_addr; /* set address */ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x00020000 | phy_reg | (dev_addr << 18) | (port_addr << 23)); + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); for (counter = 0; counter < 100 ; counter++) { udelay(30); u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); @@ -420,7 +470,7 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, /* set data */ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x10020000 | data | (dev_addr << 18) | (port_addr << 23)); + 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); for (counter = 0; counter < 100 ; counter++) { udelay(30); u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); @@ -447,12 +497,12 @@ void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) u32 u32temp; u32 counter; - dev_addr = 5; - port_addr = 0; + dev_addr = 1; + port_addr = phy_addr; /* set address */ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x00020000 | phy_reg | (dev_addr << 18) | (port_addr << 23)); + 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); for (counter = 0; counter < 100 ; counter++) { udelay(30); u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); @@ -467,7 +517,7 @@ void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) /* issue read */ nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, - 0x30020000 | (dev_addr << 18) | (port_addr << 23)); + 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23)); for (counter = 0; counter < 100 ; counter++) { udelay(30); u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS); @@ -483,6 +533,96 @@ void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr) /** + * nes_get_cqp_request + */ +struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) +{ + unsigned long flags; + struct nes_cqp_request *cqp_request = NULL; + + if (!list_empty(&nesdev->cqp_avail_reqs)) { + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_request = list_entry(nesdev->cqp_avail_reqs.next, + struct nes_cqp_request, list); + list_del_init(&cqp_request->list); + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + } else { + cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); + if (cqp_request) { + cqp_request->dynamic = 1; + INIT_LIST_HEAD(&cqp_request->list); + } + } + + if (cqp_request) { + init_waitqueue_head(&cqp_request->waitq); + cqp_request->waiting = 0; + cqp_request->request_done = 0; + init_waitqueue_head(&cqp_request->waitq); + nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n", + cqp_request); + } else + printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n", + __FUNCTION__); + + return cqp_request; +} + + +/** + * nes_post_cqp_request + */ +void nes_post_cqp_request(struct nes_device *nesdev, + struct nes_cqp_request *cqp_request, int ring_doorbell) +{ + struct nes_hw_cqp_wqe *cqp_wqe; + unsigned long flags; + u32 cqp_head; + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) & + (nesdev->cqp.sq_size - 1)) != 1) + && (list_empty(&nesdev->cqp_pending_reqs))) { + cqp_head = nesdev->cqp.sq_head++; + nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + barrier(); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = cpu_to_le32((u32)((u64)(cqp_request))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = cpu_to_le32((u32)(((u64)(cqp_request))>>32)); + nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," + " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," + " waiting = %d, refcount = %d.\n", + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, + nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, + cqp_request->waiting, atomic_read(&cqp_request->refcount)); + barrier(); + if (ring_doorbell) { + /* Ring doorbell (1 WQEs) */ + nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id); + } + + barrier(); + } else { + nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X" + " put on the pending queue.\n", + cqp_request, + cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]&0x3f, + cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]); + list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs); + } + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + return; +} + + + + +/** * nes_arp_table */ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action) @@ -503,7 +643,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti arp_index = 0; err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, - nesadapter->arp_table_size, &arp_index, &nesadapter->next_arp_index); + nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index); if (err) { nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); return err; @@ -680,194 +820,75 @@ no_mh_work: } -/* -"Everything you wanted to know about CRC algorithms, but were afraid to ask - for fear that errors in your understanding might be detected." Version : 3. -Date : 19 August 1993. -Author : Ross N. Williams. -Net : ross at guest.adelaide.edu.au. -FTP : ftp.adelaide.edu.au/pub/rocksoft/crc_v3.txt -Company : Rocksoft� Pty Ltd. -Snail : 16 Lerwick Avenue, Hazelwood Park 5066, Australia. -Fax : +61 8 373-4911 (c/- Internode Systems Pty Ltd). -Phone : +61 8 379-9217 (10am to 10pm Adelaide Australia time). -Note : "Rocksoft" is a trademark of Rocksoft Pty Ltd, Australia. -Status : Copyright (C) Ross Williams, 1993. However, permission is granted to - make and distribute verbatim copies of this document provided that this information - block and copyright notice is included. Also, the C code modules included in this - document are fully public domain. - -Thanks : Thanks to Jean-loup Gailly (jloup at chorus.fr) and Mark Adler - (me at quest.jpl.nasa.gov) who both proof read this document and picked - out lots of nits as well as some big fat bugs. - -The current web page for this seems to be http://www.ross.net/crc/crcpaper.html. - -*/ - -/****************************************************************************/ -/* Generate width mask */ -/****************************************************************************/ -/* */ -/* Returns a longword whose value is (2^p_cm->cm_width)-1. */ -/* The trick is to do this portably (e.g. without doing <<32). */ -/* */ -/* Author: Tristan Gross */ -/* Source: "A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS" */ -/* Ross N. Williams */ -/* http://www.rocksoft.com */ -/* */ -/****************************************************************************/ - -static u32 nesCRCWidMask (u32 width) -{ - return(((1L<<(((u32)width)-1))-1L)<<1)|1L; -} - - -/****************************************************************************/ -/* Generate CRC table */ -/****************************************************************************/ -/* */ -/* Source: "A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS" */ -/* Ross N. Williams */ -/* http://www.rocksoft.com */ -/* */ -/****************************************************************************/ -static u32 nes_crc_table_gen ( u32 *pCRCTable, - u32 poly, - u32 order, - u32 reflectIn) -{ - u32 i; - u32 reg; - u32 byte; - u32 topbit = BITMASK(NES_CRC_WID-1); - u32 tmp; - - for (byte=0;byte<256;byte++) { - - // If we need to creat a reflected table we must reflect the index (byte) and - // reflect the final reg - tmp = (reflectIn) ? reflect(byte,8): byte; - - reg = tmp << (NES_CRC_WID-8); - - for (i=0; i<8; i++) { - if (reg & topbit) { - reg = (reg << 1) ^ poly; - } else { - reg <<= 1; - } - } - - reg = (reflectIn) ? reflect(reg,order): reg; - pCRCTable[byte] = reg & nesCRCWidMask(NES_CRC_WID); - } - - return 0; -} - - -/****************************************************************************/ -/* Perform 32 bit based CRC calculation */ -/****************************************************************************/ -/* */ -/* Source: "A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS" */ -/* Ross N. Williams */ -/* http://www.rocksoft.com */ -/* */ -/* This performs a standard 32 bit crc on an array of arbitrary length */ -/* with an arbitrary initial value and passed generator polynomial */ -/* in the form of a crc table. */ -/* */ -/****************************************************************************/ -static u32 reflect (u32 data, u32 num) -{ - /* Reflects the lower num bits in 'data' around their center point. */ - u32 i; - u32 j = 1; - u32 result = 0; - - for (i=(u32)1<<(num-1); i; i>>=1) { - if (data & i) result|=j; - j <<= 1; - } - return result; -} - - /** - * byte_swap + * nes_dump_mem */ -static u32 byte_swap (u32 data, u32 num) +void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length) { - u32 i; - u32 result = 0; - - if (num%16) { - dprintk("\nbyte_swap: ERROR: num is not an even number of bytes\n"); - /* ASSERT(0); */ + char xlate[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f'}; + char *ptr; + char hex_buf[80]; + char ascii_buf[20]; + int num_char; + int num_ascii; + int num_hex; + + if (!(nes_debug_level & dump_debug_level)) { + return; } - for (i = 0; i < num; i += 8) { - result |= (0xFF & (data >> i)) << (num-8-i); - } - - return result; -} - - -/** - * nes_crc32 - - * This is a reflected table algorithm. ReflectIn basically - * means to reflect each incomming byte of the data. But to make - * things more complicated, we can instead reflect the initial - * value, the final crc, and shift data to the right using a - * reflected pCRCTable. CRC is FUN!! - */ -u32 nes_crc32 ( u32 reverse, - u32 initialValue, - u32 finalXOR, - u32 messageLength, - u8 *pMessage, - u32 order, - u32 reflectIn, - u32 reflectOut) - -{ - u8 *pBlockAddr = pMessage; - u32 mlen = messageLength; - u32 crc; - - if (0 == nesCRCInitialized) { - nes_crc_table_gen( &nesCRCTable[0], CRC32C_POLY, ORDER, REFIN); - nesCRCInitialized = 1; + ptr = addr; + if (length > 0x100) { + nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100); + length = 0x100; } + nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", ptr, length, length); + + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + + num_ascii = 0; + num_hex = 0; + for (num_char = 0; num_char < length; num_char++) { + if (num_ascii == 8) { + ascii_buf[num_ascii++] = ' '; + hex_buf[num_hex++] = '-'; + hex_buf[num_hex++] = ' '; + } - crc = (reflectIn) ? reflect(initialValue,order): initialValue; - - while (mlen--) { - /* printf("byte = %x, index = %u, crctable[index] = %x\n", - *pBlockAddr, (crc & 0xffL) ^ *pBlockAddr, - nesCRCTable[(crc & 0xffL) ^ *pBlockAddr]); - */ - if (reflectIn) { - crc = nesCRCTable[(crc & 0xffL ) ^ *pBlockAddr++] ^ (crc >> 8); - } else { - crc = nesCRCTable[((crc>>24) ^ *pBlockAddr++) & 0xFFL] ^ (crc << 8); + if (*ptr < 0x20 || *ptr > 0x7e) + ascii_buf[num_ascii++] = '.'; + else + ascii_buf[num_ascii++] = *ptr; + hex_buf[num_hex++] = xlate[((*ptr & 0xf0) >> 4)]; + hex_buf[num_hex++] = xlate[*ptr & 0x0f]; + hex_buf[num_hex++] = ' '; + ptr++; + + if (num_ascii >= 17) { + /* output line and reset */ + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + memset(ascii_buf, 0, 20); + memset(hex_buf, 0, 80); + num_ascii = 0; + num_hex = 0; } } - /* if reflectOut and reflectIn are both set, we don't */ - /* do anything since reflecting twice effectively does nothing. */ - crc = ((reflectIn)^(reflectOut)) ? reflect(crc,order): crc; - - crc = crc^finalXOR; - - /* We don't really use this, but it is here for completeness */ - crc = (reverse) ? byte_swap(crc,32): crc; + /* output the rest */ + if (num_ascii) { + while (num_ascii < 17) { + if (num_ascii == 8) { + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + } + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + hex_buf[num_hex++] = ' '; + num_ascii++; + } - return crc; + nes_debug(dump_debug_level, " %s | %s\n", hex_buf, ascii_buf); + } } - From glenn at lists.openfabrics.org Wed Nov 14 14:40:19 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Wed, 14 Nov 2007 14:40:19 -0800 (PST) Subject: [ofa-general] [PATCH 6/6] nes: Cosmetic changes; support virtual WQs and PPC Message-ID: <20071114224019.79ACFE2807D@openfabrics.org> Updated code for the NetEffect NE020 adapter. Updates include: - Support for userspace/virtual WQs. - PowerPC - Support for multiple debugging levels - Many, many cosmetic changes inline with kernel.org standards Diffs for nes_verbs.c and nes_verbs.h Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 7a0aee7..311127e 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -34,15 +34,16 @@ #include #include #include +#include #include #include #include #include + #include "nes.h" -#ifndef OFED_1_2 + #include -#endif atomic_t mod_qp_timouts; atomic_t qps_created; @@ -83,7 +84,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { return ERR_PTR(ret); } - nesmr = kmalloc(sizeof(*nesmr), GFP_KERNEL); + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); if (!nesmr) { nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); return ERR_PTR(-ENOMEM); @@ -97,12 +98,13 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { stag, stag_index); /* Register the region with the adapter */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); return ERR_PTR(-ENOMEM); } + cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; @@ -120,7 +122,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = - cpu_to_le32(nespd->pd_id&0x00007fff); + cpu_to_le32(nespd->pd_id & 0x00007fff); cqp_wqe->wqe_words[NES_CQP_STAG_WQE_STAG_IDX] = cpu_to_le32(stag); cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0; @@ -129,11 +131,10 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0; atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", @@ -141,10 +142,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -160,10 +159,8 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { } else { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -197,8 +194,8 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) int ret; /* Deallocate the window with the adapter */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); return -ENOMEM; } @@ -217,8 +214,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) cqp_wqe->wqe_words[NES_CQP_STAG_WQE_STAG_IDX] = cpu_to_le32(ibmw->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n", @@ -231,10 +227,8 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -248,10 +242,8 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) } else { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -260,7 +252,7 @@ static int nes_dealloc_mw(struct ib_mw *ibmw) } nes_free_resource(nesadapter, nesadapter->allocated_mrs, - (ibmw->rkey&0x0fffff00) >> 8); + (ibmw->rkey & 0x0fffff00) >> 8); kfree(nesmr); return err; @@ -294,6 +286,7 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, /* Check for SQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { + spin_unlock_irqrestore(&nesqp->lock, flags); return -EINVAL; } @@ -301,8 +294,10 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */ u64temp = (u64)ibmw_bind->wr_id; wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX] = cpu_to_le32((u32)u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX] = cpu_to_le32((u32)((u64temp)>>32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)nesqp)>>32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX] = + cpu_to_le32((u32)((u64temp)>>32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)nesqp)>>32)); wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = (u32)((u64)nesqp); wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] |= head; wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = @@ -329,7 +324,8 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; u64temp = (u64)ibmw_bind->addr; wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX] = cpu_to_le32((u32)u64temp); - wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX] = cpu_to_le32((u32)(u64temp>>32)); + wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX] = + cpu_to_le32((u32)(u64temp >> 32)); head++; if (head >= qsize) @@ -338,7 +334,7 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, nesqp->hwqp.sq_head = head; barrier(); - nes_write32(nesdev->regs + NES_WQE_ALLOC, + nes_write32(nesdev->regs+NES_WQE_ALLOC, (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); @@ -385,7 +381,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, goto failed_resource_alloc; } - nesfmr = kmalloc(sizeof(*nesfmr), GFP_KERNEL); + nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL); if (!nesfmr) { ret = -ENOMEM; goto failed_fmr_alloc; @@ -408,17 +404,17 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, /* use two level 4K PBLs */ /* add support for two level 256B PBLs */ nesfmr->nesmr.pbl_4k = 1; - nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages>>9) + - ((ibfmr_attr->max_pages&511)?1:0); + nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) + + ((ibfmr_attr->max_pages & 511) ? 1 : 0); } /* Register the region with the adapter */ - spin_lock_irqsave(&nesdev->cqp.lock, flags); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); /* track PBL resources */ if (nesfmr->nesmr.pbls_used != 0) { if (nesfmr->nesmr.pbl_4k) { if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); ret = -ENOMEM; goto failed_vpbl_alloc; } else { @@ -426,7 +422,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, } } else { if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); ret = -ENOMEM; goto failed_vpbl_alloc; } else { @@ -444,7 +440,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, &nesfmr->root_vpbl.pbl_pbase); if (!nesfmr->root_vpbl.pbl_vbase) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); ret = -ENOMEM; goto failed_vpbl_alloc; } @@ -457,14 +453,14 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, &nesfmr->root_vpbl.pbl_pbase); if (!nesfmr->root_vpbl.pbl_vbase) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); ret = -ENOMEM; goto failed_vpbl_alloc; } - nesfmr->root_vpbl.leaf_vpbl = kmalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); + nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); if (!nesfmr->root_vpbl.leaf_vpbl) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); ret = -ENOMEM; goto failed_leaf_vpbl_alloc; } @@ -503,9 +499,9 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, stag |= driver_key; stag += (u32)stag_key; - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); ret = -ENOMEM; goto failed_leaf_vpbl_pages_alloc; @@ -517,9 +513,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, stag, stag_index); cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( - NES_CQP_ALLOCATE_STAG | - NES_CQP_STAG_VA_TO | - NES_CQP_STAG_MR); + NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR); if (nesfmr->nesmr.pbl_4k == 1) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); @@ -535,7 +529,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) { cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= - cpu_to_le32( NES_CQP_STAG_RIGHTS_REMOTE_READ | + cpu_to_le32(NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN); nesfmr->access_rights |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ | @@ -545,11 +539,11 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = - cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = cpu_to_le32(nespd->pd_id & 0x00007fff); cqp_wqe->wqe_words[NES_CQP_STAG_WQE_STAG_IDX] = cpu_to_le32(stag); @@ -562,11 +556,10 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0; atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", @@ -575,10 +568,8 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -589,10 +580,8 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, } else { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -605,6 +594,7 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, nesfmr->attr = *ibfmr_attr; return &nesfmr->nesmr.ibfmr; + failed_leaf_vpbl_pages_alloc: /* unroll all allocated pages */ for (i=0; ileaf_pbl_cnt; i++) { @@ -614,7 +604,8 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, } } if (nesfmr->root_vpbl.leaf_vpbl) - kfree( nesfmr->root_vpbl.leaf_vpbl ); + kfree(nesfmr->root_vpbl.leaf_vpbl); + failed_leaf_vpbl_alloc: if (nesfmr->leaf_pbl_cnt == 0) { if (nesfmr->root_vpbl.pbl_vbase) @@ -623,10 +614,13 @@ static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, } else pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, nesfmr->root_vpbl.pbl_pbase); + failed_vpbl_alloc: kfree(nesfmr); + failed_fmr_alloc: nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + failed_resource_alloc: return ERR_PTR(ret); } @@ -656,7 +650,7 @@ static int nes_dealloc_fmr(struct ib_fmr *ibfmr) pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, nesfmr->root_vpbl.pbl_pbase); } else { - for (i=0; ileaf_pbl_cnt; i++) { + for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) { pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); } @@ -781,7 +775,7 @@ static int nes_modify_port(struct ib_device *ibdev, u8 port, /** * nes_query_pkey */ -static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey) +static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { *pkey = 0; return 0; @@ -812,34 +806,51 @@ static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev, { struct nes_vnic *nesvnic = to_nesvnic(ibdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_alloc_ucontext_req req; struct nes_alloc_ucontext_resp uresp; struct nes_ucontext *nes_ucontext; struct nes_ib_device *nesibdev = nesvnic->nesibdev; + + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) { + printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n"); + return ERR_PTR(-EINVAL); + } + + if (req.userspace_ver != NES_ABI_USERSPACE_VER) { + printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n", + req.userspace_ver, NES_ABI_USERSPACE_VER); + return ERR_PTR(-EINVAL); + } + + memset(&uresp, 0, sizeof uresp); uresp.max_qps = nesibdev->max_qp; uresp.max_pds = nesibdev->max_pd; - uresp.wq_size = nesdev->nesadapter->max_qp_wr*2; + uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2; + uresp.virtwq = nesadapter->virtwq; + uresp.kernel_ver = NES_ABI_KERNEL_VER; - nes_ucontext = kmalloc(sizeof *nes_ucontext, GFP_KERNEL); + nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL); if (!nes_ucontext) return ERR_PTR(-ENOMEM); - memset(nes_ucontext, 0, sizeof(struct nes_ucontext)); - nes_ucontext->nesdev = nesdev; nes_ucontext->mmap_wq_offset = ((uresp.max_pds * 4096) + PAGE_SIZE-1) / PAGE_SIZE; nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset + ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) / PAGE_SIZE; + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { kfree(nes_ucontext); return ERR_PTR(-EFAULT); } INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list); + INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list); return &nes_ucontext->ibucontext; } @@ -882,7 +893,7 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return -EFAULT; } nesqp = nes_ucontext->mmap_nesqp[index]; - if (NULL == nesqp) { + if (nesqp == NULL) { nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index); return -EFAULT; } @@ -903,7 +914,7 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, (nesdev->doorbell_start + - ((nes_ucontext->mmap_db_index[index]-nesdev->base_doorbell_index) * 4096)) + ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096)) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; vma->vm_private_data = nes_ucontext; @@ -929,7 +940,8 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, u32 pd_num = 0; int err; - nes_debug(NES_DBG_PD, "netdev refcnt=%u\n", + nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", + nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, atomic_read(&nesvnic->netdev->refcnt)); err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, @@ -938,12 +950,12 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, return ERR_PTR(err); } - nespd = kmalloc(sizeof (struct nes_pd), GFP_KERNEL); + nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL); if (!nespd) { nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num); return ERR_PTR(-ENOMEM); } - memset(nespd, 0, sizeof(struct nes_pd)); + nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", nespd, nesvnic->nesibdev->ibdev.name); @@ -1031,6 +1043,221 @@ static int nes_destroy_ah(struct ib_ah *ah) /** + * nes_get_encoded_size + */ +static inline u8 nes_get_encoded_size(u32 *size) +{ + u8 encoded_size = 0; + if (*size <= 32) { + *size = 32; + encoded_size = 1; + } else if (*size <= 128) { + *size = 128; + encoded_size = 2; + } else if (*size <= 512) { + *size = 512; + encoded_size = 3; + } + return (encoded_size); +} + + + +/** + * nes_setup_virt_qp + */ +static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, + struct nes_vnic *nesvnic, int sq_size, int rq_size) +{ + unsigned long flags; + void *mem; + u64 *pbl = NULL; + u64 *tpbl; + u64 *pblbuffer; + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 pbl_entries; + u8 rq_pbl_entries; + u8 sq_pbl_entries; + + pbl_entries = nespbl->pbl_size >> 3; + nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%p\n", + nespbl->pbl_size, pbl_entries, + (void *)nespbl->pbl_vbase, + (void *)nespbl->pbl_pbase); + pbl = nespbl->pbl_vbase; /* points to first pbl entry */ + /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */ + /* the first pbl to be fro the rq_vbase... */ + rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> PAGE_SHIFT; + sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> PAGE_SHIFT; + nesqp->hwqp.sq_pbase = (le32_to_cpu (((u32 *)pbl)[0]) ) | ((u64)((le32_to_cpu (((u32 *)pbl)[1]))) << 32); + if (!nespbl->page) { + nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n"); + kfree(nespbl); + return -ENOMEM; + } + + nesqp->hwqp.sq_vbase = kmap(nespbl->page); + nesqp->page = nespbl->page; + + nesqp->hwqp.sq_vbase = ioremap(nesqp->hwqp.sq_pbase, PAGE_SIZE); + if (!nesqp->hwqp.sq_vbase) { + nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n"); + kfree(nespbl); + return -ENOMEM; + } + + /* Now to get to sq.. we need to calculate how many */ + /* PBL entries were used by the rq.. */ + pbl += sq_pbl_entries; + nesqp->hwqp.rq_pbase = (le32_to_cpu (((u32 *)pbl)[0]) ) | ((u64)((le32_to_cpu (((u32 *)pbl)[1]))) << 32); + /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */ + /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */ + + nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%p rq_vbase=%p rq_pbase=%p\n", + nesqp->hwqp.sq_vbase, (void *)nesqp->hwqp.sq_pbase, + nesqp->hwqp.rq_vbase, (void *)nesqp->hwqp.rq_pbase); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (!nesadapter->free_256pbl) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + kfree(nespbl); + return -ENOMEM; + } + nesadapter->free_256pbl--; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase); + pblbuffer = nesqp->pbl_vbase; + if (!nesqp->pbl_vbase) { + /* memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + memset(nesqp->pbl_vbase, 0, 256); + /* fill in the page address in the pbl buffer.. */ + tpbl = pblbuffer + 16; + pbl = nespbl->pbl_vbase; + while (sq_pbl_entries--) + *tpbl++ = *pbl++; + tpbl = pblbuffer; + while (rq_pbl_entries--) + *tpbl++ = *pbl++; + + /* done with memory allocated during nes_reg_user_mr() */ + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + + nesqp->qp_mem_size = + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.q2_pbase); + + if (!mem) { + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase); + nesqp->pbl_vbase = NULL; + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + kunmap(nesqp->page); + return -ENOMEM; + } + nesqp->hwqp.q2_vbase = mem; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + nesqp->nesqp_context = mem; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + + return 0; +} + + +/** + * nes_setup_mmap_qp + */ +static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic, + int sq_size, int rq_size) +{ + void *mem; + struct nes_device *nesdev = nesvnic->nesdev; + + nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) + + (sizeof(struct nes_hw_qp_wqe) * rq_size) + + max((u32)sizeof(struct nes_qp_context), ((u32)256)) + + 256; /* this is Q2 */ + /* Round up to a multiple of a page */ + nesqp->qp_mem_size += PAGE_SIZE - 1; + nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); + + mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, + &nesqp->hwqp.sq_pbase); + if (!mem) + return -ENOMEM; + nes_debug(NES_DBG_QP, "PCI consistent memory for " + "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n", + mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size); + + memset(mem, 0, nesqp->qp_mem_size); + + nesqp->hwqp.sq_vbase = mem; + mem += sizeof(struct nes_hw_qp_wqe) * sq_size; + + nesqp->hwqp.rq_vbase = mem; + nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase + + sizeof(struct nes_hw_qp_wqe) * sq_size; + mem += sizeof(struct nes_hw_qp_wqe) * rq_size; + + nesqp->hwqp.q2_vbase = mem; + nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase + + sizeof(struct nes_hw_qp_wqe) * rq_size; + mem += 256; + memset(nesqp->hwqp.q2_vbase, 0, 256); + + nesqp->nesqp_context = mem; + nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; + memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); + return 0; +} + + +/** + * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory. + */ +static inline void nes_free_qp_mem(struct nes_device *nesdev, + struct nes_qp *nesqp, int virt_wqs) +{ + unsigned long flags; + struct nes_adapter *nesadapter = nesdev->nesadapter; + if (!virt_wqs) { + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, + nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + }else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); + pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); + nesqp->pbl_vbase = NULL; + kunmap(nesqp->page); + } +} + + +/** * nes_create_qp */ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, @@ -1047,12 +1274,16 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, struct nes_ucontext *nes_ucontext; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; + struct nes_create_qp_req req; struct nes_create_qp_resp uresp; + struct nes_pbl *nespbl = NULL; u32 qp_num = 0; /* u32 counter = 0; */ void *mem; unsigned long flags; int ret; + int err; + int virt_wqs = 0; int sq_size; int rq_size; u8 sq_encoded_size; @@ -1067,36 +1298,20 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } else { init_attr->cap.max_inline_data = 64; } + sq_size = init_attr->cap.max_send_wr; + rq_size = init_attr->cap.max_recv_wr; - if (init_attr->cap.max_send_wr < 32) { - sq_size = 32; - sq_encoded_size = 1; - } else if (init_attr->cap.max_send_wr < 128) { - sq_size = 128; - sq_encoded_size = 2; - } else if (init_attr->cap.max_send_wr < 512) { - sq_size = 512; - sq_encoded_size = 3; - } else { - printk(KERN_ERR PFX "%s: SQ size (%u) too large.\n", - __FUNCTION__, init_attr->cap.max_send_wr); - return ERR_PTR(-EINVAL); - } - init_attr->cap.max_send_wr = sq_size - 2; - if (init_attr->cap.max_recv_wr < 32) { - rq_size = 32; - rq_encoded_size = 1; - } else if (init_attr->cap.max_recv_wr < 128) { - rq_size = 128; - rq_encoded_size = 2; - } else if (init_attr->cap.max_recv_wr < 512) { - rq_size = 512; - rq_encoded_size = 3; - } else { - printk(KERN_ERR PFX "%s: RQ size (%u) too large.\n", - __FUNCTION__, init_attr->cap.max_recv_wr); + // check if the encoded sizes are OK or not... + sq_encoded_size = nes_get_encoded_size(&sq_size); + rq_encoded_size = nes_get_encoded_size(&rq_size); + + if ((!sq_encoded_size) || (!rq_encoded_size)) { + nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n", + rq_size, sq_size); return ERR_PTR(-EINVAL); } + + init_attr->cap.max_send_wr = sq_size -2; init_attr->cap.max_recv_wr = rq_size -1; nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); @@ -1123,18 +1338,53 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->allocated_buffer = mem; if (udata) { + if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) { + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); + return NULL; + } + if (req.user_wqe_buffers) { + virt_wqs = 1; + } if ((ibpd->uobject) && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); + if (virt_wqs) { + err = 1; + list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) { + if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) { + list_del(&nespbl->list); + err = 0; + nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n", + nespbl, nespbl->user_base); + break; + } + } + if (err) { + nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n", + (long long unsigned int)req.user_wqe_buffers); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + kfree(nesqp->allocated_buffer); + return ERR_PTR(-ENOMEM); + } + } + + nes_ucontext = to_nesucontext(ibpd->uobject->context); nesqp->mmap_sq_db_index = - find_next_zero_bit(nes_ucontext->allocated_wqs, - NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); + find_next_zero_bit(nes_ucontext->allocated_wqs, + NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq); /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n", - nespd->mmap_db_index); */ + nespd->mmap_db_index); */ if (nesqp->mmap_sq_db_index > NES_MAX_USER_WQ_REGIONS) { nes_debug(NES_DBG_QP, - "db index > max user regions, failing create QP\n"); + "db index > max user regions, failing create QP\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + if (virt_wqs) { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase, + nespbl->pbl_pbase); + kfree(nespbl); + } kfree(nesqp->allocated_buffer); return ERR_PTR(-ENOMEM); } @@ -1147,53 +1397,21 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, return ERR_PTR(-EFAULT); } } - - nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) + - (sizeof(struct nes_hw_qp_wqe) * rq_size) + - max((u32)sizeof(struct nes_qp_context), ((u32)256)) + - 256; /* this is Q2 */ - /* Round up to a multiple of a page */ - nesqp->qp_mem_size += PAGE_SIZE - 1; - nesqp->qp_mem_size &= ~(PAGE_SIZE - 1); - - mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size, - &nesqp->hwqp.sq_pbase); - if (!mem) { - nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) : + nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size); + if (err) { nes_debug(NES_DBG_QP, - "Unable to allocate memory for host descriptor rings\n"); + "error geting qp mem code = %d\n", err); + nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); return ERR_PTR(-ENOMEM); } - nes_debug(NES_DBG_QP, "PCI consistent memory for " - "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n", - mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size); - - memset(mem, 0, nesqp->qp_mem_size); - nesqp->hwqp.sq_vbase = mem; nesqp->hwqp.sq_size = sq_size; nesqp->hwqp.sq_encoded_size = sq_encoded_size; nesqp->hwqp.sq_head = 1; - mem += sizeof(struct nes_hw_qp_wqe) * sq_size; - - nesqp->hwqp.rq_vbase = mem; nesqp->hwqp.rq_size = rq_size; nesqp->hwqp.rq_encoded_size = rq_encoded_size; - nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase + - sizeof(struct nes_hw_qp_wqe) * sq_size; - mem += sizeof(struct nes_hw_qp_wqe)*rq_size; - - nesqp->hwqp.q2_vbase = mem; - nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase + - sizeof(struct nes_hw_qp_wqe) * rq_size; - mem += 256; - memset(nesqp->hwqp.q2_vbase, 0, 256); - - nesqp->nesqp_context = mem; - nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256; - memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context)); - /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n", (void *)nesqp->nesqp_context_pbase); */ @@ -1219,13 +1437,25 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, u64temp = (u64)nesqp->hwqp.sq_pbase; nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - u64temp = (u64)nesqp->hwqp.rq_pbase; - nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); - spin_lock_irqsave(&nesdev->cqp.lock, flags); + + + if (!virt_wqs) { + u64temp = (u64)nesqp->hwqp.sq_pbase; + nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)nesqp->hwqp.rq_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } else { + u64temp = (u64)nesqp->pbl_pbase; + nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp); + nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32)); + } + /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n", nesvnic->next_qp_nic_index, nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); nesqp->nesqp_context->misc2 |= cpu_to_le32( (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] << NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT); @@ -1239,9 +1469,9 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16); u64temp = (u64)nesqp->hwqp.q2_pbase; nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp); - nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp>>32)); + nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32)); nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((u64)(nesqp))); - nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(((u64)(nesqp))>>32)); + nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(((u64)(nesqp)) >> 32)); nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | ((((u32)nesadapter->max_irrq_wr) << NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); @@ -1252,21 +1482,26 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, /* Create the QP */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n"); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); return ERR_PTR(-ENOMEM); } cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + if (!virt_wqs) { + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_IWARP_STATE_IDLE); + } else { + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS | + NES_CQP_QP_IWARP_STATE_IDLE); + } cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_QP_CQS_VALID); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = @@ -1281,14 +1516,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, cpu_to_le32((u32)(u64temp >> 32)); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n", nesqp->hwqp.qp_id); ret = wait_event_timeout(cqp_request->waitq, - (0 != cqp_request->request_done), NES_EVENT_TIMEOUT); + (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u," " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u," " CQP Major:Minor codes = 0x%04X:0x%04X.\n", @@ -1297,18 +1531,15 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); } } nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); if (!ret) { return ERR_PTR(-ETIME); @@ -1318,10 +1549,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } else { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1336,9 +1565,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, uresp.qp_id = nesqp->hwqp.qp_id; uresp.nes_drv_opt = nes_drv_opt; if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { - pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, - nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase); nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); + nes_free_qp_mem(nesdev, nesqp,virt_wqs); kfree(nesqp->allocated_buffer); return ERR_PTR(-EFAULT); } @@ -1430,9 +1658,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp) * nes_create_cq */ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, -#ifndef OFED_1_2 int comp_vector, -#endif struct ib_ucontext *context, struct ib_udata *udata) { u64 u64temp; @@ -1459,13 +1685,12 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, return ERR_PTR(err); } - nescq = kmalloc(sizeof(struct nes_cq), GFP_KERNEL); + nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL); if (!nescq) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n"); return ERR_PTR(-ENOMEM); } - memset(nescq, 0, sizeof(struct nes_cq)); nescq->hw_cq.cq_size = max(entries + 1, 5); nescq->hw_cq.cq_number = cq_num; @@ -1523,8 +1748,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, spin_lock_init(&nescq->lock); /* send CreateCQ request to CQP */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, @@ -1540,20 +1765,20 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | NES_CQP_CQ_CHK_OVERFLOW | NES_CQP_CQ_CEQE_MASK |((u32)nescq->hw_cq.cq_size << 16)); - spin_lock_irqsave(&nesdev->cqp.lock, flags); - if (1 != pbl_entries) { + + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + if (pbl_entries != 1) { if (pbl_entries > 32) { /* use 4k pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); - if (0 == nesadapter->free_4kpbl) { + if (nesadapter->free_4kpbl == 0) { if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - atomic_inc(&cqp_reqs_dynfreed); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, @@ -1570,15 +1795,13 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, } else { /* use 256 byte pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); - if (0 == nesadapter->free_256pbl) { + if (nesadapter->free_256pbl == 0) { if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - atomic_inc(&cqp_reqs_dynfreed); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, @@ -1594,12 +1817,16 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, } } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); if (context) { - if (1 != pbl_entries) + if (pbl_entries != 1) u64temp = (u64)nespbl->pbl_pbase; else u64temp = le64_to_cpu(nespbl->pbl_vbase[0]); @@ -1613,13 +1840,13 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, cqp_wqe->wqe_words[NES_CQP_CQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; u64temp = (u64)&nescq->hw_cq; - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp>>1)); - cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = cpu_to_le32(((u32)((u64temp)>>33))&0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = + cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n", @@ -1631,10 +1858,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1652,10 +1877,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, } else { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1707,8 +1930,8 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number); /* Send DestroyCQ request to CQP */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n"); return -ENOMEM; } @@ -1718,7 +1941,7 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16)); - spin_lock_irqsave(&nesdev->cqp.lock, flags); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); if (nescq->virtual_cq == 1) { nesadapter->free_256pbl++; if (nesadapter->free_256pbl > nesadapter->max_256pbl) { @@ -1734,23 +1957,24 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_CQ_4KB_CHUNK); } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; - atomic_set(&cqp_request->refcount, 2); nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + atomic_set(&cqp_request->refcount, 2); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n", nescq->hw_cq.cq_number); - /* cqp_head = (cqp_head+1)&(nesdev->cqp.sq_size-1); */ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u," @@ -1760,10 +1984,8 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) if ((!ret) || (cqp_request->major_code)) { if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1782,10 +2004,8 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) ret = 0; if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1819,15 +2039,15 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, u16 major_code; /* Register the region with the adapter */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); return -ENOMEM; } cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesdev->cqp.lock, flags); + spin_lock_irqsave(&nesadapter->pbl_lock, flags); /* track PBL resources */ if (pbl_count != 0) { if (pbl_count > 1) { @@ -1835,13 +2055,11 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, if ((pbl_count+1) > nesadapter->free_4kpbl) { nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - atomic_inc(&cqp_reqs_dynfreed); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } return -ENOMEM; } else { @@ -1851,13 +2069,11 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, if (pbl_count > nesadapter->free_4kpbl) { nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - atomic_inc(&cqp_reqs_dynfreed); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } return -ENOMEM; } else { @@ -1867,13 +2083,11 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, if (pbl_count > nesadapter->free_256pbl) { nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - atomic_inc(&cqp_reqs_dynfreed); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } return -ENOMEM; } else { @@ -1881,7 +2095,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } } } - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ); @@ -1902,8 +2117,9 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32( NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN); } - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_VA_LOW_IDX] = cpu_to_le32((u32)*iova_start); @@ -1938,8 +2154,7 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, barrier(); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), @@ -1950,10 +2165,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, major_code = cqp_request->major_code; if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -1975,7 +2188,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, */ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, - u64 * iova_start) { + u64 * iova_start) +{ u64 region_length; struct nes_pd *nespd = to_nespd(ib_pd); struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device); @@ -2021,7 +2235,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, return ERR_PTR(err); } - nesmr = kmalloc(sizeof(*nesmr), GFP_KERNEL); + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); if (!nesmr) { nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); return ERR_PTR(-ENOMEM); @@ -2030,7 +2244,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, for (i = 0; i < num_phys_buf; i++) { if ((i & 0x01FF) == 0) { - if (1 == root_pbl_index) { + if (root_pbl_index == 1) { /* Allocate the root PBL */ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, &root_vpbl.pbl_pbase); @@ -2043,7 +2257,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, kfree(nesmr); return ERR_PTR(-ENOMEM); } - root_vpbl.leaf_vpbl = kmalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); if (!root_vpbl.leaf_vpbl) { pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, root_vpbl.pbl_pbase); @@ -2168,7 +2382,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, /** * nes_get_dma_mr */ -static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) { +static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) +{ struct ib_phys_buf bl; u64 kva = 0; @@ -2183,14 +2398,9 @@ static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc) { /** * nes_reg_user_mr */ -#ifdef OFED_1_2 -static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, - int acc, struct ib_udata *udata) -#else static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) -#endif - { +{ u64 iova_start; u64 *pbl; u64 region_length; @@ -2205,15 +2415,14 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_ucontext *nes_ucontext; struct nes_pbl *nespbl; struct nes_mr *nesmr; -#ifndef OFED_1_2 struct ib_umem *region; -#endif struct nes_mem_reg_req req; struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; - int j; + int nmap_index, page_index; int page_count = 0; int err, pbl_depth = 0; + int chunk_pages; int ret; u32 stag; u32 stag_index = 0; @@ -2225,15 +2434,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u8 single_page = 1; u8 stag_key; - - nes_debug(NES_DBG_MR, "\n"); - -#ifdef OFED_1_2 - nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," - " offset = %u, page size = %u.\n", - region->user_base, region->virt_base, (u32)region->length, - region->offset, region->page_size); -#else region = ib_umem_get(pd->uobject->context, start, length, acc); if (IS_ERR(region)) { return (struct ib_mr *)region; @@ -2241,7 +2441,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u\n", (unsigned long int)start, (unsigned long int)virt, (u32)length); -#endif if (ib_copy_from_udata(&req, udata, sizeof(req))) return ERR_PTR(-EFAULT); @@ -2266,33 +2465,25 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, &stag_index, &next_stag_index); if (err) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif return ERR_PTR(err); } - nesmr = kmalloc(sizeof(*nesmr), GFP_KERNEL); + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); if (!nesmr) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); return ERR_PTR(-ENOMEM); } -#ifndef OFED_1_2 nesmr->region = region; -#endif list_for_each_entry(chunk, ®ion->chunk_list, list) { nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", chunk->nents, chunk->nmap); - for (j = 0; j < chunk->nmap; ++j) { + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { if ((page_count&0x01FF) == 0) { if (page_count>(1024*512)) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); nes_free_resource(nesadapter, @@ -2300,15 +2491,13 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, kfree(nesmr); return ERR_PTR(-E2BIG); } - if (1 == root_pbl_index) { + if (root_pbl_index == 1) { root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, &root_vpbl.pbl_pbase); nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); if (!root_vpbl.pbl_vbase) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); nes_free_resource(nesadapter, nesadapter->allocated_mrs, @@ -2316,12 +2505,10 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, kfree(nesmr); return ERR_PTR(-ENOMEM); } - root_vpbl.leaf_vpbl = kmalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); if (!root_vpbl.leaf_vpbl) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, root_vpbl.pbl_pbase); pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, @@ -2342,9 +2529,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); if (!vpbl.pbl_vbase) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); ibmr = ERR_PTR(-ENOMEM); kfree(nesmr); @@ -2360,22 +2545,18 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, root_pbl_index++; cur_pbl_index = 0; } - if (sg_dma_address(&chunk->page_list[j]) & ~PAGE_MASK) { -#ifndef OFED_1_2 + if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { ib_umem_release(region); -#endif nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) sg_dma_address(&chunk->page_list[j])); + (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); ibmr = ERR_PTR(-EINVAL); kfree(nesmr); goto reg_user_mr_err; } - if (!sg_dma_len(&chunk->page_list[j])) { -#ifndef OFED_1_2 + if (!sg_dma_len(&chunk->page_list[nmap_index])) { ib_umem_release(region); -#endif nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); @@ -2384,25 +2565,33 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto reg_user_mr_err; } - region_length += sg_dma_len(&chunk->page_list[j]); - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr+PAGE_SIZE) != - sg_dma_address(&chunk->page_list[j])) - single_page = 0; - last_dma_addr = sg_dma_address(&chunk->page_list[j]); - } else { - first_dma_addr = sg_dma_address(&chunk->page_list[j]); - last_dma_addr = first_dma_addr; + region_length += sg_dma_len(&chunk->page_list[nmap_index]); + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> PAGE_SHIFT; + for (page_index=0; page_index < chunk_pages; page_index++) { + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+PAGE_SIZE) != + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE))) + single_page = 0; + last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE); + } else { + first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE); + last_dma_addr = first_dma_addr; + } } - } - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)sg_dma_address(&chunk->page_list[j])); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)sg_dma_address(&chunk->page_list[j]))>>32))); - cur_pbl_index++; - page_count++; + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE))) >> 32))); + cur_pbl_index++; + page_count++; + } } } @@ -2416,11 +2605,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, stag = 1; } -#ifdef OFED_1_2 - iova_start = (u64)region->virt_base; -#else iova_start = virt; -#endif nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X," " index = 0x%08X, region->length=0x%08llx\n", stag, (unsigned int)iova_start, @@ -2454,9 +2639,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->pbls_used++; } } else { -#ifndef OFED_1_2 ib_umem_release(region); -#endif kfree(nesmr); ibmr = ERR_PTR(-ENOMEM); } @@ -2467,10 +2650,10 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase); } else { - for (j=0; jpcidev, 4096, - root_vpbl.leaf_vpbl[j].pbl_vbase, - root_vpbl.leaf_vpbl[j].pbl_pbase); + root_vpbl.leaf_vpbl[page_index].pbl_vbase, + root_vpbl.leaf_vpbl[page_index].pbl_pbase); } kfree(root_vpbl.leaf_vpbl); pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, @@ -2482,75 +2665,76 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ibmr; break; case IWNES_MEMREG_TYPE_QP: -#ifndef OFED_1_2 - ib_umem_release(region); -#endif - return ERR_PTR(-ENOSYS); - break; case IWNES_MEMREG_TYPE_CQ: - nespbl = kmalloc(sizeof(*nespbl), GFP_KERNEL); + nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); if (!nespbl) { nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); -#ifndef OFED_1_2 ib_umem_release(region); -#endif return ERR_PTR(-ENOMEM); } - memset(nespbl, 0, sizeof(*nespbl)); - nesmr = kmalloc(sizeof(*nesmr), GFP_KERNEL); + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); if (!nesmr) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif kfree(nespbl); nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n"); return ERR_PTR(-ENOMEM); } - memset(nesmr, 0, sizeof(*nesmr)); -#ifndef OFED_1_2 nesmr->region = region; -#endif nes_ucontext = to_nesucontext(pd->uobject->context); pbl_depth = region->length >> PAGE_SHIFT; pbl_depth += (region->length & ~PAGE_MASK) ? 1 : 0; nespbl->pbl_size = pbl_depth*sizeof(u64); - nes_debug(NES_DBG_MR, "Attempting to allocate CQ PBL memory, %u bytes, %u entries.\n", + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory"); + } else { + nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory"); + } + + nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n", nespbl->pbl_size, pbl_depth); pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size, &nespbl->pbl_pbase); if (!pbl) { -#ifndef OFED_1_2 ib_umem_release(region); -#endif kfree(nesmr); kfree(nespbl); - nes_debug(NES_DBG_MR, "Unable to allocate cq PBL memory\n"); + nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n"); return ERR_PTR(-ENOMEM); } nespbl->pbl_vbase = pbl; -#ifdef OFED_1_2 - nespbl->user_base = region->user_base; -#else nespbl->user_base = start; -#endif - nes_debug(NES_DBG_MR, "Allocated CQ PBL memory, %u bytes, pbl_pbase=%p," + nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%p," " pbl_vbase=%p user_base=0x%lx\n", nespbl->pbl_size, (void *)nespbl->pbl_pbase, (void*)nespbl->pbl_vbase, nespbl->user_base); list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - ((u32 *)pbl)[0] = cpu_to_le32((u32)sg_dma_address(&chunk->page_list[j])); - ((u32 *)pbl)[1] = cpu_to_le32(((u64)sg_dma_address(&chunk->page_list[j]))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, *pbl, le32_to_cpu(((u32 *)pbl)[1]), le32_to_cpu(((u32 *)pbl)[0])); - pbl++; + for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { + chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> PAGE_SHIFT; + nespbl->page = sg_page(&chunk->page_list[0]); + for (page_index=0; page_indexpage_list[nmap_index])+ + (page_index*PAGE_SIZE))); + ((u32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(&chunk->page_list[nmap_index])+ + (page_index*PAGE_SIZE)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((u32 *)pbl)[1]), le32_to_cpu(((u32 *)pbl)[0])); + pbl++; + } } } - list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list); + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { + list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); + } else { + list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list); + } nesmr->ibmr.rkey = -1; nesmr->ibmr.lkey = -1; - nesmr->mode = IWNES_MEMREG_TYPE_CQ; + nesmr->mode = req.reg_type; return &nesmr->ibmr; break; } @@ -2575,11 +2759,9 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) u16 major_code; u16 minor_code; -#ifndef OFED_1_2 if (nesmr->region) { ib_umem_release(nesmr->region); } -#endif if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) { kfree(nesmr); return 0; @@ -2587,16 +2769,16 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) /* Deallocate the region with the adapter */ - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); return -ENOMEM; } cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesdev->cqp.lock, flags); - if (0 != nesmr->pbls_used) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesmr->pbls_used != 0) { if (nesmr->pbl_4k) { nesadapter->free_4kpbl += nesmr->pbls_used; if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { @@ -2612,11 +2794,15 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) } } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(&nesdev->cqp))); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(&nesdev->cqp))>>32)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = + cpu_to_le32((u32)((u64)(&nesdev->cqp))); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(&nesdev->cqp)) >> 32)); cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; @@ -2624,13 +2810,11 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) cqp_wqe->wqe_words[NES_CQP_STAG_WQE_STAG_IDX] = cpu_to_le32(ib_mr->rkey); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ - spin_unlock_irqrestore(&nesdev->cqp.lock, flags); nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey); - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u," " CQP Major:Minor codes = 0x%04X:0x%04X\n", @@ -2645,10 +2829,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) minor_code = cqp_request->minor_code; if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2767,7 +2949,8 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /** * nes_hw_modify_qp */ -int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, u32 next_iwarp_state, u32 wait_completion) +int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, + u32 next_iwarp_state, u32 wait_completion) { u64 u64temp; struct nes_hw_cqp_wqe *cqp_wqe; @@ -2781,8 +2964,8 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, u32 next_i nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_NOT_HOLDING_LOCK); - if (NULL == cqp_request) { + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n"); return -ENOMEM; } @@ -2808,14 +2991,13 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, u32 next_i cqp_wqe->wqe_words[NES_CQP_QP_WQE_CONTEXT_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); atomic_set(&cqp_request->refcount, 2); - nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_NOT_HOLDING_LOCK, - NES_CQP_REQUEST_RING_DOORBELL); + nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_RING_DOORBELL); /* Wait for CQP */ if (wait_completion) { /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n", nesqp->hwqp.qp_id); */ - ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done), + ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), NES_EVENT_TIMEOUT); nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, " "CQP Major:Minor codes = 0x%04X:0x%04X.\n", @@ -2829,10 +3011,8 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, u32 next_i } if (atomic_dec_and_test(&cqp_request->refcount)) { if (cqp_request->dynamic) { - atomic_inc(&cqp_reqs_dynfreed); kfree(cqp_request); } else { - atomic_inc(&cqp_reqs_freed); spin_lock_irqsave(&nesdev->cqp.lock, flags); list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); spin_unlock_irqrestore(&nesdev->cqp.lock, flags); @@ -2935,7 +3115,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, issue_modify_qp = 1; nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n", nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); - if (nesqp->iwarp_state==(u32)NES_CQP_QP_IWARP_STATE_CLOSING) { + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); nes_rem_ref(&nesqp->ibqp); return 0; @@ -2988,7 +3168,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; case IB_QPS_ERR: case IB_QPS_RESET: - if (nesqp->iwarp_state==(u32)NES_CQP_QP_IWARP_STATE_ERROR) { + if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); nes_rem_ref(&nesqp->ibqp); return -EINVAL; @@ -3111,7 +3291,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { /* These two are for the timer thread */ - if (atomic_inc_return(&nesqp->close_timer_started)==1) { + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { nes_add_ref(&nesqp->ibqp); nesqp->cm_id->add_ref(nesqp->cm_id); nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," @@ -3213,7 +3393,6 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_count = 0; total_payload_length = 0; - nes_debug(NES_DBG_IW_TX, "\n"); if (nesqp->ibqp_state > IB_QPS_RTS) return -EINVAL; @@ -3232,10 +3411,12 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n", nesqp->hwqp.qp_id, wqe, head); */ u64temp = (u64)(ib_wr->wr_id); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX] = cpu_to_le32((u32)u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX] = cpu_to_le32((u32)((u64temp)>>32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(nesqp))); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(((u64)(nesqp))>>32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX] = cpu_to_le32((u32)u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX] = + cpu_to_le32((u32)((u64temp) >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)((u64)(nesqp))); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = + cpu_to_le32((u32)(((u64)(nesqp)) >> 32)); wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] |= cpu_to_le32(head); switch (ib_wr->opcode) { @@ -3253,7 +3434,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; } if ((ib_wr->send_flags & IB_SEND_INLINE) && - (0 == (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA)) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && (ib_wr->sg_list[0].length <= 64)) { memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], (void *)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); @@ -3298,12 +3479,12 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, cpu_to_le32((u32)(ib_wr->wr.rdma.remote_addr >> 32)); if ((ib_wr->send_flags & IB_SEND_INLINE) && - (0 == (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA)) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && (ib_wr->sg_list[0].length <= 64)) { memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], (void *)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = cpu_to_le32( - ib_wr->sg_list[0].length); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(ib_wr->sg_list[0].length); wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; } else { total_payload_length = 0; @@ -3405,7 +3586,6 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, u32 counter; u32 total_payload_length; - nes_debug(NES_DBG_IW_RX, "\n"); if (nesqp->ibqp_state > IB_QPS_RTS) return -EINVAL; @@ -3503,7 +3683,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) head = nescq->hw_cq.cq_head; cq_size = nescq->hw_cq.cq_size; - while (cqe_counthw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { cqe = nescq->hw_cq.cq_vbase[head]; @@ -3517,7 +3697,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) ((u64)u32temp); nesqp = *((struct nes_qp **)&u64temp); memset(entry, 0, sizeof *entry); - if (0 == cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]) { + if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { entry->status = IB_WC_SUCCESS; } else { entry->status = IB_WC_WR_FLUSH_ERR; @@ -3535,8 +3715,10 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /* Working on a SQ Completion*/ wq_tail = wqe_index; nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); - wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) | - ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); + wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | + ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); @@ -3575,7 +3757,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) head = 0; cqe_count++; nescq->polled_completions++; - if ((nescq->polled_completions > (cq_size/2)) || + if ((nescq->polled_completions > (cq_size / 2)) || (nescq->polled_completions == 255)) { nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" " are pending %u of %u.\n", @@ -3608,11 +3790,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) /** * nes_req_notify_cq */ -#ifdef OFED_1_2 -static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) -#else static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) -#endif { struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -3623,17 +3801,10 @@ static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_ nescq->hw_cq.cq_number); cq_arm = nescq->hw_cq.cq_number; -#ifdef OFED_1_2 - if (notify == IB_CQ_NEXT_COMP) - cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT; - else if (notify == IB_CQ_SOLICITED) - cq_arm |= NES_CQE_ALLOC_NOTIFY_SE; -#else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP) cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT; else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) cq_arm |= NES_CQE_ALLOC_NOTIFY_SE; -#endif else return -EINVAL; @@ -3688,9 +3859,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) { (1ull << IB_USER_VERBS_CMD_POST_SEND); nesibdev->ibdev.phys_port_cnt = 1; -#ifndef OFED_1_2 nesibdev->ibdev.num_comp_vectors = 1; -#endif nesibdev->ibdev.dma_device = &nesdev->pcidev->dev; nesibdev->ibdev.class_dev.dev = &nesdev->pcidev->dev; nesibdev->ibdev.query_device = nes_query_device; @@ -3733,7 +3902,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) { nesibdev->ibdev.post_send = nes_post_send; nesibdev->ibdev.post_recv = nes_post_recv; - nesibdev->ibdev.iwcm = kmalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); + nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { ib_dealloc_device(&nesibdev->ibdev); return NULL; @@ -3756,15 +3925,13 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) { */ void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) { - if (NULL == nesibdev) + if (nesibdev == NULL) return; nes_unregister_ofa_device(nesibdev); kfree(nesibdev->ibdev.iwcm); ib_dealloc_device(&nesibdev->ibdev); - - nes_debug(NES_DBG_SHUTDOWN, "\n"); } @@ -3780,7 +3947,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) ret = ib_register_device(&nesvnic->nesibdev->ibdev); if (ret) { - nes_debug(NES_DBG_INIT, "\n"); return ret; } @@ -3791,7 +3957,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) { - nes_debug(NES_DBG_INIT, "call class_device_create_file\n"); ret = class_device_create_file(&nesibdev->ibdev.class_dev, nes_class_attributes[i]); if (ret) { while (i > 0) { @@ -3818,7 +3983,7 @@ void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) struct nes_vnic *nesvnic = nesibdev->nesvnic; int i; - if (NULL == nesibdev) + if (nesibdev == NULL) return; for (i = 0; i < ARRAY_SIZE(nes_class_attributes); ++i) { @@ -3826,11 +3991,8 @@ void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) } if (nesvnic->of_device_registered) { - nes_debug(NES_DBG_SHUTDOWN, "call ib_unregister_device()\n"); ib_unregister_device(&nesibdev->ibdev); } nesvnic->of_device_registered = 0; - } - diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index ef358f2..b53e492 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -42,38 +42,37 @@ struct nes_device; struct nes_ucontext { struct ib_ucontext ibucontext; - struct nes_device *nesdev; - unsigned long mmap_wq_offset; - unsigned long mmap_cq_offset; /* to be removed */ - int index; /* rnic index (minor) */ - unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)]; - u16 mmap_db_index[NES_MAX_USER_DB_REGIONS]; - u16 first_free_db; - unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)]; - struct nes_qp * mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; - u16 first_free_wq; - struct list_head cq_reg_mem_list; + struct nes_device *nesdev; + unsigned long mmap_wq_offset; + unsigned long mmap_cq_offset; /* to be removed */ + int index; /* rnic index (minor) */ + unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)]; + u16 mmap_db_index[NES_MAX_USER_DB_REGIONS]; + u16 first_free_db; + unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)]; + struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS]; + u16 first_free_wq; + struct list_head cq_reg_mem_list; + struct list_head qp_reg_mem_list; }; struct nes_pd { struct ib_pd ibpd; - u16 pd_id; - atomic_t sqp_count; - u16 mmap_db_index; + u16 pd_id; + atomic_t sqp_count; + u16 mmap_db_index; }; struct nes_mr { union { - struct ib_mr ibmr; - struct ib_mw ibmw; + struct ib_mr ibmr; + struct ib_mw ibmw; struct ib_fmr ibfmr; }; -#ifndef OFED_1_2 - struct ib_umem *region; -#endif - u16 pbls_used; - u8 mode; - u8 pbl_4k; + struct ib_umem *region; + u16 pbls_used; + u8 mode; + u8 pbl_4k; }; struct nes_hw_pb { @@ -82,35 +81,35 @@ struct nes_hw_pb { }; struct nes_vpbl { - dma_addr_t pbl_pbase; + dma_addr_t pbl_pbase; struct nes_hw_pb *pbl_vbase; }; struct nes_root_vpbl { - dma_addr_t pbl_pbase; + dma_addr_t pbl_pbase; struct nes_hw_pb *pbl_vbase; - struct nes_vpbl *leaf_vpbl; + struct nes_vpbl *leaf_vpbl; }; struct nes_fmr { - struct nes_mr nesmr; - u32 leaf_pbl_cnt; + struct nes_mr nesmr; + u32 leaf_pbl_cnt; struct nes_root_vpbl root_vpbl; - struct ib_qp* ib_qp; - int access_rights; - struct ib_fmr_attr attr; + struct ib_qp *ib_qp; + int access_rights; + struct ib_fmr_attr attr; }; struct nes_av; struct nes_cq { - struct ib_cq ibcq; + struct ib_cq ibcq; struct nes_hw_cq hw_cq; - u32 polled_completions; - u32 cq_mem_size; - spinlock_t lock; - u8 virtual_cq; - u8 pad[3]; + u32 polled_completions; + u32 cq_mem_size; + spinlock_t lock; + u8 virtual_cq; + u8 pad[3]; }; struct nes_wq { @@ -121,45 +120,48 @@ struct iw_cm_id; struct ietf_mpa_frame; struct nes_qp { - struct ib_qp ibqp; - void * allocated_buffer; - struct iw_cm_id *cm_id; + struct ib_qp ibqp; + void *allocated_buffer; + struct iw_cm_id *cm_id; struct workqueue_struct *wq; - struct work_struct disconn_work; - struct nes_cq *nesscq; - struct nes_cq *nesrcq; - struct nes_pd *nespd; + struct work_struct disconn_work; + struct nes_cq *nesscq; + struct nes_cq *nesrcq; + struct nes_pd *nespd; void *cm_node; /* handle of the node this QP is associated with */ struct ietf_mpa_frame *ietf_frame; - dma_addr_t ietf_frame_pbase; - wait_queue_head_t state_waitq; - unsigned long socket; - struct nes_hw_qp hwqp; - struct work_struct work; - struct work_struct ae_work; - enum ib_qp_state ibqp_state; - u32 iwarp_state; - u32 hte_index; - u32 last_aeq; - u32 qp_mem_size; - atomic_t refcount; - atomic_t close_timer_started; - u32 mmap_sq_db_index; - u32 mmap_rq_db_index; - spinlock_t lock; + dma_addr_t ietf_frame_pbase; + wait_queue_head_t state_waitq; + unsigned long socket; + struct nes_hw_qp hwqp; + struct work_struct work; + struct work_struct ae_work; + enum ib_qp_state ibqp_state; + u32 iwarp_state; + u32 hte_index; + u32 last_aeq; + u32 qp_mem_size; + atomic_t refcount; + atomic_t close_timer_started; + u32 mmap_sq_db_index; + u32 mmap_rq_db_index; + spinlock_t lock; struct nes_qp_context *nesqp_context; - dma_addr_t nesqp_context_pbase; - wait_queue_head_t kick_waitq; - u16 in_disconnect; - u16 private_data_len; - u8 active_conn; - u8 skip_lsmm; - u8 user_mode; - u8 hte_added; - u8 hw_iwarp_state; - u8 flush_issued; - u8 hw_tcp_state; - u8 disconn_pending; - u8 destroyed; + dma_addr_t nesqp_context_pbase; + void *pbl_vbase; + dma_addr_t pbl_pbase; + struct page *page; + wait_queue_head_t kick_waitq; + u16 in_disconnect; + u16 private_data_len; + u8 active_conn; + u8 skip_lsmm; + u8 user_mode; + u8 hte_added; + u8 hw_iwarp_state; + u8 flush_issued; + u8 hw_tcp_state; + u8 disconn_pending; + u8 destroyed; }; #endif /* NES_VERBS_H */ From Arkady.Kanevsky at netapp.com Wed Nov 14 15:18:08 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 14 Nov 2007 18:18:08 -0500 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <473B58C3.40708@dev.mellanox.co.il> References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> <473B58C3.40708@dev.mellanox.co.il> Message-ID: Yevgeny, Lets say I want to run another ULP, say NFS-RDMA. Does this mean by default I will get the SL which is in use by currently running ULP, say SDP? Or the SL is differentiated based on port? And if I want to run different SL level traffic between the same pair of nodes I will need to use different port ID? Look strange for ULPs that have well known port ID... Thanks, Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Yevgeny Kliteynik [mailto:kliteyn at dev.mellanox.co.il] > Sent: Wednesday, November 14, 2007 3:21 PM > To: Kanevsky, Arkady > Cc: Hal Rosenstock; general at lists.openfabrics.org > Subject: Re: [ofa-general] RE: QoS for iSER > > Kanevsky, Arkady wrote: > > what happens when multiple apps runs on the same server? > > I guess that when you say "server" you mean "host" and not > the server from server-client terminology. > > This is what the whole point of QoS is: if the applications > are using the same ULP, they probably would get the same > Service Level, unless they were differentiated by the > administrator is some other way, e.g. they all use SDP, but > connect to different TCP port of the server application. > If the applications are using different ULPs, they will get > Service Level accordingly to the ULPs that they are using. > > -- Yevgeny > > > Arkady Kanevsky email: arkady at netapp.com > > Network Appliance Inc. phone: 781-768-5395 > > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > > Waltham, MA 02451 central phone: 781-768-5300 > > > > > >> -----Original Message----- > >> From: Hal Rosenstock [mailto:hrosenstock at xsigo.com] > >> Sent: Wednesday, November 14, 2007 8:18 AM > >> To: Yevgeny Kliteynik > >> Cc: gdror at mellanox.co.il; general at lists.openfabrics.org > >> Subject: Re: [ofa-general] RE: QoS for iSER > >> > >> On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: > >>> Hal Rosenstock wrote: > >>>> Or, > >>>> > >>>> On 11/13/07, Or Gerlitz wrote: > >>>>> Yevgeny, > >>>>> > >>>>> iSER (as you can learn from doing a grep) is using the > >> RDMA-CM TCP > >>>>> port space as does RDS. The RDMA-CM signature is > >> something which I > >>>>> am sure exists, you can look on the RDMA-CM IB spec > >> Annex to see if > >>>>> such thing indeed exist or I am wrong. > >>>> Did you really look at the annex for this ? > >>>> > >>>>> The TCP port is the 16 bit port portion of the ip:port address > >>>>> provided by a ULP that uses the RDMA-CM to rdma_resolve_addr(), > >>>>> again the annex explained how the port is embedded into > >> the SID, I > >>>>> don't remember the location within the 64 bit string. > >>>> It's in the low 16 bits (bytes 6-7) of the SID as the > >> annex indicates. > >>>>> Or. > >>>>> > >>>>> -------- Original Message -------- > >>>>> Subject: > >>>>> Re: QoS for iSER > >>>>> Date: > >>>>> Mon, 12 Nov 2007 11:41:43 +0200 > >>>>> From: Yevgeny Kliteynik > >>>>> > >>>>> Hi Erez, > >>>>> > >>>>> Erez Zilber wrote: > >>>>>> to create the SID, the rdma cm combines > >>>>>> > >>>>>> 1) the port space > >>>>> What is the port space for iSER? > >>>>> For SDP it's 0x10000 - 0x1FFFF. > >>>>> For RDS it's 0x1060000 - 0x106FFFF > >> I presume this is just saying RDS uses IP protocol TCP and > there is > >> no well known port (e.g. uses dynamic ports). So how do you know > >> ahead of time which port ? > >> > >>>>> For iSER it's ...? > >>>> These numbers are too large for just "port space". > >>>> > >>>> iSER SID is 0x000000000106035c > >>>> > >>>> in your nomenclature, I guess 0x106035c > >>>> > >>>> 01 says RDMA aware ULP service ID range > >>>> 06 says IP protocol is TCP > >>>> 0x035c (port 860) is the well known TCP port for iSCSI > >>> Thanks, that is just what I needed. > >>> I'm preparing a (very) simplified interface for defining > QoS policy. > >>> I'm adding an additional section in QoS policy file, > where an admin > >>> will be able to configure QoS per ULP or per application > w/o going > >>> into too many details. > >>> Here's the example of what I have in mind: > >>> > >>> qos-ulps > >>> default : 0 #default SL > >>> sdp, port 10000-20000 : 2 > >>> sdp : 0 #default SL for SDP > >>> rds, port 25000 : 2 #SL for RDS when > >> destination port is 25000 > >> > >> Isn't there a chicken and egg problem here with this ? How do you > >> know port 25000 will be assigned "in advance" ? > >> > >>> rds, : 0 #default SL for RDS > >> I don't see how RDS can work separate from other CMA based > protocols > >> which use dynamic ports. > >> > >>> iser *??????* : 4 #SL for iSER > >>> ipoib, pkey 0x0001 : 5 #SL for IPoIB on > >> partition with pkey 0x0001 > >>> ipoib : 6 #default IPoIB > >> partition - pkey=0x7FFF > >> ... > >>> end-qos-ulps > >>> > >>> This syntax is possible only if there are well known facts > >> such as SDP > >>> service ID, in which case admin will be able to just state "sdp: > >>> ", and OpenSM will (internally) generate relevant > matching rule > >>> and QoS level based on this known service ID. > >>> > >>> So back to iSER: > >>> > >>> Can I assume that the target port for iSER will always be > >> 860, hence > >>> the iSER service ID will always be 0x000000000106035c? > >> In terms of iSER, I was only commenting on what the spec > says. I did > >> not verify its operation in terms of the code. > >> Does the code follow the spec ? > >> > >> -- Hal > >> > >>> Or perhaps I can do it similar to SDP, where there is an > option to > >>> specify the port ranges along with the ULP name (SDP): > >>> - if administrator only specifies "iser", I can assume that > >>> the service ID is default 0x000000000106035c > >>> - if administrator only specifies "iser" and ports, OpenSM > >>> will build service ID based on a well known prefix > >>> (0x000000000106pppp) where the last 4 hex digits are target > >>> port number > >>> > >>> Keep in mind that if this doesn't look too flexible and > >> doesn't cover > >>> all the cases, there's always the rest of the QoS policy > >> file with all > >>> the advanced configuration. > >>> > >>> -- Yevgeny > >>> > >>>> -- Hal > >>>> > >>>>>> 2) the rdma cm signature > >>>>> Do you mean something iSER-specific, or just the way the > >> cm builds > >>>>> the service ID out of port space and tcp port? > >>>>> Can you give an example? > >>>>> > >>>>>> 3) the destination tcp port provided to rdma_resolve_addr > >>>>> I guess that tcp port is in the lower 4 nibs of the service ID, > >>>>> similar to SDP. Right? > >>>>> -- Yevgeny > >>>>> > >>>>> _______________________________________________ > >>>>> general mailing list > >>>>> general at lists.openfabrics.org > >>>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>>>> > >>>>> To unsubscribe, please visit > >>>>> http://openib.org/mailman/listinfo/openib-general > >>>>> > >>> _______________________________________________ > >>> general mailing list > >>> general at lists.openfabrics.org > >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>> > >>> To unsubscribe, please visit > >>> http://openib.org/mailman/listinfo/openib-general > >> _______________________________________________ > >> general mailing list > >> general at lists.openfabrics.org > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >> > >> To unsubscribe, please visit > >> http://openib.org/mailman/listinfo/openib-general > >> > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From krkumar2 at in.ibm.com Wed Nov 14 21:05:37 2007 From: krkumar2 at in.ibm.com (Krishna Kumar) Date: Thu, 15 Nov 2007 10:35:37 +0530 Subject: [ofa-general] [PATCH] IPoIB: Remove redundant check in xmit handler Message-ID: <20071115050537.7100.93755.sendpatchset@K50wks273871wss.in.ibm.com> qdisc_run() tests for queue_stopped() before calling __qdisc_run(), and the same check is done in every iteration of __qdisc_run(), so another check is not required in the driver xmit. To validate, I put a debug in the TX_BUSY path which never hit with 64 threads running overnight exercising this code a few 100 million times. Signed-off-by: Krishna Kumar --- ipoib_main.c | 10 ---------- 1 files changed, 10 deletions(-) diff -ruNp 1/drivers/infiniband/ulp/ipoib/ipoib_main.c 2/drivers/infiniband/ulp/ipoib/ipoib_main.c --- 1/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-14 14:07:06.000000000 +0530 +++ 2/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-14 14:07:35.000000000 +0530 @@ -666,16 +666,6 @@ static int ipoib_start_xmit(struct sk_bu if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) return NETDEV_TX_LOCKED; - /* - * Check if our queue is stopped. Since we have the LLTX bit - * set, we can't rely on netif_stop_queue() preventing our - * xmit function from being called with a full queue. - */ - if (unlikely(netif_queue_stopped(dev))) { - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_BUSY; - } - if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); From or.gerlitz at gmail.com Wed Nov 14 23:58:17 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 14 Nov 2007 23:58:17 -0800 Subject: [ofa-general] disconnect issues/questions In-Reply-To: <15ddcffd0711142341g7b83d917t2fcc4b9a64e54f55@mail.gmail.com> References: <15ddcffd0711142341g7b83d917t2fcc4b9a64e54f55@mail.gmail.com> Message-ID: <15ddcffd0711142358m55192a25qaa2e419045f6d0ea@mail.gmail.com> Sean, I am quite sure we discussed at least part of this in the past, but I fail to find it. Anyway, looking on librdmacm code I am not clear about the following: A) it seems that other than some error handling code, only the flow of rdma_disconnect() and RDMA_CM_EVENT_REJECTED move the QP associated with this ID to the ERROR state. From that I conclude that in order to get flushes on all the WR posted to the QP one must call rdma_disconnect() in both sides of the connection. Am I right, is this what we want? if yes, lets document this. B) will RDMA_CM_EVENT_DISCONNECTED event would --always-- be generated also for the side that called rdma_disconnect()? in both cases (yes and no), we need to document this. thanks (and see you tomorrow...) Or. From vlad at dev.mellanox.co.il Thu Nov 15 00:12:35 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 15 Nov 2007 10:12:35 +0200 Subject: [ofa-general] ANNOUNCE: ofed_1_3/linux-2.6.git branches update Message-ID: <473BFF73.40101@dev.mellanox.co.il> Hi, Following the merge of linux-2.6.24-rc2 into the git://git.openfabrics.org/ofed_1_3/linux-2.6.git The following changes were made: The branch 'ofed_kernel' renamed into 'ofed_kernel_2_6_23' The branch 'ofed_kernel_2_6_24_rc1' renamed into 'ofed_kernel' - and will be used as a base for OFED-1.3-beta. Regards, Vladimir From kliteyn at dev.mellanox.co.il Thu Nov 15 00:21:10 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 10:21:10 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com> <473B58C3.40708@dev.mellanox.co.il> Message-ID: <473C0176.7020001@dev.mellanox.co.il> Kanevsky, Arkady wrote: > Yevgeny, > Lets say I want to run another ULP, say NFS-RDMA. > Does this mean by default I will get the SL which is in use by > currently running ULP, say SDP? > Or the SL is differentiated based on port? > And if I want to run different SL level traffic between the same pair > of nodes I will need to use different port ID? > Look strange for ULPs that have well known port ID... If you run the same application between the same pair of nodes, you will get the same SL. If you run different applications over the same ULP that use different TCP ports, but you have no way knowing what are those the TCP ports, you will get the same SL. If you run different applications over different ULPs that are using same port space (which produces same service ID range), and you have no way knowing what are the the TCP ports, you will get the same SL. In any other case there is a way to differentiate the traffic. Connection requests (doesn't matter whether it's ULP or not) can be differentiated by source guid, destination guid, pkey, service ID, QoS class, and any combination of all of the above. Please see the QoS RFC: http://lists.openfabrics.org/pipermail/general/2007-July/038488.html Check the QoS Policy File in the RFC. There were many changes since this RFC was issued, but it's still enough to get the general idea. -- Yevgeny > Thanks, > > Arkady Kanevsky email: arkady at netapp.com > Network Appliance Inc. phone: 781-768-5395 > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > Waltham, MA 02451 central phone: 781-768-5300 > > >> -----Original Message----- >> From: Yevgeny Kliteynik [mailto:kliteyn at dev.mellanox.co.il] >> Sent: Wednesday, November 14, 2007 3:21 PM >> To: Kanevsky, Arkady >> Cc: Hal Rosenstock; general at lists.openfabrics.org >> Subject: Re: [ofa-general] RE: QoS for iSER >> >> Kanevsky, Arkady wrote: >>> what happens when multiple apps runs on the same server? >> I guess that when you say "server" you mean "host" and not >> the server from server-client terminology. >> >> This is what the whole point of QoS is: if the applications >> are using the same ULP, they probably would get the same >> Service Level, unless they were differentiated by the >> administrator is some other way, e.g. they all use SDP, but >> connect to different TCP port of the server application. >> If the applications are using different ULPs, they will get >> Service Level accordingly to the ULPs that they are using. >> >> -- Yevgeny >> >>> Arkady Kanevsky email: arkady at netapp.com >>> Network Appliance Inc. phone: 781-768-5395 >>> 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 >>> Waltham, MA 02451 central phone: 781-768-5300 >>> >>> >>>> -----Original Message----- >>>> From: Hal Rosenstock [mailto:hrosenstock at xsigo.com] >>>> Sent: Wednesday, November 14, 2007 8:18 AM >>>> To: Yevgeny Kliteynik >>>> Cc: gdror at mellanox.co.il; general at lists.openfabrics.org >>>> Subject: Re: [ofa-general] RE: QoS for iSER >>>> >>>> On Wed, 2007-11-14 at 11:11 +0200, Yevgeny Kliteynik wrote: >>>>> Hal Rosenstock wrote: >>>>>> Or, >>>>>> >>>>>> On 11/13/07, Or Gerlitz wrote: >>>>>>> Yevgeny, >>>>>>> >>>>>>> iSER (as you can learn from doing a grep) is using the >>>> RDMA-CM TCP >>>>>>> port space as does RDS. The RDMA-CM signature is >>>> something which I >>>>>>> am sure exists, you can look on the RDMA-CM IB spec >>>> Annex to see if >>>>>>> such thing indeed exist or I am wrong. >>>>>> Did you really look at the annex for this ? >>>>>> >>>>>>> The TCP port is the 16 bit port portion of the ip:port address >>>>>>> provided by a ULP that uses the RDMA-CM to rdma_resolve_addr(), >>>>>>> again the annex explained how the port is embedded into >>>> the SID, I >>>>>>> don't remember the location within the 64 bit string. >>>>>> It's in the low 16 bits (bytes 6-7) of the SID as the >>>> annex indicates. >>>>>>> Or. >>>>>>> >>>>>>> -------- Original Message -------- >>>>>>> Subject: >>>>>>> Re: QoS for iSER >>>>>>> Date: >>>>>>> Mon, 12 Nov 2007 11:41:43 +0200 >>>>>>> From: Yevgeny Kliteynik >>>>>>> >>>>>>> Hi Erez, >>>>>>> >>>>>>> Erez Zilber wrote: >>>>>>>> to create the SID, the rdma cm combines >>>>>>>> >>>>>>>> 1) the port space >>>>>>> What is the port space for iSER? >>>>>>> For SDP it's 0x10000 - 0x1FFFF. >>>>>>> For RDS it's 0x1060000 - 0x106FFFF >>>> I presume this is just saying RDS uses IP protocol TCP and >> there is >>>> no well known port (e.g. uses dynamic ports). So how do you know >>>> ahead of time which port ? >>>> >>>>>>> For iSER it's ...? >>>>>> These numbers are too large for just "port space". >>>>>> >>>>>> iSER SID is 0x000000000106035c >>>>>> >>>>>> in your nomenclature, I guess 0x106035c >>>>>> >>>>>> 01 says RDMA aware ULP service ID range >>>>>> 06 says IP protocol is TCP >>>>>> 0x035c (port 860) is the well known TCP port for iSCSI >>>>> Thanks, that is just what I needed. >>>>> I'm preparing a (very) simplified interface for defining >> QoS policy. >>>>> I'm adding an additional section in QoS policy file, >> where an admin >>>>> will be able to configure QoS per ULP or per application >> w/o going >>>>> into too many details. >>>>> Here's the example of what I have in mind: >>>>> >>>>> qos-ulps >>>>> default : 0 #default SL >>>>> sdp, port 10000-20000 : 2 >>>>> sdp : 0 #default SL for SDP >>>>> rds, port 25000 : 2 #SL for RDS when >>>> destination port is 25000 >>>> >>>> Isn't there a chicken and egg problem here with this ? How do you >>>> know port 25000 will be assigned "in advance" ? >>>> >>>>> rds, : 0 #default SL for RDS >>>> I don't see how RDS can work separate from other CMA based >> protocols >>>> which use dynamic ports. >>>> >>>>> iser *??????* : 4 #SL for iSER >>>>> ipoib, pkey 0x0001 : 5 #SL for IPoIB on >>>> partition with pkey 0x0001 >>>>> ipoib : 6 #default IPoIB >>>> partition - pkey=0x7FFF >>>> ... >>>>> end-qos-ulps >>>>> >>>>> This syntax is possible only if there are well known facts >>>> such as SDP >>>>> service ID, in which case admin will be able to just state "sdp: >>>>> ", and OpenSM will (internally) generate relevant >> matching rule >>>>> and QoS level based on this known service ID. >>>>> >>>>> So back to iSER: >>>>> >>>>> Can I assume that the target port for iSER will always be >>>> 860, hence >>>>> the iSER service ID will always be 0x000000000106035c? >>>> In terms of iSER, I was only commenting on what the spec >> says. I did >>>> not verify its operation in terms of the code. >>>> Does the code follow the spec ? >>>> >>>> -- Hal >>>> >>>>> Or perhaps I can do it similar to SDP, where there is an >> option to >>>>> specify the port ranges along with the ULP name (SDP): >>>>> - if administrator only specifies "iser", I can assume that >>>>> the service ID is default 0x000000000106035c >>>>> - if administrator only specifies "iser" and ports, OpenSM >>>>> will build service ID based on a well known prefix >>>>> (0x000000000106pppp) where the last 4 hex digits are target >>>>> port number >>>>> >>>>> Keep in mind that if this doesn't look too flexible and >>>> doesn't cover >>>>> all the cases, there's always the rest of the QoS policy >>>> file with all >>>>> the advanced configuration. >>>>> >>>>> -- Yevgeny >>>>> >>>>>> -- Hal >>>>>> >>>>>>>> 2) the rdma cm signature >>>>>>> Do you mean something iSER-specific, or just the way the >>>> cm builds >>>>>>> the service ID out of port space and tcp port? >>>>>>> Can you give an example? >>>>>>> >>>>>>>> 3) the destination tcp port provided to rdma_resolve_addr >>>>>>> I guess that tcp port is in the lower 4 nibs of the service ID, >>>>>>> similar to SDP. Right? >>>>>>> -- Yevgeny >>>>>>> >>>>>>> _______________________________________________ >>>>>>> general mailing list >>>>>>> general at lists.openfabrics.org >>>>>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>>>>> >>>>>>> To unsubscribe, please visit >>>>>>> http://openib.org/mailman/listinfo/openib-general >>>>>>> >>>>> _______________________________________________ >>>>> general mailing list >>>>> general at lists.openfabrics.org >>>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>>> >>>>> To unsubscribe, please visit >>>>> http://openib.org/mailman/listinfo/openib-general >>>> _______________________________________________ >>>> general mailing list >>>> general at lists.openfabrics.org >>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>>> >>>> To unsubscribe, please visit >>>> http://openib.org/mailman/listinfo/openib-general >>>> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> > From sashak at voltaire.com Thu Nov 15 00:51:20 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 10:51:20 +0200 Subject: [ofa-general] [PATCH] libibumad: fix NULL pointer referencing Message-ID: <20071115085120.GK17237@sashak.voltaire.com> best_port = NULL is valid parameter value for resolve_ca_name(), so check this. Signed-off-by: Sasha Khapyorsky --- libibumad/src/umad.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/libibumad/src/umad.c b/libibumad/src/umad.c index 1012695..5b7b83e 100644 --- a/libibumad/src/umad.c +++ b/libibumad/src/umad.c @@ -303,7 +303,7 @@ resolve_ca_name(char *ca_name, int *best_port) for (caidx = 0; caidx < n; caidx++) { TRACE("checking ca '%s'", names[caidx]); - port = *best_port; + port = best_port ? *best_port : 0; if ((port_type = resolve_ca_port(names[caidx], &port)) < 0) continue; -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 01:03:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:03:01 +0200 Subject: [ofa-general] [PATCH 0/3] opensm/vendor: fixes and improvement Message-ID: <11951173841159-git-send-email-sashak@voltaire.com> There are couple fixes (mostly memory leaks) and improvements for opensm/libvendor/osm_vendor_ibumad: 1 - opensm/libvendor: remove not used umad_ca field 2 - opensm/libvendor: fix umad_port leak 3 - opensm/vendor: handle guid = 0 with osm_vendor_bind() Sasha From sashak at voltaire.com Thu Nov 15 01:03:02 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:03:02 +0200 Subject: [ofa-general] [PATCH 1/3] opensm/libvendor: remove not used umad_ca field In-Reply-To: <11951173841159-git-send-email-sashak@voltaire.com> References: <11951173841159-git-send-email-sashak@voltaire.com> Message-ID: <11951173841652-git-send-email-sashak@voltaire.com> Remove not really used (but leaked) umad_ca field from ibumad vendor structure. Signed-off-by: Sasha Khapyorsky --- opensm/include/vendor/osm_vendor_ibumad.h | 1 - opensm/libvendor/osm_vendor_ibumad.c | 18 ------------------ 2 files changed, 0 insertions(+), 19 deletions(-) diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h index 743b393..84fd21a 100644 --- a/opensm/include/vendor/osm_vendor_ibumad.h +++ b/opensm/include/vendor/osm_vendor_ibumad.h @@ -158,7 +158,6 @@ typedef struct _osm_vendor { osm_bind_handle_t agents[UMAD_CA_MAX_AGENTS]; char ca_names[OSM_UMAD_MAX_CAS][UMAD_CA_NAME_LEN]; vendor_match_tbl_t mtbl; - umad_ca_t umad_ca; umad_port_t umad_port; pthread_mutex_t cb_mutex; pthread_mutex_t match_tbl_mutex; diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 240a97b..9e186d5 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -703,24 +703,6 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, } /* Port found, try to open it */ - if (umad_get_ca(p_vend->ca_names[ca], &p_vend->umad_ca) < 0) { - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "osm_vendor_open_port: ERR 542A: " - "umad_get_ca() failed\n"); - goto Exit; - } - - /* Validate that node is an IB node type */ - if (p_vend->umad_ca.node_type < 1 || p_vend->umad_ca.node_type > 3) { - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "osm_vendor_open_port: ERR 542D: " - "Node type %d is not an IB node type\n", - p_vend->umad_ca.node_type); - fprintf(stderr, "Node type %d is not an IB node type\n", - p_vend->umad_ca.node_type); - goto Exit; - } - if (umad_get_port(p_vend->ca_names[ca], i, &p_vend->umad_port) < 0) { osm_log(p_vend->p_log, OSM_LOG_ERROR, "osm_vendor_open_port: ERR 542B: " -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 01:03:04 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:03:04 +0200 Subject: [ofa-general] [PATCH 3/3] opensm/vendor: handle guid = 0 with osm_vendor_bind() In-Reply-To: <11951173841159-git-send-email-sashak@voltaire.com> References: <11951173841159-git-send-email-sashak@voltaire.com> Message-ID: <11951173842906-git-send-email-sashak@voltaire.com> Handle guid = 0 with osm_vendor_bind() and osm_vendor_open_port() - just open default umad port. Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 43 +++++++++++++++++---------------- 1 files changed, 22 insertions(+), 21 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 5668c5e..1d5f359 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -656,20 +656,25 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, IN const ib_net64_t port_guid) { ib_net64_t portguids[OSM_UMAD_MAX_PORTS_PER_CA + 1]; - int i = 0, umad_port_id = -1, found = 0; + int i = 0, umad_port_id = -1; + char *name; int ca, r; CL_ASSERT(p_vend); OSM_LOG_ENTER(p_vend->p_log, osm_vendor_open_port); - CL_ASSERT(port_guid); - if (p_vend->umad_port_id >= 0) { umad_port_id = p_vend->umad_port_id; goto Exit; } + if (!port_guid) { + name = NULL; + i = 0; + goto _found; + } + for (ca = 0; ca < p_vend->ca_count; ca++) { if ((r = umad_get_ca_portguids(p_vend->ca_names[ca], portguids, @@ -680,37 +685,33 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, p_vend->ca_names[ca], strerror(r)); goto Exit; } - for (i = 0; i < r; i++) if (port_guid == portguids[i]) { - found = 1; - break; + name = p_vend->ca_names[ca]; + goto _found; } - - if (found) - break; } - if (!found) { - /* - * No local CA owns this guid! - */ - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "osm_vendor_open_port: ERR 5422: " - "Unable to find requested CA guid 0x%" PRIx64 "\n", - cl_ntoh64(port_guid)); - goto Exit; - } + /* + * No local CA owns this guid! + */ + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_open_port: ERR 5422: " + "Unable to find requested CA guid 0x%" PRIx64 "\n", + cl_ntoh64(port_guid)); + goto Exit; +_found: /* Port found, try to open it */ - if (umad_get_port(p_vend->ca_names[ca], i, &p_vend->umad_port) < 0) { + if (umad_get_port(name, i, &p_vend->umad_port) < 0) { osm_log(p_vend->p_log, OSM_LOG_ERROR, "osm_vendor_open_port: ERR 542B: " "umad_get_port() failed\n"); goto Exit; } - if ((umad_port_id = umad_open_port(p_vend->ca_names[ca], i)) < 0) { + if ((umad_port_id = umad_open_port(p_vend->umad_port.ca_name, + p_vend->umad_port.portnum)) < 0) { osm_log(p_vend->p_log, OSM_LOG_ERROR, "osm_vendor_open_port: ERR 542C: " "umad_open_port() failed\n"); -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 01:03:03 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:03:03 +0200 Subject: [ofa-general] [PATCH 2/3] opensm/libvendor: fix umad_port leak In-Reply-To: <11951173841159-git-send-email-sashak@voltaire.com> References: <11951173841159-git-send-email-sashak@voltaire.com> Message-ID: <11951173841034-git-send-email-sashak@voltaire.com> Fix memory leaks - release and invalidate allocated umad port resources. Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 9e186d5..5668c5e 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -725,6 +725,8 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, "osm_vendor_open_port: ERR 5423: " "Unable to alloc receiver struct\n"); umad_close_port(umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; p_vend->umad_port_id = umad_port_id = -1; goto Exit; } @@ -733,6 +735,8 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, "osm_vendor_open_port: ERR 5420: " "umad_receiver_init failed\n"); umad_close_port(umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; p_vend->umad_port_id = umad_port_id = -1; } @@ -758,6 +762,8 @@ static void osm_vendor_close_port(osm_vendor_t * const p_vend) if (p_vend->agents[i]) umad_unregister(p_vend->umad_port_id, i); umad_close_port(p_vend->umad_port_id); + umad_release_port(&p_vend->umad_port); + p_vend->umad_port.port_guid = 0; p_vend->umad_port_id = -1; } } -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 01:12:06 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:12:06 +0200 Subject: [ofa-general] [PATCH 0/3] opensm/vendor: fixes and improvement Message-ID: <11951179291471-git-send-email-sashak@voltaire.com> There are couple of fixes (and actually rework) in opensm/libvendor/osm_vendor_ibumad osm_vendor_get_all_port_attr() function. Also resulted fixes (opensm's get_port_guid() detects properly number of ports), improvements and adoptions. 1 - libvendor: osm_vendor_get_all_port_attr() rework 2 - opensm/main: merge gen2 code in get_port_guid() 3 - opensm/osmtest: adopt to osm_vendor_get_all_port_attr() changes Sasha From sashak at voltaire.com Thu Nov 15 01:12:07 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:12:07 +0200 Subject: [ofa-general] [PATCH 1/3] libvendor: osm_vendor_get_all_port_attr() rework In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <11951179291903-git-send-email-sashak@voltaire.com> It fixes couple of issues with this function: - return only valid guids, don't return duplicated entries as well as valid number of ports - return valid sm_lid (as on ports) - potential local buffers overflow - minor leaks (not released ca) Finally it is much simplified now. Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 100 ++++++++-------------------------- 1 files changed, 24 insertions(+), 76 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 1d5f359..37007cd 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -543,18 +543,10 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, IN ib_port_attr_t * const p_attr_array, IN uint32_t * const p_num_ports) { - ib_net64_t portguids[*p_num_ports]; - ib_net64_t *p_guid = portguids, *e = portguids + *p_num_ports; umad_ca_t ca; - int lids[*p_num_ports]; - int linkstates[*p_num_ports]; - int portnums[*p_num_ports]; - int *p_lid = lids; - int *p_linkstates = linkstates; - int *p_portnum = portnums; - umad_port_t def_port = { "" }; + ib_port_attr_t *attr = p_attr_array; + unsigned done = 0; int r, i, j; - int sm_lid = 0; OSM_LOG_ENTER(p_vend->p_log, osm_vendor_get_all_port_attr); @@ -568,81 +560,37 @@ osm_vendor_get_all_port_attr(IN osm_vendor_t * const p_vend, goto Exit; } - for (i = 0; p_guid < e && i < p_vend->ca_count; i++) { + if (!p_attr_array) { + r = IB_INSUFFICIENT_MEMORY; + *p_num_ports = 0; + goto Exit; + } + + for (i = 0; i < p_vend->ca_count && !done; i++) { /* * For each CA, retrieve the port guids */ - if ((r = umad_get_ca_portguids(p_vend->ca_names[i], - p_guid, e - p_guid)) < 0) { - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "osm_vendor_get_all_port_attr: ERR 5419: " - "Unable to get CA %s port guids (%s)\n", - p_vend->ca_names[i], strerror(r)); - goto Exit; - } - - p_guid += r; - - if ((r = umad_get_ca(p_vend->ca_names[i], &ca)) == 0) { + if (umad_get_ca(p_vend->ca_names[i], &ca) == 0) { for (j = 0; j <= ca.numports; j++) { - if (ca.ports[j]) { - *p_lid = ca.ports[j]->base_lid; - *p_linkstates = ca.ports[j]->state; - *p_portnum = ca.ports[j]->portnum; - free(ca.ports[j]); + if (!ca.ports[j]) + continue; + attr->port_guid = ca.ports[j]->port_guid; + attr->lid = ca.ports[j]->base_lid; + attr->port_num = ca.ports[j]->portnum; + attr->sm_lid = ca.ports[j]->sm_lid; + attr->link_state = ca.ports[j]->state; + attr++; + if (attr - p_attr_array > *p_num_ports) { + done = 1; + break; } - p_lid++; - p_linkstates++; - p_portnum++; } + umad_release_ca(&ca); } } - *p_num_ports = p_guid - portguids; - - /* - * If no port 0 - we are on other than switch. - * Get a default 'best' port from the library. - */ - if (*p_num_ports && !portguids[0]) { - umad_get_port(0, 0, &def_port); - - portguids[0] = def_port.port_guid; - lids[0] = def_port.base_lid; - linkstates[0] = def_port.state; - portnums[0] = def_port.portnum; - sm_lid = def_port.sm_lid; - - osm_log(p_vend->p_log, OSM_LOG_DEBUG, - "osm_vendor_get_all_port_attr: " - "assign CA %s port %d guid (0x%" PRIx64 - ") as the default port\n", def_port.ca_name, - def_port.portnum, cl_hton64(def_port.port_guid)); - - umad_release_port(&def_port); - } - - j = 0; - if (p_attr_array) { - /* set the port guid, lid, and sm lid in the port attr struct */ - for (i = 0; i < *p_num_ports; i++) { - if (i > 0 && portguids[i] == 0) - continue; - p_attr_array[j].port_guid = portguids[i]; - p_attr_array[j].lid = lids[i]; - p_attr_array[j].port_num = portnums[i]; - if (j == 0) - p_attr_array[j].sm_lid = sm_lid; - else - p_attr_array[j].sm_lid = - p_vend->umad_port.sm_lid; - p_attr_array[j].link_state = linkstates[i]; - j++; - } - r = 0; - *p_num_ports = j; - } else - r = IB_INSUFFICIENT_MEMORY; + *p_num_ports = attr - p_attr_array; + r = 0; Exit: OSM_LOG_EXIT(p_vend->p_log); -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 01:12:08 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:12:08 +0200 Subject: [ofa-general] [PATCH 2/3] opensm/main: merge gen2 code in get_port_guid() In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <1195117929869-git-send-email-sashak@voltaire.com> Merge gen2 and gen1 code in get_port_guid(), also improve printing. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/main.c | 56 +++++++++++++++---------------------------------- 1 files changed, 17 insertions(+), 39 deletions(-) diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 948af83..0a58a16 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -349,13 +349,12 @@ void show_usage(void) **********************************************************************/ ib_net64_t get_port_guid(IN osm_opensm_t * p_osm, uint64_t port_guid) { - uint32_t i; - uint32_t choice = 0; + ib_port_attr_t attr_array[GUID_ARRAY_SIZE]; + uint32_t num_ports = GUID_ARRAY_SIZE; char junk[128]; + uint32_t i, choice = 0; boolean_t done_flag = FALSE; ib_api_status_t status; - uint32_t num_ports = GUID_ARRAY_SIZE; - ib_port_attr_t attr_array[GUID_ARRAY_SIZE]; /* Call the transport layer for a list of local port @@ -382,58 +381,41 @@ ib_net64_t get_port_guid(IN osm_opensm_t * p_osm, uint64_t port_guid) cl_hton64(attr_array[0].port_guid)); return (attr_array[0].port_guid); } -#if defined ( OSM_VENDOR_INTF_OPENIB ) - /* If port_guid is 0, and this is gen2 - use the default port - * whose info is in attr_array[0] */ + /* If port_guid is 0 - use the first connected port */ if (port_guid == 0) { + for (i = 0; i < num_ports; i++) + if (attr_array[i].link_state > IB_LINK_DOWN) + break; + if (i == num_ports) + i = 0; printf("Using default GUID 0x%" PRIx64 "\n", - cl_hton64(attr_array[0].port_guid)); - return (attr_array[0].port_guid); + cl_hton64(attr_array[i].port_guid)); + return (attr_array[i].port_guid); } -#endif /* OSM_VENDOR_INTF_OPENIB */ /* More than one possible port - list all ports and let the user * to choose. */ while (done_flag == FALSE) { printf("\nChoose a local port number with which to bind:\n\n"); - /* If this is gen2 code - then port 0 has details of the - * default port used, no need to print it. - * If this is not gen2 code - need to print details of - * all ports. */ -#if defined ( OSM_VENDOR_INTF_OPENIB ) - for (i = 1; i < num_ports; i++) - printf("\t%u: GUID 0x%8" PRIx64 - ", lid 0x%04X, state %s\n", i, - cl_ntoh64(attr_array[i].port_guid), - attr_array[i].lid, - ib_get_port_state_str(attr_array[i].link_state)); - printf("\nEnter choice (1-%u): ", i - 1); -# else for (i = 0; i < num_ports; i++) /* Print the index + 1 since by convention, port * numbers start with 1 on host channel adapters. */ - printf("\t%u: GUID 0x%8" PRIx64 - ", lid 0x%04X, state %s\n", i + 1, + printf("\t%u: GUID 0x%" PRIx64 + ", lid %u, state %s\n", i + 1, cl_ntoh64(attr_array[i].port_guid), attr_array[i].lid, ib_get_port_state_str(attr_array[i].link_state)); printf("\nEnter choice (1-%u): ", i); -#endif /* OSM_VENDOR_INTF_OPENIB */ - fflush(stdout); if (scanf("%u", &choice)) { - /* If gen2 code - choice can be between 1 to num_ports-1 - if not gen2 code - choice can be between 1 to num_ports */ -#if defined ( OSM_VENDOR_INTF_OPENIB ) - if (choice >= num_ports) -# else if (choice > num_ports || choice < 1) -#endif /* OSM_VENDOR_INTF_OPENIB */ { printf("\nError: Lame choice!\n"); fflush(stdin); - } else + } else { + choice--; done_flag = TRUE; + } } else { /* get rid of the junk in the selection line */ scanf("%s", junk); @@ -441,13 +423,9 @@ ib_net64_t get_port_guid(IN osm_opensm_t * p_osm, uint64_t port_guid) fflush(stdin); } } -#if defined ( OSM_VENDOR_INTF_OPENIB ) - printf("Choice guid=0x%8" PRIx64 "\n", + printf("Choice guid=0x%" PRIx64 "\n", cl_ntoh64(attr_array[choice].port_guid)); return (attr_array[choice].port_guid); -# else - return (attr_array[choice - 1].port_guid); -#endif /* OSM_VENDOR_INTF_OPENIB */ } /********************************************************************** -- 1.5.3.rc2.29.gc4640f From rdreier at cisco.com Thu Nov 15 01:00:26 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 15 Nov 2007 01:00:26 -0800 Subject: [ofa-general] [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: <20071115050537.7100.93755.sendpatchset@K50wks273871wss.in.ibm.com> (Krishna Kumar's message of "Thu, 15 Nov 2007 10:35:37 +0530") References: <20071115050537.7100.93755.sendpatchset@K50wks273871wss.in.ibm.com> Message-ID: > - /* > - * Check if our queue is stopped. Since we have the LLTX bit > - * set, we can't rely on netif_stop_queue() preventing our > - * xmit function from being called with a full queue. > - */ > - if (unlikely(netif_queue_stopped(dev))) { > - spin_unlock_irqrestore(&priv->tx_lock, flags); > - return NETDEV_TX_BUSY; > - } This check was added because of a real problem seen in practice a while ago. Has something changed in the tx queue locking that makes it redundant now? I seem to remember that I could make the problem race trigger pretty fast by making the tx queue very small so that it got stopped a lot. - R. From sashak at voltaire.com Thu Nov 15 01:12:09 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 11:12:09 +0200 Subject: [ofa-general] [PATCH 3/3] opensm/osmtest: adopt to osm_vendor_get_all_port_attr() changes In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <11951179291935-git-send-email-sashak@voltaire.com> Count ports from 0 - there is no duplicated records anymore. Signed-off-by: Sasha Khapyorsky --- opensm/osmtest/main.c | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/opensm/osmtest/main.c b/opensm/osmtest/main.c index d95d5c8..1c9004b 100644 --- a/opensm/osmtest/main.c +++ b/opensm/osmtest/main.c @@ -235,10 +235,9 @@ static void print_all_guids(IN osmtest_t * p_osmt) } printf("\nListing GUIDs:\n"); - for (i = 1; i < num_ports; i++) { /* excluding logical mgmt port */ + for (i = 0; i < num_ports; i++) printf("Port %i: 0x%" PRIx64 "\n", i, cl_hton64(attr_array[i].port_guid)); - } } /********************************************************************** -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 02:10:06 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 12:10:06 +0200 Subject: [ofa-general] [PATCH] opensm: no interactive games in daemon mode In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <20071115101006.GL17237@sashak.voltaire.com> When running in daemon mode don't run interactive guid chooser. And if guid was not selected do not fail - pass zero to osm_vendor_bind() it will try to select port automatically. Signed-off-by: Sasha Khapyorsky --- opensm/opensm/main.c | 9 +++------ 1 files changed, 3 insertions(+), 6 deletions(-) diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 0a58a16..13c9f70 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -393,6 +393,9 @@ ib_net64_t get_port_guid(IN osm_opensm_t * p_osm, uint64_t port_guid) return (attr_array[i].port_guid); } + if (p_osm->subn.opt.daemon) + return 0; + /* More than one possible port - list all ports and let the user * to choose. */ while (done_flag == FALSE) { @@ -956,12 +959,6 @@ int main(int argc, char *argv[]) if (opt.guid == 0 || cl_hton64(opt.guid) == CL_HTON64(INVALID_GUID)) opt.guid = get_port_guid(&osm, opt.guid); - if (opt.guid == 0) { - printf("Error: Could not get port guid\n"); - status = IB_ERROR; - goto Exit; - } - if (cache_options == TRUE && osm_subn_write_conf_file(&opt) != IB_SUCCESS) printf("\nosm_subn_write_conf_file failed!\n"); -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 02:37:54 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 12:37:54 +0200 Subject: [ofa-general] [PATCH] infiniband-diags/ibtracert: print lids in decimal form Message-ID: <20071115103754.GM17237@sashak.voltaire.com> As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) lid output format unification is needed. Print LIDs as decimal in ibtracert. Signed-off-by: Sasha Khapyorsky --- infiniband-diags/src/ibtracert.c | 10 +++++----- 1 files changed, 5 insertions(+), 5 deletions(-) diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c index 010f45f..284ae2a 100644 --- a/infiniband-diags/src/ibtracert.c +++ b/infiniband-diags/src/ibtracert.c @@ -207,7 +207,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); - fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", + fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid %u-%u \"%s\"\n", prompt, (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->type == IB_NODE_SWITCH ? 0 : port->portnum, @@ -231,7 +231,7 @@ dump_route(int dump, Node *node, int outport, Port *port) fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", outport, port->portguid, port->portnum); else - fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid 0x%x-0x%x \"%s\"\n", + fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid %u-%u \"%s\"\n", outport, (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), port->portguid, port->portnum, @@ -640,7 +640,7 @@ dump_mcpath(Node *node, int dumplevel) nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); if (!node->dist) { - printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", + printf("From %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->ports->portnum, node->ports->lid, node->ports->lid + (1 << node->ports->lmc) - 1, @@ -655,7 +655,7 @@ dump_mcpath(Node *node, int dumplevel) (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->upport); else - printf("[%d] -> %s 0x%" PRIx64 "[%d] lid 0x%x \"%s\"\n", + printf("[%d] -> %s 0x%" PRIx64 "[%d] lid %u \"%s\"\n", node->ports->remoteport->portnum, (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->upport, @@ -664,7 +664,7 @@ dump_mcpath(Node *node, int dumplevel) if (node->dist < 0) /* target node */ - printf("To %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", + printf("To %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), node->nodeguid, node->ports->portnum, node->ports->lid, node->ports->lid + (1 << node->ports->lmc) - 1, -- 1.5.3.rc2.29.gc4640f From receptorqwk13 at intuit.com Thu Nov 15 02:29:52 2007 From: receptorqwk13 at intuit.com (Byron Ohara) Date: Thu, 15 Nov 2007 19:29:52 +0900 Subject: [ofa-general] Check it out Openib Message-ID: <801867525.77584568065740@intuit.com> An HTML attachment was scrubbed... URL: From vlad at lists.openfabrics.org Thu Nov 15 02:53:00 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 15 Nov 2007 02:53:00 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071115-0200 daily build status Message-ID: <20071115105300.37F76E608E7@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.20 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.19 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.13 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071115-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From mamalido14 at yahoo.de Thu Nov 15 01:46:45 2007 From: mamalido14 at yahoo.de (James Michael) Date: Thu, 15 Nov 2007 05:46:45 -0400 (BOT) Subject: [ofa-general] From Mr. James Michael Message-ID: <3922.41.219.202.10.1195120005.squirrel@www.icacruz.org.bo> Dearest Please, I want to introduce myself and this business opportunity to you.My name is Mr James Michael , a bank accountant,I will need your assistance in this business transaction.I would like you to stand as the next of kin to my deceased customer,Mr David Clemetson, who died in a plane crash in January 31st 2000 plane crash.Check the website for the details of the plane crash: http://archives.cnn.com/2000/US/02/01/alaska.airlines.list/ He was a wealthy medical doctor who made a deposit of $17Million with our bank.He died without any registered next of kin and as such the funds now have an open beneficiary mandate. If you are interested please do let me have your private telephone number so that I can give you comprehensive details on what we are to do. Send me the following information: 1. Your Full name : 2. Your private secured telephone number : 3. Your occupation : 4. Your home or office address : I urgently hope to get your response as soon as possible. Best Regards, James Michael From kliteyn at dev.mellanox.co.il Thu Nov 15 03:58:46 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 13:58:46 +0200 Subject: [ofa-general] [PATCH] osm: fixing memory leak in QoS policy Message-ID: <473C3476.1050003@dev.mellanox.co.il> Fixing memory leak in QoS policy Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_policy.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index 6cdf84c..34f72b0 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -400,6 +400,11 @@ void osm_qos_policy_match_rule_destroy(osm_qos_match_rule_t * p) if (p->qos_class_range_arr) free(p->qos_class_range_arr); + for (i = 0; i < p->pkey_range_len; i++) + free(p->pkey_range_arr[i]); + if (p->pkey_range_arr) + free(p->pkey_range_arr); + cl_list_apply_func(&p->source_list, __free_single_element, NULL); cl_list_remove_all(&p->source_list); cl_list_destroy(&p->source_list); -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Thu Nov 15 04:03:17 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 14:03:17 +0200 Subject: [ofa-general] [PATCH] osm: handle first syntax error in policy file Message-ID: <473C3585.5010108@dev.mellanox.co.il> When parsing QoS policy file for the first time, print first syntax error to stdout and to the osm log. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_parser.y | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index 4738831..71f41fe 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() void __qos_parser_error (char *s) { + static boolean_t first_time = TRUE; + char * last_text_read = __parser_strip_white(__qos_parser_text); OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, "__qos_parser_error: ERR AC05: " "Syntax error (line %d:%d): %s. " "Last text read: \"%s\"\n", - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); + line_num, column_num, s, last_text_read); + if (first_time) + { + first_time = FALSE; + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " + "Last text read: \"%s\".\n", + line_num, column_num, s, last_text_read); + } OSM_LOG_EXIT(p_qos_parser_osm_log); } -- 1.5.1.4 From jkabelitz at sysgen.de Thu Nov 15 05:06:37 2007 From: jkabelitz at sysgen.de (=?iso-8859-1?Q?J=FCrgen_Kabelitz?=) Date: Thu, 15 Nov 2007 14:06:37 +0100 Subject: [ofa-general] OFED 1.2 and lustreFS Message-ID: Hello I have problems to build the ofed 1.2 software against the Lustre Linux kernel. Linux Kernel: linux-2.6.16-46-0.14_lustre.1.6.3 OFED software: OFED-1.2 When I build the software I got the following error: Building ofa_user RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-dapl --with-ipoibtools --with-libcxgb3 --with-l ibibcm --with-libibcommon --with-libibmad --with-libibumad --with-libibverbs --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-sdpnetstat --with-srptools --with-mstfli nt --with-perftest --with-tvflash --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-dapl --with-ipoibtools --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibmad -- with-libibumad --with-libibverbs --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-sdpnetstat --with-srptools --sysconfdir=/etc --mandir=/usr/share/man' --define 'buil d_32bit 1' --define '_mandir /usr/share/man' /usr/src/OFED-1.2/SRPMS/ofa_user-1.2-0.src.rpm Building ofa_kernel RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-cxgb3-mod --with-ipoib-mod --with-mthca-mod --w ith-sdp-mod --with-srp-mod --with-core-mod --with-user_mad-mod --with-user_access-mod --with-addr_trans-mod --with-rds-mod ' --define 'KVERSION 2.6.16-46-0.14_lustre.1.6.3smp' --define 'KSRC /lib/modules/2 .6.16-46-0.14_lustre.1.6.3smp/build' --define 'build_kernel_ib 1' --define 'build_kernel_ib_devel 1' --define 'NETWORK_CONF_DIR /etc/sysconfig/network' --define 'modprobe_update 1' --define 'include_ipoib_ conf 1' /usr/src/OFED-1.2/SRPMS/ofa_kernel-1.2-0.src.rpm \ ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-cxgb3-mod --with-ipoib-mod --w ith-mthca-mod --with-sdp-mod --with-srp-mod --with-core-mod --with-user_mad-mod --with-user_access-mod --with-addr_trans-mod --with-rds-mod ' --define 'KVERSION 2.6.16-46-0.14_lustre.1.6.3smp' --define 'KS RC /lib/modules/2.6.16-46-0.14_lustre.1.6.3smp/build' --define 'build_kernel_ib 1' --define 'build_kernel_ib_devel 1' --define 'NETWORK_CONF_DIR /etc/sysconfig/network' --define 'modprobe_update 1' --defin e 'include_ipoib_conf 1' /usr/src/OFED-1.2/SRPMS/ofa_kernel-1.2-0.src.rpm" See log file: /tmp/OFED.build.19811.log See the attachment OFED.build.19811.log.gz J. Kabelitz sysGen GmbH Support und Technik Clustersysteme Am Hallacker 48 28327 Bremen Tel.: +49 421 40966-28 Fax: +49 421 40966-33 jkabelitz at sysgen.de www.sysgen.de Geschaeftsfuehrerin Gabriele Nikisch Eingetragen beim Amtsgericht Walsrode HRB 121943 -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED.build.19811.log.gz Type: application/x-gzip Size: 140893 bytes Desc: OFED.build.19811.log.gz URL: From kliteyn at dev.mellanox.co.il Thu Nov 15 05:22:29 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 15:22:29 +0200 Subject: [ofa-general] [PATCH] osm: fixing memory leak in node description Message-ID: <473C4815.20904@dev.mellanox.co.il> Fix memory leak in node description Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_node_desc_rcv.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/opensm/osm_node_desc_rcv.c b/opensm/opensm/osm_node_desc_rcv.c index 2a2d3c8..e6a41a5 100644 --- a/opensm/opensm/osm_node_desc_rcv.c +++ b/opensm/opensm/osm_node_desc_rcv.c @@ -85,7 +85,7 @@ __osm_nd_rcv_process_nd(IN const osm_nd_rcv_t * const p_rcv, /* make a copy for this node to "own" */ if (p_node->print_desc) free(p_node->print_desc); - p_node->print_desc = strdup(tmp_desc); + p_node->print_desc = tmp_desc; if (osm_log_is_active(p_rcv->p_log, OSM_LOG_VERBOSE)) { osm_log(p_rcv->p_log, OSM_LOG_VERBOSE, -- 1.5.1.4 From sashak at voltaire.com Thu Nov 15 05:44:58 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 15:44:58 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <473C3585.5010108@dev.mellanox.co.il> References: <473C3585.5010108@dev.mellanox.co.il> Message-ID: <20071115134458.GA21832@sashak.voltaire.com> Hi Yevgeny, On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: > When parsing QoS policy file for the first time, print > first syntax error to stdout and to the osm log. > > Signed-off-by: Yevgeny Kliteynik > --- > opensm/opensm/osm_qos_parser.y | 11 ++++++++++- > 1 files changed, 10 insertions(+), 1 deletions(-) > > diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y > index 4738831..71f41fe 100644 > --- a/opensm/opensm/osm_qos_parser.y > +++ b/opensm/opensm/osm_qos_parser.y > @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() > > void __qos_parser_error (char *s) > { > + static boolean_t first_time = TRUE; Could we avoid using static variables? (But seems __qos_parser_error() function itself should be declared as static). Sasha > + char * last_text_read = __parser_strip_white(__qos_parser_text); > OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); > osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > "__qos_parser_error: ERR AC05: " > "Syntax error (line %d:%d): %s. " > "Last text read: \"%s\"\n", > - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); > + line_num, column_num, s, last_text_read); > + if (first_time) > + { > + first_time = FALSE; > + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " > + "Last text read: \"%s\".\n", > + line_num, column_num, s, last_text_read); > + } > OSM_LOG_EXIT(p_qos_parser_osm_log); > } > > -- > 1.5.1.4 > > From kliteyn at dev.mellanox.co.il Thu Nov 15 05:56:05 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 15:56:05 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <20071115134458.GA21832@sashak.voltaire.com> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> Message-ID: <473C4FF5.40706@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: >> When parsing QoS policy file for the first time, print >> first syntax error to stdout and to the osm log. >> >> Signed-off-by: Yevgeny Kliteynik >> --- >> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- >> 1 files changed, 10 insertions(+), 1 deletions(-) >> >> diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y >> index 4738831..71f41fe 100644 >> --- a/opensm/opensm/osm_qos_parser.y >> +++ b/opensm/opensm/osm_qos_parser.y >> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() >> >> void __qos_parser_error (char *s) >> { >> + static boolean_t first_time = TRUE; > > Could we avoid using static variables? Would you prefer global flags instead? Is there any other way? Nothing comes to mind right now... > (But seems __qos_parser_error() function itself should be declared as static). Sorry, can't do - it's defined by yacc, and it's even declared as extern in this file. -- Yevgeny > Sasha > > >> + char * last_text_read = __parser_strip_white(__qos_parser_text); >> OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); >> osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, >> "__qos_parser_error: ERR AC05: " >> "Syntax error (line %d:%d): %s. " >> "Last text read: \"%s\"\n", >> - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); >> + line_num, column_num, s, last_text_read); >> + if (first_time) >> + { >> + first_time = FALSE; >> + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " >> + "Last text read: \"%s\".\n", >> + line_num, column_num, s, last_text_read); >> + } >> OSM_LOG_EXIT(p_qos_parser_osm_log); >> } >> >> -- >> 1.5.1.4 >> >> > From hrosenstock at xsigo.com Thu Nov 15 06:30:17 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Thu, 15 Nov 2007 06:30:17 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071115103754.GM17237@sashak.voltaire.com> References: <20071115103754.GM17237@sashak.voltaire.com> Message-ID: <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > lid output format unification is needed. Print LIDs as decimal in > ibtracert. I'd prefer to see this done as some sort of option. Also, I think hex is better for MLIDs. It would be best to see the bigger picture for what will change to support decimal LIDs in management and ibutils but this is a first trial balloon IMO. -- Hal > > Signed-off-by: Sasha Khapyorsky > --- > infiniband-diags/src/ibtracert.c | 10 +++++----- > 1 files changed, 5 insertions(+), 5 deletions(-) > > diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c > index 010f45f..284ae2a 100644 > --- a/infiniband-diags/src/ibtracert.c > +++ b/infiniband-diags/src/ibtracert.c > @@ -207,7 +207,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) > > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > - fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", > + fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid %u-%u \"%s\"\n", > prompt, > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > node->nodeguid, node->type == IB_NODE_SWITCH ? 0 : port->portnum, > @@ -231,7 +231,7 @@ dump_route(int dump, Node *node, int outport, Port *port) > fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", > outport, port->portguid, port->portnum); > else > - fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid 0x%x-0x%x \"%s\"\n", > + fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid %u-%u \"%s\"\n", > outport, > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > port->portguid, port->portnum, > @@ -640,7 +640,7 @@ dump_mcpath(Node *node, int dumplevel) > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > if (!node->dist) { > - printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > + printf("From %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > node->nodeguid, node->ports->portnum, node->ports->lid, > node->ports->lid + (1 << node->ports->lmc) - 1, > @@ -655,7 +655,7 @@ dump_mcpath(Node *node, int dumplevel) > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > node->nodeguid, node->upport); > else > - printf("[%d] -> %s 0x%" PRIx64 "[%d] lid 0x%x \"%s\"\n", > + printf("[%d] -> %s 0x%" PRIx64 "[%d] lid %u \"%s\"\n", > node->ports->remoteport->portnum, > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > node->nodeguid, node->upport, > @@ -664,7 +664,7 @@ dump_mcpath(Node *node, int dumplevel) > > if (node->dist < 0) > /* target node */ > - printf("To %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > + printf("To %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > node->nodeguid, node->ports->portnum, node->ports->lid, > node->ports->lid + (1 << node->ports->lmc) - 1, From kliteyn at dev.mellanox.co.il Thu Nov 15 06:46:28 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 16:46:28 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <473C4FF5.40706@dev.mellanox.co.il> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> <473C4FF5.40706@dev.mellanox.co.il> Message-ID: <473C5BC4.5040009@dev.mellanox.co.il> Yevgeny Kliteynik wrote: > Sasha Khapyorsky wrote: >> Hi Yevgeny, >> >> On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: >>> When parsing QoS policy file for the first time, print >>> first syntax error to stdout and to the osm log. >>> >>> Signed-off-by: Yevgeny Kliteynik >>> --- >>> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- >>> 1 files changed, 10 insertions(+), 1 deletions(-) >>> >>> diff --git a/opensm/opensm/osm_qos_parser.y >>> b/opensm/opensm/osm_qos_parser.y >>> index 4738831..71f41fe 100644 >>> --- a/opensm/opensm/osm_qos_parser.y >>> +++ b/opensm/opensm/osm_qos_parser.y >>> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() >>> >>> void __qos_parser_error (char *s) >>> { >>> + static boolean_t first_time = TRUE; >> >> Could we avoid using static variables? > > Would you prefer global flags instead? > Is there any other way? Nothing comes to mind right now... I can also remove the flag and have the error message printed to stderr with every sweep, until the syntax error is fixed. Sounds better? -- Yevgeny >> (But seems __qos_parser_error() function itself should be declared as >> static). > > Sorry, can't do - it's defined by yacc, and > it's even declared as extern in this file. > > -- Yevgeny > >> Sasha >> >> >>> + char * last_text_read = __parser_strip_white(__qos_parser_text); >>> OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); >>> osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, >>> "__qos_parser_error: ERR AC05: " >>> "Syntax error (line %d:%d): %s. " >>> "Last text read: \"%s\"\n", >>> - line_num, column_num, s, >>> __parser_strip_white(__qos_parser_text)); >>> + line_num, column_num, s, last_text_read); >>> + if (first_time) >>> + { >>> + first_time = FALSE; >>> + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " >>> + "Last text read: \"%s\".\n", >>> + line_num, column_num, s, last_text_read); >>> + } >>> OSM_LOG_EXIT(p_qos_parser_osm_log); >>> } >>> >>> -- >>> 1.5.1.4 >>> >>> >> > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Thu Nov 15 06:58:27 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 16:58:27 +0200 Subject: [ofa-general] [PATCH] osm: remove unused 'is_ipoib' parameter when adding mcast group Message-ID: <473C5E93.7070703@dev.mellanox.co.il> Remove unused 'is_ipoib' parameter when creating mcast group for ipoib partitions. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_prtn.c | 4 ++-- opensm/opensm/osm_prtn_config.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opensm/opensm/osm_prtn.c b/opensm/opensm/osm_prtn.c index 151c4aa..8b20459 100644 --- a/opensm/opensm/osm_prtn.c +++ b/opensm/opensm/osm_prtn.c @@ -189,7 +189,7 @@ static const ib_gid_t osm_ts_ipoib_mgid = { ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, osm_subn_t * p_subn, osm_prtn_t * p, - unsigned is_ipoib, uint8_t rate, + uint8_t rate, uint8_t mtu, uint8_t scope) { ib_member_rec_t mc_rec; @@ -338,7 +338,7 @@ static ib_api_status_t osm_prtn_make_default(osm_log_t * const p_log, osm_prtn_add_port(p_log, p_subn, p, p_subn->sm_port_guid, TRUE); if (no_config) - osm_prtn_add_mcgroup(p_log, p_subn, p, 1, 0, 0, 0); + osm_prtn_add_mcgroup(p_log, p_subn, p, 0, 0, 0); _err: return status; diff --git a/opensm/opensm/osm_prtn_config.c b/opensm/opensm/osm_prtn_config.c index 2124c2d..1253031 100644 --- a/opensm/opensm/osm_prtn_config.c +++ b/opensm/opensm/osm_prtn_config.c @@ -82,7 +82,7 @@ extern ib_api_status_t osm_prtn_add_port(osm_log_t * p_log, ib_net64_t guid, boolean_t full); extern ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, osm_subn_t * p_subn, osm_prtn_t * p, - unsigned is_ipoib, uint8_t rate, + uint8_t rate, uint8_t mtu, uint8_t scope); static int partition_create(unsigned lineno, struct part_conf *conf, @@ -121,7 +121,7 @@ static int partition_create(unsigned lineno, struct part_conf *conf, if (conf->is_ipoib) osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, - conf->is_ipoib, (uint8_t) conf->rate, + (uint8_t) conf->rate, (uint8_t) conf->mtu, (uint8_t) conf->scope); -- 1.5.1.4 From kliteyn at mellanox.co.il Wed Nov 14 21:12:57 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 15 Nov 2007 07:12:57 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-15:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-14 OpenSM git rev = Wed_Nov_14_20:28:12_2007 [c37b7050b85318978811506752b2c1ade26735b3] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From sashak at voltaire.com Thu Nov 15 07:19:33 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 17:19:33 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <473C4FF5.40706@dev.mellanox.co.il> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> <473C4FF5.40706@dev.mellanox.co.il> Message-ID: <20071115151933.GC21832@sashak.voltaire.com> On 15:56 Thu 15 Nov , Yevgeny Kliteynik wrote: > Sasha Khapyorsky wrote: > > Hi Yevgeny, > > On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: > >> When parsing QoS policy file for the first time, print > >> first syntax error to stdout and to the osm log. > >> > >> Signed-off-by: Yevgeny Kliteynik > >> --- > >> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- > >> 1 files changed, 10 insertions(+), 1 deletions(-) > >> > >> diff --git a/opensm/opensm/osm_qos_parser.y > >> b/opensm/opensm/osm_qos_parser.y > >> index 4738831..71f41fe 100644 > >> --- a/opensm/opensm/osm_qos_parser.y > >> +++ b/opensm/opensm/osm_qos_parser.y > >> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() > >> > >> void __qos_parser_error (char *s) > >> { > >> + static boolean_t first_time = TRUE; > > Could we avoid using static variables? > > Would you prefer global flags instead? Not really. > Is there any other way? Nothing comes to mind right now... Could you pass parameters to parser, then this flag could be kept as part of the structure. > > > (But seems __qos_parser_error() function itself should be declared as > > static). > > Sorry, can't do - it's defined by yacc, and > it's even declared as extern in this file. Isn't it hand made part? This works for me: diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index 4738831..d9e1567 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -124,9 +124,9 @@ static void __parser_add_partition_list_to_port_map( static void __parser_add_map_to_port_map( cl_qmap_t * p_dmap, cl_map_t * p_smap); +static void __qos_parser_error (char *s); extern char * __qos_parser_text; -extern void __qos_parser_error (char *s); extern int __qos_parser_lex (void); extern FILE * __qos_parser_in; extern int errno; @@ -1871,7 +1871,7 @@ int __qos_parser_wrap() /*************************************************** ***************************************************/ -void __qos_parser_error (char *s) +static void __qos_parser_error (char *s) { OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, Sasha From sashak at voltaire.com Thu Nov 15 07:23:17 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 17:23:17 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <473C5BC4.5040009@dev.mellanox.co.il> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> <473C4FF5.40706@dev.mellanox.co.il> <473C5BC4.5040009@dev.mellanox.co.il> Message-ID: <20071115152317.GD21832@sashak.voltaire.com> On 16:46 Thu 15 Nov , Yevgeny Kliteynik wrote: > Yevgeny Kliteynik wrote: > > Sasha Khapyorsky wrote: > >> Hi Yevgeny, > >> > >> On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: > >>> When parsing QoS policy file for the first time, print > >>> first syntax error to stdout and to the osm log. > >>> > >>> Signed-off-by: Yevgeny Kliteynik > >>> --- > >>> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- > >>> 1 files changed, 10 insertions(+), 1 deletions(-) > >>> > >>> diff --git a/opensm/opensm/osm_qos_parser.y > >>> b/opensm/opensm/osm_qos_parser.y > >>> index 4738831..71f41fe 100644 > >>> --- a/opensm/opensm/osm_qos_parser.y > >>> +++ b/opensm/opensm/osm_qos_parser.y > >>> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() > >>> > >>> void __qos_parser_error (char *s) > >>> { > >>> + static boolean_t first_time = TRUE; > >> > >> Could we avoid using static variables? > > Would you prefer global flags instead? > > Is there any other way? Nothing comes to mind right now... > > I can also remove the flag and have the error message printed > to stderr with every sweep, until the syntax error is fixed. > Sounds better? Perhaps. If there are no "false alarm" or "just warnings" it may be useful to have it permanently on. Sasha > > -- Yevgeny > > >> (But seems __qos_parser_error() function itself should be declared as > >> static). > > Sorry, can't do - it's defined by yacc, and > > it's even declared as extern in this file. > > -- Yevgeny > >> Sasha > >> > >> > >>> + char * last_text_read = __parser_strip_white(__qos_parser_text); > >>> OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); > >>> osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > >>> "__qos_parser_error: ERR AC05: " > >>> "Syntax error (line %d:%d): %s. " > >>> "Last text read: \"%s\"\n", > >>> - line_num, column_num, s, > >>> __parser_strip_white(__qos_parser_text)); > >>> + line_num, column_num, s, last_text_read); > >>> + if (first_time) > >>> + { > >>> + first_time = FALSE; > >>> + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " > >>> + "Last text read: \"%s\".\n", > >>> + line_num, column_num, s, last_text_read); > >>> + } > >>> OSM_LOG_EXIT(p_qos_parser_osm_log); > >>> } > >>> > >>> -- > >>> 1.5.1.4 > >>> > >>> > >> > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > From jackm at dev.mellanox.co.il Thu Nov 15 07:18:47 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Thu, 15 Nov 2007 17:18:47 +0200 Subject: [ofa-general] [PATCH] mlx4: fix qp state bounds check for mlx4_qp_modify Message-ID: <200711151718.48203.jackm@dev.mellanox.co.il> mlx4: fix state check in mlx4_qp_modify. cur_state is mistakenly checked twice. One of the checks should use new_state, rather than cur_state, so that the subsequent array-value check will have in-bounds indices. Signed-off-by: Jack Morgenstein --- This one has been around since commit 225c7b1feef1b41170f7037a5b10a65cd8a42c54, and even survived commit 287aa83dffd1b39859f49d73b0d67f57106de5f1. Its an Oops waiting to happen. Please commit this as a bug fix to 2.6.24 diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index cc4b1be..c17b19d 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -113,7 +113,7 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_cmd_mailbox *mailbox; int ret = 0; - if (cur_state >= MLX4_QP_NUM_STATE || cur_state >= MLX4_QP_NUM_STATE || + if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || !op[cur_state][new_state]) return -EINVAL; From sashak at voltaire.com Thu Nov 15 07:25:44 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 17:25:44 +0200 Subject: [ofa-general] Re: [PATCH] osm: fixing memory leak in QoS policy In-Reply-To: <473C3476.1050003@dev.mellanox.co.il> References: <473C3476.1050003@dev.mellanox.co.il> Message-ID: <20071115152544.GE21832@sashak.voltaire.com> On 13:58 Thu 15 Nov , Yevgeny Kliteynik wrote: > Fixing memory leak in QoS policy > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Thu Nov 15 07:35:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 17:35:35 +0200 Subject: [ofa-general] Re: [PATCH] osm: fixing memory leak in node description In-Reply-To: <473C4815.20904@dev.mellanox.co.il> References: <473C4815.20904@dev.mellanox.co.il> Message-ID: <20071115153535.GF21832@sashak.voltaire.com> On 15:22 Thu 15 Nov , Yevgeny Kliteynik wrote: > Fix memory leak in node description > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From kliteyn at dev.mellanox.co.il Thu Nov 15 07:30:11 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 17:30:11 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <20071115152317.GD21832@sashak.voltaire.com> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> <473C4FF5.40706@dev.mellanox.co.il> <473C5BC4.5040009@dev.mellanox.co.il> <20071115152317.GD21832@sashak.voltaire.com> Message-ID: <473C6603.6010707@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 16:46 Thu 15 Nov , Yevgeny Kliteynik wrote: >> Yevgeny Kliteynik wrote: >>> Sasha Khapyorsky wrote: >>>> Hi Yevgeny, >>>> >>>> On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: >>>>> When parsing QoS policy file for the first time, print >>>>> first syntax error to stdout and to the osm log. >>>>> >>>>> Signed-off-by: Yevgeny Kliteynik >>>>> --- >>>>> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- >>>>> 1 files changed, 10 insertions(+), 1 deletions(-) >>>>> >>>>> diff --git a/opensm/opensm/osm_qos_parser.y >>>>> b/opensm/opensm/osm_qos_parser.y >>>>> index 4738831..71f41fe 100644 >>>>> --- a/opensm/opensm/osm_qos_parser.y >>>>> +++ b/opensm/opensm/osm_qos_parser.y >>>>> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() >>>>> >>>>> void __qos_parser_error (char *s) >>>>> { >>>>> + static boolean_t first_time = TRUE; >>>> Could we avoid using static variables? >>> Would you prefer global flags instead? >>> Is there any other way? Nothing comes to mind right now... >> I can also remove the flag and have the error message printed >> to stderr with every sweep, until the syntax error is fixed. >> Sounds better? > > Perhaps. If there are no "false alarm" or "just warnings" it may be > useful to have it permanently on. No false alarms - syntax error means that policy file won't be used at all. I'll repost the patch. -- Yevgeny > Sasha > >> -- Yevgeny >> >>>> (But seems __qos_parser_error() function itself should be declared as >>>> static). >>> Sorry, can't do - it's defined by yacc, and >>> it's even declared as extern in this file. >>> -- Yevgeny >>>> Sasha >>>> >>>> >>>>> + char * last_text_read = __parser_strip_white(__qos_parser_text); >>>>> OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); >>>>> osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, >>>>> "__qos_parser_error: ERR AC05: " >>>>> "Syntax error (line %d:%d): %s. " >>>>> "Last text read: \"%s\"\n", >>>>> - line_num, column_num, s, >>>>> __parser_strip_white(__qos_parser_text)); >>>>> + line_num, column_num, s, last_text_read); >>>>> + if (first_time) >>>>> + { >>>>> + first_time = FALSE; >>>>> + printf("Error parsing QoS Policy File (line %d:%d): \"%s\". " >>>>> + "Last text read: \"%s\".\n", >>>>> + line_num, column_num, s, last_text_read); >>>>> + } >>>>> OSM_LOG_EXIT(p_qos_parser_osm_log); >>>>> } >>>>> >>>>> -- >>>>> 1.5.1.4 >>>>> >>>>> >>> _______________________________________________ >>> general mailing list >>> general at lists.openfabrics.org >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>> To unsubscribe, please visit >>> http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Thu Nov 15 07:43:54 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 17:43:54 +0200 Subject: [ofa-general] Re: [PATCH] osm: remove unused 'is_ipoib' parameter when adding mcast group In-Reply-To: <473C5E93.7070703@dev.mellanox.co.il> References: <473C5E93.7070703@dev.mellanox.co.il> Message-ID: <20071115154354.GG21832@sashak.voltaire.com> On 16:58 Thu 15 Nov , Yevgeny Kliteynik wrote: > Remove unused 'is_ipoib' parameter when > creating mcast group for ipoib partitions. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From kliteyn at dev.mellanox.co.il Thu Nov 15 07:36:50 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Thu, 15 Nov 2007 17:36:50 +0200 Subject: [ofa-general] Re: [PATCH] osm: handle first syntax error in policy file In-Reply-To: <20071115151933.GC21832@sashak.voltaire.com> References: <473C3585.5010108@dev.mellanox.co.il> <20071115134458.GA21832@sashak.voltaire.com> <473C4FF5.40706@dev.mellanox.co.il> <20071115151933.GC21832@sashak.voltaire.com> Message-ID: <473C6792.8000904@dev.mellanox.co.il> Sasha Khapyorsky wrote: > On 15:56 Thu 15 Nov , Yevgeny Kliteynik wrote: >> Sasha Khapyorsky wrote: >>> Hi Yevgeny, >>> On 14:03 Thu 15 Nov , Yevgeny Kliteynik wrote: >>>> When parsing QoS policy file for the first time, print >>>> first syntax error to stdout and to the osm log. >>>> >>>> Signed-off-by: Yevgeny Kliteynik >>>> --- >>>> opensm/opensm/osm_qos_parser.y | 11 ++++++++++- >>>> 1 files changed, 10 insertions(+), 1 deletions(-) >>>> >>>> diff --git a/opensm/opensm/osm_qos_parser.y >>>> b/opensm/opensm/osm_qos_parser.y >>>> index 4738831..71f41fe 100644 >>>> --- a/opensm/opensm/osm_qos_parser.y >>>> +++ b/opensm/opensm/osm_qos_parser.y >>>> @@ -1873,12 +1873,21 @@ int __qos_parser_wrap() >>>> >>>> void __qos_parser_error (char *s) >>>> { >>>> + static boolean_t first_time = TRUE; >>> Could we avoid using static variables? >> Would you prefer global flags instead? > > Not really. > >> Is there any other way? Nothing comes to mind right now... > > Could you pass parameters to parser, then this flag could be kept as > part of the structure. > >>> (But seems __qos_parser_error() function itself should be declared as >>> static). >> Sorry, can't do - it's defined by yacc, and >> it's even declared as extern in this file. > > Isn't it hand made part? > > This works for me: > Indeed... Anyway, as I said, I'll repost the patch. -- Yevgeny > diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y > index 4738831..d9e1567 100644 > --- a/opensm/opensm/osm_qos_parser.y > +++ b/opensm/opensm/osm_qos_parser.y > @@ -124,9 +124,9 @@ static void __parser_add_partition_list_to_port_map( > static void __parser_add_map_to_port_map( > cl_qmap_t * p_dmap, > cl_map_t * p_smap); > +static void __qos_parser_error (char *s); > > extern char * __qos_parser_text; > -extern void __qos_parser_error (char *s); > extern int __qos_parser_lex (void); > extern FILE * __qos_parser_in; > extern int errno; > @@ -1871,7 +1871,7 @@ int __qos_parser_wrap() > /*************************************************** > ***************************************************/ > > -void __qos_parser_error (char *s) > +static void __qos_parser_error (char *s) > { > OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); > osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > > > Sasha > From sashak at voltaire.com Thu Nov 15 08:08:13 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 18:08:13 +0200 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071115160813.GH21832@sashak.voltaire.com> On 06:30 Thu 15 Nov , Hal Rosenstock wrote: > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > lid output format unification is needed. Print LIDs as decimal in > > ibtracert. > > I'd prefer to see this done as some sort of option. It is possible, but I think it is overkill. > Also, I think hex is > better for MLIDs. Yes, there still be hex view for MLIDs. > It would be best to see the bigger picture for what will change to > support decimal LIDs in management and ibutils but this is a first trial > balloon IMO. For both management and ibutils bugs were filled. We need to start somewhere... Sasha From yaronh at voltaire.com Thu Nov 15 07:56:27 2007 From: yaronh at voltaire.com (Yaron Haviv) Date: Thu, 15 Nov 2007 17:56:27 +0200 Subject: [ofa-general] Adaptive Routing in InfiniBand at SC07 Today Message-ID: For anyone that happened to be at SC07 this week We will have a BoF session to openly discuss Adaptive Routing in InfiniBand, room A1/A6 See: http://sc07.supercomputing.org/schedule/event_detail.php?evid=11328 Presenters will be myself, Matt Leininger, and Gilad (Mellanox) * The general idea is to describe the requirements / needs * Look at some potential solutions * Have an open discussion with the industry on the required next steps Hope to see you later today, Yaron From terball at icqmail.com Thu Nov 15 08:35:20 2007 From: terball at icqmail.com (Alphonse Ouellette) Date: Thu, 15 Nov 2007 16:35:20 +0000 Subject: [ofa-general] Police believe the man is now on the run in Thailand Message-ID: <01c827a5$83188b10$55479a51@terball> A Lithuanian court convicted Cantat in 2004 of beating 41-year-old French actress Marie Trintignant to death in a Vilnius hotel room, and he was ordered to spend eight years behind bars. He was released early for good behavior.The charges resulted from an August 6 incident in which Spears allegedly hit another car in a Studio City, California, parking lot. The paparazzi captured the accident on video.The victim's mother, Nadine Trintignant, had opposed Cantat's early release, saying it was a blow for the those fighting to end violence against women.After holding talks with the president, Tymoshenko and the leaders of the pro-Yushchenko party signed an agreement that gives Tymoshenko's party the right to name the prime minister -- virtually assuring she would get the post -- and allows Yushchenko's party to name the parliament speaker.Spears and Federline have been embroiled in a bitter custody fight over their sons, Sean Preston, 2, and Jayden James, 1.KIEV, Ukraine (AP) -- Inspired by President Viktor Yushchenko's tacit endorsement, Ukraine's pro-Western parties moved closer to restoring their Orange Revolution alliance when they signed an informal agreement vowing to form a governing coalition.Together, the parties won 228 seats in the 450-member legislature, Central Election Commission spokesman Konstantin Khivrenko said -- two clear of a majority. The rival party led by Prime Minister Viktor Yanukovych won more votes than any other, but it was only enough for 175 seats.The traffic charges are the latest in what has been a difficult year for the former teen sensation. Timeline: Britney Spears in the news ťHer divorce attorney, Laura Wasser, resigned last month as her legal representative after telling reporters the singer "just wants to be a mom."Former prime minister Yulia Tymoshenko (left) has received support from Ukranian President Yushchenko -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: pic01 Type: image/gif Size: 15837 bytes Desc: not available URL: From berrypicker at jwmfoleyinc.com Thu Nov 15 10:30:56 2007 From: berrypicker at jwmfoleyinc.com (June Simpson) Date: Thu, 15 Nov 2007 13:30:56 -0500 Subject: [ofa-general] Microsoft Office 2007 Enterprise for 79, Retails @ 899 (You Save 819) Message-ID: <000001c827a4$6acd0500$0100007f@localhost> mindjet mindmanager pro 7.0 - 39 adobe golive cs2 - 49 adobe photoshop cs2 v 9.0 - 69 media tools professional 5 - 39 adobe indesign cs2 - 59 symantec norton 360 - 29 graphisoft archicad 9.0 r1 international - 69 borland developer studio 2006 - 149 type cheapxpsoft7. com in Internet Explorer From yaronh at voltaire.com Thu Nov 15 08:57:37 2007 From: yaronh at voltaire.com (Yaron Haviv) Date: Thu, 15 Nov 2007 18:57:37 +0200 Subject: [ofa-general] Adaptive Routing in InfiniBand at SC07 Today In-Reply-To: References: Message-ID: I didn't mention the hour it's at 12:15 Yaron > -----Original Message----- > From: general-bounces at lists.openfabrics.org [mailto:general- > bounces at lists.openfabrics.org] On Behalf Of Yaron Haviv > Sent: Thursday, November 15, 2007 10:56 AM > To: openib-general at openib.org; lwg at infinibandta.org > Subject: [ofa-general] Adaptive Routing in InfiniBand at SC07 Today > > For anyone that happened to be at SC07 this week > We will have a BoF session to openly discuss Adaptive Routing in > InfiniBand, room A1/A6 > See: http://sc07.supercomputing.org/schedule/event_detail.php?evid=11328 > > Presenters will be myself, Matt Leininger, and Gilad (Mellanox) > * The general idea is to describe the requirements / needs > * Look at some potential solutions > * Have an open discussion with the industry on the required next steps > > Hope to see you later today, Yaron > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib- > general From arthur.jones at qlogic.com Thu Nov 15 09:10:33 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Thu, 15 Nov 2007 09:10:33 -0800 Subject: [ofa-general] Re: [PATCH] QLogic InfiniPath: convert ipath_eep_sem to mutex In-Reply-To: References: <20071113184503.GE30483@traven> Message-ID: <20071115171033.GD5630@bauxite.pathscale.com> hi roland, thanks for picking this up... On Wed, Nov 14, 2007 at 12:56:12PM -0800, Roland Dreier wrote: > [ Forwarding to ipath entry from MAINTAINERS file... if someone from > Qlogic will ACK this, I'll add it to my tree for 2.6.25 - Roland ] > > QLogic InfiniPath: convert the semaphore ipath_eep_sem to the mutex > API > > Signed-off-by: Matthias Kaehlcke Acked-by: Michael Albaugh Tested-by: Arthur Jones arthur From whitefishery at chinagyzs.com Thu Nov 15 09:23:42 2007 From: whitefishery at chinagyzs.com (Carlos Reed) Date: Thu, 15 Nov 2007 17:23:42 +0000 Subject: [ofa-general] Adobe Master Suite for XP/Vista for 299, Retails @ 2499 (You save 2199) Message-ID: <000001c827ab$ae773d00$0100007f@localhost> microsoft frontpage 2003 - 29 sony vegas 6 - 69 microsoft money home & business 7 - 39 adobe fireworks cs3 - 59 luxology modo 301 for mac - 129 symantec norton antivirus 10.1 for mac - 29 adobe illustrator cs2 - 59 realize voice 3.51 - 29 visit cheapoemsoft3 .com in Internet Explorer From weiny2 at llnl.gov Thu Nov 15 09:40:14 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 15 Nov 2007 09:40:14 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071115094014.419113b7.weiny2@llnl.gov> On Thu, 15 Nov 2007 06:30:17 -0800 Hal Rosenstock wrote: > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > lid output format unification is needed. Print LIDs as decimal in > > ibtracert. > > I'd prefer to see this done as some sort of option. Also, I think hex is > better for MLIDs. While I support the output of decimal for the diags I don't know if this changes any of the scripts. We should check that and I don't have the time right now. Sasha do any of the scripts use ibtracert? Does anyone else on the list use the output? Ira > It would be best to see the bigger picture for what will change to > support decimal LIDs in management and ibutils but this is a first trial > balloon IMO. > > -- Hal > > > > > Signed-off-by: Sasha Khapyorsky > > --- > > infiniband-diags/src/ibtracert.c | 10 +++++----- > > 1 files changed, 5 insertions(+), 5 deletions(-) > > > > diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c > > index 010f45f..284ae2a 100644 > > --- a/infiniband-diags/src/ibtracert.c > > +++ b/infiniband-diags/src/ibtracert.c > > @@ -207,7 +207,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) > > > > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > > > - fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", > > + fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid %u-%u \"%s\"\n", > > prompt, > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > node->nodeguid, node->type == IB_NODE_SWITCH ? 0 : port->portnum, > > @@ -231,7 +231,7 @@ dump_route(int dump, Node *node, int outport, Port *port) > > fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", > > outport, port->portguid, port->portnum); > > else > > - fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid 0x%x-0x%x \"%s\"\n", > > + fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid %u-%u \"%s\"\n", > > outport, > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > port->portguid, port->portnum, > > @@ -640,7 +640,7 @@ dump_mcpath(Node *node, int dumplevel) > > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > > > if (!node->dist) { > > - printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > > + printf("From %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > node->nodeguid, node->ports->portnum, node->ports->lid, > > node->ports->lid + (1 << node->ports->lmc) - 1, > > @@ -655,7 +655,7 @@ dump_mcpath(Node *node, int dumplevel) > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > node->nodeguid, node->upport); > > else > > - printf("[%d] -> %s 0x%" PRIx64 "[%d] lid 0x%x \"%s\"\n", > > + printf("[%d] -> %s 0x%" PRIx64 "[%d] lid %u \"%s\"\n", > > node->ports->remoteport->portnum, > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > node->nodeguid, node->upport, > > @@ -664,7 +664,7 @@ dump_mcpath(Node *node, int dumplevel) > > > > if (node->dist < 0) > > /* target node */ > > - printf("To %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > > + printf("To %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > node->nodeguid, node->ports->portnum, node->ports->lid, > > node->ports->lid + (1 << node->ports->lmc) - 1, > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Thu Nov 15 12:16:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 22:16:35 +0200 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071115094014.419113b7.weiny2@llnl.gov> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071115094014.419113b7.weiny2@llnl.gov> Message-ID: <20071115201635.GI21832@sashak.voltaire.com> On 09:40 Thu 15 Nov , Ira Weiny wrote: > On Thu, 15 Nov 2007 06:30:17 -0800 > Hal Rosenstock wrote: > > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > lid output format unification is needed. Print LIDs as decimal in > > > ibtracert. > > > > I'd prefer to see this done as some sort of option. Also, I think hex is > > better for MLIDs. > > While I support the output of decimal for the diags I don't know if this > changes any of the scripts. We should check that and I don't have the time > right now. > > Sasha do any of the scripts use ibtracert? I cannot find - 'grep ibtracert infiniband-diags/scripts/*' returns nothing. > Does anyone else on the list use > the output? Still waiting for comments, but it is unlikely IMO. Sasha From sashak at voltaire.com Thu Nov 15 12:17:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 22:17:42 +0200 Subject: [ofa-general] [PATCH] opensm/osmtest: update port chooser In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <20071115201742.GJ21832@sashak.voltaire.com> Minimalistic update of the osmtest port chooser - this will work in the same fashion as it was before osm_vendor_get_all_port_attr() update. Signed-off-by: Sasha Khapyorsky --- opensm/osmtest/main.c | 7 ++++--- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/opensm/osmtest/main.c b/opensm/osmtest/main.c index 1c9004b..dade6da 100644 --- a/opensm/osmtest/main.c +++ b/opensm/osmtest/main.c @@ -263,15 +263,16 @@ ib_net64_t get_port_guid(IN osmtest_t * p_osmt, uint64_t port_guid) return (0); } - if (port_guid == 0 || num_ports == 1) { + if (num_ports == 1) { printf("using default guid 0x%" PRIx64 "\n", cl_hton64(attr_array[0].port_guid)); return (attr_array[0].port_guid); } for (i = 0; i < num_ports; i++) { - if (attr_array[i].port_guid == port_guid) - return port_guid; + if (attr_array[i].port_guid == port_guid || + (!port_guid && attr_array[i].link_state > IB_LINK_DOWN)) + return attr_array[i].port_guid; } return 0; -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Thu Nov 15 12:18:56 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 15 Nov 2007 22:18:56 +0200 Subject: [ofa-general] [PATCH] opensm/libvendor: remove __osmv_get_lid_and_sm_lid_by_port_guid() In-Reply-To: <11951179291471-git-send-email-sashak@voltaire.com> References: <11951179291471-git-send-email-sashak@voltaire.com> Message-ID: <20071115201856.GK21832@sashak.voltaire.com> __osmv_get_lid_and_sm_lid_by_port_guid() used undocumented osm_vendor_get_all_port_attr() in order to resolve number of ports on the system. All this model model is not very effective - it does full system scan again and again, although only currently binded port's LIDs (BaseLid and SMLid) changes are interesting. This patch optimizes this code - finally __osmv_get_lid_and_sm_lid_by_port_guid() is replaced by simpler update_umad_port(). Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad_sa.c | 149 ++++++------------------------- 1 files changed, 27 insertions(+), 122 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad_sa.c b/opensm/libvendor/osm_vendor_ibumad_sa.c index 4b7ae4f..b06cc69 100644 --- a/opensm/libvendor/osm_vendor_ibumad_sa.c +++ b/opensm/libvendor/osm_vendor_ibumad_sa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. * @@ -55,11 +55,8 @@ typedef struct _osmv_sa_bind_info { osm_log_t *p_log; osm_vendor_t *p_vendor; osm_mad_pool_t *p_mad_pool; - uint64_t port_guid; cl_event_t sync_event; - uint64_t last_lids_update_sec; - uint16_t lid; - uint16_t sm_lid; + time_t last_lids_update_sec; } osmv_sa_bind_info_t; /***************************************************************************** @@ -219,93 +216,18 @@ static void __osmv_sa_mad_err_cb(IN void *bind_context, IN osm_madw_t * p_madw) } /***************************************************************************** - This routine needs to be invoked on every send - since the SM LID and Local - lid might change. To do that without any major performance impact we cache - the results and time that they were obtained. Refresh only twice a minute. - To avoid the need to use statics and risk a race - we require the refresh time - to be stored in the context of the results. Also this covers cases where - we query for multiple guids. + Update lids of vendor umad_port. *****************************************************************************/ -static ib_api_status_t -__osmv_get_lid_and_sm_lid_by_port_guid(IN osm_vendor_t * const p_vend, - IN ib_net64_t port_guid, - IN OUT uint64_t * p_lids_update_time_sec, - OUT uint16_t * lid, - OUT uint16_t * sm_lid) +static ib_api_status_t update_umad_port(osm_vendor_t * p_vend) { - ib_api_status_t status; - ib_port_attr_t *p_attr_array; - uint32_t num_ports = MAX_PORTS; - uint32_t port_num; - - OSM_LOG_ENTER(p_vend->p_log, __osmv_get_lid_and_sm_lid_by_port_guid); - - /* use previous values if current time is close enough to previous query */ - if ((time(NULL) <= *p_lids_update_time_sec + 30) && *lid && *sm_lid) { - osm_log(p_vend->p_log, OSM_LOG_DEBUG, - "__osmv_get_lid_and_sm_lid_by_port_guid: " - "Using previously stored lid:0x%04x sm_lid:0x%04x\n", - *lid, *sm_lid); - status = IB_SUCCESS; - goto Exit; - } - - /* obtain the number of available ports */ - num_ports = MAX_PORTS; - status = osm_vendor_get_all_port_attr(p_vend, NULL, &num_ports); - if (status != IB_INSUFFICIENT_MEMORY) { - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "__osmv_get_lid_and_sm_lid_by_port_guid: ERR 5503: " - "expected to get IB_INSUFFICIENT_MEMORY but got: %s\n", - ib_get_err_str(status) - ); - status = IB_ERROR; - goto Exit; - } - - osm_log(p_vend->p_log, OSM_LOG_DEBUG, - "__osmv_get_lid_and_sm_lid_by_port_guid: " - "Found total of %u ports. Looking for guid:0x%016" PRIx64 "\n", - num_ports, cl_ntoh64(port_guid) - ); - - /* allocate the attributes */ - p_attr_array = - (ib_port_attr_t *) malloc(sizeof(ib_port_attr_t) * num_ports); - - /* obtain the attributes */ - status = osm_vendor_get_all_port_attr(p_vend, p_attr_array, &num_ports); - if (status != IB_SUCCESS) { - osm_log(p_vend->p_log, OSM_LOG_ERROR, - "__osmv_get_lid_and_sm_lid_by_port_guid: ERR 5504: " - "Failed to get port attributes (error: %s)\n", - ib_get_err_str(status) - ); - free(p_attr_array); - goto Exit; - } - - status = IB_ERROR; - /* find the port requested in the list */ - for (port_num = 0; (port_num < num_ports) && (status == IB_ERROR); - port_num++) { - if (p_attr_array[port_num].port_guid == port_guid) { - *lid = p_attr_array[port_num].lid; - *sm_lid = p_attr_array[port_num].sm_lid; - *p_lids_update_time_sec = time(NULL); - status = IB_SUCCESS; - osm_log(p_vend->p_log, OSM_LOG_DEBUG, - "__osmv_get_lid_and_sm_lid_by_port_guid: " - "Found guid:0x%016" PRIx64 " with idx:%d\n", - cl_ntoh64(port_guid), port_num); - } - } - - free(p_attr_array); - - Exit: - OSM_LOG_EXIT(p_vend->p_log); - return (status); + umad_port_t port; + if (umad_get_port(p_vend->umad_port.ca_name, + p_vend->umad_port.portnum, &port) < 0) + return IB_ERROR; + p_vend->umad_port.base_lid = port.base_lid; + p_vend->umad_port.sm_lid = port.sm_lid; + umad_release_port(&port); + return IB_SUCCESS; } /***************************************************************************** @@ -316,7 +238,6 @@ osmv_bind_sa(IN osm_vendor_t * const p_vend, { osm_bind_info_t bind_info; osm_log_t *p_log = p_vend->p_log; - ib_api_status_t status = IB_SUCCESS; osmv_sa_bind_info_t *p_sa_bind_info; cl_status_t cl_status; @@ -348,10 +269,8 @@ osmv_bind_sa(IN osm_vendor_t * const p_vend, /* store some important context */ p_sa_bind_info->p_log = p_log; - p_sa_bind_info->port_guid = port_guid; p_sa_bind_info->p_mad_pool = p_mad_pool; p_sa_bind_info->p_vendor = p_vend; - p_sa_bind_info->last_lids_update_sec = 0; /* Bind to the lower level */ p_sa_bind_info->h_bind = osm_vendor_bind(p_vend, &bind_info, p_mad_pool, __osmv_sa_mad_rcv_cb, __osmv_sa_mad_err_cb, p_sa_bind_info); /* context provided to CBs */ @@ -365,21 +284,8 @@ osmv_bind_sa(IN osm_vendor_t * const p_vend, goto Exit; } - /* obtain the sm_lid from the vendor */ - status = - __osmv_get_lid_and_sm_lid_by_port_guid(p_vend, port_guid, - &p_sa_bind_info-> - last_lids_update_sec, - &p_sa_bind_info->lid, - &p_sa_bind_info->sm_lid); - if (status != IB_SUCCESS) { - free(p_sa_bind_info); - p_sa_bind_info = OSM_BIND_INVALID_HANDLE; - osm_log(p_log, OSM_LOG_ERROR, - "osmv_bind_sa: ERR 5507: " - "Failed to obtain the SM lid\n"); - goto Exit; - } + /* update time umad_port is initilized now */ + p_sa_bind_info->last_lids_update_sec = time(NULL); /* initialize the sync_event */ cl_event_construct(&p_sa_bind_info->sync_event); @@ -465,18 +371,15 @@ __osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, (actually it is cached in the bind object and refreshed every 30sec by this proc ) */ - status = - __osmv_get_lid_and_sm_lid_by_port_guid(p_bind->p_vendor, - p_bind->port_guid, - &p_bind-> - last_lids_update_sec, - &p_bind->lid, - &p_bind->sm_lid); - if (status != IB_SUCCESS) { - osm_log(p_log, OSM_LOG_ERROR, - "__osmv_send_sa_req: ERR 5509: " - "Failed to obtain the SM lid\n"); - goto Exit; + if (time(NULL) > p_bind->last_lids_update_sec + 30) { + status = update_umad_port(p_bind->p_vendor); + if (status != IB_SUCCESS) { + osm_log(p_log, OSM_LOG_ERROR, + "__osmv_send_sa_req: ERR 5509: " + "Failed to obtain the SM lid\n"); + goto Exit; + } + p_bind->last_lids_update_sec = time(NULL); } /* Get a MAD wrapper for the send */ @@ -529,8 +432,10 @@ __osmv_send_sa_req(IN osmv_sa_bind_info_t * p_bind, /* Provide the address to send to */ - p_madw->mad_addr.dest_lid = cl_hton16(p_bind->sm_lid); - p_madw->mad_addr.addr_type.smi.source_lid = cl_hton16(p_bind->lid); + p_madw->mad_addr.dest_lid = + cl_hton16(p_bind->p_vendor->umad_port.sm_lid); + p_madw->mad_addr.addr_type.smi.source_lid = + cl_hton16(p_bind->p_vendor->umad_port.base_lid); p_madw->mad_addr.addr_type.gsi.remote_qp = CL_HTON32(1); p_madw->resp_expected = TRUE; p_madw->fail_msg = CL_DISP_MSGID_NONE; -- 1.5.3.rc2.29.gc4640f From krkumar2 at in.ibm.com Thu Nov 15 19:46:59 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Fri, 16 Nov 2007 09:16:59 +0530 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: <20071115050537.7100.93755.sendpatchset@K50wks273871wss.in.ibm.com> Message-ID: Hi Roland, > This check was added because of a real problem seen in practice a > while ago. Has something changed in the tx queue locking that makes > it redundant now? I am not sure of how it was earlier, but currently a device's xmit can be called only on one cpu at a time (by holding the __LINK_STATE_QDISC_RUNNING bit in qdisc_run). And queue_stopped check is present before xmit. > I seem to remember that I could make the problem race trigger pretty > fast by making the tx queue very small so that it got stopped a lot. I just tested with a smaller queue size (tx queue size=4), put a debug in the queue_stopped check in xmit(), and a counter to find how many times the queue was stopped (in ipoib_send). After a 20 min test run with 64 threads, the queue was stopped 16.5 million times, but the debug never hit. I tested with buffer sizes varying from 128 to 16K bytes (though TSO/GSO is not implemented in IPoIB anyway). Thanks, - KK From rvm at obsidianresearch.com Fri Nov 16 00:08:40 2007 From: rvm at obsidianresearch.com (Rolf Manderscheid) Date: Fri, 16 Nov 2007 01:08:40 -0700 Subject: [ofa-general] [PATCH] opensm: support multiple routers in a subnet Message-ID: <20071116080840.GC30090@obsidianresearch.com> Hi Sasha, If a path record query is made for an off-subnet DGID, the SA needs to return a path record where the DLID points to the router port that handles the DGID prefix. In the case of a subnet with only one router, the SA could just pick "the router", and that's exactly what the ROUTER_EXP code did. However, ROUTER_EXP did not look beyond the first available router. When additional routers are added, the SA needs more information. The mechanism for gathering this information has not yet been specified, so in the meantime, this patch adds a configuration file that specifies which router ports handle which prefixes. The patch also removes all occurrences of ROUTER_EXP ifdefs. The default behaviour remains unchanged with one minor exception: hop limits are set to 0xFF for path records to multicast DGIDs if the scope is non-local and to unicast DGIDs if off-subnet. This used to happen for ROUTER_EXP only. Now, the same binary can be configured at run-time to enable the ROUTER_EXP behaviour with a generic configuration file, or to handle multiple routers on a subnet with a more explicit configuration file. See the man page for details. Signed-off-by: Rolf Manderscheid -- One consequence of this patch is that people accustomed to using ROUTER_EXP will need to specify a configuration file to get the same behaviour. I toyed with the idea of keeping one ROUTER_EXP ifdef to control the default behaviour, but then we're back to having two versions of opensm with different default behaviours, and the counter-intuitive: empty cfg file != non-existent cfg file. One of the goals was to get to a single standard binary. So, to help avoid surprises, I actually added back one ifdef ROUTER_EXP which causes the compilation to fail with a useful message. This only helps those who both build and configure their special ROUTER_EXP opensms, but I suspect that's most. Thanks to Hal for reviewing early versions of this patch and providing feedback. Rolf --- diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index aa8d378..db58919 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -253,6 +253,22 @@ BEGIN_C_DECLS #endif /* __WIN__ */ /***********/ +/****d* OpenSM: Base/OSM_DEFAULT_PREFIX_ROUTES_FILE +* NAME +* OSM_DEFAULT_PREFIX_ROUTES_FILE +* +* DESCRIPTION +* Specifies the default prefix routes file name +* +* SYNOPSIS +*/ +#ifdef __WIN__ +#define OSM_DEFAULT_PREFIX_ROUTES_FILE strcat(GetOsmCachePath(), "osm-prefix-routes.conf") +#else +#define OSM_DEFAULT_PREFIX_ROUTES_FILE "/etc/ofa/opensm-prefix-routes.conf" +#endif +/***********/ + /****d* OpenSM: Base/OSM_DEFAULT_SWEEP_INTERVAL_SECS * NAME * OSM_DEFAULT_SWEEP_INTERVAL_SECS diff --git a/opensm/include/opensm/osm_prefix_route.h b/opensm/include/opensm/osm_prefix_route.h new file mode 100644 index 0000000..cebd532 --- /dev/null +++ b/opensm/include/opensm/osm_prefix_route.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef _OSM_PREFIX_ROUTES_H_ +#define _OSM_PREFIX_ROUTES_H_ + +#include +#include + +typedef struct { + cl_list_item_t list_item; /* must be first */ + uint64_t prefix; /* network order, zero means "any" */ + uint64_t guid; /* network order, zero means "any" */ +} osm_prefix_route_t; + +#ifdef ROUTER_EXP +#error ROUTER_EXP is deprecated, specify prefix routes at runtime instead (see opensm man page for details) +#endif + +#endif /* _OSM_PREFIX_ROUTES_H_ */ diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h index 452098b..b67add3 100644 --- a/opensm/include/opensm/osm_subnet.h +++ b/opensm/include/opensm/osm_subnet.h @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -298,6 +299,7 @@ typedef struct _osm_subn_opt { #endif /* ENABLE_OSM_PERF_MGR */ char *event_plugin_name; char *node_name_map_name; + char *prefix_routes_file; } osm_subn_opt_t; /* * FIELDS @@ -550,6 +552,7 @@ typedef struct _osm_subn { cl_qmap_t node_guid_tbl; cl_qmap_t port_guid_tbl; cl_qmap_t rtr_guid_tbl; + cl_qlist_t prefix_routes_list; cl_qmap_t prtn_pkey_tbl; cl_qmap_t mgrp_mlid_tbl; cl_qmap_t sm_guid_tbl; diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 index ef12980..d5434be 100644 --- a/opensm/man/opensm.8 +++ b/opensm/man/opensm.8 @@ -20,6 +20,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) [\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] [\-B | \-\-daemon] [\-I | \-\-inactive] [\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] +[\-\-prefix_routes_file ] [\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] .SH DESCRIPTION @@ -201,6 +202,13 @@ is accumulative. This option defines the optional partition configuration file. The default name is \'/etc/opensm/opensm-partitions.conf\'. .TP +.BI --prefix_routes_file= path +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. The +.B PREFIX ROUTES +section below describes the format of the configuration file. +The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. +.TP \fB\-Q\fR, \fB\-\-qos\fR This option enables QoS setup. It is disabled by default. .TP @@ -465,6 +473,39 @@ Examples: qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, qos_swe_high_limit=0 +.SH PREFIX ROUTES +.PP +Prefix routes control how the SA responds to path record queries for +off-subnet DGIDs. By default, the SA fails such queries. +Note that IBA does not specify how the SA should obtain off-subnet path +record information. +The prefix routes configuration is meant as a stop-gap until the +specification is completed. +.PP +Each line in the configuration file is a 64-bit prefix followed by a +64-bit GUID, separated by white space. +The GUID specifies the router port on the local subnet that will +handle the prefix. +Blank lines are ignored, as is anything between a \fB#\fP character +and the end of the line. +The prefix and GUID are both in hex, the leading 0x is optional. +Either, or both, can be wild-carded by specifying an +asterisk instead of an explicit prefix or GUID. +.PP +When responding to a path record query for an off-subnet DGID, +opensm searches for the first prefix match in the configuration file. +Therefore, the order of the lines in the configuration file is important: +a wild-carded prefix at the beginning of the configuration file renders +all subsequent lines useless. +If there is no match, then opensm fails the query. +It is legal to repeat prefixes in the configuration file, +opensm will return the path to the first available matching router. +A configuration file with a single line where both prefix and GUID +are wild-carded means that a path record query specifying any +off-subnet DGID should return a path to the first available router. +This configuration yields the same behaviour formerly achieved by +compiling opensm with -DROUTER_EXP. + .SH ROUTING .PP OpenSM now offers five routing engines: @@ -872,6 +913,10 @@ a file compatible with dump_lfts.sh output. This file can be used as input for forwarding tables loading by 'file' routing engine. Both or one of options -U and -M can be specified together with \'-R file\'. +.SH FILES +.TP +.B /etc/ofa/opensm-prefix-routes.conf +default prefix routes file. .SH AUTHORS .TP diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c index 13c9f70..4b99dd0 100644 --- a/opensm/opensm/main.c +++ b/opensm/opensm/main.c @@ -291,6 +291,11 @@ void show_usage(void) printf("--perfmgr_sweep_time_s \n" " PerfMgr sweep interval in seconds.\n\n"); #endif + printf("--prefix_routes_file \n" + " This option specifies the prefix routes file.\n" + " Prefix routes control how the SA responds to path record\n" + " queries for off-subnet DGIDs. Default file is:\n" + " "OSM_DEFAULT_PREFIX_ROUTES_FILE"\n\n"); printf("-v\n" "--verbose\n" " This option increases the log verbosity level.\n" @@ -609,6 +614,7 @@ int main(int argc, char *argv[]) {"perfmgr", 0, NULL, 1}, {"perfmgr_sweep_time_s", 1, NULL, 2}, #endif + {"prefix_routes_file", 1, NULL, 3}, {NULL, 0, NULL, 0} /* Required at the end of the array */ }; @@ -911,6 +917,9 @@ int main(int argc, char *argv[]) break; #endif /* ENABLE_OSM_PERF_MGR */ + case 3: + opt.prefix_routes_file = optarg; + break; case 'h': case '?': case ':': diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index ce75ec8..2597046 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -69,10 +69,9 @@ #include #include #include -#ifdef ROUTER_EXP #include #include -#endif +#include #define OSM_PR_RCV_POOL_MIN_SIZE 64 #define OSM_PR_RCV_POOL_GROW_SIZE 64 @@ -858,19 +857,12 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, { const osm_physp_t *p_src_physp; const osm_physp_t *p_dest_physp; -#ifdef ROUTER_EXP boolean_t is_nonzero_gid = 0; -#endif OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_build_pr); p_src_physp = p_src_port->p_physp; -#ifndef ROUTER_EXP - p_dest_physp = p_dest_port->p_physp; - p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp); - p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp); -#else if (p_dgid) { if (memcmp(p_dgid, &zero_gid, sizeof(*p_dgid))) is_nonzero_gid = 1; @@ -886,7 +878,6 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp); } -#endif p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp); @@ -895,11 +886,10 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, p_pr->slid = cl_hton16(src_lid_ho); p_pr->hop_flow_raw &= cl_hton32(1 << 31); -#ifdef ROUTER_EXP + /* Only set HopLimit if going through a router */ if (is_nonzero_gid) p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX); -#endif p_pr->pkey = p_parms->pkey; ib_path_rec_set_sl(p_pr, p_parms->sl); @@ -1262,10 +1252,8 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, ib_net64_t dest_guid; ib_api_status_t status; ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; -#ifdef ROUTER_EXP osm_router_t *p_rtr; osm_port_t *p_rtr_port; -#endif OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_get_end_points); @@ -1359,20 +1347,47 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, "Non local DGID subnet prefix 0x%016" PRIx64 "\n", cl_ntoh64(p_pr->dgid.unicast.prefix)); -#ifndef ROUTER_EXP - /* - This 'error' is the client's fault (bad gid) so - don't enter it as an error in our own log. - Return an error response to the client. - */ - sa_status = IB_SA_MAD_STATUS_INVALID_GID; - goto Exit; -#else - /* Just use "first" router (if it exists) for now */ - p_rtr = - (osm_router_t *) cl_qmap_head(&p_rcv-> - p_subn-> - rtr_guid_tbl); + + /* Find the router port that is configured to handle + this prefix, if any: */ + osm_prefix_route_t *route = NULL; + osm_prefix_route_t *r = (osm_prefix_route_t *) + cl_qlist_head(&p_rcv->p_subn->prefix_routes_list); + + while (r != (osm_prefix_route_t *) + cl_qlist_end(&p_rcv->p_subn->prefix_routes_list)) + { + if (r->prefix == p_pr->dgid.unicast.prefix || + r->prefix == 0) + { + route = r; + break; + } + r = (osm_prefix_route_t *) cl_qlist_next(&r->list_item); + } + + if (! route) { + /* + This 'error' is the client's fault (bad gid) so + don't enter it as an error in our own log. + Return an error response to the client. + */ + sa_status = IB_SA_MAD_STATUS_INVALID_GID; + goto Exit; + } else if (route->guid == 0) { + /* first router */ + p_rtr = (osm_router_t *) + cl_qmap_head(&p_rcv-> + p_subn-> + rtr_guid_tbl); + } else { + p_rtr = (osm_router_t *) + cl_qmap_get(&p_rcv-> + p_subn-> + rtr_guid_tbl, + route->guid); + } + if (p_rtr == (osm_router_t *) cl_qmap_end(&p_rcv-> p_subn-> @@ -1380,7 +1395,7 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, { osm_log(p_rcv->p_log, OSM_LOG_ERROR, "__osm_pr_rcv_get_end_points: ERR 1F22: " - "Off subnet DGID but no routers found\n"); + "Off subnet DGID but router not found\n"); sa_status = IB_SA_MAD_STATUS_INVALID_GID; goto Exit; @@ -1390,7 +1405,6 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, dest_guid = osm_port_get_guid(p_rtr_port); if (p_dgid) *p_dgid = p_pr->dgid; -#endif } } @@ -2134,22 +2148,14 @@ void osm_pr_rcv_process(IN void *context, IN void *data) &sl, &flow_label, &hop_limit); ib_path_rec_set_sl(&p_pr_item->path_rec, sl); ib_path_rec_set_qos_class(&p_pr_item->path_rec, 0); -#ifndef ROUTER_EXP - p_pr_item->path_rec.hop_flow_raw = - cl_hton32(hop_limit) | (flow_label << 8); -#else + /* HopLimit is not yet set in non link local MC groups */ /* If it were, this would not be needed */ - if (ib_mgid_get_scope - (&p_mgrp->mcmember_rec.mgid) == MC_SCOPE_LINK_LOCAL) - p_pr_item->path_rec. - hop_flow_raw = - cl_hton32(hop_limit) | (flow_label << 8); - else - p_pr_item->path_rec. - hop_flow_raw = - cl_hton32(IB_HOPLIMIT_MAX) | (flow_label << 8); -#endif + if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != MC_SCOPE_LINK_LOCAL) + hop_limit = IB_HOPLIMIT_MAX; + + p_pr_item->path_rec.hop_flow_raw = + cl_hton32(hop_limit) | (flow_label << 8); cl_qlist_insert_tail(&pr_list, (cl_list_item_t *) & p_pr_item->pool_item); diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index 0f109a5..834d283 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -91,6 +91,7 @@ void osm_subn_construct(IN osm_subn_t * const p_subn) cl_qmap_init(&p_subn->sm_guid_tbl); cl_qlist_init(&p_subn->sa_sr_list); cl_qlist_init(&p_subn->sa_infr_list); + cl_qlist_init(&p_subn->prefix_routes_list); cl_qmap_init(&p_subn->rtr_guid_tbl); cl_qmap_init(&p_subn->prtn_pkey_tbl); cl_qmap_init(&p_subn->mgrp_mlid_tbl); @@ -475,6 +476,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt) p_opt->exit_on_fatal = TRUE; p_opt->enable_quirks = FALSE; p_opt->no_clients_rereg = FALSE; + p_opt->prefix_routes_file = OSM_DEFAULT_PREFIX_ROUTES_FILE; subn_set_default_qos_options(&p_opt->qos_options); subn_set_default_qos_options(&p_opt->qos_ca_options); subn_set_default_qos_options(&p_opt->qos_sw0_options); @@ -686,6 +688,112 @@ subn_dump_qos_options(FILE * file, /********************************************************************** **********************************************************************/ +static ib_api_status_t +append_prefix_route(IN osm_subn_t * const p_subn, uint64_t prefix, uint64_t guid) +{ + osm_prefix_route_t *route; + + route = malloc(sizeof *route); + if (! route) { + osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, "%s: out of memory", __FUNCTION__); + return IB_ERROR; + } + + route->prefix = cl_hton64(prefix); + route->guid = cl_hton64(guid); + cl_qlist_insert_tail(&p_subn->prefix_routes_list, &route->list_item); + return IB_SUCCESS; +} + +static ib_api_status_t +osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) +{ + osm_log_t *log = &p_subn->p_osm->log; + FILE *fp; + char buf[1024]; + int line = 0; + int errors = 0; + + while (! cl_is_qlist_empty(&p_subn->prefix_routes_list)) { + cl_list_item_t *item = cl_qlist_remove_head(&p_subn->prefix_routes_list); + free(item); + } + + fp = fopen(p_subn->opt.prefix_routes_file, "r"); + if (! fp) { + if (errno == ENOENT) + return IB_SUCCESS; + + osm_log(log, OSM_LOG_ERROR, "%s: fopen(%s) failed: %s", + __FUNCTION__, p_subn->opt.prefix_routes_file, strerror(errno)); + return IB_ERROR; + } + + while (fgets(buf, sizeof buf, fp) != NULL) { + char *p_prefix, *p_guid, *p_extra, *p_last, *p_end; + uint64_t prefix, guid; + + line++; + if (errors > 10) + break; + + p_prefix = strtok_r(buf, " \t\n", &p_last); + if (! p_prefix) + continue; /* ignore blank lines */ + + if (*p_prefix == '#') + continue; /* ignore comment lines */ + + p_guid = strtok_r(NULL, " \t\n", &p_last); + if (! p_guid) { + osm_log(log, OSM_LOG_ERROR, "%s:%d: missing GUID\n", + p_subn->opt.prefix_routes_file, line); + errors++; + continue; + } + + p_extra = strtok_r(NULL, " \t\n", &p_last); + if (p_extra && *p_extra != '#') { + osm_log(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", + p_subn->opt.prefix_routes_file, line); + } + + if (strcmp(p_prefix, "*") == 0) + prefix = 0; + else { + prefix = strtoull(p_prefix, &p_end, 16); + if (*p_end != '\0') { + osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal prefix: %s\n", + p_subn->opt.prefix_routes_file, line, p_prefix); + errors++; + continue; + } + } + + if (strcmp(p_guid, "*") == 0) + guid = 0; + else { + guid = strtoull(p_guid, &p_end, 16); + if (*p_end != '\0' && *p_end != '#') { + osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal GUID: %s\n", + p_subn->opt.prefix_routes_file, line, p_guid); + errors++; + continue; + } + } + + if (append_prefix_route(p_subn, prefix, guid) != IB_SUCCESS) { + errors++; + break; + } + } + + fclose(fp); + return (errors == 0) ? IB_SUCCESS : IB_ERROR; +} + +/********************************************************************** + **********************************************************************/ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) { char *p_cache_dir = getenv("OSM_CACHE_DIR"); @@ -745,6 +853,8 @@ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) if (p_subn->opt.qos) osm_qos_parse_policy_file(p_subn); + osm_parse_prefix_routes_file(p_subn); + return IB_SUCCESS; } @@ -1285,6 +1395,9 @@ ib_api_status_t osm_subn_parse_conf_file(IN osm_subn_opt_t * const p_opts) opts_unpack_boolean("no_clients_rereg", p_key, p_val, &p_opts->no_clients_rereg); + + opts_unpack_charp("prefix_routes_file", + p_key, p_val, &p_opts->prefix_routes_file); } fclose(opts_file); @@ -1606,6 +1719,11 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) "QoS Router ports options", "qos_rtr", &p_opts->qos_rtr_options); + fprintf(opts_file, + "# Prefix routes file name\n" + "prefix_routes_file %s\n\n", + p_opts->prefix_routes_file); + /* optional string attributes ... */ fclose(opts_file); From loselism at npc-color.com Fri Nov 16 00:36:16 2007 From: loselism at npc-color.com (Cathrin Anderson) Date: Fri, 16 Nov 2007 11:36:16 +0300 Subject: [ofa-general] Microsoft Office 2007 Enterprise for 79, Retails @ 899 (You Save 819) Message-ID: <000001c8282b$15880680$0100007f@localhost> cakewalk sonar 6 producer edition - 69 virtualdj 4.3 for mac - 39 adobe acrobat 3d - 59 virtual pc 7.0 for mac - 49 webeasy pro 6.0 - 39 adobe photoshop cs2 v 9.0 - 69 readiris pro 11.5 for mac - 39 adobe contribute cs3 - 39 type cheapxpsoft8 .com in Internet Explorer From jkabelitz at web.de Fri Nov 16 00:56:35 2007 From: jkabelitz at web.de (=?iso-8859-15?Q?J=FCrgen_Kabelitz?=) Date: Fri, 16 Nov 2007 09:56:35 +0100 Subject: [ofa-general] ofed 1.2 and lustreFS Message-ID: <622750768@web.de> Hello I have problems to build the ofed 1.2 software against the Lustre Linux Kernel. linux Kernel: linux-2.6.16-46-0.14_lustre.16.3 OFED software: ofed-1.2 When I build the software I got the following error: Building ofa_user RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-dapl --with-ipoibtools --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibmad --with-libibumad --with-libibverbs --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-sdpnetstat --with-srptools --with-mstflint --with-perftest --with-tvflash --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-dapl --with-ipoibtools --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibmad --with-libibumad --with-libibverbs --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-sdpnetstat --with-srptools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /usr/src/OFED-1.2/SRPMS/ofa_user-1.2-0.src.rpm Building ofa_kernel RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-cxgb3-mod --with-ipoib-mod --with-mthca-mod --with-sdp-mod --with-srp-mod --with-core-mod --with-user_mad-mod --with-user_access-mod --with-addr_trans-mod --with-rds-mod ' --define 'KVERSION 2.6.16-46-0.14_lustre.1.6.3smp' --define 'KSRC /lib/modules/2.6.16-46-0.14_lustre.1.6.3smp/build' --define 'build_kernel_ib 1' --define 'build_kernel_ib_devel 1' --define 'NETWORK_CONF_DIR /etc/sysconfig/network' --define 'modprobe_update 1' --define 'include_ipoib_conf 1' /usr/src/OFED-1.2/SRPMS/ofa_kernel-1.2-0.src.rpm ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-cxgb3-mod --with-ipoib-mod --with-mthca-mod --with-sdp-mod --with-srp-mod --with-core-mod --with-user_mad-mod --with-user_access-mod --with-addr_trans-mod --with-rds-mod ' --define 'KVERSION 2.6.16-46-0.14_lustre.1.6.3smp' --define 'KSRC /lib/modules/2.6.16-46-0.14_lustre.1.6.3smp/build' --define 'build_kernel_ib 1' --define 'build_kernel_ib_devel 1' --define 'NETWORK_CONF_DIR /etc/sysconfig/network' --define 'modprobe_update 1' --define 'include_ipoib_conf 1' /usr/src/OFED-1.2/SRPMS/ofa_kernel-1.2-0.src.rpm" See log file: /tmp/OFED.build.19811.log What is going wrong? J. Kabelitz _____________________________________________________________________ Der WEB.DE SmartSurfer hilft bis zu 70% Ihrer Onlinekosten zu sparen! http://smartsurfer.web.de/?mc=100071&distributionid=000000000066 -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED.build.19811.log.gz Type: application/x-gzip Size: 140893 bytes Desc: not available URL: From vlad at lists.openfabrics.org Fri Nov 16 02:52:00 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 16 Nov 2007 02:52:00 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071116-0200 daily build status Message-ID: <20071116105200.D5862E60879@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.12 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071116-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From barclaysbankaudit5 at gmail.com Fri Nov 16 04:20:53 2007 From: barclaysbankaudit5 at gmail.com (BARCLAYS BANK) Date: Fri, 16 Nov 2007 12:20:53 +0000 Subject: [ofa-general] BARCLAYS BANK NOTIFICATION Message-ID: Attn, ****CONFIRM THIS MAIL URGENTLY**** Series of mail has been sent to you without responds. We have been directed by the Bank of New York and the World Bank to contact the beneficiary of this fund. Sir, We write to inform you that a Floating sum was recovered by the World Bank, and was wired to our Bank (Barclays Bank Plc), London United Kingdomin your favor. Since last 15 Days, all efforts to reach you have been to no avail, but yesterday again after the auditing and cross examination of your file, we discover that your fund has long been delayed due to some International Monetary Regulations, this fund was supposed to be credited to your Bank Account. We therefore request that you identify with us as well as reconfirm your Banking details to avoid any wrong Transfer of this fund. We did not want to mention the amount, as we are not too sure if we are communicating with the right beneficiary or not? Our Bank is ready to effect this Payment immediately as soon as the original beneficiary is identified. Sir, you are therefore advised to send a copy of your ID or International passport, to our Office here in London United Kingdom. You are also requested to quote correctly the amount in which you are expecting from African Bank, and Bank In particular. If the information you provide are confirmed right, we shall be able to proceed in the transfer hence we don't want to talk to the wrong person. Forward to us immediately the requested information's for our perusal, quoting the expected Transfer Sum correctly, as well as your private telephone / fax numbers. Do not forget to reconfirm your accounts to avoid any mistakes. Your can send your responds directly to the Auditing Department on barclaysbankaudit6 at gmail.com Best Regards. *Mr. Pyle Michael Lee * *(Operations Department)** * Tel: +44- 703 1 84 6552 (Direct Line) Website: http://www.barclays.co.uk/ *Email: barclaysbankaudit6 at gmail.com * -------------- next part -------------- An HTML attachment was scrubbed... URL: From akepner at sgi.com Thu Nov 15 12:23:02 2007 From: akepner at sgi.com (akepner at sgi.com) Date: Thu, 15 Nov 2007 12:23:02 -0800 Subject: [ofa-general] IPoIB CQ overrun Message-ID: <20071115202302.GK5448@sgi.com> We have a large (~1800 node) IB cluster of x86_64 machines, and we're having some significant problems with IPoIB. The thing that all the IPoIB failures have in common seems to be an appearance of a "CQ overrun" in syslog, e.g.: ib_mthca 0000:06:00.0: CQ overrun on CQN 180082 >From there things go badly in different ways - tx_timeouts, oopses, etc. Sometimes things just start working again after a few minutes. The appearance of these failures seems to be well correlated with the size of the machine. I don't think there any problems until the machine is built up to about its maximum size, and then they become pretty common. We are using MT25204 HCAs with 1.2.0 firmware, and OFED 1.2. Does this ring a bell with anyone? -- Arthur From arthur.jones at qlogic.com Fri Nov 16 07:35:51 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 16 Nov 2007 07:35:51 -0800 Subject: [ofa-general] [PATCH] IB/ipath -- more fixes for 2.6.24 Message-ID: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> hi roland, here are more bugfix patches from ralph for 2.6.24. they can also be pulled from: git://git.qlogic.com/ipath-linux-2.6 for-roland arthur From arthur.jones at qlogic.com Fri Nov 16 07:35:56 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 16 Nov 2007 07:35:56 -0800 Subject: [ofa-general] [PATCH 1/4] IB/ipath - fix ibv_resize_cq() In-Reply-To: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> References: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> Message-ID: <20071116153556.13665.47188.stgit@eng-46.internal.keyresearch.com> From: Ralph Campbell The wrong offset was being returned to libipathverbs so that when ibv_resize_cq() calls mmap(), it always fails. Signed-off-by: Ralph Campbell --- drivers/infiniband/hw/ipath/ipath_cq.c | 19 ++++++++++++++----- 1 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 08d8ae1..d1380c7 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -395,12 +395,9 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = (__u64) wc; + __u64 offset = 0; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) @@ -450,6 +447,18 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct ipath_mmap_info *ip = cq->ip; ipath_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); From arthur.jones at qlogic.com Fri Nov 16 07:36:01 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 16 Nov 2007 07:36:01 -0800 Subject: [ofa-general] [PATCH 2/4] IB/ipath - Fix QP clean up if error in creation In-Reply-To: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> References: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> Message-ID: <20071116153601.13665.21072.stgit@eng-46.internal.keyresearch.com> From: Ralph Campbell This patch fixes the code which frees the partially allocated QP resources if there was an error while creating the QP. In particular, the QPN wasn't deallocated and the QP wasn't removed from the hash table. Signed-off-by: Ralph Campbell --- drivers/infiniband/hw/ipath/ipath_qp.c | 15 +++++++++------ 1 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6a41fdb..b997ff8 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -835,7 +835,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->qp_type); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + vfree(qp->r_rq.wq); + goto bail_qp; } qp->ip = NULL; ipath_reset_qp(qp); @@ -863,7 +864,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, sizeof(offset)); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + goto bail_ip; } } else { u32 s = sizeof(struct ipath_rwq) + @@ -875,7 +876,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); - goto bail_rwq; + goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), @@ -907,9 +908,11 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; bail_ip: - kfree(qp->ip); -bail_rwq: - vfree(qp->r_rq.wq); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); bail_qp: kfree(qp); bail_swq: From arthur.jones at qlogic.com Fri Nov 16 07:36:06 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 16 Nov 2007 07:36:06 -0800 Subject: [ofa-general] [PATCH 3/4] IB/ipath - fix ibv_modify_srq() In-Reply-To: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> References: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> Message-ID: <20071116153606.13665.11613.stgit@eng-46.internal.keyresearch.com> From: Ralph Campbell The wrong offset was being returned to libipathverbs so that when ibv_modify_srq() calls mmap(), it always fails. Signed-off-by: Ralph Campbell --- drivers/infiniband/hw/ipath/ipath_srq.c | 42 ++++++++++++++++++------------- 1 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 40c36ec..434da62 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -211,11 +211,11 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct ipath_rwq *owq; - struct ipath_rwq *wq; struct ipath_rwqe *p; u32 sz, size, n, head, tail; @@ -236,27 +236,20 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; - __u64 offset = (__u64) wq; + __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; udata->outbuf = (void __user *) offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; } spin_lock_irq(&srq->rq.lock); @@ -277,10 +270,8 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, else n -= tail; if (size <= n) { - spin_unlock_irq(&srq->rq.lock); - vfree(wq); ret = -EINVAL; - goto bail; + goto bail_unlock; } n = 0; p = wq->wq; @@ -314,6 +305,18 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, u32 s = sizeof(struct ipath_rwq) + size * sz; ipath_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, @@ -328,7 +331,12 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } + goto bail; +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); bail: return ret; } From arthur.jones at qlogic.com Fri Nov 16 07:36:11 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Fri, 16 Nov 2007 07:36:11 -0800 Subject: [ofa-general] [PATCH 4/4] IB/ipath - Normalize error return codes for different posts In-Reply-To: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> References: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> Message-ID: <20071116153611.13665.27415.stgit@eng-46.internal.keyresearch.com> From: Ralph Campbell The error codes for ib_post_send(), ib_post_recv(), and ib_post_srq_recv() were inconsistent. Use EINVAL for too many SGEs and ENOMEM for too many WRs. Signed-off-by: Ralph Campbell --- drivers/infiniband/hw/ipath/ipath_srq.c | 2 +- drivers/infiniband/hw/ipath/ipath_verbs.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 434da62..2fef36f 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -59,7 +59,7 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 74f77e7..c4c9984 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -302,8 +302,10 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - goto bail_inval; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; @@ -404,7 +406,7 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } From philippe.gregoire at cea.fr Fri Nov 16 08:37:03 2007 From: philippe.gregoire at cea.fr (Philippe Gregoire) Date: Fri, 16 Nov 2007 17:37:03 +0100 Subject: [ofa-general] Re: [openSM] Pkey index and Pkey value In-Reply-To: <829ded920711122126k2bd7f3adu8859799442701602@mail.gmail.com> References: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> <829ded920711122126k2bd7f3adu8859799442701602@mail.gmail.com> Message-ID: <473DC72F.5040806@cea.fr> Keshetti Mahesh a écrit : >> No; there is no requirement for a pkey value to be in the same index >> in different ports. >> > > But if you want to run an MPI application in all the ports belonging to > one partition (e.g: partition2 in my previous exmple) a conflict will arise > as most of the MPI implementations have provision of allowing only one > Pkey index (e.g: ib_pkey_ix in openMPI). This problem can be easily solved > by storing pkey value at the same pkey index in all ports belonging to same > partition. > > -Mahesh > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > No, some nodes in the cluster may have a specific partitions configuration so for the same partition (ex 0x8088) pkey index may change on nodes. For more informations, See https://bugs.openfabrics.org/show_bug.cgi?id=519 That's why a new configuration parameter has been created for mvapich : # grep -C2 VIADEV_DEFAULT_PKEY /usr/mpi/gcc/mvapich-0.9.9/etc/mvapich.conf # VIADEV_DEFAULT_MAX_RDMA_DST_OPS #----------------------------------------------------------------------------------------- # VIADEV_DEFAULT_PKEY_IX # # Default value:none # Index of predefined partition #----------------------------------------------------------------------------------------- # VIADEV_DEFAULT_PKEY # # Default value:none # Name (key) of predefined partition. One can pass it both in decimal end hexadecimal # format. This parameter prevail over VIADEV_DEFAULT_PKEY_IX, when defined. #----------------------------------------------------------------------------------------- I'm wondering if the ib_pkey_val parameter has not the same goal for OPENMPI : see https://svn.open-mpi.org/trac/ompi/changeset/14463 Philippe From arlin.r.davis at intel.com Fri Nov 16 13:50:51 2007 From: arlin.r.davis at intel.com (Davis, Arlin R) Date: Fri, 16 Nov 2007 13:50:51 -0800 Subject: [ofa-general] [PATCH] v2 dapltest - RHEL5.1 ia64 build problems, convert timestamps to gettimeofday Message-ID: Fix RHEL5.1 ia64 v2 dapltest build problems with get_cycles by moving to gettimeofday. James, I kept the get_cycle code in with compile-time switch (RDTSC_TIMERS) and fixed the ia64 build issue just in case some cannot live without it. If you think we should just blow it all away let me know and I will update the patch. - Change dapltest timing to use gettimeofday instead of get_cycles for better portability. Signed-off by: Arlin Davis   diff --git a/test/dapltest/mdep/linux/dapl_mdep_user.c b/test/dapltest/mdep/linux/dapl_mdep_user.c index 015e53c..6402623 100644 --- a/test/dapltest/mdep/linux/dapl_mdep_user.c +++ b/test/dapltest/mdep/linux/dapl_mdep_user.c @@ -181,6 +181,7 @@ DT_Mdep_GetTime (void) return tv.tv_sec * 1000 + tv.tv_usec / 1000; } +#ifdef RDTSC_TIMERS double DT_Mdep_GetCpuMhz ( void ) @@ -231,6 +232,15 @@ DT_Mdep_GetCpuMhz ( return strtod (mhz_str, NULL) / DT_TSC_BASE; } +#else /* !RDTSC_TIMERS */ + +double +DT_Mdep_GetCpuMhz ( + void ) +{ + return 1; +} +#endif unsigned long diff --git a/test/dapltest/mdep/linux/dapl_mdep_user.h b/test/dapltest/mdep/linux/dapl_mdep_user.h index 4aa25d3..153c8c1 100755 --- a/test/dapltest/mdep/linux/dapl_mdep_user.h +++ b/test/dapltest/mdep/linux/dapl_mdep_user.h @@ -43,10 +43,6 @@ #include #include -#ifdef __ia64__ -#include -#endif - /* Default Device Name */ #define DT_MdepDeviceName "ofa-v2-ib0" @@ -111,7 +107,7 @@ typedef struct /* * Timing */ - +#ifdef RDTSC_TIMERS typedef unsigned long long int DT_Mdep_TimeStamp; static _INLINE_ DT_Mdep_TimeStamp @@ -122,12 +118,10 @@ DT_Mdep_GetTimeStamp ( void ) __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); return x; #else - #ifdef __ia64__ - unsigned long x; - - x = get_cycles (); - return x; + unsigned long ret; + __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); + return ret; #else #if defined(__PPC__) || defined(__PPC64__) unsigned int tbl, tbu0, tbu1; @@ -149,6 +143,21 @@ DT_Mdep_GetTimeStamp ( void ) #endif #endif } +#else /* !RDTSC_TIMERS */ +/* + * Get timestamp, microseconds, (relative to some fixed point) + */ +typedef double DT_Mdep_TimeStamp; + +static _INLINE_ DT_Mdep_TimeStamp +DT_Mdep_GetTimeStamp ( void ) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000 + tv.tv_usec); +} +#endif + /* * Define long format types to be used in *printf format strings. We diff --git a/test/dapltest/test/dapl_performance_stats.c b/test/dapltest/test/dapl_performance_stats.c index ce29272..0b1eaf1 100644 --- a/test/dapltest/test/dapl_performance_stats.c +++ b/test/dapltest/test/dapl_performance_stats.c @@ -281,7 +281,7 @@ DT_performance_stats_print ( " Operation Type : %s\n" " Number of Operations : %u\n" " Segment Size : %u\n" - " Number of Segments : %u bytes\n" + " Number of Segments : %u \n" " Pipeline Length : %u\n\n", DT_PerformanceModeToString (cmd->mode), DT_TransferTypeToString (cmd->op.transfer_type), From jenos at ncsa.uiuc.edu Fri Nov 16 14:30:54 2007 From: jenos at ncsa.uiuc.edu (Jeremy Enos) Date: Fri, 16 Nov 2007 16:30:54 -0600 Subject: [ofa-general] rhel5 updated, ofed 1.2.5.2 breaks In-Reply-To: <4738B3EB.8030304@ncsa.uiuc.edu> References: <47352125.2040206@ncsa.uiuc.edu> <4738B3EB.8030304@ncsa.uiuc.edu> Message-ID: <473E1A1E.2090409@ncsa.uiuc.edu> Reverting the kernel back to 2.6.18-8.1.15.el5 allowed it to build. Still broken for the current kernel though. Jeremy Jeremy Enos wrote: > Technically, after the updates, I guess I'm working with rhel5.1 now. > (x86_64, if that matters) > thx- > > Jeremy > > Jeremy Enos wrote: >> 232 updates to rhel5 in the last 2 weeks, so I decided to update. >> Went from kernel-2.6.18-53.el5 to kernel-2.6.18-8.1.15.el5. Now ofed >> build barks about various kernel headers during the build. >> See build log at: >> http://yams.ncsa.uiuc.edu/~jenos/OFED.build.26335.log >> >> Any ideas? thx- >> >> Jeremy Enos >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From chrise at sgi.com Fri Nov 16 15:23:31 2007 From: chrise at sgi.com (Chris Elmquist) Date: Fri, 16 Nov 2007 17:23:31 -0600 Subject: [ofa-general] IPoIB CQ overrun In-Reply-To: <20071115202302.GK5448@sgi.com> References: <20071115202302.GK5448@sgi.com> Message-ID: <20071116232331.GK24803@sgi.com> On Thursday (11/15/2007 at 12:23PM -0800), akepner at sgi.com wrote: > > We have a large (~1800 node) IB cluster of x86_64 machines, and > we're having some significant problems with IPoIB. > > The thing that all the IPoIB failures have in common seems to be > an appearance of a "CQ overrun" in syslog, e.g.: > > ib_mthca 0000:06:00.0: CQ overrun on CQN 180082 > > >From there things go badly in different ways - tx_timeouts, > oopses, etc. Sometimes things just start working again after > a few minutes. > > The appearance of these failures seems to be well correlated > with the size of the machine. I don't think there any problems > until the machine is built up to about its maximum size, and > then they become pretty common. > > We are using MT25204 HCAs with 1.2.0 firmware, and OFED 1.2. > > Does this ring a bell with anyone? I can perhaps elaborate a little more on the test case we are using to expose this situation... On 1024 (or more) nodes, nttcp -i is started as a "tcp socket server". Eight copies are started, each on a different tcp port (5000 ... 5007). On another client node, as few as 1024 and as many as 8192 nttcp clients are launched from that node to all of the 1024 others. We can have one connection between the client and each node or we can have eight connections between the client and each node. The nttcp test is run for 120 secs and in these scenarios, all connections get established, nttcp moves data, and never fails. We get expected performance. If the node count is increased to 1152, then things start to become unreliable. We will see connections fail to be established when we try to do 8 per node. If we do one per node, they will all establish and run. In fact, we can do one per node across 1664 and that will succeed also. So the problem seems to be related to the total number of nodes on the fabric as well as how many TCP connections you try to establish to each node. One is tempted to believe it is a problem at the single node that is opening all of these connections to the others... but the failure occurs on the nodes being connected to-- the nttcp servers-- with the CQ overrun and TX WATCHDOG TIMEOUTS, etc. The final outcome of which is that we loose all TCP connectivity over IB to the affect nodes for some period of time. Sometimes they come back, sometimes they don't and sometimes its seconds and sometimes its minutes before they come back. Not very deterministic. cje -- Chris Elmquist mailto:chrise at sgi.com (651)683-3093 Silicon Graphics, Inc. Eagan, MN From rvm at obsidianresearch.com Fri Nov 16 15:32:22 2007 From: rvm at obsidianresearch.com (Rolf Manderscheid) Date: Fri, 16 Nov 2007 16:32:22 -0700 Subject: [ofa-general] PATCH diags: add saquery option to get path records by GIDs Message-ID: <20071116233222.GE30090@obsidianresearch.com> Hi Sasha, This patch adds a --sgid-to-dgid option to saquery, useful for validating configuration of opensm prefix routes. Rolf Signed-off-by: Rolf Manderscheid --- man/saquery.8 | 14 +++++++- src/saquery.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 7 deletions(-) diff --git a/infiniband-diags/man/saquery.8 b/infiniband-diags/man/saquery.8 index 516ae85..6860062 100644 --- a/infiniband-diags/man/saquery.8 +++ b/infiniband-diags/man/saquery.8 @@ -6,8 +6,10 @@ saquery \- query InfiniBand subnet administration attributes .SH SYNOPSIS .B saquery [\-h] [\-d] [\-p] [\-N] [\-\-list | \-D] [\-S] [\-I] [\-L] [\-l] [\-G] [\-O] -[\-U] [\-c] [\-s] [\-g] [\-m] [--src-to-dst ] [\-C ca_name] -[\-P ca_port] [\-t(imeout) ] [\-\-node\-name\-map ] +[\-U] [\-c] [\-s] [\-g] [\-m] [\-C ca_name] [\-P ca_port] [\-t(imeout) ] +[\-\-src\-to\-dst ] +[\-\-sgid\-to\-dgid ] +[\-\-node\-name\-map ] [ | | ] .SH DESCRIPTION @@ -66,6 +68,14 @@ description for each entry. Example: saquery -m 0xc000 get a PathRecord for where src and dst are either node names or LIDs .TP +.B \-\-sgid\-to\-dgid +get a PathRecord for +.I sgid +to +.I dgid +where both GIDs are in an IPv6 format acceptable to +.BR inet_pton (3). +.TP \fB\-C\fR use the specified ca_name. .TP diff --git a/infiniband-diags/src/saquery.c b/infiniband-diags/src/saquery.c index c6cc0a2..d060aa7 100644 --- a/infiniband-diags/src/saquery.c +++ b/infiniband-diags/src/saquery.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -782,6 +783,50 @@ get_print_path_rec_lid(osm_bind_handle_t bind_handle, } static ib_api_status_t +get_print_path_rec_gid(osm_bind_handle_t bind_handle, + const ib_gid_t *src_gid, + const ib_gid_t *dst_gid) +{ + int i = 0; + ib_path_rec_t *path_record = NULL; + osmv_query_req_t req; + osmv_gid_pair_t gid_pair; + ib_api_status_t status; + + gid_pair.src_gid = *src_gid; + gid_pair.dest_gid = *dst_gid; + + memset( &req, 0, sizeof( req ) ); + + req.query_type = OSMV_QUERY_PATH_REC_BY_GIDS; + req.timeout_ms = sa_timeout_ms; + req.retry_cnt = 1; + req.flags = OSM_SA_FLAGS_SYNC; + req.query_context = NULL; + req.pfn_query_cb = query_res_cb; + req.p_query_input = (void *)&gid_pair; + req.sm_key = 0; + + if ((status = osmv_query_sa(bind_handle, &req)) != IB_SUCCESS) { + fprintf(stderr, "ERROR: Query SA failed: %s\n", + ib_get_err_str(status)); + return (status); + } + if (result.status != IB_SUCCESS) { + fprintf(stderr, "ERROR: Query result returned: %s\n", + ib_get_err_str(result.status)); + return (result.status); + } + status = result.status; + for (i = 0; i < result.result_cnt; i++) { + path_record = osmv_get_query_path_rec(result.p_result_madw, i); + print_path_record(path_record); + } + return_mad(); + return (status); +} + +static ib_api_status_t get_print_class_port_info(osm_bind_handle_t bind_handle) { int i = 0; @@ -1046,8 +1091,8 @@ static void usage(void) { fprintf(stderr, "Usage: %s [-h -d -p -N] [--list | -D] [-S -I -L -l -G" - " -O -U -c -s -g -m --src-to-dst -C " - "-P -t(imeout) ] [ | | ]\n", + " -O -U -c -s -g -m --src-to-dst --sgid-to-dgid " + "-C -P -t(imeout) ] [ | | ]\n", argv0); fprintf(stderr, " Queries node records by default\n"); fprintf(stderr, " -d enable debugging\n"); @@ -1070,8 +1115,11 @@ usage(void) " only for group specified\n"); fprintf(stderr, " specified, for example 'saquery -m 0xC000')\n"); fprintf(stderr, " --src-to-dst get a PathRecord for \n" - " where src amd dst are either node " + " where src and dst are either node " "names or LIDs\n"); + fprintf(stderr, " --sgid-to-dgid get a PathRecord for \n" + " where sgid and dgid are addresses in " + "IPv6 format\n"); fprintf(stderr, " -C specify the SA query HCA\n"); fprintf(stderr, " -P specify the SA query port\n"); fprintf(stderr, " -t | --timeout specify the SA query " @@ -1089,6 +1137,8 @@ main(int argc, char **argv) osm_bind_handle_t bind_handle; char *src = NULL; char *dst = NULL; + char *sgid = NULL; + char *dgid = NULL; ib_net16_t src_lid; ib_net16_t dst_lid; ib_api_status_t status; @@ -1115,8 +1165,9 @@ main(int argc, char **argv) {"help", 0, 0, 'h'}, {"list", 0, 0, 'D'}, {"src-to-dst", 1, 0, 1}, + {"sgid-to-dgid", 1, 0, 2}, {"timeout", 1, 0, 't'}, - {"node-name-map", 1, 0, 2}, + {"node-name-map", 1, 0, 3}, { } }; @@ -1142,7 +1193,25 @@ main(int argc, char **argv) query_type = IB_MAD_ATTR_PATH_RECORD; break; } - case 2: + case 2: + { + char *opt = strdup(optarg); + char *tok1 = strtok(opt, "-"); + char *tok2 = strtok(NULL, "\0"); + + if (tok1 && tok2) { + sgid = strdup(tok1); + dgid = strdup(tok2); + } else { + fprintf(stderr, + "ERROR: --sgid-to-dgid -\n"); + usage(); + } + free(opt); + query_type = IB_MAD_ATTR_PATH_RECORD; + break; + } + case 3: node_name_map_file = strdup(optarg); break; case 'p': @@ -1264,6 +1333,21 @@ main(int argc, char **argv) } else { status = get_print_path_rec_lid(bind_handle, src_lid, dst_lid); } + } else if (sgid && dgid) { + struct in6_addr src_addr, dst_addr; + + if (inet_pton(AF_INET6, sgid, &src_addr) <= 0) { + fprintf(stderr, "invalid src gid: %s\n", sgid); + exit(-1); + } + if (inet_pton(AF_INET6, dgid, &dst_addr) <= 0) { + fprintf(stderr, "invalid dst gid: %s\n", dgid); + exit(-1); + } + status = get_print_path_rec_gid( + bind_handle, + (ib_gid_t *) &src_addr.s6_addr, + (ib_gid_t *) &dst_addr.s6_addr); } else { status = print_path_records(bind_handle); } From unmonastic at ennisfootball.com Fri Nov 16 22:07:35 2007 From: unmonastic at ennisfootball.com (Shane Hernandez) Date: Sat, 17 Nov 2007 11:07:35 +0500 Subject: [ofa-general] Adobe Font Folio 11 MAC/XP/Vista for 189, Retails @ 2599 (You save 2409) Message-ID: <000001c828df$9bfe2000$0100007f@localhost> adobe acrobat 8.0 professional - 79 steinberg nuendo 3.1 - 99 intuit quicken premier 2008 - 29 mindjet mindmanager 7 for mac - 39 autodesk architectural studio 3.0 - 39 creative suite standard - 99 crystal reports professional edition 11 - 69 cakewalk sonar 6 producer edition - 69 visit cheapoemsoft3 .com in Internet Explorer From sean.hefty at intel.com Fri Nov 16 23:17:40 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 16 Nov 2007 23:17:40 -0800 Subject: [ofa-general] RE: disconnect issues/questions In-Reply-To: <15ddcffd0711142358m55192a25qaa2e419045f6d0ea@mail.gmail.com> References: <15ddcffd0711142341g7b83d917t2fcc4b9a64e54f55@mail.gmail.com> <15ddcffd0711142358m55192a25qaa2e419045f6d0ea@mail.gmail.com> Message-ID: <000001c828e9$f0ad4f40$2ccc180a@amr.corp.intel.com> >A) it seems that other than some error handling code, only the flow of >rdma_disconnect() and RDMA_CM_EVENT_REJECTED move the QP associated >with this ID to the ERROR state. From that I conclude that in order to >get flushes on all the WR posted to the QP one must call >rdma_disconnect() in both sides of the connection. Am I right, is this >what we want? if yes, lets document this. Correct - both sides must call disconnect. rdma_cm.7 and rdma_disconnect.3 man pages call this out to some degree. >B) will RDMA_CM_EVENT_DISCONNECTED event would --always-- be generated >also for the side that called rdma_disconnect()? in both cases (yes >and no), we need to document this. I need to verify this. - Sean From vlad at lists.openfabrics.org Sat Nov 17 02:50:35 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 17 Nov 2007 02:50:35 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071117-0200 daily build status Message-ID: <20071117105035.7C199E60A17@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.14 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.20 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.22 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071117-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From hrosenstock at xsigo.com Sat Nov 17 08:31:39 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 17 Nov 2007 08:31:39 -0800 Subject: [ofa-general] Re: [openSM] Pkey index and Pkey value In-Reply-To: <473DC72F.5040806@cea.fr> References: <829ded920711120045s1c03e008k5c8fa3034744601b@mail.gmail.com> <829ded920711122126k2bd7f3adu8859799442701602@mail.gmail.com> <473DC72F.5040806@cea.fr> Message-ID: <1195317099.6525.28.camel@hrosenstock-ws.xsigo.com> On Fri, 2007-11-16 at 17:37 +0100, Philippe Gregoire wrote: > Keshetti Mahesh a écrit : > >> No; there is no requirement for a pkey value to be in the same index > >> in different ports. > >> > > > > But if you want to run an MPI application in all the ports belonging to > > one partition (e.g: partition2 in my previous exmple) a conflict will arise > > as most of the MPI implementations have provision of allowing only one > > Pkey index (e.g: ib_pkey_ix in openMPI). This problem can be easily solved > > by storing pkey value at the same pkey index in all ports belonging to same > > partition. > > > > -Mahesh > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > > > No, some nodes in the cluster may have a specific partitions configuration > so for the same partition (ex 0x8088) pkey index may change on nodes. > > For more informations, > See https://bugs.openfabrics.org/show_bug.cgi?id=519 > > That's why a new configuration parameter has been created for mvapich : > # grep -C2 VIADEV_DEFAULT_PKEY /usr/mpi/gcc/mvapich-0.9.9/etc/mvapich.conf > # VIADEV_DEFAULT_MAX_RDMA_DST_OPS > #----------------------------------------------------------------------------------------- > # VIADEV_DEFAULT_PKEY_IX > # > # Default value:none > # Index of predefined partition > #----------------------------------------------------------------------------------------- > # VIADEV_DEFAULT_PKEY > # > # Default value:none > # Name (key) of predefined partition. One can pass it both in decimal > end hexadecimal > # format. This parameter prevail over VIADEV_DEFAULT_PKEY_IX, when defined. > #----------------------------------------------------------------------------------------- > > I'm wondering if the ib_pkey_val parameter has not the same goal for > OPENMPI : > > see https://svn.open-mpi.org/trac/ompi/changeset/14463 I recently discussed this with Jeff Squyres and he showed me a recent checkin (not sure for which OpenMPI version) which supports either pkey value or index. So it looks like both OpenMPI and MVAPICH support this. Not sure about any commercial MPIs. Bottom line, pkey value will always work; pkey index is SM dependent. -- Hal > Philippe > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Sat Nov 17 08:35:01 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 17 Nov 2007 08:35:01 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071115160813.GH21832@sashak.voltaire.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071115160813.GH21832@sashak.voltaire.com> Message-ID: <1195317301.6525.32.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-15 at 18:08 +0200, Sasha Khapyorsky wrote: > On 06:30 Thu 15 Nov , Hal Rosenstock wrote: > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > lid output format unification is needed. Print LIDs as decimal in > > > ibtracert. > > > > I'd prefer to see this done as some sort of option. > > It is possible, but I think it is overkill. I don't think it is in the short term if it is not all going to be converted. Will all the changes make OFED 1.3 ? > > Also, I think hex is > > better for MLIDs. > > Yes, there still be hex view for MLIDs. > > > It would be best to see the bigger picture for what will change to > > support decimal LIDs in management and ibutils but this is a first trial > > balloon IMO. > > For both management and ibutils bugs were filled. We need to start > somewhere... Yes, but IMO OpenSM is a critical piece to be consistent with here IMO. A quick scan shows many displays in hex of unicast LIDs. -- Hal > Sasha From kliteyn at mellanox.co.il Fri Nov 16 21:12:44 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 17 Nov 2007 07:12:44 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-17:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-16 OpenSM git rev = Thu_Nov_15_22:11:52_2007 [0f02129fba975d28b123104af97786ac578b3c90] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From hrosenstock at xsigo.com Sat Nov 17 08:56:19 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 17 Nov 2007 08:56:19 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071115094014.419113b7.weiny2@llnl.gov> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071115094014.419113b7.weiny2@llnl.gov> Message-ID: <1195318579.6525.44.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-15 at 09:40 -0800, Ira Weiny wrote: > On Thu, 15 Nov 2007 06:30:17 -0800 > Hal Rosenstock wrote: > > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > lid output format unification is needed. Print LIDs as decimal in > > > ibtracert. > > > > I'd prefer to see this done as some sort of option. Also, I think hex is > > better for MLIDs. > > While I support the output of decimal for the diags I don't know if this > changes any of the scripts. We should check that and I don't have the time > right now. > > Sasha do any of the scripts use ibtracert? Does anyone else on the list use > the output? Don't know but this is one other reason to maintain output backward compatibility and make this an option. -- Hal > Ira > > > It would be best to see the bigger picture for what will change to > > support decimal LIDs in management and ibutils but this is a first trial > > balloon IMO. > > > > -- Hal > > > > > > > > Signed-off-by: Sasha Khapyorsky > > > --- > > > infiniband-diags/src/ibtracert.c | 10 +++++----- > > > 1 files changed, 5 insertions(+), 5 deletions(-) > > > > > > diff --git a/infiniband-diags/src/ibtracert.c b/infiniband-diags/src/ibtracert.c > > > index 010f45f..284ae2a 100644 > > > --- a/infiniband-diags/src/ibtracert.c > > > +++ b/infiniband-diags/src/ibtracert.c > > > @@ -207,7 +207,7 @@ dump_endnode(int dump, char *prompt, Node *node, Port *port) > > > > > > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > > > > > - fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid 0x%x-0x%x \"%s\"\n", > > > + fprintf(f, "%s %s {0x%016" PRIx64 "} portnum %d lid %u-%u \"%s\"\n", > > > prompt, > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > node->nodeguid, node->type == IB_NODE_SWITCH ? 0 : port->portnum, > > > @@ -231,7 +231,7 @@ dump_route(int dump, Node *node, int outport, Port *port) > > > fprintf(f, "[%d] -> {0x%016" PRIx64 "}[%d]\n", > > > outport, port->portguid, port->portnum); > > > else > > > - fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid 0x%x-0x%x \"%s\"\n", > > > + fprintf(f, "[%d] -> %s port {0x%016" PRIx64 "}[%d] lid %u-%u \"%s\"\n", > > > outport, > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > port->portguid, port->portnum, > > > @@ -640,7 +640,7 @@ dump_mcpath(Node *node, int dumplevel) > > > nodename = remap_node_name(node_name_map, node->nodeguid, node->nodedesc); > > > > > > if (!node->dist) { > > > - printf("From %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > > > + printf("From %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > node->nodeguid, node->ports->portnum, node->ports->lid, > > > node->ports->lid + (1 << node->ports->lmc) - 1, > > > @@ -655,7 +655,7 @@ dump_mcpath(Node *node, int dumplevel) > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > node->nodeguid, node->upport); > > > else > > > - printf("[%d] -> %s 0x%" PRIx64 "[%d] lid 0x%x \"%s\"\n", > > > + printf("[%d] -> %s 0x%" PRIx64 "[%d] lid %u \"%s\"\n", > > > node->ports->remoteport->portnum, > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > node->nodeguid, node->upport, > > > @@ -664,7 +664,7 @@ dump_mcpath(Node *node, int dumplevel) > > > > > > if (node->dist < 0) > > > /* target node */ > > > - printf("To %s 0x%" PRIx64 " port %d lid 0x%x-0x%x \"%s\"\n", > > > + printf("To %s 0x%" PRIx64 " port %d lid %u-%u \"%s\"\n", > > > (node->type <= IB_NODE_MAX ? node_type_str[node->type] : "???"), > > > node->nodeguid, node->ports->portnum, node->ports->lid, > > > node->ports->lid + (1 << node->ports->lmc) - 1, > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Sat Nov 17 09:35:36 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sat, 17 Nov 2007 17:35:36 +0000 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <1195317301.6525.32.camel@hrosenstock-ws.xsigo.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071115160813.GH21832@sashak.voltaire.com> <1195317301.6525.32.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071117173536.GA3719@sashak.voltaire.com> On 08:35 Sat 17 Nov , Hal Rosenstock wrote: > On Thu, 2007-11-15 at 18:08 +0200, Sasha Khapyorsky wrote: > > On 06:30 Thu 15 Nov , Hal Rosenstock wrote: > > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > > lid output format unification is needed. Print LIDs as decimal in > > > > ibtracert. > > > > > > I'd prefer to see this done as some sort of option. > > > > It is possible, but I think it is overkill. > > I don't think it is in the short term if it is not all going to be > converted. Will all the changes make OFED 1.3 ? I think to push it. Sasha > > > Also, I think hex is > > > better for MLIDs. > > > > Yes, there still be hex view for MLIDs. > > > > > It would be best to see the bigger picture for what will change to > > > support decimal LIDs in management and ibutils but this is a first trial > > > balloon IMO. > > > > For both management and ibutils bugs were filled. We need to start > > somewhere... > > Yes, but IMO OpenSM is a critical piece to be consistent with here IMO. > A quick scan shows many displays in hex of unicast LIDs. > > -- Hal > > > Sasha From lloydsbuddy at gmail.com Sat Nov 17 09:06:18 2007 From: lloydsbuddy at gmail.com (LLOYDS BUDDY) Date: Sat, 17 Nov 2007 12:06:18 -0500 (EST) Subject: [ofa-general] JOB OFFER Message-ID: <2149.207.226.43.4.1195319178.squirrel@megamallz.com> Capital Gems Company Ltd (CHINA) 107 Dongsi Bei Dajie Dongcheng District Beijing 100007 China Ref: 44/28/CH Attn: Mr./Mrs, I am Mr. Lloyds Buddy Chief Personnel Officer Gems Company Ltd (China).We are a trading company that is into the hire and sales of precious stones like: Ruby, Sapphire, Emerald, Tanzanite, Morganite, Amethyst, Citrine, Peridot etc. which can be readily adapted for customers specific requirements to the America, Asia, South America and selected locations in Europe. Currently our company Capital Gems in China is encountering problems by retrieving/collecting payment from its customers/client located in America, Asia, South America and selected locations in Europe, due to the fact that we sell goods to these customers/clients on credit base and we are now searching for a company or individual who can represent our company in terms of helping us Collecting these payment in his/her country and earn 10% commission of every payment made through you or your company on behalf of Capital Gems Company Ltd (China). If you know you are not trustwothy please do not bother to reply,because a lot of money is involved here. All I am asking you is, if you would like to earn 10% of any amount you will help to collect from customers. One of the vital reason for this new development is that before, things were not as difficult as this for Capital Gems Company Ltd and exporters in the China because cheques from these countries never took time before they are cleared in our territory, but now it takes weeks and this slows down the production of the company.Some of these customers even issued cheques that will not be clear eventually at the end of the day and these has being creating more financial problems for the company (Capital Gems Company Ltd) and including other Chinese businessmen. The best part of this job offer is that you do not have to leave your present place of employment because it is on a part-time basis. You can ask any question concerning the areas where you are not clear and I assure you that we will answer them to the best of our knowledge. Thank you. If you are interested,forward to us the following information. Name In Full Home Address City State Zip Code Country Home and Office phone Fax No If Any Age Nationality Mr Lloyds Buddy. Capital Gems Company Ltd NB: PLEASE DO NOT APPLY IF YOU ARE NOT FROM THE U.S.A.,CANADA,EUROPE, AUSTRALIA OR ASIA.THESE ARE THE ONLY PLACES WHERE I NEED REPRESENTATIVES.OUR COMPANY'S LINE OF BUSINESS IS VERY LUCRATIVE AND YOU WILL NOT REGRET BEING A MEMBER OF THE TEAM. Copyright(C)2007,Capital Gems Company Ltd. All Rights Reserved From jgunthorpe at obsidianresearch.com Sat Nov 17 12:33:27 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Sat, 17 Nov 2007 13:33:27 -0700 Subject: [ofa-general] [PATCH] opensm: support multiple routers in a subnet In-Reply-To: <20071116080840.GC30090@obsidianresearch.com> References: <20071116080840.GC30090@obsidianresearch.com> Message-ID: <20071117203327.GB32058@obsidianresearch.com> On Fri, Nov 16, 2007 at 01:08:40AM -0700, Rolf Manderscheid wrote: > If a path record query is made for an off-subnet DGID, the SA needs to > return a path record where the DLID points to the router port that > handles the DGID prefix. In the case of a subnet with only one > router, the SA could just pick "the router", and that's exactly what > the ROUTER_EXP code did. However, ROUTER_EXP did not look beyond the > first available router. One thing we learned during the show is that the format of this file should be improved slightly. I suggest copying the usual linux 'ip route' syntax 2001::/64 via fe80::xxx:xxxx:xxxx The important thing is adding the prefix bits, but also normalzing the information to match IPv6 convections. In future other keywords beyond 'via' could be added, like 'mut' and or so on. The special word 'default' means ::/0 Jason From jgunthorpe at obsidianresearch.com Sat Nov 17 12:41:11 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Sat, 17 Nov 2007 13:41:11 -0700 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071117204111.GC32058@obsidianresearch.com> On Thu, Nov 15, 2007 at 06:30:17AM -0800, Hal Rosenstock wrote: > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > lid output format unification is needed. Print LIDs as decimal in > > ibtracert. > > I'd prefer to see this done as some sort of option. Also, I think hex is > better for MLIDs. FWIW, we consistently use hex notation in our switch products in the format '0x10/16' which specifies both the LMC and the LID in a compact manner. Since both GIDs, GUIDs and MACs are printed in hex, choosing decimal for lid seems like an inconsistent choice to me. It also makes it harder to read out the LMC bits. Also, this same kind of unification is needed for GID's. They should always be printed and accepted in IPv6 format, not 128 bit decimal. Jason From hrosenstock at xsigo.com Sat Nov 17 12:52:10 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sat, 17 Nov 2007 12:52:10 -0800 Subject: [ofa-general] [PATCH 1/3] opensm/libvendor: remove not used umad_ca field In-Reply-To: <11951173841652-git-send-email-sashak@voltaire.com> References: <11951173841159-git-send-email-sashak@voltaire.com> <11951173841652-git-send-email-sashak@voltaire.com> Message-ID: <1195332730.6727.47.camel@hrosenstock-ws.xsigo.com> On Thu, 2007-11-15 at 11:03 +0200, Sasha Khapyorsky wrote: > Remove not really used (but leaked) umad_ca field from ibumad vendor > structure. > > Signed-off-by: Sasha Khapyorsky > --- > opensm/include/vendor/osm_vendor_ibumad.h | 1 - > opensm/libvendor/osm_vendor_ibumad.c | 18 ------------------ > 2 files changed, 0 insertions(+), 19 deletions(-) > > diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h > index 743b393..84fd21a 100644 > --- a/opensm/include/vendor/osm_vendor_ibumad.h > +++ b/opensm/include/vendor/osm_vendor_ibumad.h > @@ -158,7 +158,6 @@ typedef struct _osm_vendor { > osm_bind_handle_t agents[UMAD_CA_MAX_AGENTS]; > char ca_names[OSM_UMAD_MAX_CAS][UMAD_CA_NAME_LEN]; > vendor_match_tbl_t mtbl; > - umad_ca_t umad_ca; > umad_port_t umad_port; > pthread_mutex_t cb_mutex; > pthread_mutex_t match_tbl_mutex; > diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c > index 240a97b..9e186d5 100644 > --- a/opensm/libvendor/osm_vendor_ibumad.c > +++ b/opensm/libvendor/osm_vendor_ibumad.c > @@ -703,24 +703,6 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, > } > > /* Port found, try to open it */ > - if (umad_get_ca(p_vend->ca_names[ca], &p_vend->umad_ca) < 0) { > - osm_log(p_vend->p_log, OSM_LOG_ERROR, > - "osm_vendor_open_port: ERR 542A: " > - "umad_get_ca() failed\n"); > - goto Exit; > - } > - > - /* Validate that node is an IB node type */ > - if (p_vend->umad_ca.node_type < 1 || p_vend->umad_ca.node_type > 3) { Where is this check done now ? This was put in to skip iWARP nodes. Have you tried that configuration ? -- Hal > - osm_log(p_vend->p_log, OSM_LOG_ERROR, > - "osm_vendor_open_port: ERR 542D: " > - "Node type %d is not an IB node type\n", > - p_vend->umad_ca.node_type); > - fprintf(stderr, "Node type %d is not an IB node type\n", > - p_vend->umad_ca.node_type); > - goto Exit; > - } > - > if (umad_get_port(p_vend->ca_names[ca], i, &p_vend->umad_port) < 0) { > osm_log(p_vend->p_log, OSM_LOG_ERROR, > "osm_vendor_open_port: ERR 542B: " From jgunthorpe at obsidianresearch.com Sat Nov 17 12:53:49 2007 From: jgunthorpe at obsidianresearch.com (Jason Gunthorpe) Date: Sat, 17 Nov 2007 13:53:49 -0700 Subject: [ofa-general] [PATCH] libibmad/dump.c: Support link speed and width vendor extensions In-Reply-To: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> References: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071117205349.GD32058@obsidianresearch.com> On Tue, Nov 13, 2007 at 07:38:43AM -0800, Hal Rosenstock wrote: > libibmad/dump.c: Support link speed and width vendor extensions > > When decoding values, handle vendor extensions to link speed and width > including accommodating a "documentation" change between IBA 1.2 and > 1.2.1 Isn't this better done with a bitmap match? const unsigned int widths[] = {1,4,8,12}; char S[300]; int off = 0; for (unsigned int I = 0; I != sizeof(width)/sizeof(width[0]) && off < sizeof(S); I++) if ((width >> I) & 1) off += snprintf(S,sizeof(S)-off,"%uX ",widths[I]); Regards, Jason From sean.hefty at intel.com Sat Nov 17 20:01:59 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Sat, 17 Nov 2007 20:01:59 -0800 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com><473B14A3.5090703@dev.mellanox.co.il> <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> Message-ID: <000b01c82997$c4f8f670$0c02a8c0@amr.corp.intel.com> >> And as you've mentioned, some rules may overlap. For instance, >> if the rule for all the RDS traffic will appear before the iSER >> rule, then iSER requests will be caught by the RDS rule. > >That doesn't sound so good but I don't see a good alternative here other >than for this case to put the iSER rule first. The other fallback is the >more detailed configuration but RDS falls into the generic range >category which is problematic in terms of this (and can't be >differentiated by ServiceID unlike the other ULPs). I'm not overly familiar with the details of RDS, but event if the active side uses a dynamic service ID, I would expect the passive side to use something well known. - Sean From mccreary at darwinmag.com Sat Nov 17 21:21:08 2007 From: mccreary at darwinmag.com (Alison Cruz) Date: Sat, 17 Nov 2007 21:21:08 -0800 Subject: [ofa-general] Stop paying so much for what you know already. Message-ID: <01c8295f$c4ed3110$e2bd647a@mccreary> Obtain the degree you deserve, based on your present knowledge and work experience. A prosperous future, money earning power, and the Admiration of all. Degrees from an Established, Prestigious, Leading Institution. Your Degree will show exactly what you really can do. Get the Job, Promotion, Business Opportunity and Social Advancement you Desire! Eliminates classrooms and traveling. Achieve your Bach elors, Masters, M B A, or Ph D in the field of your expertise Professional and affordable Call now - your Graduation is a phone call away. Please call: 1-630-225-5210 thong of buckskin, and fastened his canoe to the stalks of the peculiar smell--one so fragrant and tempting to those who are to observe their proceedings. After discussing the matter for some From jackm at dev.mellanox.co.il Sat Nov 17 22:35:37 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Sun, 18 Nov 2007 08:35:37 +0200 Subject: [ofa-general] [RFC] mlx4: Add some form of error-path debug output In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E2D6@mtlexch01.mtl.com> References: <200711151920.59592.jackm@dev.mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E2D6@mtlexch01.mtl.com> Message-ID: <200711180835.37745.jackm@dev.mellanox.co.il> Roland, I quickly dashed off an example of how I can rapidly implement some form of debug printouts to assist in supporting the driver. The philosophy I used below is to focus more on errors which would be returned as -EINVAL -- to provide a bit more detail. Also, I wanted the facility to be on-off switchable at run-time (i.e., module-parameter controlled). Finally, although I think that it would be nice to have component-based trace capability, (i.e., treat the debug level parameter as a bitmask), I think we can do without this for now (since we are only implementing error path printouts). Let me know what you think. - Jack (P.S., I've not yet compiled this, so it may have bugs). ============================================================= diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d8287d9..970e181 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -43,7 +43,7 @@ #include "mlx4_ib.h" #include "user.h" -#define DRV_NAME "mlx4_ib" +#define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "0.01" #define DRV_RELDATE "May 1, 2006" @@ -52,6 +52,14 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +#ifdef CONFIG_MLX4_DEBUG + +int mlx4_ib_debug_level = 0; +module_param_named(debug_level, mlx4_ib_debug_level, int, 0644); +MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); + +#endif /* CONFIG_MLX4_DEBUG */ + static const char mlx4_ib_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 2869765..3664f86 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -43,6 +43,25 @@ #include #include + +#define MLX4_IB_DRV_NAME "mlx4_ib" + +#ifdef CONFIG_MLX4_DEBUG +extern int mlx4_ib_debug_level; + +#define mlx4_ib_dbg(format, arg...) \ + do { \ + if (mlx4_ib_debug_level) \ + printk(KERN_DEBUG MLX4_IB_DRV_NAME ", %s: " format "\n",\ + __func__, ## arg); \ + } while (0) + +#else /* CONFIG_MLX4_DEBUG */ + +#define mlx4_ib_dbg(format, arg...) do {} while (0) + +#endif /* CONFIG_MLX4_DEBUG */ + enum { MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4 }; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8cba9c5..fe48b13 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -213,19 +213,25 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || - cap->max_recv_sge > dev->dev->caps.max_rq_sg) + cap->max_recv_sge > dev->dev->caps.max_rq_sg) { + mlx4_ib_dbg("Requested RQ size (sge or wr) too large"); return -EINVAL; + } if (has_srq) { /* QPs attached to an SRQ should have no RQ */ - if (cap->max_recv_wr) + if (cap->max_recv_wr) { + mlx4_ib_dbg("non-zero RQ size for QP using SRQ"); return -EINVAL; + } qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { /* HW requires >= 1 RQ entry with >= 1 gather entry */ - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { + mlx4_ib_dbg("user QP RQ has 0 wr's or 0 sge's"); return -EINVAL; + } qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); @@ -245,16 +251,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, if (cap->max_send_wr > dev->dev->caps.max_wqes || cap->max_send_sge > dev->dev->caps.max_sq_sg || cap->max_inline_data + send_wqe_overhead(type) + - sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) { + mlx4_ib_dbg("Requested SQ resources exceed device maxima"); return -EINVAL; + } /* * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && - cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) + cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) { + mlx4_ib_dbg("No space for SQP hdr/csum sge's"); return -EINVAL; + } qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg), @@ -297,8 +307,10 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes || ucmd->log_sq_stride > ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) || - ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) + ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE) { + mlx4_ib_dbg("Requested max wqes or wqe stride exceeds max"); return -EINVAL; + } qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; qp->sq.wqe_shift = ucmd->log_sq_stride; @@ -350,23 +362,30 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); + mlx4_ib_dbg("ib_umem_get error (%d)", err); goto err; } err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), ilog2(qp->umem->page_size), &qp->mtt); - if (err) + if (err) { + mlx4_ib_dbg("mlx4_mtt_init error (%d)", err); goto err_buf; + } err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); - if (err) + if (err) { + mlx4_ib_dbg("mlx4_ib_umem_write_mtt error (%d)", err); goto err_mtt; + } if (!init_attr->srq) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); - if (err) + if (err) { + mlx4_ib_dbg("mlx4_ib_db_map_user error (%d)", err); goto err_mtt; + } } } else { qp->sq_no_prefetch = 0; @@ -390,12 +409,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, &qp->mtt); - if (err) + if (err) { + mlx4_ib_dbg("kernel qp mlx4_mtt_init error (%d)", err); goto err_buf; + } err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); - if (err) + if (err) { + mlx4_ib_dbg("mlx4_buf_write_mtt error (%d)", err); goto err_mtt; + } qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); From eli at dev.mellanox.co.il Sun Nov 18 00:27:23 2007 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Sun, 18 Nov 2007 10:27:23 +0200 Subject: [ofa-general] IPoIB CQ overrun In-Reply-To: <20071116232331.GK24803@sgi.com> References: <20071115202302.GK5448@sgi.com> <20071116232331.GK24803@sgi.com> Message-ID: <1195374443.2802.31.camel@mtls03> Can you tell how IPOIB is configured - connected mode or datagram mode? Also can you send more context from /var/log/messages? Especially can you rerun with debug enabled and send the output? Enabling debug can be done by: echo 1 > /sys/module/ib_ipoib/parameters/debug_level On Fri, 2007-11-16 at 17:23 -0600, Chris Elmquist wrote: > On Thursday (11/15/2007 at 12:23PM -0800), akepner at sgi.com wrote: > > > > We have a large (~1800 node) IB cluster of x86_64 machines, and > > we're having some significant problems with IPoIB. > > > > The thing that all the IPoIB failures have in common seems to be > > an appearance of a "CQ overrun" in syslog, e.g.: > > > > ib_mthca 0000:06:00.0: CQ overrun on CQN 180082 > > > > >From there things go badly in different ways - tx_timeouts, > > oopses, etc. Sometimes things just start working again after > > a few minutes. > > > > The appearance of these failures seems to be well correlated > > with the size of the machine. I don't think there any problems > > until the machine is built up to about its maximum size, and > > then they become pretty common. > > > > We are using MT25204 HCAs with 1.2.0 firmware, and OFED 1.2. > > > > Does this ring a bell with anyone? > > I can perhaps elaborate a little more on the test case we are using to > expose this situation... > > On 1024 (or more) nodes, nttcp -i is started as a "tcp socket server". > Eight copies are started, each on a different tcp port (5000 ... 5007). > > On another client node, as few as 1024 and as many as 8192 nttcp clients > are launched from that node to all of the 1024 others. We can have > one connection between the client and each node or we can have eight > connections between the client and each node. The nttcp test is run > for 120 secs and in these scenarios, all connections get established, > nttcp moves data, and never fails. We get expected performance. > > If the node count is increased to 1152, then things start to become > unreliable. We will see connections fail to be established when we try > to do 8 per node. If we do one per node, they will all establish and run. > In fact, we can do one per node across 1664 and that will succeed also. > > So the problem seems to be related to the total number of nodes on > the fabric as well as how many TCP connections you try to establish to > each node. > > One is tempted to believe it is a problem at the single node that is > opening all of these connections to the others... but the failure occurs > on the nodes being connected to-- the nttcp servers-- with the CQ overrun > and TX WATCHDOG TIMEOUTS, etc. The final outcome of which is that we loose > all TCP connectivity over IB to the affect nodes for some period of time. > Sometimes they come back, sometimes they don't and sometimes its seconds > and sometimes its minutes before they come back. Not very deterministic. > > cje From vlad at lists.openfabrics.org Sun Nov 18 02:50:09 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 18 Nov 2007 02:50:09 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071118-0200 daily build status Message-ID: <20071118105009.F1325E608B9@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071118-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From kliteyn at dev.mellanox.co.il Sun Nov 18 03:59:01 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 13:59:01 +0200 Subject: [ofa-general] [PATCH] osm: use OSM_DEFAULT_PARTITION_CONFIG_FILE instead of hard-coded string Message-ID: <47402905.6030302@dev.mellanox.co.il> Use default partition file instead of a hard-coded string Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_prtn.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/opensm/osm_prtn.c b/opensm/opensm/osm_prtn.c index 8b20459..ec42da1 100644 --- a/opensm/opensm/osm_prtn.c +++ b/opensm/opensm/osm_prtn.c @@ -355,7 +355,7 @@ ib_api_status_t osm_prtn_make_partitions(osm_log_t * const p_log, osm_prtn_t *p; file_name = p_subn->opt.partition_config_file ? - p_subn->opt.partition_config_file : "/etc/osm-partitions.conf"; + p_subn->opt.partition_config_file : OSM_DEFAULT_PARTITION_CONFIG_FILE; if (stat(file_name, &statbuf)) is_config = FALSE; -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Sun Nov 18 04:02:47 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 14:02:47 +0200 Subject: [ofa-general] [PATCH] osm: moving call to QoS policy file parser to osm_qos_setup() Message-ID: <474029E7.605@dev.mellanox.co.il> When QoS policy is being set up, partitions have to already exist. Moving call to policy file parser from osm_subn_rescan_conf_files() to osm_qos_setup(). Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos.c | 4 ++++ opensm/opensm/osm_subnet.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c index c6641fc..1c1e1f1 100644 --- a/opensm/opensm/osm_qos.c +++ b/opensm/opensm/osm_qos.c @@ -52,6 +52,7 @@ #include #include #include +#include struct qos_config { uint8_t max_vls; @@ -296,6 +297,9 @@ osm_signal_t osm_qos_setup(osm_opensm_t * p_osm) cl_plock_excl_acquire(&p_osm->lock); + /* read QoS policy config file */ + osm_qos_parse_policy_file(&p_osm->subn); + p_tbl = &p_osm->subn.port_guid_tbl; p_next = cl_qmap_head(p_tbl); while (p_next != cl_qmap_end(p_tbl)) { diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c index 0f109a5..15aecf2 100644 --- a/opensm/opensm/osm_subnet.c +++ b/opensm/opensm/osm_subnet.c @@ -741,10 +741,6 @@ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) } fclose(opts_file); - /* read QoS policy config file */ - if (p_subn->opt.qos) - osm_qos_parse_policy_file(p_subn); - return IB_SUCCESS; } -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Sun Nov 18 04:07:37 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 14:07:37 +0200 Subject: [ofa-general] [PATCH] osm: improving error reporting function of the QoS parser Message-ID: <47402B09.70308@dev.mellanox.co.il> [this patch replaces "handle first syntax error in policy file" patch] Improving error reporting function of the QoS parser: - Making it static - Printing error message to stderr as well as to the log - Function now can get formatted string as an argument Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_parser.y | 22 +++++++++++++++++----- 1 files changed, 17 insertions(+), 5 deletions(-) diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index 4738831..bf560aa 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -125,8 +126,9 @@ static void __parser_add_map_to_port_map( cl_qmap_t * p_dmap, cl_map_t * p_smap); +static void __qos_parser_error(const char *format, ...); + extern char * __qos_parser_text; -extern void __qos_parser_error (char *s); extern int __qos_parser_lex (void); extern FILE * __qos_parser_in; extern int errno; @@ -1871,14 +1873,24 @@ int __qos_parser_wrap() /*************************************************** ***************************************************/ -void __qos_parser_error (char *s) +static void __qos_parser_error(const char *format, ...) { + char s[9999]; + va_list pvar; + OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); + + va_start(pvar, format); + vsprintf(s, format, pvar); + va_end(pvar); + osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, "__qos_parser_error: ERR AC05: " - "Syntax error (line %d:%d): %s. " - "Last text read: \"%s\"\n", - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); + "Syntax error (line %d:%d): %s", + line_num, column_num, s); + fprintf(stderr, + "Error in QoS Policy File (line %d:%d): %s.\n", + line_num, column_num, s); OSM_LOG_EXIT(p_qos_parser_osm_log); } -- 1.5.1.4 From or.gerlitz at gmail.com Sun Nov 18 04:37:47 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Sun, 18 Nov 2007 07:37:47 -0500 Subject: [ofa-general] Re: disconnect issues/questions In-Reply-To: <000001c828e9$f0ad4f40$2ccc180a@amr.corp.intel.com> References: <15ddcffd0711142341g7b83d917t2fcc4b9a64e54f55@mail.gmail.com> <15ddcffd0711142358m55192a25qaa2e419045f6d0ea@mail.gmail.com> <000001c828e9$f0ad4f40$2ccc180a@amr.corp.intel.com> Message-ID: <15ddcffd0711180437s3f583227n136d6903dd4af5d8@mail.gmail.com> On Nov 17, 2007 2:17 AM, Sean Hefty wrote: > Correct - both sides must call disconnect. rdma_cm.7 and rdma_disconnect.3 man > pages call this out to some degree. OK, when I read the man pages it was not fully clear to me, I'll look there again and see if I can come up with a sentence that we could add to clarify this. > >B) will RDMA_CM_EVENT_DISCONNECTED event would --always-- be generated > >also for the side that called rdma_disconnect()? in both cases (yes > >and no), we need to document this. > > I need to verify this. OK, many thanks. Or. From kliteyn at dev.mellanox.co.il Sun Nov 18 04:45:10 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 14:45:10 +0200 Subject: [ofa-general] [PATCH] osm: QoS - adding simplified syntax for policy definition Message-ID: <474033D6.2000307@dev.mellanox.co.il> This patch adds simplified syntax for QoS definition in QoS Policy file. Using this syntax the administrator is able to define QoS policy per ULP and/or per Service ID and/or per partition (pkey). Here's an example of the policy file with the new syntax, which is added in a new section called qos-policy: qos-ulps sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 sdp, port-num 10000-20000, 0xfffd : 2 sdp : 0 #default SL for SDP srp, target-port-guid 0x1234-0x1235 : 2 iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports iser : 5 #default SL for iSER rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 rds : 0 #default SL for RDS ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 ipoib : 6 #default IPoIB partition - pkey=0x7FFF any, pkey 0x0ABC : 3 any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 any, service-id 0x6234 : 2 any, target-port-guid 0x2234-0xF235 : 2 default : 0 #default SL end-qos-ulps Since any section of the policy file is optional as long as basic rules of the file are kept, the above example can serve as a complete QoS policy file - short and clear. I suspect that most of the administrators will use only this syntax, but if someone wishes to manage QoS in more detailed manner, there's always the rest of the policy file to do so. Signed-off-by: Yevgeny Kliteynik --- opensm/include/opensm/osm_qos_policy.h | 5 + opensm/opensm/osm_qos_parser.l | 46 +++ opensm/opensm/osm_qos_parser.y | 637 +++++++++++++++++++++++++++++++- opensm/opensm/osm_qos_policy.c | 79 ++++- 4 files changed, 756 insertions(+), 11 deletions(-) diff --git a/opensm/include/opensm/osm_qos_policy.h b/opensm/include/opensm/osm_qos_policy.h index 61fc325..d61c269 100644 --- a/opensm/include/opensm/osm_qos_policy.h +++ b/opensm/include/opensm/osm_qos_policy.h @@ -59,6 +59,11 @@ #define OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH 128 #define OSM_QOS_POLICY_DEFAULT_LEVEL_NAME "default" +#define OSM_QOS_POLICY_ULP_SDP_SERVICE_ID 0x0000000000010000ULL +#define OSM_QOS_POLICY_ULP_RDS_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_ISER_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_ISER_PORT 0x035C + #define OSM_QOS_POLICY_NODE_TYPE_CA (((uint8_t)1)< 15) + { + __qos_parser_error("illegal SL value"); + return 1; + } + __default_simple_qos_level.sl = (uint8_t)(*p_tmp_num); + __default_simple_qos_level.sl_set = TRUE; + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + } + + | qos_ulp_type_any_service list_of_ranges TK_DOTDOT { + /* "any, service-id ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have service ids"); + return 1; + } + + /* get all the service id ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_pkey list_of_ranges TK_DOTDOT { + /* "any, pkey ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_target_port_guid list_of_ranges TK_DOTDOT { + /* any, target-port-guid ... : sl */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_ULP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_sdp_default { + /* "sdp : sl" - default SL for SDP */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[0][1] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID + 0xFFFF; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_sdp_port list_of_ranges TK_DOTDOT { + /* sdp with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("SDP ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_rds_default { + /* "rds : sl" - default SL for RDS */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + range_arr[0][1] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID + 0xFFFF; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_rds_port list_of_ranges TK_DOTDOT { + /* rds with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("RDS ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_iser_default { + /* "iSER : sl" - default SL for iSER */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = + OSM_QOS_POLICY_ULP_ISER_SERVICE_ID + OSM_QOS_POLICY_ULP_ISER_PORT; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_iser_port list_of_ranges TK_DOTDOT { + /* iser with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("iSER ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_srp_guid list_of_ranges TK_DOTDOT { + /* srp with target guids - this rule is similar + to writing 'any' ulp with target port guids */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("SRP ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_SRP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_ipoib_default { + /* ipoib w/o any pkeys (default pkey) */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = 0x7fff; + + /* + * Although we know that the default partition exists, + * we still need to validate it by checking that it has + * at least two full members. Otherwise IPoIB won't work. + */ + if (__validate_pkeys(range_arr, 1, TRUE)) + return 1; + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_ipoib_pkey list_of_ranges TK_DOTDOT { + /* ipoib with pkeys */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("IPoIB ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + /* + * Validate pkeys. + * For IPoIB pkeys the validation is strict. + * If some problem would be found, parsing will + * be aborted with a proper error messages. + */ + if (__validate_pkeys(range_arr, range_len, TRUE)) + return 1; + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + ; + +qos_ulp_type_any_service: TK_ULP_ANY_SERVICE_ID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_pkey: TK_ULP_ANY_PKEY + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_target_port_guid: TK_ULP_ANY_TARGET_PORT_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_default: TK_ULP_SDP_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_port: TK_ULP_SDP_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_default: TK_ULP_RDS_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_port: TK_ULP_RDS_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_default: TK_ULP_ISER_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_port: TK_ULP_ISER_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_srp_guid: TK_ULP_SRP_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_default: TK_ULP_IPOIB_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_pkey: TK_ULP_IPOIB_PKEY + { __parser_ulp_match_rule_start(); }; + + +qos_ulp_sl: single_number { + /* get the SL for ULP rules */ + cl_list_iterator_t list_iterator; + uint64_t * p_tmp_num; + uint8_t sl; + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_tmp_num > 15) + { + __qos_parser_error("illegal SL value"); + return 1; + } + + sl = (uint8_t)(*p_tmp_num); + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + + p_current_qos_match_rule->p_qos_level = + &osm_qos_policy_simple_qos_levels[sl]; + p_current_qos_match_rule->qos_level_name = + strdup(osm_qos_policy_simple_qos_levels[sl].name); + + if (__parser_ulp_match_rule_end()) + return 1; + } + ; + /* * port_group_entry values: * port_group_name @@ -1814,10 +2289,19 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) if (first_time) { first_time = FALSE; + __setup_simple_qos_levels(); + __setup_ulp_match_rules(); osm_log(p_qos_parser_osm_log, OSM_LOG_INFO, "osm_qos_parse_policy_file: Loading QoS policy file (%s)\n", p_subn->opt.qos_policy_file); } + else + /* + * ULP match rules list was emptied at the end of + * previous parsing iteration. + * What's left is to clear simple QoS levels. + */ + __clear_simple_qos_levels(); column_num = 1; line_num = 1; @@ -1843,6 +2327,9 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) goto Exit; } + /* add generated ULP match rules to the usual match rules */ + __process_ulp_match_rules(); + if (osm_qos_policy_validate(p_subn->p_qos_policy,p_qos_parser_osm_log)) { osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, @@ -2068,6 +2555,26 @@ static int __parser_match_rule_end() /*************************************************** ***************************************************/ +static void __parser_ulp_match_rule_start() +{ + p_current_qos_match_rule = osm_qos_policy_match_rule_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_ulp_match_rule_end() +{ + CL_ASSERT(p_current_qos_match_rule->p_qos_level); + cl_list_insert_tail(&__ulp_match_rules, + p_current_qos_match_rule); + p_current_qos_match_rule = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + static void __parser_tmp_struct_init() { tmp_parser_struct.str[0] = '\0'; @@ -2110,6 +2617,73 @@ static void __parser_tmp_struct_destroy() /*************************************************** ***************************************************/ +#define __SIMPLE_QOS_LEVEL_NAME "SimpleQoSLevel_SL" +#define __SIMPLE_QOS_LEVEL_DEFAULT_NAME "SimpleQoSLevel_DEFAULT" + +static void __setup_simple_qos_levels() +{ + uint8_t i; + char tmp_buf[30]; + memset(osm_qos_policy_simple_qos_levels, 0, + sizeof(osm_qos_policy_simple_qos_levels)); + for (i = 0; i < 16; i++) + { + osm_qos_policy_simple_qos_levels[i].sl = i; + osm_qos_policy_simple_qos_levels[i].sl_set = TRUE; + sprintf(tmp_buf, "%s%u", __SIMPLE_QOS_LEVEL_NAME, i); + osm_qos_policy_simple_qos_levels[i].name = strdup(tmp_buf); + } + + memset(&__default_simple_qos_level, 0, + sizeof(__default_simple_qos_level)); + __default_simple_qos_level.name = + strdup(__SIMPLE_QOS_LEVEL_DEFAULT_NAME); +} + +/*************************************************** + ***************************************************/ + +static void __clear_simple_qos_levels() +{ + /* + * Simple QoS levels are static. + * What's left is to invalidate default simple QoS level. + */ + __default_simple_qos_level.sl_set = FALSE; +} + +/*************************************************** + ***************************************************/ + +static void __setup_ulp_match_rules() +{ + cl_list_construct(&__ulp_match_rules); + cl_list_init(&__ulp_match_rules, 10); +} + +/*************************************************** + ***************************************************/ + +static void __process_ulp_match_rules() +{ + cl_list_iterator_t list_iterator; + osm_qos_match_rule_t *p_qos_match_rule = NULL; + + list_iterator = cl_list_head(&__ulp_match_rules); + while (list_iterator != cl_list_end(&__ulp_match_rules)) + { + p_qos_match_rule = (osm_qos_match_rule_t *) cl_list_obj(list_iterator); + if (p_qos_match_rule) + cl_list_insert_tail(&p_qos_policy->qos_match_rules, + p_qos_match_rule); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&__ulp_match_rules); +} + +/*************************************************** + ***************************************************/ + static int OSM_CDECL __cmp_num_range( const void * p1, @@ -2394,3 +2968,64 @@ static void __parser_add_map_to_port_map( /*************************************************** ***************************************************/ + +static int __validate_pkeys( uint64_t ** range_arr, + unsigned range_len, + boolean_t is_ipoib) +{ + unsigned i; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; + + if (!range_arr || !range_len) + return 0; + + for (i = 0; i < range_len; i++) { + for (pkey_64 = range_arr[i][0]; pkey_64 <= range_arr[i][1]; pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *) + cl_qmap_get(&p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) + p_prtn = NULL; + + if (is_ipoib) { + /* + * Be very strict for IPoIB partition: + * - the partition for the pkey have to exist + * - it has to have at least 2 full members + */ + if (!p_prtn) { + __qos_parser_error("IPoIB partition, pkey 0x%04X - " + "partition doesn't exist", + cl_ntoh16(pkey)); + return 1; + } + else if (cl_map_count(&p_prtn->full_guid_tbl) < 2) { + __qos_parser_error("IPoIB partition, pkey 0x%04X - " + "partition has less than two full members", + cl_ntoh16(pkey)); + return 1; + } + } + else if (!p_prtn) { + /* + * For non-IPoIB pkey we just want to check that + * the relevant partition exists. + * And even if it doesn't, don't exit - just print + * error message and continue. + */ + osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, + "__validate_pkeys: ERR AC02: pkey 0x%04X - " + "partition doesn't exist", + cl_ntoh16(pkey)); + } + } + } + return 0; +} + +/*************************************************** + ***************************************************/ diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index 34f72b0..c0a7810 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -56,6 +56,8 @@ #include #include +extern osm_qos_level_t __default_simple_qos_level; + /*************************************************** ***************************************************/ @@ -780,8 +782,11 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, osm_qos_port_group_t *p_port_group = NULL; osm_qos_match_rule_t *p_qos_match_rule = NULL; char *str; - unsigned i; + unsigned i, j; int res = 0; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; OSM_LOG_ENTER(p_log, osm_qos_policy_validate); @@ -790,12 +795,20 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_qos_policy->p_default_qos_level = __qos_policy_get_qos_level_by_name(p_qos_policy, OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); if (!p_qos_policy->p_default_qos_level) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC10: " - "Default qos-level (%s) not defined.\n", - OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); - res = 1; - goto Exit; + /* There's no default QoS level in the usual qos-level section. + Check whether the 'simple' default QoS level that can be + defined in the qos-ulp section exists */ + if (__default_simple_qos_level.sl_set) { + p_qos_policy->p_default_qos_level = &__default_simple_qos_level; + } + else { + osm_log(p_log, OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC10: " + "Default qos-level (%s) not defined.\n", + OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); + res = 1; + goto Exit; + } } /* scan all the match rules, and fill the lists of pointers to @@ -813,9 +826,10 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, /* find the matching qos-level for each match-rule */ - p_qos_match_rule->p_qos_level = - __qos_policy_get_qos_level_by_name(p_qos_policy, - p_qos_match_rule->qos_level_name); + if (!p_qos_match_rule->p_qos_level) + p_qos_match_rule->p_qos_level = + __qos_policy_get_qos_level_by_name(p_qos_policy, + p_qos_match_rule->qos_level_name); if (!p_qos_match_rule->p_qos_level) { osm_log(p_log, OSM_LOG_ERROR, @@ -887,6 +901,51 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, } } + /* + * Scan all the pkeys in matching rule, and if the + * partition for these pkeys exists, set the SL + * according to the QoS Level. + * Warn if there's mismatch between QoS level SL + * and Partition SL. + */ + + for (j = 0; j < p_qos_match_rule->pkey_range_len; j++) { + for ( pkey_64 = p_qos_match_rule->pkey_range_arr[i][0]; + pkey_64 <= p_qos_match_rule->pkey_range_arr[i][1]; + pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *)cl_qmap_get( + &p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) { + /* partition for this pkey not found */ + osm_log(p_log, + OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC14: " + "pkey 0x%04X in match rule - " + "partition doesn't exist\n", + cl_ntoh16(pkey)); + continue; + } + + if (p_qos_match_rule->p_qos_level->sl_set && + p_prtn->sl != p_qos_match_rule->p_qos_level->sl) { + /* overriding partition's SL */ + osm_log(p_log, + OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC15: " + "pkey 0x%04X in match rule - " + "overriding partition SL (%u) " + "with QoS Level SL (%u)\n", + cl_ntoh16(pkey), + p_prtn->sl, + p_qos_match_rule->p_qos_level->sl); + p_prtn->sl = p_qos_match_rule->p_qos_level->sl; + } + } + } + /* done with the current match-rule */ match_rules_list_iterator = -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Sun Nov 18 04:49:28 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 14:49:28 +0200 Subject: [ofa-general] [PATCH] osm: QoS - adding simplified syntax for policy definition In-Reply-To: <474033D6.2000307@dev.mellanox.co.il> References: <474033D6.2000307@dev.mellanox.co.il> Message-ID: <474034D8.6060206@dev.mellanox.co.il> Yevgeny Kliteynik wrote: > This patch adds simplified syntax for QoS definition in QoS Policy file. > Using this syntax the administrator is able to define QoS policy per > ULP and/or per Service ID and/or per partition (pkey). > > Here's an example of the policy file with the new syntax, which is added > in a new section called qos-policy: Correction: it's called "qos-ulps" -- Yevgeny > qos-ulps > sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 > sdp, port-num 10000-20000, 0xfffd : 2 > sdp : 0 #default SL for SDP > srp, target-port-guid 0x1234-0x1235 : 2 > iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports > iser : 5 #default SL for iSER > rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 > rds : 0 #default SL for RDS > ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > ipoib : 6 #default IPoIB partition - pkey=0x7FFF > any, pkey 0x0ABC : 3 > any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 > any, service-id 0x6234 : 2 > any, target-port-guid 0x2234-0xF235 : 2 > default : 0 #default SL > end-qos-ulps > > Since any section of the policy file is optional as long as basic rules > of the file are kept, the above example can serve as a complete QoS > policy file - short and clear. > > I suspect that most of the administrators will use only this syntax, but > if someone wishes to manage QoS in more detailed manner, there's always > the rest of the policy file to do so. > > Signed-off-by: Yevgeny Kliteynik > --- > opensm/include/opensm/osm_qos_policy.h | 5 + > opensm/opensm/osm_qos_parser.l | 46 +++ > opensm/opensm/osm_qos_parser.y | 637 +++++++++++++++++++++++++++++++- > opensm/opensm/osm_qos_policy.c | 79 ++++- > 4 files changed, 756 insertions(+), 11 deletions(-) > > diff --git a/opensm/include/opensm/osm_qos_policy.h b/opensm/include/opensm/osm_qos_policy.h > index 61fc325..d61c269 100644 > --- a/opensm/include/opensm/osm_qos_policy.h > +++ b/opensm/include/opensm/osm_qos_policy.h > @@ -59,6 +59,11 @@ > #define OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH 128 > #define OSM_QOS_POLICY_DEFAULT_LEVEL_NAME "default" > > +#define OSM_QOS_POLICY_ULP_SDP_SERVICE_ID 0x0000000000010000ULL > +#define OSM_QOS_POLICY_ULP_RDS_SERVICE_ID 0x0000000001060000ULL > +#define OSM_QOS_POLICY_ULP_ISER_SERVICE_ID 0x0000000001060000ULL > +#define OSM_QOS_POLICY_ULP_ISER_PORT 0x035C > + > #define OSM_QOS_POLICY_NODE_TYPE_CA (((uint8_t)1)< #define OSM_QOS_POLICY_NODE_TYPE_SWITCH (((uint8_t)1)< #define OSM_QOS_POLICY_NODE_TYPE_ROUTER (((uint8_t)1)< diff --git a/opensm/opensm/osm_qos_parser.l b/opensm/opensm/osm_qos_parser.l > index bfc4637..41f8720 100644 > --- a/opensm/opensm/osm_qos_parser.l > +++ b/opensm/opensm/osm_qos_parser.l > @@ -105,19 +105,34 @@ static void reset_new_line_flags(); > #define START_RATE_LIMIT {in_single_number = TRUE;} /* single number */ > #define START_PACKET_LIFE {in_single_number = TRUE;} /* single number */ > > +#define START_ULP_DEFAULT {in_single_number = TRUE;} /* single number */ > +#define START_ULP_ANY {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > +#define START_ULP_SDP_DEFAULT {in_single_number = TRUE;} /* single number */ > +#define START_ULP_SDP_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > +#define START_ULP_RDS_DEFAULT {in_single_number = TRUE;} /* single number */ > +#define START_ULP_RDS_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > +#define START_ULP_ISER_DEFAULT {in_single_number = TRUE;} /* single number */ > +#define START_ULP_ISER_PORT {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > +#define START_ULP_SRP_GUID {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > +#define START_ULP_IPOIB_DEFAULT {in_single_number = TRUE;} /* single number */ > +#define START_ULP_IPOIB_PKEY {in_list_of_num_ranges = TRUE;} /* comma-separated list of hex or dec num ranges */ > > > %} > > %option nounput > > +QOS_ULPS_START qos\-ulps > +QOS_ULPS_END end\-qos\-ulps > PORT_GROUPS_START port\-groups > PORT_GROUPS_END end\-port\-groups > PORT_GROUP_START port\-group > PORT_GROUP_END end\-port\-group > +PORT_NUM port\-num > NAME name > USE use > PORT_GUID port\-guid > +TARGET_PORT_GUID target\-port\-guid > PORT_NAME port\-name > PARTITION partition > NODE_TYPE node\-type > @@ -167,10 +182,19 @@ SWITCH [Ss][Ww][Ii][Tt][Cc][Hh] > SELF [Ss][Ee][Ll][Ff] > ALL [Aa][Ll][Ll] > > +ULP_SDP [Ss][Dd][Pp] > +ULP_SRP [Ss][Rr][Pp] > +ULP_RDS [Rr][Dd][Ss] > +ULP_IPOIB [Ii][Pp][Oo][Ii][Bb] > +ULP_ISER [Ii][Ss][Ee][Rr] > +ULP_ANY [Aa][Nn][Yy] > +ULP_DEFAULT [Dd][Ee][Ff][Aa][Uu][Ll][Tt] > + > WHITE [ \t]+ > NEW_LINE \n > COMMENT \#.*\n > WHITE_DOTDOT_WHITE [ \t]*:[ \t]* > +WHITE_COMMA_WHITE [ \t]*,[ \t]* > QUOTED_TEXT \"[^\"]*\" > > %% > @@ -181,6 +205,9 @@ QUOTED_TEXT \"[^\"]*\" > {WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; } > {NEW_LINE} { SAVE_POS; RESET_NEW_LINE_FLAGS; } > > +{QOS_ULPS_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_ULPS_START; } > +{QOS_ULPS_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_QOS_ULPS_END; } > + > {PORT_GROUPS_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUPS_START; } > {PORT_GROUPS_END} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUPS_END; } > {PORT_GROUP_START} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; return TK_PORT_GROUP_START; } > @@ -242,6 +269,25 @@ QUOTED_TEXT \"[^\"]*\" > {SELF} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_SELF; __qos_parser_lval = strdup(__qos_parser_text); return TK_TEXT; } > {ALL} { SAVE_POS; if (in_node_type) return TK_NODE_TYPE_ALL; __qos_parser_lval = strdup(__qos_parser_text); return TK_TEXT; } > > +{ULP_DEFAULT}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_DEFAULT; return TK_ULP_DEFAULT; } > +{ULP_ANY}{WHITE_COMMA_WHITE}{SERVICE_ID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_SERVICE_ID; } > +{ULP_ANY}{WHITE_COMMA_WHITE}{PKEY} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_PKEY; } > +{ULP_ANY}{WHITE_COMMA_WHITE}{TARGET_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_ANY; return TK_ULP_ANY_TARGET_PORT_GUID; } > + > +{ULP_SDP}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_DEFAULT; return TK_ULP_SDP_DEFAULT; } > +{ULP_SDP}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_PORT; return TK_ULP_SDP_PORT; } > + > +{ULP_RDS}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_RDS_DEFAULT; return TK_ULP_RDS_DEFAULT; } > +{ULP_RDS}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_RDS_PORT; return TK_ULP_RDS_PORT; } > + > +{ULP_ISER}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_DEFAULT; return TK_ULP_ISER_DEFAULT; } > +{ULP_ISER}{WHITE_COMMA_WHITE}{PORT_NUM} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SDP_PORT; return TK_ULP_ISER_PORT; } > + > +{ULP_SRP}{WHITE_COMMA_WHITE}{TARGET_PORT_GUID} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_SRP_GUID; return TK_ULP_SRP_GUID; } > + > +{ULP_IPOIB}{WHITE_DOTDOT_WHITE} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_IPOIB_DEFAULT; return TK_ULP_IPOIB_DEFAULT; } > +{ULP_IPOIB}{WHITE_COMMA_WHITE}{PKEY} { SAVE_POS; HANDLE_IF_IN_DESCRIPTION; START_ULP_IPOIB_PKEY; return TK_ULP_IPOIB_PKEY; } > + > 0[xX][0-9a-fA-F]+ { > SAVE_POS; > __qos_parser_lval = strdup(__qos_parser_text); > diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y > index bf560aa..f136af4 100644 > --- a/opensm/opensm/osm_qos_parser.y > +++ b/opensm/opensm/osm_qos_parser.y > @@ -91,6 +91,9 @@ static int __parser_qos_level_end(); > static void __parser_match_rule_start(); > static int __parser_match_rule_end(); > > +static void __parser_ulp_match_rule_start(); > +static int __parser_ulp_match_rule_end(); > + > static void __rangelist2rangearr( > cl_list_t * p_list, > uint64_t ** * p_arr, > @@ -126,6 +129,15 @@ static void __parser_add_map_to_port_map( > cl_qmap_t * p_dmap, > cl_map_t * p_smap); > > +static int __validate_pkeys( > + uint64_t ** range_arr, > + unsigned range_len, > + boolean_t is_ipoib); > + > +static void __setup_simple_qos_levels(); > +static void __clear_simple_qos_levels(); > +static void __setup_ulp_match_rules(); > +static void __process_ulp_match_rules(); > static void __qos_parser_error(const char *format, ...); > > extern char * __qos_parser_text; > @@ -148,6 +160,20 @@ osm_qos_level_t * p_current_qos_level = NULL; > osm_qos_match_rule_t * p_current_qos_match_rule = NULL; > osm_log_t * p_qos_parser_osm_log; > > +/* 16 Simple QoS Levels - one for each SL */ > +static osm_qos_level_t osm_qos_policy_simple_qos_levels[16]; > + > +/* Default Simple QoS Level */ > +osm_qos_level_t __default_simple_qos_level; > + > +/* > + * List of match rules that will be generated by the > + * qos-ulp section. These rules are concatenated to > + * the end of the usual matching rules list at the > + * end of parsing. > + */ > +static cl_list_t __ulp_match_rules; > + > /***************************************************/ > > %} > @@ -159,6 +185,9 @@ osm_log_t * p_qos_parser_osm_log; > %token TK_ASTERISK > %token TK_TEXT > > +%token TK_QOS_ULPS_START > +%token TK_QOS_ULPS_END > + > %token TK_PORT_GROUPS_START > %token TK_PORT_GROUPS_END > %token TK_PORT_GROUP_START > @@ -220,6 +249,19 @@ osm_log_t * p_qos_parser_osm_log; > %token TK_NODE_TYPE_SELF > %token TK_NODE_TYPE_ALL > > +%token TK_ULP_DEFAULT > +%token TK_ULP_ANY_SERVICE_ID > +%token TK_ULP_ANY_PKEY > +%token TK_ULP_ANY_TARGET_PORT_GUID > +%token TK_ULP_SDP_DEFAULT > +%token TK_ULP_SDP_PORT > +%token TK_ULP_RDS_DEFAULT > +%token TK_ULP_RDS_PORT > +%token TK_ULP_ISER_DEFAULT > +%token TK_ULP_ISER_PORT > +%token TK_ULP_SRP_GUID > +%token TK_ULP_IPOIB_DEFAULT > +%token TK_ULP_IPOIB_PKEY > > %start head > > @@ -232,13 +274,42 @@ qos_policy_entries: /* empty */ > | qos_policy_entries qos_policy_entry > ; > > -qos_policy_entry: port_groups_section > +qos_policy_entry: qos_ulps_section > + | port_groups_section > | qos_setup_section > | qos_levels_section > | qos_match_rules_section > ; > > /* > + * Parsing qos-ulps: > + * ------------------- > + * qos-ulps > + * default : 0 #default SL > + * sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 > + * sdp, port-num 10000-20000 : 2 > + * sdp : 0 #default SL for SDP > + * srp, target-port-guid 0x1234 : 2 > + * rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 > + * rds, : 0 #default SL for RDS > + * iser, port-num 900 : 5 #SL for iSER where target port is 900 > + * iser : 4 #default SL for iSER > + * ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > + * ipoib : 6 #default IPoIB partition - pkey=0x7FFF > + * any, service-id 0x6234 : 2 > + * any, pkey 0x0ABC : 3 > + * any, target-port-guid 0x0ABC-0xFFFFF : 6 > + * end-qos-ulps > + */ > + > +qos_ulps_section: TK_QOS_ULPS_START qos_ulps TK_QOS_ULPS_END > + ; > + > +qos_ulps: qos_ulp > + | qos_ulps qos_ulp > + ; > + > + /* > * Parsing port groups: > * ------------------- > * port-groups > @@ -536,6 +607,410 @@ qos_match_rule_entry: qos_match_rule_use > | qos_match_rule_pkey > ; > > + > + /* > + * Parsing qos-ulps: > + * ----------------- > + * default > + * sdp > + * sdp with port-num > + * rds > + * rds with port-num > + * srp with port-guid > + * iser > + * iser with port-num > + * ipoib > + * ipoib with pkey > + * any with service-id > + * any with pkey > + * any with target-port-guid > + */ > + > +qos_ulp: TK_ULP_DEFAULT single_number { > + /* parsing default ulp rule: "default: num" */ > + cl_list_iterator_t list_iterator; > + uint64_t * p_tmp_num; > + > + list_iterator = cl_list_head(&tmp_parser_struct.num_list); > + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); > + if (*p_tmp_num > 15) > + { > + __qos_parser_error("illegal SL value"); > + return 1; > + } > + __default_simple_qos_level.sl = (uint8_t)(*p_tmp_num); > + __default_simple_qos_level.sl_set = TRUE; > + free(p_tmp_num); > + cl_list_remove_all(&tmp_parser_struct.num_list); > + } > + > + | qos_ulp_type_any_service list_of_ranges TK_DOTDOT { > + /* "any, service-id ... : sl" - one instance of list of ranges */ > + uint64_t ** range_arr; > + unsigned range_len; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("ULP rule doesn't have service ids"); > + return 1; > + } > + > + /* get all the service id ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = range_len; > + > + } qos_ulp_sl > + > + | qos_ulp_type_any_pkey list_of_ranges TK_DOTDOT { > + /* "any, pkey ... : sl" - one instance of list of ranges */ > + uint64_t ** range_arr; > + unsigned range_len; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("ULP rule doesn't have pkeys"); > + return 1; > + } > + > + /* get all the pkey ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + > + p_current_qos_match_rule->pkey_range_arr = range_arr; > + p_current_qos_match_rule->pkey_range_len = range_len; > + > + } qos_ulp_sl > + > + | qos_ulp_type_any_target_port_guid list_of_ranges TK_DOTDOT { > + /* any, target-port-guid ... : sl */ > + uint64_t ** range_arr; > + unsigned range_len; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("ULP rule doesn't have port guids"); > + return 1; > + } > + > + /* create a new port group with these ports */ > + __parser_port_group_start(); > + > + p_current_port_group->name = strdup("_ULP_Targets_"); > + p_current_port_group->use = strdup("Generated from ULP rules"); > + > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + > + __parser_add_guid_range_to_port_map( > + &p_current_port_group->port_map, > + range_arr, > + range_len); > + > + /* add this port group to the destination > + groups of the current match rule */ > + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, > + p_current_port_group); > + > + __parser_port_group_end(); > + > + } qos_ulp_sl > + > + | qos_ulp_type_sdp_default { > + /* "sdp : sl" - default SL for SDP */ > + uint64_t ** range_arr = > + (uint64_t **)malloc(sizeof(uint64_t *)); > + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); > + range_arr[0][0] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; > + range_arr[0][1] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID + 0xFFFF; > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = 1; > + > + } qos_ulp_sl > + > + | qos_ulp_type_sdp_port list_of_ranges TK_DOTDOT { > + /* sdp with port numbers */ > + uint64_t ** range_arr; > + unsigned range_len; > + unsigned i; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("SDP ULP rule doesn't have port numbers"); > + return 1; > + } > + > + /* get all the port ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + /* now translate these port numbers into service ids */ > + for (i = 0; i < range_len; i++) > + { > + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) > + { > + __qos_parser_error("SDP port number out of range"); > + return 1; > + } > + range_arr[i][0] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; > + range_arr[i][1] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; > + } > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = range_len; > + > + } qos_ulp_sl > + > + | qos_ulp_type_rds_default { > + /* "rds : sl" - default SL for RDS */ > + uint64_t ** range_arr = > + (uint64_t **)malloc(sizeof(uint64_t *)); > + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); > + range_arr[0][0] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; > + range_arr[0][1] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID + 0xFFFF; > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = 1; > + > + } qos_ulp_sl > + > + | qos_ulp_type_rds_port list_of_ranges TK_DOTDOT { > + /* rds with port numbers */ > + uint64_t ** range_arr; > + unsigned range_len; > + unsigned i; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("RDS ULP rule doesn't have port numbers"); > + return 1; > + } > + > + /* get all the port ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + /* now translate these port numbers into service ids */ > + for (i = 0; i < range_len; i++) > + { > + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) > + { > + __qos_parser_error("SDP port number out of range"); > + return 1; > + } > + range_arr[i][0] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; > + range_arr[i][1] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; > + } > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = range_len; > + > + } qos_ulp_sl > + > + | qos_ulp_type_iser_default { > + /* "iSER : sl" - default SL for iSER */ > + uint64_t ** range_arr = > + (uint64_t **)malloc(sizeof(uint64_t *)); > + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); > + range_arr[0][0] = range_arr[0][1] = > + OSM_QOS_POLICY_ULP_ISER_SERVICE_ID + OSM_QOS_POLICY_ULP_ISER_PORT; > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = 1; > + > + } qos_ulp_sl > + > + | qos_ulp_type_iser_port list_of_ranges TK_DOTDOT { > + /* iser with port numbers */ > + uint64_t ** range_arr; > + unsigned range_len; > + unsigned i; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("iSER ULP rule doesn't have port numbers"); > + return 1; > + } > + > + /* get all the port ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + /* now translate these port numbers into service ids */ > + for (i = 0; i < range_len; i++) > + { > + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) > + { > + __qos_parser_error("SDP port number out of range"); > + return 1; > + } > + range_arr[i][0] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; > + range_arr[i][1] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; > + } > + > + p_current_qos_match_rule->service_id_range_arr = range_arr; > + p_current_qos_match_rule->service_id_range_len = range_len; > + > + } qos_ulp_sl > + > + | qos_ulp_type_srp_guid list_of_ranges TK_DOTDOT { > + /* srp with target guids - this rule is similar > + to writing 'any' ulp with target port guids */ > + uint64_t ** range_arr; > + unsigned range_len; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("SRP ULP rule doesn't have port guids"); > + return 1; > + } > + > + /* create a new port group with these ports */ > + __parser_port_group_start(); > + > + p_current_port_group->name = strdup("_SRP_Targets_"); > + p_current_port_group->use = strdup("Generated from ULP rules"); > + > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + > + __parser_add_guid_range_to_port_map( > + &p_current_port_group->port_map, > + range_arr, > + range_len); > + > + /* add this port group to the destination > + groups of the current match rule */ > + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, > + p_current_port_group); > + > + __parser_port_group_end(); > + > + } qos_ulp_sl > + > + | qos_ulp_type_ipoib_default { > + /* ipoib w/o any pkeys (default pkey) */ > + uint64_t ** range_arr = > + (uint64_t **)malloc(sizeof(uint64_t *)); > + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); > + range_arr[0][0] = range_arr[0][1] = 0x7fff; > + > + /* > + * Although we know that the default partition exists, > + * we still need to validate it by checking that it has > + * at least two full members. Otherwise IPoIB won't work. > + */ > + if (__validate_pkeys(range_arr, 1, TRUE)) > + return 1; > + > + p_current_qos_match_rule->pkey_range_arr = range_arr; > + p_current_qos_match_rule->pkey_range_len = 1; > + > + } qos_ulp_sl > + > + | qos_ulp_type_ipoib_pkey list_of_ranges TK_DOTDOT { > + /* ipoib with pkeys */ > + uint64_t ** range_arr; > + unsigned range_len; > + > + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) > + { > + __qos_parser_error("IPoIB ULP rule doesn't have pkeys"); > + return 1; > + } > + > + /* get all the pkey ranges */ > + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, > + &range_arr, > + &range_len ); > + > + /* > + * Validate pkeys. > + * For IPoIB pkeys the validation is strict. > + * If some problem would be found, parsing will > + * be aborted with a proper error messages. > + */ > + if (__validate_pkeys(range_arr, range_len, TRUE)) > + return 1; > + > + p_current_qos_match_rule->pkey_range_arr = range_arr; > + p_current_qos_match_rule->pkey_range_len = range_len; > + > + } qos_ulp_sl > + ; > + > +qos_ulp_type_any_service: TK_ULP_ANY_SERVICE_ID > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_any_pkey: TK_ULP_ANY_PKEY > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_any_target_port_guid: TK_ULP_ANY_TARGET_PORT_GUID > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_sdp_default: TK_ULP_SDP_DEFAULT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_sdp_port: TK_ULP_SDP_PORT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_rds_default: TK_ULP_RDS_DEFAULT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_rds_port: TK_ULP_RDS_PORT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_iser_default: TK_ULP_ISER_DEFAULT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_iser_port: TK_ULP_ISER_PORT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_srp_guid: TK_ULP_SRP_GUID > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_ipoib_default: TK_ULP_IPOIB_DEFAULT > + { __parser_ulp_match_rule_start(); }; > + > +qos_ulp_type_ipoib_pkey: TK_ULP_IPOIB_PKEY > + { __parser_ulp_match_rule_start(); }; > + > + > +qos_ulp_sl: single_number { > + /* get the SL for ULP rules */ > + cl_list_iterator_t list_iterator; > + uint64_t * p_tmp_num; > + uint8_t sl; > + > + list_iterator = cl_list_head(&tmp_parser_struct.num_list); > + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); > + if (*p_tmp_num > 15) > + { > + __qos_parser_error("illegal SL value"); > + return 1; > + } > + > + sl = (uint8_t)(*p_tmp_num); > + free(p_tmp_num); > + cl_list_remove_all(&tmp_parser_struct.num_list); > + > + p_current_qos_match_rule->p_qos_level = > + &osm_qos_policy_simple_qos_levels[sl]; > + p_current_qos_match_rule->qos_level_name = > + strdup(osm_qos_policy_simple_qos_levels[sl].name); > + > + if (__parser_ulp_match_rule_end()) > + return 1; > + } > + ; > + > /* > * port_group_entry values: > * port_group_name > @@ -1814,10 +2289,19 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) > if (first_time) > { > first_time = FALSE; > + __setup_simple_qos_levels(); > + __setup_ulp_match_rules(); > osm_log(p_qos_parser_osm_log, OSM_LOG_INFO, > "osm_qos_parse_policy_file: Loading QoS policy file (%s)\n", > p_subn->opt.qos_policy_file); > } > + else > + /* > + * ULP match rules list was emptied at the end of > + * previous parsing iteration. > + * What's left is to clear simple QoS levels. > + */ > + __clear_simple_qos_levels(); > > column_num = 1; > line_num = 1; > @@ -1843,6 +2327,9 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) > goto Exit; > } > > + /* add generated ULP match rules to the usual match rules */ > + __process_ulp_match_rules(); > + > if (osm_qos_policy_validate(p_subn->p_qos_policy,p_qos_parser_osm_log)) > { > osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > @@ -2068,6 +2555,26 @@ static int __parser_match_rule_end() > /*************************************************** > ***************************************************/ > > +static void __parser_ulp_match_rule_start() > +{ > + p_current_qos_match_rule = osm_qos_policy_match_rule_create(); > +} > + > +/*************************************************** > + ***************************************************/ > + > +static int __parser_ulp_match_rule_end() > +{ > + CL_ASSERT(p_current_qos_match_rule->p_qos_level); > + cl_list_insert_tail(&__ulp_match_rules, > + p_current_qos_match_rule); > + p_current_qos_match_rule = NULL; > + return 0; > +} > + > +/*************************************************** > + ***************************************************/ > + > static void __parser_tmp_struct_init() > { > tmp_parser_struct.str[0] = '\0'; > @@ -2110,6 +2617,73 @@ static void __parser_tmp_struct_destroy() > /*************************************************** > ***************************************************/ > > +#define __SIMPLE_QOS_LEVEL_NAME "SimpleQoSLevel_SL" > +#define __SIMPLE_QOS_LEVEL_DEFAULT_NAME "SimpleQoSLevel_DEFAULT" > + > +static void __setup_simple_qos_levels() > +{ > + uint8_t i; > + char tmp_buf[30]; > + memset(osm_qos_policy_simple_qos_levels, 0, > + sizeof(osm_qos_policy_simple_qos_levels)); > + for (i = 0; i < 16; i++) > + { > + osm_qos_policy_simple_qos_levels[i].sl = i; > + osm_qos_policy_simple_qos_levels[i].sl_set = TRUE; > + sprintf(tmp_buf, "%s%u", __SIMPLE_QOS_LEVEL_NAME, i); > + osm_qos_policy_simple_qos_levels[i].name = strdup(tmp_buf); > + } > + > + memset(&__default_simple_qos_level, 0, > + sizeof(__default_simple_qos_level)); > + __default_simple_qos_level.name = > + strdup(__SIMPLE_QOS_LEVEL_DEFAULT_NAME); > +} > + > +/*************************************************** > + ***************************************************/ > + > +static void __clear_simple_qos_levels() > +{ > + /* > + * Simple QoS levels are static. > + * What's left is to invalidate default simple QoS level. > + */ > + __default_simple_qos_level.sl_set = FALSE; > +} > + > +/*************************************************** > + ***************************************************/ > + > +static void __setup_ulp_match_rules() > +{ > + cl_list_construct(&__ulp_match_rules); > + cl_list_init(&__ulp_match_rules, 10); > +} > + > +/*************************************************** > + ***************************************************/ > + > +static void __process_ulp_match_rules() > +{ > + cl_list_iterator_t list_iterator; > + osm_qos_match_rule_t *p_qos_match_rule = NULL; > + > + list_iterator = cl_list_head(&__ulp_match_rules); > + while (list_iterator != cl_list_end(&__ulp_match_rules)) > + { > + p_qos_match_rule = (osm_qos_match_rule_t *) cl_list_obj(list_iterator); > + if (p_qos_match_rule) > + cl_list_insert_tail(&p_qos_policy->qos_match_rules, > + p_qos_match_rule); > + list_iterator = cl_list_next(list_iterator); > + } > + cl_list_remove_all(&__ulp_match_rules); > +} > + > +/*************************************************** > + ***************************************************/ > + > static int OSM_CDECL > __cmp_num_range( > const void * p1, > @@ -2394,3 +2968,64 @@ static void __parser_add_map_to_port_map( > > /*************************************************** > ***************************************************/ > + > +static int __validate_pkeys( uint64_t ** range_arr, > + unsigned range_len, > + boolean_t is_ipoib) > +{ > + unsigned i; > + uint64_t pkey_64; > + ib_net16_t pkey; > + osm_prtn_t * p_prtn; > + > + if (!range_arr || !range_len) > + return 0; > + > + for (i = 0; i < range_len; i++) { > + for (pkey_64 = range_arr[i][0]; pkey_64 <= range_arr[i][1]; pkey_64++) { > + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); > + p_prtn = (osm_prtn_t *) > + cl_qmap_get(&p_qos_policy->p_subn->prtn_pkey_tbl, pkey); > + > + if (p_prtn == (osm_prtn_t *)cl_qmap_end( > + &p_qos_policy->p_subn->prtn_pkey_tbl)) > + p_prtn = NULL; > + > + if (is_ipoib) { > + /* > + * Be very strict for IPoIB partition: > + * - the partition for the pkey have to exist > + * - it has to have at least 2 full members > + */ > + if (!p_prtn) { > + __qos_parser_error("IPoIB partition, pkey 0x%04X - " > + "partition doesn't exist", > + cl_ntoh16(pkey)); > + return 1; > + } > + else if (cl_map_count(&p_prtn->full_guid_tbl) < 2) { > + __qos_parser_error("IPoIB partition, pkey 0x%04X - " > + "partition has less than two full members", > + cl_ntoh16(pkey)); > + return 1; > + } > + } > + else if (!p_prtn) { > + /* > + * For non-IPoIB pkey we just want to check that > + * the relevant partition exists. > + * And even if it doesn't, don't exit - just print > + * error message and continue. > + */ > + osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > + "__validate_pkeys: ERR AC02: pkey 0x%04X - " > + "partition doesn't exist", > + cl_ntoh16(pkey)); > + } > + } > + } > + return 0; > +} > + > +/*************************************************** > + ***************************************************/ > diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c > index 34f72b0..c0a7810 100644 > --- a/opensm/opensm/osm_qos_policy.c > +++ b/opensm/opensm/osm_qos_policy.c > @@ -56,6 +56,8 @@ > #include > #include > > +extern osm_qos_level_t __default_simple_qos_level; > + > /*************************************************** > ***************************************************/ > > @@ -780,8 +782,11 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, > osm_qos_port_group_t *p_port_group = NULL; > osm_qos_match_rule_t *p_qos_match_rule = NULL; > char *str; > - unsigned i; > + unsigned i, j; > int res = 0; > + uint64_t pkey_64; > + ib_net16_t pkey; > + osm_prtn_t * p_prtn; > > OSM_LOG_ENTER(p_log, osm_qos_policy_validate); > > @@ -790,12 +795,20 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, > p_qos_policy->p_default_qos_level = > __qos_policy_get_qos_level_by_name(p_qos_policy, OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); > if (!p_qos_policy->p_default_qos_level) { > - osm_log(p_log, OSM_LOG_ERROR, > - "osm_qos_policy_validate: ERR AC10: " > - "Default qos-level (%s) not defined.\n", > - OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); > - res = 1; > - goto Exit; > + /* There's no default QoS level in the usual qos-level section. > + Check whether the 'simple' default QoS level that can be > + defined in the qos-ulp section exists */ > + if (__default_simple_qos_level.sl_set) { > + p_qos_policy->p_default_qos_level = &__default_simple_qos_level; > + } > + else { > + osm_log(p_log, OSM_LOG_ERROR, > + "osm_qos_policy_validate: ERR AC10: " > + "Default qos-level (%s) not defined.\n", > + OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); > + res = 1; > + goto Exit; > + } > } > > /* scan all the match rules, and fill the lists of pointers to > @@ -813,9 +826,10 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, > > /* find the matching qos-level for each match-rule */ > > - p_qos_match_rule->p_qos_level = > - __qos_policy_get_qos_level_by_name(p_qos_policy, > - p_qos_match_rule->qos_level_name); > + if (!p_qos_match_rule->p_qos_level) > + p_qos_match_rule->p_qos_level = > + __qos_policy_get_qos_level_by_name(p_qos_policy, > + p_qos_match_rule->qos_level_name); > > if (!p_qos_match_rule->p_qos_level) { > osm_log(p_log, OSM_LOG_ERROR, > @@ -887,6 +901,51 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, > } > } > > + /* > + * Scan all the pkeys in matching rule, and if the > + * partition for these pkeys exists, set the SL > + * according to the QoS Level. > + * Warn if there's mismatch between QoS level SL > + * and Partition SL. > + */ > + > + for (j = 0; j < p_qos_match_rule->pkey_range_len; j++) { > + for ( pkey_64 = p_qos_match_rule->pkey_range_arr[i][0]; > + pkey_64 <= p_qos_match_rule->pkey_range_arr[i][1]; > + pkey_64++) { > + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); > + p_prtn = (osm_prtn_t *)cl_qmap_get( > + &p_qos_policy->p_subn->prtn_pkey_tbl, pkey); > + > + if (p_prtn == (osm_prtn_t *)cl_qmap_end( > + &p_qos_policy->p_subn->prtn_pkey_tbl)) { > + /* partition for this pkey not found */ > + osm_log(p_log, > + OSM_LOG_ERROR, > + "osm_qos_policy_validate: ERR AC14: " > + "pkey 0x%04X in match rule - " > + "partition doesn't exist\n", > + cl_ntoh16(pkey)); > + continue; > + } > + > + if (p_qos_match_rule->p_qos_level->sl_set && > + p_prtn->sl != p_qos_match_rule->p_qos_level->sl) { > + /* overriding partition's SL */ > + osm_log(p_log, > + OSM_LOG_ERROR, > + "osm_qos_policy_validate: ERR AC15: " > + "pkey 0x%04X in match rule - " > + "overriding partition SL (%u) " > + "with QoS Level SL (%u)\n", > + cl_ntoh16(pkey), > + p_prtn->sl, > + p_qos_match_rule->p_qos_level->sl); > + p_prtn->sl = p_qos_match_rule->p_qos_level->sl; > + } > + } > + } > + > /* done with the current match-rule */ > > match_rules_list_iterator = From kliteyn at dev.mellanox.co.il Sun Nov 18 05:04:00 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 15:04:00 +0200 Subject: [ofa-general] [PATCH] osm: cosmetics - removing obsolete comment Message-ID: <47403840.7070900@dev.mellanox.co.il> Removing obsolete comment. Since port names are converted to guids and added to the usual guid map of the port group, no need to treat the port names separately here. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_policy.c | 10 ---------- 1 files changed, 0 insertions(+), 10 deletions(-) diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index c0a7810..b95e651 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -557,16 +557,6 @@ __qos_policy_is_port_in_group(osm_subn_t * p_subn, cl_qmap_end(&p_port_group->port_map)) return TRUE; - /* check whether this port's name matches any of group's names */ - - /* - * TODO: check port names - * - * char desc[IB_NODE_DESCRIPTION_SIZE + 1]; - * memcpy(desc, p_node->node_desc.description, IB_NODE_DESCRIPTION_SIZE); - * desc[IB_NODE_DESCRIPTION_SIZE] = '\0'; - */ - return FALSE; } /* __qos_policy_is_port_in_group() */ -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Sun Nov 18 05:05:54 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 15:05:54 +0200 Subject: [ofa-general] [PATCH] osm: Using new format of QoS parser error function Message-ID: <474038B2.3090205@dev.mellanox.co.il> Using new format of QoS parser error function Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_parser.y | 11 ++++++++--- 1 files changed, 8 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index f136af4..9424462 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -1100,14 +1100,17 @@ port_group_port_name: port_group_port_name_start string_list { port_str = strrchr(tmp_str, '/'); if (!port_str || (strlen(port_str) < 3) || (port_str[1] != 'p' && port_str[1] != 'P')) { - yyerror("illegal port name"); + __qos_parser_error("'%s' - illegal port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; } if (!(port_num = strtoul(&port_str[2],NULL,0))) { - yyerror("illegal port number in port name"); + __qos_parser_error( + "'%s' - illegal port number in port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; @@ -1123,7 +1126,9 @@ port_group_port_name: port_group_port_name_start string_list { /* we found the node, now get the right port */ p_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_physp) { - yyerror("port number out of range in port name"); + __qos_parser_error( + "'%s' - port number out of range in port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Sun Nov 18 05:19:19 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 15:19:19 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <000b01c82997$c4f8f670$0c02a8c0@amr.corp.intel.com> References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com><473B14A3.5090703@dev.mellanox.co.il> <1195056883.14106.90.camel@hrosenstock-ws.xsigo.com> <000b01c82997$c4f8f670$0c02a8c0@amr.corp.intel.com> Message-ID: <47403BD7.2020007@dev.mellanox.co.il> Sean Hefty wrote: >>> And as you've mentioned, some rules may overlap. For instance, >>> if the rule for all the RDS traffic will appear before the iSER >>> rule, then iSER requests will be caught by the RDS rule. >> That doesn't sound so good but I don't see a good alternative here other >> than for this case to put the iSER rule first. The other fallback is the >> more detailed configuration but RDS falls into the generic range >> category which is problematic in terms of this (and can't be >> differentiated by ServiceID unlike the other ULPs). > > I'm not overly familiar with the details of RDS, but event if the active side > uses a dynamic service ID, I would expect the passive side to use something well > known. Couldn't agree more. That's why I think that although there are cases where this simplified way of defining SLs per ULP plus target TCP port won't be useful, in many cases it would actually make the administrator's life easier. -- Yevgeny > - Sean > From Arkady.Kanevsky at netapp.com Sun Nov 18 07:05:18 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Sun, 18 Nov 2007 10:05:18 -0500 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: <47403BD7.2020007@dev.mellanox.co.il> References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com><473B14A3.5090703@dev.mellanox.co.il><1195056883.14106.90.camel@hrosenstock-ws.xsigo.com><000b01c82997$c4f8f670$0c02a8c0@amr.corp.intel.com> <47403BD7.2020007@dev.mellanox.co.il> Message-ID: I think providing QoS per vertical (that is ULP) is less prevelant as providing QoS per horizontal, that is application, which uses miltiple ULPs. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Yevgeny Kliteynik [mailto:kliteyn at dev.mellanox.co.il] > Sent: Sunday, November 18, 2007 8:19 AM > To: Sean Hefty > Cc: gdror at mellanox.co.il; 'Hal Rosenstock'; > general at lists.openfabrics.org > Subject: Re: [ofa-general] RE: QoS for iSER > > Sean Hefty wrote: > >>> And as you've mentioned, some rules may overlap. For instance, if > >>> the rule for all the RDS traffic will appear before the > iSER rule, > >>> then iSER requests will be caught by the RDS rule. > >> That doesn't sound so good but I don't see a good alternative here > >> other than for this case to put the iSER rule first. The other > >> fallback is the more detailed configuration but RDS falls into the > >> generic range category which is problematic in terms of this (and > >> can't be differentiated by ServiceID unlike the other ULPs). > > > > I'm not overly familiar with the details of RDS, but event if the > > active side uses a dynamic service ID, I would expect the > passive side > > to use something well known. > > Couldn't agree more. > That's why I think that although there are cases where this > simplified way of defining SLs per ULP plus target TCP port > won't be useful, in many cases it would actually make the > administrator's life easier. > > -- Yevgeny > > > - Sean > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From kliteyn at dev.mellanox.co.il Sun Nov 18 07:21:24 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 17:21:24 +0200 Subject: [ofa-general] RE: QoS for iSER In-Reply-To: References: <473822FD.20208@Voltaire.COM><473ABBCE.8010109@dev.mellanox.co.il><1195046307.14106.72.camel@hrosenstock-ws.xsigo.com><473B14A3.5090703@dev.mellanox.co.il><1195056883.14106.90.camel@hrosenstock-ws.xsigo.com><000b01c82997$c4f8f670$0c02a8c0@amr.corp.intel.com> <47403BD7.2020007@dev.mellanox.co.il> Message-ID: <47405874.4000606@dev.mellanox.co.il> Kanevsky, Arkady wrote: > I think providing QoS per vertical (that is ULP) is less prevelant > as providing QoS per horizontal, that is application, which uses > miltiple ULPs. No problem, there's a way to define it per application too. Administrator will just have to define qos-level, and set of match-rules for this application that point to the defined qos-level. The cons here is that administrator will have to really understand what ULPs and which ports this application is using, but I can't think of any way to simplify this task. Perhaps in the future we will have examples for common applications. -- Yevgeny > Arkady Kanevsky email: arkady at netapp.com > Network Appliance Inc. phone: 781-768-5395 > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > Waltham, MA 02451 central phone: 781-768-5300 > > >> -----Original Message----- >> From: Yevgeny Kliteynik [mailto:kliteyn at dev.mellanox.co.il] >> Sent: Sunday, November 18, 2007 8:19 AM >> To: Sean Hefty >> Cc: gdror at mellanox.co.il; 'Hal Rosenstock'; >> general at lists.openfabrics.org >> Subject: Re: [ofa-general] RE: QoS for iSER >> >> Sean Hefty wrote: >>>>> And as you've mentioned, some rules may overlap. For instance, if >>>>> the rule for all the RDS traffic will appear before the >> iSER rule, >>>>> then iSER requests will be caught by the RDS rule. >>>> That doesn't sound so good but I don't see a good alternative here >>>> other than for this case to put the iSER rule first. The other >>>> fallback is the more detailed configuration but RDS falls into the >>>> generic range category which is problematic in terms of this (and >>>> can't be differentiated by ServiceID unlike the other ULPs). >>> I'm not overly familiar with the details of RDS, but event if the >>> active side uses a dynamic service ID, I would expect the >> passive side >>> to use something well known. >> Couldn't agree more. >> That's why I think that although there are cases where this >> simplified way of defining SLs per ULP plus target TCP port >> won't be useful, in many cases it would actually make the >> administrator's life easier. >> >> -- Yevgeny >> >>> - Sean >>> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> > From sashak at voltaire.com Sun Nov 18 09:05:25 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 17:05:25 +0000 Subject: [ofa-general] [PATCH 1/3] opensm/libvendor: remove not used umad_ca field In-Reply-To: <1195332730.6727.47.camel@hrosenstock-ws.xsigo.com> References: <11951173841159-git-send-email-sashak@voltaire.com> <11951173841652-git-send-email-sashak@voltaire.com> <1195332730.6727.47.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071118170525.GA6245@sashak.voltaire.com> On 12:52 Sat 17 Nov , Hal Rosenstock wrote: > On Thu, 2007-11-15 at 11:03 +0200, Sasha Khapyorsky wrote: > > Remove not really used (but leaked) umad_ca field from ibumad vendor > > structure. > > > > Signed-off-by: Sasha Khapyorsky > > --- > > opensm/include/vendor/osm_vendor_ibumad.h | 1 - > > opensm/libvendor/osm_vendor_ibumad.c | 18 ------------------ > > 2 files changed, 0 insertions(+), 19 deletions(-) > > > > diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h > > index 743b393..84fd21a 100644 > > --- a/opensm/include/vendor/osm_vendor_ibumad.h > > +++ b/opensm/include/vendor/osm_vendor_ibumad.h > > @@ -158,7 +158,6 @@ typedef struct _osm_vendor { > > osm_bind_handle_t agents[UMAD_CA_MAX_AGENTS]; > > char ca_names[OSM_UMAD_MAX_CAS][UMAD_CA_NAME_LEN]; > > vendor_match_tbl_t mtbl; > > - umad_ca_t umad_ca; > > umad_port_t umad_port; > > pthread_mutex_t cb_mutex; > > pthread_mutex_t match_tbl_mutex; > > diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c > > index 240a97b..9e186d5 100644 > > --- a/opensm/libvendor/osm_vendor_ibumad.c > > +++ b/opensm/libvendor/osm_vendor_ibumad.c > > @@ -703,24 +703,6 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, > > } > > > > /* Port found, try to open it */ > > - if (umad_get_ca(p_vend->ca_names[ca], &p_vend->umad_ca) < 0) { > > - osm_log(p_vend->p_log, OSM_LOG_ERROR, > > - "osm_vendor_open_port: ERR 542A: " > > - "umad_get_ca() failed\n"); > > - goto Exit; > > - } > > - > > - /* Validate that node is an IB node type */ > > - if (p_vend->umad_ca.node_type < 1 || p_vend->umad_ca.node_type > 3) { > > Where is this check done now ? This was put in to skip iWARP nodes. Have > you tried that configuration ? With iWARP? No. Actually if it is important we can put this check back and to use just local umad_ca variable which will be freed locally. Sounds good? Sasha From hrosenstock at xsigo.com Sun Nov 18 10:12:39 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Sun, 18 Nov 2007 10:12:39 -0800 Subject: [ofa-general] [PATCH 1/3] opensm/libvendor: remove not used umad_ca field In-Reply-To: <20071118170525.GA6245@sashak.voltaire.com> References: <11951173841159-git-send-email-sashak@voltaire.com> <11951173841652-git-send-email-sashak@voltaire.com> <1195332730.6727.47.camel@hrosenstock-ws.xsigo.com> <20071118170525.GA6245@sashak.voltaire.com> Message-ID: <1195409559.6727.92.camel@hrosenstock-ws.xsigo.com> On Sun, 2007-11-18 at 17:05 +0000, Sasha Khapyorsky wrote: > On 12:52 Sat 17 Nov , Hal Rosenstock wrote: > > On Thu, 2007-11-15 at 11:03 +0200, Sasha Khapyorsky wrote: > > > Remove not really used (but leaked) umad_ca field from ibumad vendor > > > structure. > > > > > > Signed-off-by: Sasha Khapyorsky > > > --- > > > opensm/include/vendor/osm_vendor_ibumad.h | 1 - > > > opensm/libvendor/osm_vendor_ibumad.c | 18 ------------------ > > > 2 files changed, 0 insertions(+), 19 deletions(-) > > > > > > diff --git a/opensm/include/vendor/osm_vendor_ibumad.h b/opensm/include/vendor/osm_vendor_ibumad.h > > > index 743b393..84fd21a 100644 > > > --- a/opensm/include/vendor/osm_vendor_ibumad.h > > > +++ b/opensm/include/vendor/osm_vendor_ibumad.h > > > @@ -158,7 +158,6 @@ typedef struct _osm_vendor { > > > osm_bind_handle_t agents[UMAD_CA_MAX_AGENTS]; > > > char ca_names[OSM_UMAD_MAX_CAS][UMAD_CA_NAME_LEN]; > > > vendor_match_tbl_t mtbl; > > > - umad_ca_t umad_ca; > > > umad_port_t umad_port; > > > pthread_mutex_t cb_mutex; > > > pthread_mutex_t match_tbl_mutex; > > > diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c > > > index 240a97b..9e186d5 100644 > > > --- a/opensm/libvendor/osm_vendor_ibumad.c > > > +++ b/opensm/libvendor/osm_vendor_ibumad.c > > > @@ -703,24 +703,6 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, > > > } > > > > > > /* Port found, try to open it */ > > > - if (umad_get_ca(p_vend->ca_names[ca], &p_vend->umad_ca) < 0) { > > > - osm_log(p_vend->p_log, OSM_LOG_ERROR, > > > - "osm_vendor_open_port: ERR 542A: " > > > - "umad_get_ca() failed\n"); > > > - goto Exit; > > > - } > > > - > > > - /* Validate that node is an IB node type */ > > > - if (p_vend->umad_ca.node_type < 1 || p_vend->umad_ca.node_type > 3) { > > > > Where is this check done now ? This was put in to skip iWARP nodes. Have > > you tried that configuration ? > > With iWARP? No. > > Actually if it is important It needs to work with a mix of iWARP and IB adapters in the machine. > we can put this check back and to use just > local umad_ca variable which will be freed locally. Sounds good? Yes, that looks like it would work fine. -- Hal > Sasha From kliteyn at mellanox.co.il Sat Nov 17 21:25:40 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 18 Nov 2007 07:25:40 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-18:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-17 OpenSM git rev = Thu_Nov_15_22:11:52_2007 [0f02129fba975d28b123104af97786ac578b3c90] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From sashak at voltaire.com Sun Nov 18 10:48:53 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 18:48:53 +0000 Subject: [ofa-general] [PATCH] opensm/vendor: be sure that node has IB type In-Reply-To: <1195409559.6727.92.camel@hrosenstock-ws.xsigo.com> References: <11951173841159-git-send-email-sashak@voltaire.com> <11951173841652-git-send-email-sashak@voltaire.com> <1195332730.6727.47.camel@hrosenstock-ws.xsigo.com> <20071118170525.GA6245@sashak.voltaire.com> <1195409559.6727.92.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071118184853.GC6245@sashak.voltaire.com> This returns node type check back - be sure it is IB (and not iWARP). Signed-off-by: Sasha Khapyorsky --- opensm/libvendor/osm_vendor_ibumad.c | 21 +++++++++++++++++++++ 1 files changed, 21 insertions(+), 0 deletions(-) diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 331244f..462ad9c 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -604,6 +604,7 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, IN const ib_net64_t port_guid) { ib_net64_t portguids[OSM_UMAD_MAX_PORTS_PER_CA + 1]; + umad_ca_t umad_ca; int i = 0, umad_port_id = -1; char *name; int ca, r; @@ -650,6 +651,26 @@ osm_vendor_open_port(IN osm_vendor_t * const p_vend, goto Exit; _found: + /* Validate that node is an IB node type (not iWARP) */ + if (umad_get_ca(name, &umad_ca) < 0) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_open_port: ERR 542A: " + "umad_get_ca() failed\n"); + goto Exit; + } + + if (umad_ca.node_type < 1 || umad_ca.node_type > 3) { + osm_log(p_vend->p_log, OSM_LOG_ERROR, + "osm_vendor_open_port: ERR 542D: " + "Type %d of node \'%s\' is not an IB node type\n", + umad_ca.node_type, umad_ca.ca_name); + fprintf(stderr, "Type %d of node \'%s\' is not an IB node type\n", + umad_ca.node_type, umad_ca.ca_name); + umad_release_ca(&umad_ca); + goto Exit; + } + umad_release_ca(&umad_ca); + /* Port found, try to open it */ if (umad_get_port(name, i, &p_vend->umad_port) < 0) { osm_log(p_vend->p_log, OSM_LOG_ERROR, -- 1.5.3.rc2.29.gc4640f From sashak at voltaire.com Sun Nov 18 10:59:29 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 18:59:29 +0000 Subject: [ofa-general] Re: [PATCH] osm: use OSM_DEFAULT_PARTITION_CONFIG_FILE instead of hard-coded string In-Reply-To: <47402905.6030302@dev.mellanox.co.il> References: <47402905.6030302@dev.mellanox.co.il> Message-ID: <20071118185929.GE6245@sashak.voltaire.com> On 13:59 Sun 18 Nov , Yevgeny Kliteynik wrote: > Use default partition file instead of a hard-coded string > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Sun Nov 18 11:11:42 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 19:11:42 +0000 Subject: [ofa-general] Re: [PATCH] osm: moving call to QoS policy file parser to osm_qos_setup() In-Reply-To: <474029E7.605@dev.mellanox.co.il> References: <474029E7.605@dev.mellanox.co.il> Message-ID: <20071118191142.GF6245@sashak.voltaire.com> On 14:02 Sun 18 Nov , Yevgeny Kliteynik wrote: > When QoS policy is being set up, partitions have to already exist. Makes sense. > Moving call to policy file parser from osm_subn_rescan_conf_files() > to osm_qos_setup(). > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Sun Nov 18 11:17:13 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 19:17:13 +0000 Subject: [ofa-general] Re: [PATCH] osm: improving error reporting function of the QoS parser In-Reply-To: <47402B09.70308@dev.mellanox.co.il> References: <47402B09.70308@dev.mellanox.co.il> Message-ID: <20071118191713.GG6245@sashak.voltaire.com> Hi Yevgeny, On 14:07 Sun 18 Nov , Yevgeny Kliteynik wrote: > [this patch replaces "handle first syntax error in policy file" patch] > > Improving error reporting function of the QoS parser: > - Making it static > - Printing error message to stderr as well as to the log > - Function now can get formatted string as an argument > > Signed-off-by: Yevgeny Kliteynik > --- > opensm/opensm/osm_qos_parser.y | 22 +++++++++++++++++----- > 1 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y > index 4738831..bf560aa 100644 > --- a/opensm/opensm/osm_qos_parser.y > +++ b/opensm/opensm/osm_qos_parser.y > @@ -47,6 +47,7 @@ > > #include > #include > +#include > #include > #include > #include > @@ -125,8 +126,9 @@ static void __parser_add_map_to_port_map( > cl_qmap_t * p_dmap, > cl_map_t * p_smap); > > +static void __qos_parser_error(const char *format, ...); > + > extern char * __qos_parser_text; > -extern void __qos_parser_error (char *s); > extern int __qos_parser_lex (void); > extern FILE * __qos_parser_in; > extern int errno; > @@ -1871,14 +1873,24 @@ int __qos_parser_wrap() > /*************************************************** > ***************************************************/ > > -void __qos_parser_error (char *s) > +static void __qos_parser_error(const char *format, ...) > { > + char s[9999]; What about to have smaller buffer (let's say 256) and to use vsnprintf() below to prevent overflow? Sasha > + va_list pvar; > + > OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); > + > + va_start(pvar, format); > + vsprintf(s, format, pvar); > + va_end(pvar); > + > osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, > "__qos_parser_error: ERR AC05: " > - "Syntax error (line %d:%d): %s. " > - "Last text read: \"%s\"\n", > - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); > + "Syntax error (line %d:%d): %s", > + line_num, column_num, s); > + fprintf(stderr, > + "Error in QoS Policy File (line %d:%d): %s.\n", > + line_num, column_num, s); > OSM_LOG_EXIT(p_qos_parser_osm_log); > } > > -- > 1.5.1.4 > > From sashak at voltaire.com Sun Nov 18 11:19:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Sun, 18 Nov 2007 19:19:35 +0000 Subject: [ofa-general] Re: [PATCH] osm: cosmetics - removing obsolete comment In-Reply-To: <47403840.7070900@dev.mellanox.co.il> References: <47403840.7070900@dev.mellanox.co.il> Message-ID: <20071118191935.GH6245@sashak.voltaire.com> On 15:04 Sun 18 Nov , Yevgeny Kliteynik wrote: > Removing obsolete comment. > Since port names are converted to guids and added to the usual > guid map of the port group, no need to treat the port names > separately here. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From kliteyn at dev.mellanox.co.il Sun Nov 18 13:06:09 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Sun, 18 Nov 2007 23:06:09 +0200 Subject: [ofa-general] Re: [PATCH] osm: improving error reporting function of the QoS parser In-Reply-To: <20071118191713.GG6245@sashak.voltaire.com> References: <47402B09.70308@dev.mellanox.co.il> <20071118191713.GG6245@sashak.voltaire.com> Message-ID: <4740A941.9090807@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 14:07 Sun 18 Nov , Yevgeny Kliteynik wrote: >> [this patch replaces "handle first syntax error in policy file" patch] >> >> Improving error reporting function of the QoS parser: >> - Making it static >> - Printing error message to stderr as well as to the log >> - Function now can get formatted string as an argument >> >> Signed-off-by: Yevgeny Kliteynik >> --- >> opensm/opensm/osm_qos_parser.y | 22 +++++++++++++++++----- >> 1 files changed, 17 insertions(+), 5 deletions(-) >> >> diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y >> index 4738831..bf560aa 100644 >> --- a/opensm/opensm/osm_qos_parser.y >> +++ b/opensm/opensm/osm_qos_parser.y >> @@ -47,6 +47,7 @@ >> >> #include >> #include >> +#include >> #include >> #include >> #include >> @@ -125,8 +126,9 @@ static void __parser_add_map_to_port_map( >> cl_qmap_t * p_dmap, >> cl_map_t * p_smap); >> >> +static void __qos_parser_error(const char *format, ...); >> + >> extern char * __qos_parser_text; >> -extern void __qos_parser_error (char *s); >> extern int __qos_parser_lex (void); >> extern FILE * __qos_parser_in; >> extern int errno; >> @@ -1871,14 +1873,24 @@ int __qos_parser_wrap() >> /*************************************************** >> ***************************************************/ >> >> -void __qos_parser_error (char *s) >> +static void __qos_parser_error(const char *format, ...) >> { >> + char s[9999]; > > What about to have smaller buffer (let's say 256) and to use vsnprintf() > below to prevent overflow? Having smaller buffer won't matter - this function is not used as log function for errors, but as kind of "fatal" for QoS parser, so allocating a string only when there is a syntax error in the policy file doesn't look like an issue to me. However, vsnprintf() is probably a good idea. -- Yevgeny > Sasha > >> + va_list pvar; >> + >> OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); >> + >> + va_start(pvar, format); >> + vsprintf(s, format, pvar); >> + va_end(pvar); >> + >> osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, >> "__qos_parser_error: ERR AC05: " >> - "Syntax error (line %d:%d): %s. " >> - "Last text read: \"%s\"\n", >> - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); >> + "Syntax error (line %d:%d): %s", >> + line_num, column_num, s); >> + fprintf(stderr, >> + "Error in QoS Policy File (line %d:%d): %s.\n", >> + line_num, column_num, s); >> OSM_LOG_EXIT(p_qos_parser_osm_log); >> } >> >> -- >> 1.5.1.4 >> >> > From keshetti85-student at yahoo.co.in Sun Nov 18 23:57:41 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Mon, 19 Nov 2007 13:27:41 +0530 Subject: [ofa-general] Failure in ibdmchk utility with LMC > 0 Message-ID: <829ded920711182357r3897e56crca9419f1873fa5ef@mail.gmail.com> When I run ibdmchk with LMC=1 on my cluster it is failing with below warnings. <=================================================> -W- Found - un-assigned hops for node:U1 to lid:3) -W- Found - un-assigned hops for node:U1 to lid:7) -W- Found - un-assigned hops for node:U1 to lid:9) -W- Found - un-assigned hops for node:U1 to lid:11) -W- Found - un-assigned hops for node:U1 to lid:13) -W- Found - un-assigned hops for node:U1 to lid:15) -W- Found - un-assigned hops for node:U1 to lid:17) -W- Found - un-assigned hops for node:U1 to lid:19) -W- Found - un-assigned hops for node:U1 to lid:21) -W- Found - un-assigned hops for node:U1 to lid:23) -W- Found - un-assigned hops for node:U2 to lid:3) -W- Found - un-assigned hops for node:U2 to lid:7) -W- Found - un-assigned hops for node:U2 to lid:9) -W- Found - un-assigned hops for node:U2 to lid:11) -W- Found - un-assigned hops for node:U2 to lid:13) -W- Found - un-assigned hops for node:U2 to lid:15) -W- Found - un-assigned hops for node:U2 to lid:17) -W- Found - un-assigned hops for node:U2 to lid:19) -W- Found - un-assigned hops for node:U2 to lid:21) -W- Found - un-assigned hops for node:U2 to lid:23) -W- Found - un-assigned hops for node:U3 to lid:3) -W- Found - un-assigned hops for node:U3 to lid:7) -W- Found - un-assigned hops for node:U3 to lid:9) -W- Found - un-assigned hops for node:U3 to lid:11) -W- Found - un-assigned hops for node:U3 to lid:13) -W- Found - un-assigned hops for node:U3 to lid:15) -W- Found - un-assigned hops for node:U3 to lid:17) -W- Found - un-assigned hops for node:U3 to lid:19) -W- Found - un-assigned hops for node:U3 to lid:21) -W- Found - un-assigned hops for node:U3 to lid:23) ------------------ NUM ALTERNATE PORTS TO CA HISTOGRAM -------------------- Describes how many out ports on every switch have the same Min Hop to each target CA. Or in other words how many alternate routes are possible at the switch level. This is useful to show the symmetry of the cluster. OUT-PORTS NUM-SW-LID-PAIRS --------------------------------------------------------------------------- -W- Found - un-reachable lids. -E- Fail to update Min Hops Tables. <=================================================> Can anyone of you explain me what do they mean? (Please find contents of osm.fdbs, osm-subnet.list and complete output of the ibdmchk in the attachments.) opensm is running as, > opensm -l 1 -D 0x43 & -Mahesh. -------------- next part -------------- A non-text attachment was scrubbed... Name: osm.fdbs Type: application/octet-stream Size: 2103 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: osm-subnet.lst Type: application/octet-stream Size: 9426 bytes Desc: not available URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: ibdmchk_output Type: application/octet-stream Size: 6009 bytes Desc: not available URL: From elenafg at mailrus.ru Mon Nov 19 01:55:50 2007 From: elenafg at mailrus.ru (Elena) Date: Mon, 19 Nov 2007 12:55:50 +0300 Subject: [ofa-general] Re: Re[1]: Message-ID: <20071119095551.CD893700009C@mwinf6006.orange.nl> Hi, My name is Elena, I have 30-years and I live in small city in Russian province. I have a 6-years daughter, her father abandoned us and we live with my mother. Recently my mother lost job due to old age and our situation became very difficult. The prices for gas and electricity became very high in the last months and we cannot use it to heat our home anymore. The winter is coming and temperature is very cold here already. The radio say it will be till minus 30 degrees in the near weeks. We do not know what to do and we very afraid. The only way for us to heat our home is to use portable wood burning stove which gives heat with burning wood or coal. We have a lot of wood in our region, therefore this way is very good for us and it will heat our home the whole winter with minimal charges. I work in library and after my job I allowed to use computer. I finded your address in internet and may be you can help us. We need portable stove, but we cannot buy it because it cost 6980 roubles (equivalent of 197 Euros) but my salary is very small. May be you have any used wood burning stove which you don't use anymore, then we would be very grateful to you if you can donate it to us and organize transport of its to our address. This stoves are different, and they weight between 60-150kg. I would like to send you picture of us, but unfortunately I don't have digital picture which can be transmited by computer. I send you kind greetings and hope to hear from you soon. Elena. From vlad at lists.openfabrics.org Mon Nov 19 02:48:54 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 19 Nov 2007 02:48:54 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071119-0200 daily build status Message-ID: <20071119104854.E769AE603B6@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: Build failed on x86_64 with linux-2.6.16.43-0.3-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.43-0.3-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.43-0.3-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.16.21-0.8-smp Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-smp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.16.21-0.8-smp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ppc64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_ppc64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ppc64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-42.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-42.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-42.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.18-8.el5 Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.18-8.el5_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.18-8.el5' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on x86_64 with linux-2.6.9-55.ELsmp Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.9-55.ELsmp_x86_64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/x86_64/linux-2.6.9-55.ELsmp' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- Build failed on ia64 with linux-2.6.16.21-0.8-default Log: /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:253: warning: assignment makes pointer from integer without a cast /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c: In function 'iser_data_buf_aligned_len': /home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.c:311: warning: passing argument 2 of 'ib_sg_dma_address' makes pointer from integer without a cast make[4]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser/iser_memory.o] Error 1 make[3]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband/ulp/iser] Error 2 make[2]: *** [/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check/drivers/infiniband] Error 2 make[1]: *** [_module_/home/vlad/tmp/ofa_1_3_kernel-20071119-0200_linux-2.6.16.21-0.8-default_ia64_check] Error 2 make[1]: Leaving directory `/home/vlad/kernel.org/ia64/linux-2.6.16.21-0.8-default' make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From kliteyn at dev.mellanox.co.il Mon Nov 19 04:34:27 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 19 Nov 2007 14:34:27 +0200 Subject: [ofa-general] [PATCH v2] osm: improving error reporting function of the QoS parser Message-ID: <474182D3.6050708@dev.mellanox.co.il> Improving error reporting function of the QoS parser: - Making it static - Printing error message to stderr as well as to the log - Function now can get formatted string as an argument Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_parser.y | 22 +++++++++++++++++----- 1 files changed, 17 insertions(+), 5 deletions(-) diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index 4738831..ca696d7 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -47,6 +47,7 @@ #include #include +#include #include #include #include @@ -125,8 +126,9 @@ static void __parser_add_map_to_port_map( cl_qmap_t * p_dmap, cl_map_t * p_smap); +static void __qos_parser_error(const char *format, ...); + extern char * __qos_parser_text; -extern void __qos_parser_error (char *s); extern int __qos_parser_lex (void); extern FILE * __qos_parser_in; extern int errno; @@ -1871,14 +1873,24 @@ int __qos_parser_wrap() /*************************************************** ***************************************************/ -void __qos_parser_error (char *s) +static void __qos_parser_error(const char *format, ...) { + char s[256]; + va_list pvar; + OSM_LOG_ENTER(p_qos_parser_osm_log, __qos_parser_error); + + va_start(pvar, format); + vsnprintf(s, 256, format, pvar); + va_end(pvar); + osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, "__qos_parser_error: ERR AC05: " - "Syntax error (line %d:%d): %s. " - "Last text read: \"%s\"\n", - line_num, column_num, s, __parser_strip_white(__qos_parser_text)); + "Syntax error (line %d:%d): %s", + line_num, column_num, s); + fprintf(stderr, + "Error in QoS Policy File (line %d:%d): %s.\n", + line_num, column_num, s); OSM_LOG_EXIT(p_qos_parser_osm_log); } -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Mon Nov 19 04:44:14 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 19 Nov 2007 14:44:14 +0200 Subject: [ofa-general] [PATCH] osm: broken logic when scanning subnet for PIR request Message-ID: <4741851E.7010308@dev.mellanox.co.il> Fixing broken logic when scanning subnet for PIR request, that was causing some ports to be scanned twice if two ports of the same node belong to the same fabric. Fixing to iterate through nodes instead of ports. Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_sa_portinfo_record.c | 17 +++++++++-------- 1 files changed, 9 insertions(+), 8 deletions(-) diff --git a/opensm/opensm/osm_sa_portinfo_record.c b/opensm/opensm/osm_sa_portinfo_record.c index 8b107de..22869e6 100644 --- a/opensm/opensm/osm_sa_portinfo_record.c +++ b/opensm/opensm/osm_sa_portinfo_record.c @@ -461,7 +461,7 @@ __osm_sa_pir_check_physp(IN osm_pir_rcv_t * const p_rcv, **********************************************************************/ static void __osm_sa_pir_by_comp_mask(IN osm_pir_rcv_t * const p_rcv, - IN const osm_port_t * const p_port, + IN const osm_node_t * const p_node, osm_pir_search_ctxt_t * const p_ctxt) { const ib_portinfo_record_t *p_rcvd_rec; @@ -477,12 +477,12 @@ __osm_sa_pir_by_comp_mask(IN osm_pir_rcv_t * const p_rcv, comp_mask = p_ctxt->comp_mask; p_req_physp = p_ctxt->p_req_physp; - num_ports = osm_node_get_num_physp(p_port->p_node); + num_ports = osm_node_get_num_physp(p_node); if (comp_mask & IB_PIR_COMPMASK_PORTNUM) { if (p_rcvd_rec->port_num < num_ports) { p_physp = - osm_node_get_physp_ptr(p_port->p_node, + osm_node_get_physp_ptr(p_node, p_rcvd_rec->port_num); /* Check that the p_physp is valid, and that the p_physp and the p_req_physp share a pkey. */ @@ -495,7 +495,7 @@ __osm_sa_pir_by_comp_mask(IN osm_pir_rcv_t * const p_rcv, } else { for (port_num = 0; port_num < num_ports; port_num++) { p_physp = - osm_node_get_physp_ptr(p_port->p_node, port_num); + osm_node_get_physp_ptr(p_node, port_num); if (!osm_physp_is_valid(p_physp)) continue; @@ -518,10 +518,10 @@ static void __osm_sa_pir_by_comp_mask_cb(IN cl_map_item_t * const p_map_item, IN void *context) { - const osm_port_t *const p_port = (osm_port_t *) p_map_item; + const osm_node_t *const p_node = (osm_node_t *) p_map_item; osm_pir_search_ctxt_t *const p_ctxt = (osm_pir_search_ctxt_t *) context; - __osm_sa_pir_by_comp_mask(p_ctxt->p_rcv, p_port, p_ctxt); + __osm_sa_pir_by_comp_mask(p_ctxt->p_rcv, p_node, p_ctxt); } /********************************************************************** @@ -641,9 +641,10 @@ void osm_pir_rcv_process(IN void *ctx, IN void *data) if (status == IB_SUCCESS) { if (p_port) - __osm_sa_pir_by_comp_mask(p_rcv, p_port, &context); + __osm_sa_pir_by_comp_mask(p_rcv, p_port->p_node, + &context); else - cl_qmap_apply_func(&p_rcv->p_subn->port_guid_tbl, + cl_qmap_apply_func(&p_rcv->p_subn->node_guid_tbl, __osm_sa_pir_by_comp_mask_cb, &context); } -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Mon Nov 19 05:02:23 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 19 Nov 2007 15:02:23 +0200 Subject: [ofa-general] [PATCH v2] osm: Using new format of QoS parser error function Message-ID: <4741895F.2090809@dev.mellanox.co.il> Using new format of QoS parser error function Signed-off-by: Yevgeny Kliteynik --- opensm/opensm/osm_qos_parser.y | 11 ++++++++--- 1 files changed, 8 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_qos_parser.y b/opensm/opensm/osm_qos_parser.y index ca696d7..f078a86 100644 --- a/opensm/opensm/osm_qos_parser.y +++ b/opensm/opensm/osm_qos_parser.y @@ -625,14 +625,17 @@ port_group_port_name: port_group_port_name_start string_list { port_str = strrchr(tmp_str, '/'); if (!port_str || (strlen(port_str) < 3) || (port_str[1] != 'p' && port_str[1] != 'P')) { - yyerror("illegal port name"); + __qos_parser_error("'%s' - illegal port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; } if (!(port_num = strtoul(&port_str[2],NULL,0))) { - yyerror("illegal port number in port name"); + __qos_parser_error( + "'%s' - illegal port number in port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; @@ -648,7 +651,9 @@ port_group_port_name: port_group_port_name_start string_list { /* we found the node, now get the right port */ p_physp = osm_node_get_physp_ptr(p_node, port_num); if (!p_physp) { - yyerror("port number out of range in port name"); + __qos_parser_error( + "'%s' - port number out of range in port name", + tmp_str); free(tmp_str); cl_list_remove_all(&tmp_parser_struct.str_list); return 1; -- 1.5.1.4 From kliteyn at dev.mellanox.co.il Mon Nov 19 05:04:42 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 19 Nov 2007 15:04:42 +0200 Subject: [ofa-general] [PATCH v2] osm: QoS - adding simplified syntax for policy definition Message-ID: <474189EA.3070500@dev.mellanox.co.il> This patch adds simplified syntax for QoS definition in QoS Policy file. Using this syntax the administrator is able to define QoS policy per ULP and/or per Service ID and/or per partition (pkey). Here's an example of the policy file with the new syntax, which is added in a new section called qos-policy: qos-ulps sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 sdp, port-num 10000-20000, 0xfffd : 2 sdp : 0 #default SL for SDP srp, target-port-guid 0x1234-0x1235 : 2 iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports iser : 5 #default SL for iSER rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 rds : 0 #default SL for RDS ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 ipoib : 6 #default IPoIB partition - pkey=0x7FFF any, pkey 0x0ABC : 3 any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 any, service-id 0x6234 : 2 any, target-port-guid 0x2234-0xF235 : 2 default : 0 #default SL end-qos-ulps Since any section of the policy file is optional as long as basic rules of the file are kept, the above example can serve as a complete QoS policy file - short and clear. I suspect that most of the administrators will use only this syntax, but if someone wishes to manage QoS in more detailed manner, there's always the rest of the policy file to do so. Signed-off-by: Yevgeny Kliteynik --- opensm/include/opensm/osm_qos_policy.h | 5 + opensm/opensm/osm_qos_parser.l | 46 +++ opensm/opensm/osm_qos_parser.y | 637 +++++++++++++++++++++++++++++++- opensm/opensm/osm_qos_policy.c | 79 ++++- 4 files changed, 756 insertions(+), 11 deletions(-) diff --git a/opensm/include/opensm/osm_qos_policy.h b/opensm/include/opensm/osm_qos_policy.h index 61fc325..d61c269 100644 --- a/opensm/include/opensm/osm_qos_policy.h +++ b/opensm/include/opensm/osm_qos_policy.h @@ -59,6 +59,11 @@ #define OSM_QOS_POLICY_MAX_PORTS_ON_SWITCH 128 #define OSM_QOS_POLICY_DEFAULT_LEVEL_NAME "default" +#define OSM_QOS_POLICY_ULP_SDP_SERVICE_ID 0x0000000000010000ULL +#define OSM_QOS_POLICY_ULP_RDS_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_ISER_SERVICE_ID 0x0000000001060000ULL +#define OSM_QOS_POLICY_ULP_ISER_PORT 0x035C + #define OSM_QOS_POLICY_NODE_TYPE_CA (((uint8_t)1)< 15) + { + __qos_parser_error("illegal SL value"); + return 1; + } + __default_simple_qos_level.sl = (uint8_t)(*p_tmp_num); + __default_simple_qos_level.sl_set = TRUE; + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + } + + | qos_ulp_type_any_service list_of_ranges TK_DOTDOT { + /* "any, service-id ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have service ids"); + return 1; + } + + /* get all the service id ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_pkey list_of_ranges TK_DOTDOT { + /* "any, pkey ... : sl" - one instance of list of ranges */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_any_target_port_guid list_of_ranges TK_DOTDOT { + /* any, target-port-guid ... : sl */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_ULP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_sdp_default { + /* "sdp : sl" - default SL for SDP */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[0][1] = OSM_QOS_POLICY_ULP_SDP_SERVICE_ID + 0xFFFF; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_sdp_port list_of_ranges TK_DOTDOT { + /* sdp with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("SDP ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_SDP_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_rds_default { + /* "rds : sl" - default SL for RDS */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + range_arr[0][1] = OSM_QOS_POLICY_ULP_RDS_SERVICE_ID + 0xFFFF; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_rds_port list_of_ranges TK_DOTDOT { + /* rds with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("RDS ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_RDS_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_iser_default { + /* "iSER : sl" - default SL for iSER */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = + OSM_QOS_POLICY_ULP_ISER_SERVICE_ID + OSM_QOS_POLICY_ULP_ISER_PORT; + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_iser_port list_of_ranges TK_DOTDOT { + /* iser with port numbers */ + uint64_t ** range_arr; + unsigned range_len; + unsigned i; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("iSER ULP rule doesn't have port numbers"); + return 1; + } + + /* get all the port ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + /* now translate these port numbers into service ids */ + for (i = 0; i < range_len; i++) + { + if (range_arr[i][0] > 0xFFFF || range_arr[i][1] > 0xFFFF) + { + __qos_parser_error("SDP port number out of range"); + return 1; + } + range_arr[i][0] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + range_arr[i][1] += OSM_QOS_POLICY_ULP_ISER_SERVICE_ID; + } + + p_current_qos_match_rule->service_id_range_arr = range_arr; + p_current_qos_match_rule->service_id_range_len = range_len; + + } qos_ulp_sl + + | qos_ulp_type_srp_guid list_of_ranges TK_DOTDOT { + /* srp with target guids - this rule is similar + to writing 'any' ulp with target port guids */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("SRP ULP rule doesn't have port guids"); + return 1; + } + + /* create a new port group with these ports */ + __parser_port_group_start(); + + p_current_port_group->name = strdup("_SRP_Targets_"); + p_current_port_group->use = strdup("Generated from ULP rules"); + + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + __parser_add_guid_range_to_port_map( + &p_current_port_group->port_map, + range_arr, + range_len); + + /* add this port group to the destination + groups of the current match rule */ + cl_list_insert_tail(&p_current_qos_match_rule->destination_group_list, + p_current_port_group); + + __parser_port_group_end(); + + } qos_ulp_sl + + | qos_ulp_type_ipoib_default { + /* ipoib w/o any pkeys (default pkey) */ + uint64_t ** range_arr = + (uint64_t **)malloc(sizeof(uint64_t *)); + range_arr[0] = (uint64_t *)malloc(2*sizeof(uint64_t)); + range_arr[0][0] = range_arr[0][1] = 0x7fff; + + /* + * Although we know that the default partition exists, + * we still need to validate it by checking that it has + * at least two full members. Otherwise IPoIB won't work. + */ + if (__validate_pkeys(range_arr, 1, TRUE)) + return 1; + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = 1; + + } qos_ulp_sl + + | qos_ulp_type_ipoib_pkey list_of_ranges TK_DOTDOT { + /* ipoib with pkeys */ + uint64_t ** range_arr; + unsigned range_len; + + if (!cl_list_count(&tmp_parser_struct.num_pair_list)) + { + __qos_parser_error("IPoIB ULP rule doesn't have pkeys"); + return 1; + } + + /* get all the pkey ranges */ + __rangelist2rangearr( &tmp_parser_struct.num_pair_list, + &range_arr, + &range_len ); + + /* + * Validate pkeys. + * For IPoIB pkeys the validation is strict. + * If some problem would be found, parsing will + * be aborted with a proper error messages. + */ + if (__validate_pkeys(range_arr, range_len, TRUE)) + return 1; + + p_current_qos_match_rule->pkey_range_arr = range_arr; + p_current_qos_match_rule->pkey_range_len = range_len; + + } qos_ulp_sl + ; + +qos_ulp_type_any_service: TK_ULP_ANY_SERVICE_ID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_pkey: TK_ULP_ANY_PKEY + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_any_target_port_guid: TK_ULP_ANY_TARGET_PORT_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_default: TK_ULP_SDP_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_sdp_port: TK_ULP_SDP_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_default: TK_ULP_RDS_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_rds_port: TK_ULP_RDS_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_default: TK_ULP_ISER_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_iser_port: TK_ULP_ISER_PORT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_srp_guid: TK_ULP_SRP_GUID + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_default: TK_ULP_IPOIB_DEFAULT + { __parser_ulp_match_rule_start(); }; + +qos_ulp_type_ipoib_pkey: TK_ULP_IPOIB_PKEY + { __parser_ulp_match_rule_start(); }; + + +qos_ulp_sl: single_number { + /* get the SL for ULP rules */ + cl_list_iterator_t list_iterator; + uint64_t * p_tmp_num; + uint8_t sl; + + list_iterator = cl_list_head(&tmp_parser_struct.num_list); + p_tmp_num = (uint64_t*)cl_list_obj(list_iterator); + if (*p_tmp_num > 15) + { + __qos_parser_error("illegal SL value"); + return 1; + } + + sl = (uint8_t)(*p_tmp_num); + free(p_tmp_num); + cl_list_remove_all(&tmp_parser_struct.num_list); + + p_current_qos_match_rule->p_qos_level = + &osm_qos_policy_simple_qos_levels[sl]; + p_current_qos_match_rule->qos_level_name = + strdup(osm_qos_policy_simple_qos_levels[sl].name); + + if (__parser_ulp_match_rule_end()) + return 1; + } + ; + /* * port_group_entry values: * port_group_name @@ -1819,10 +2294,19 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) if (first_time) { first_time = FALSE; + __setup_simple_qos_levels(); + __setup_ulp_match_rules(); osm_log(p_qos_parser_osm_log, OSM_LOG_INFO, "osm_qos_parse_policy_file: Loading QoS policy file (%s)\n", p_subn->opt.qos_policy_file); } + else + /* + * ULP match rules list was emptied at the end of + * previous parsing iteration. + * What's left is to clear simple QoS levels. + */ + __clear_simple_qos_levels(); column_num = 1; line_num = 1; @@ -1848,6 +2332,9 @@ int osm_qos_parse_policy_file(IN osm_subn_t * const p_subn) goto Exit; } + /* add generated ULP match rules to the usual match rules */ + __process_ulp_match_rules(); + if (osm_qos_policy_validate(p_subn->p_qos_policy,p_qos_parser_osm_log)) { osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, @@ -2073,6 +2560,26 @@ static int __parser_match_rule_end() /*************************************************** ***************************************************/ +static void __parser_ulp_match_rule_start() +{ + p_current_qos_match_rule = osm_qos_policy_match_rule_create(); +} + +/*************************************************** + ***************************************************/ + +static int __parser_ulp_match_rule_end() +{ + CL_ASSERT(p_current_qos_match_rule->p_qos_level); + cl_list_insert_tail(&__ulp_match_rules, + p_current_qos_match_rule); + p_current_qos_match_rule = NULL; + return 0; +} + +/*************************************************** + ***************************************************/ + static void __parser_tmp_struct_init() { tmp_parser_struct.str[0] = '\0'; @@ -2115,6 +2622,73 @@ static void __parser_tmp_struct_destroy() /*************************************************** ***************************************************/ +#define __SIMPLE_QOS_LEVEL_NAME "SimpleQoSLevel_SL" +#define __SIMPLE_QOS_LEVEL_DEFAULT_NAME "SimpleQoSLevel_DEFAULT" + +static void __setup_simple_qos_levels() +{ + uint8_t i; + char tmp_buf[30]; + memset(osm_qos_policy_simple_qos_levels, 0, + sizeof(osm_qos_policy_simple_qos_levels)); + for (i = 0; i < 16; i++) + { + osm_qos_policy_simple_qos_levels[i].sl = i; + osm_qos_policy_simple_qos_levels[i].sl_set = TRUE; + sprintf(tmp_buf, "%s%u", __SIMPLE_QOS_LEVEL_NAME, i); + osm_qos_policy_simple_qos_levels[i].name = strdup(tmp_buf); + } + + memset(&__default_simple_qos_level, 0, + sizeof(__default_simple_qos_level)); + __default_simple_qos_level.name = + strdup(__SIMPLE_QOS_LEVEL_DEFAULT_NAME); +} + +/*************************************************** + ***************************************************/ + +static void __clear_simple_qos_levels() +{ + /* + * Simple QoS levels are static. + * What's left is to invalidate default simple QoS level. + */ + __default_simple_qos_level.sl_set = FALSE; +} + +/*************************************************** + ***************************************************/ + +static void __setup_ulp_match_rules() +{ + cl_list_construct(&__ulp_match_rules); + cl_list_init(&__ulp_match_rules, 10); +} + +/*************************************************** + ***************************************************/ + +static void __process_ulp_match_rules() +{ + cl_list_iterator_t list_iterator; + osm_qos_match_rule_t *p_qos_match_rule = NULL; + + list_iterator = cl_list_head(&__ulp_match_rules); + while (list_iterator != cl_list_end(&__ulp_match_rules)) + { + p_qos_match_rule = (osm_qos_match_rule_t *) cl_list_obj(list_iterator); + if (p_qos_match_rule) + cl_list_insert_tail(&p_qos_policy->qos_match_rules, + p_qos_match_rule); + list_iterator = cl_list_next(list_iterator); + } + cl_list_remove_all(&__ulp_match_rules); +} + +/*************************************************** + ***************************************************/ + static int OSM_CDECL __cmp_num_range( const void * p1, @@ -2399,3 +2973,64 @@ static void __parser_add_map_to_port_map( /*************************************************** ***************************************************/ + +static int __validate_pkeys( uint64_t ** range_arr, + unsigned range_len, + boolean_t is_ipoib) +{ + unsigned i; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; + + if (!range_arr || !range_len) + return 0; + + for (i = 0; i < range_len; i++) { + for (pkey_64 = range_arr[i][0]; pkey_64 <= range_arr[i][1]; pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *) + cl_qmap_get(&p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) + p_prtn = NULL; + + if (is_ipoib) { + /* + * Be very strict for IPoIB partition: + * - the partition for the pkey have to exist + * - it has to have at least 2 full members + */ + if (!p_prtn) { + __qos_parser_error("IPoIB partition, pkey 0x%04X - " + "partition doesn't exist", + cl_ntoh16(pkey)); + return 1; + } + else if (cl_map_count(&p_prtn->full_guid_tbl) < 2) { + __qos_parser_error("IPoIB partition, pkey 0x%04X - " + "partition has less than two full members", + cl_ntoh16(pkey)); + return 1; + } + } + else if (!p_prtn) { + /* + * For non-IPoIB pkey we just want to check that + * the relevant partition exists. + * And even if it doesn't, don't exit - just print + * error message and continue. + */ + osm_log(p_qos_parser_osm_log, OSM_LOG_ERROR, + "__validate_pkeys: ERR AC02: pkey 0x%04X - " + "partition doesn't exist", + cl_ntoh16(pkey)); + } + } + } + return 0; +} + +/*************************************************** + ***************************************************/ diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c index 60b818d..b95e651 100644 --- a/opensm/opensm/osm_qos_policy.c +++ b/opensm/opensm/osm_qos_policy.c @@ -56,6 +56,8 @@ #include #include +extern osm_qos_level_t __default_simple_qos_level; + /*************************************************** ***************************************************/ @@ -770,8 +772,11 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, osm_qos_port_group_t *p_port_group = NULL; osm_qos_match_rule_t *p_qos_match_rule = NULL; char *str; - unsigned i; + unsigned i, j; int res = 0; + uint64_t pkey_64; + ib_net16_t pkey; + osm_prtn_t * p_prtn; OSM_LOG_ENTER(p_log, osm_qos_policy_validate); @@ -780,12 +785,20 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, p_qos_policy->p_default_qos_level = __qos_policy_get_qos_level_by_name(p_qos_policy, OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); if (!p_qos_policy->p_default_qos_level) { - osm_log(p_log, OSM_LOG_ERROR, - "osm_qos_policy_validate: ERR AC10: " - "Default qos-level (%s) not defined.\n", - OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); - res = 1; - goto Exit; + /* There's no default QoS level in the usual qos-level section. + Check whether the 'simple' default QoS level that can be + defined in the qos-ulp section exists */ + if (__default_simple_qos_level.sl_set) { + p_qos_policy->p_default_qos_level = &__default_simple_qos_level; + } + else { + osm_log(p_log, OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC10: " + "Default qos-level (%s) not defined.\n", + OSM_QOS_POLICY_DEFAULT_LEVEL_NAME); + res = 1; + goto Exit; + } } /* scan all the match rules, and fill the lists of pointers to @@ -803,9 +816,10 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, /* find the matching qos-level for each match-rule */ - p_qos_match_rule->p_qos_level = - __qos_policy_get_qos_level_by_name(p_qos_policy, - p_qos_match_rule->qos_level_name); + if (!p_qos_match_rule->p_qos_level) + p_qos_match_rule->p_qos_level = + __qos_policy_get_qos_level_by_name(p_qos_policy, + p_qos_match_rule->qos_level_name); if (!p_qos_match_rule->p_qos_level) { osm_log(p_log, OSM_LOG_ERROR, @@ -877,6 +891,51 @@ int osm_qos_policy_validate(osm_qos_policy_t * p_qos_policy, } } + /* + * Scan all the pkeys in matching rule, and if the + * partition for these pkeys exists, set the SL + * according to the QoS Level. + * Warn if there's mismatch between QoS level SL + * and Partition SL. + */ + + for (j = 0; j < p_qos_match_rule->pkey_range_len; j++) { + for ( pkey_64 = p_qos_match_rule->pkey_range_arr[i][0]; + pkey_64 <= p_qos_match_rule->pkey_range_arr[i][1]; + pkey_64++) { + pkey = cl_hton16((uint16_t)(pkey_64 & 0x7fff)); + p_prtn = (osm_prtn_t *)cl_qmap_get( + &p_qos_policy->p_subn->prtn_pkey_tbl, pkey); + + if (p_prtn == (osm_prtn_t *)cl_qmap_end( + &p_qos_policy->p_subn->prtn_pkey_tbl)) { + /* partition for this pkey not found */ + osm_log(p_log, + OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC14: " + "pkey 0x%04X in match rule - " + "partition doesn't exist\n", + cl_ntoh16(pkey)); + continue; + } + + if (p_qos_match_rule->p_qos_level->sl_set && + p_prtn->sl != p_qos_match_rule->p_qos_level->sl) { + /* overriding partition's SL */ + osm_log(p_log, + OSM_LOG_ERROR, + "osm_qos_policy_validate: ERR AC15: " + "pkey 0x%04X in match rule - " + "overriding partition SL (%u) " + "with QoS Level SL (%u)\n", + cl_ntoh16(pkey), + p_prtn->sl, + p_qos_match_rule->p_qos_level->sl); + p_prtn->sl = p_qos_match_rule->p_qos_level->sl; + } + } + } + /* done with the current match-rule */ match_rules_list_iterator = -- 1.5.1.4 From kliteyn at mellanox.co.il Sun Nov 18 21:13:02 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 19 Nov 2007 07:13:02 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-19:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-18 OpenSM git rev = Thu_Nov_15_22:11:52_2007 [0f02129fba975d28b123104af97786ac578b3c90] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From ossrosch at linux.vnet.ibm.com Mon Nov 19 06:25:57 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Mon, 19 Nov 2007 15:25:57 +0100 Subject: [ofa-general] [PATCH] ofed-1.3: Remove hvcall.h backport file for rhel-5.1 Message-ID: <200711191525.59412.ossrosch@linux.vnet.ibm.com> This patch deletes the backport file hvcall.h for RHEL-5.1. The backport is no longer needed because RHEL-5.1 already contains the needed hvcalls. regards Stefan Signed-off-by: Stefan Roscher --- hvcall.h | 32 -------------------------------- 1 file changed, 32 deletions(-) diff -Nurp ofa_kernel-1.3_old/kernel_addons/backport/2.6.18-EL5.1/include/asm/hvcall.h ofa_kernel-1.3_new/kernel_addons/backport/2.6.18-EL5.1/include/asm/hvcall.h --- ofa_kernel-1.3_old/kernel_addons/backport/2.6.18-EL5.1/include/asm/hvcall.h 2007-11-01 08:11:21.000000000 -0400 +++ ofa_kernel-1.3_new/kernel_addons/backport/2.6.18-EL5.1/include/asm/hvcall.h 1969-12-31 19:00:00.000000000 -0500 @@ -1,32 +0,0 @@ -#ifndef ASM_HVCALL_BACKPORT_2618_H -#define ASM_HVCALL_BACKPORT_2618_H - -#include_next - -#ifdef __KERNEL__ - -#define PLPAR_HCALL9_BUFSIZE 9 - -inline static long plpar_hcall9(unsigned long opcode, - unsigned long *retbuf, - unsigned long arg1, /* This patch fixes a migth_sleep kernel warning with using sdp + ehca device driver. We have to change the kmem_cache_alloc() attribute from GFP_KERNEL to GFP_ATOMIC because sdp is running within a spinlock during ah allocation. regards Stefan Signed-off-by: Stefan Roscher --- ehca_av.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff -Nurp ofa_kernel-1.3_old/drivers/infiniband/hw/ehca/ehca_av.c ofa_kernel-1.3_new/drivers/infiniband/hw/ehca/ehca_av.c --- ofa_kernel-1.3_old/drivers/infiniband/hw/ehca/ehca_av.c 2007-11-19 14:32:35.000000000 +0100 +++ ofa_kernel-1.3_new/drivers/infiniband/hw/ehca/ehca_av.c 2007-11-19 14:40:16.000000000 +0100 @@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_p struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - av = kmem_cache_alloc(av_cache, GFP_KERNEL); + av = kmem_cache_alloc(av_cache, GFP_ATOMIC); if (!av) { ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", pd, ah_attr); From jim at mellanox.com Mon Nov 19 06:27:51 2007 From: jim at mellanox.com (Jim Mott) Date: Mon, 19 Nov 2007 06:27:51 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - Fix a memory leak in bzcopy Message-ID: The Mellanox regression group found and fixed a problem that could cause memory leaks in the bzcopy code. Signed-off-by: Jim Mott --- Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-19 08:56:45.000000000 -0600 +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-19 09:19:24.000000000 -0600 @@ -1248,8 +1248,10 @@ * fits into the frags array in the skb. */ size_goal = size_goal / PAGE_SIZE + 1; - if (size_goal >= MAX_SKB_FRAGS) + if (size_goal >= MAX_SKB_FRAGS) { + kfree(bz); return NULL; + } addr = (unsigned long)base; From ossrosch at linux.vnet.ibm.com Mon Nov 19 06:29:43 2007 From: ossrosch at linux.vnet.ibm.com (Stefan Roscher) Date: Mon, 19 Nov 2007 15:29:43 +0100 Subject: [ofa-general] [PATCH ofed-1.3] ehca: Fix dma_map_page() call for rhel-4.5 backport Message-ID: <200711191529.45675.ossrosch@linux.vnet.ibm.com> This patch fixes a wrong function call in backport file for rhel-4.5. Because rhel-4.5 is not supporting dma_map_page() for ibmebus devices we have to call ibmebus_map_single() explicitly. regards Stefan Signed-off-by: Stefan Roscher --- backport_ehca_4_rhel45_dma_fix.patch | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff -Nurp ofa_kernel-1.3_old/kernel_patches/backport/2.6.9_U5/backport_ehca_4_rhel45_dma_fix.patch ofa_kernel-1.3_new/kernel_patches/backport/2.6.9_U5/backport_ehca_4_rhel45_dma_fix.patch --- ofa_kernel-1.3_old/kernel_patches/backport/2.6.9_U5/backport_ehca_4_rhel45_dma_fix.patch 1970-01-01 01:00:00.000000000 +0100 +++ ofa_kernel-1.3_new/kernel_patches/backport/2.6.9_U5/backport_ehca_4_rhel45_dma_fix.patch 2007-11-19 14:29:12.000000000 +0100 @@ -0,0 +1,21 @@ +diff -Nurp ofa_kernel-1.3_old/drivers/infiniband/hw/ehca/ehca_dma.c ofa_kernel-1.3_new/drivers/infiniband/hw/ehca/ehca_dma.c +--- ofa_kernel-1.3_old/drivers/infiniband/hw/ehca/ehca_dma.c 2007-11-19 14:15:01.000000000 +0100 ++++ ofa_kernel-1.3_new/drivers/infiniband/hw/ehca/ehca_dma.c 2007-11-19 14:20:09.000000000 +0100 +@@ -131,14 +131,15 @@ static u64 ehca_dma_map_page(struct ib_d + size_t size, + enum dma_data_direction direction) + { +- return dma_map_page(dev->dma_device, page, offset, size, direction); ++ return ibmebus_map_single(dev, (page_address(page) + offset), ++ size, direction); + } + + static void ehca_dma_unmap_page(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction) + { +- dma_unmap_page(dev->dma_device, addr, size, direction); ++ ibmebus_unmap_single(dev, addr, size, direction); + } + + int ehca_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, From sashak at voltaire.com Mon Nov 19 07:33:09 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 15:33:09 +0000 Subject: [ofa-general] Re: [PATCH v2] osm: improving error reporting function of the QoS parser In-Reply-To: <474182D3.6050708@dev.mellanox.co.il> References: <474182D3.6050708@dev.mellanox.co.il> Message-ID: <20071119153309.GB5986@sashak.voltaire.com> On 14:34 Mon 19 Nov , Yevgeny Kliteynik wrote: > Improving error reporting function of the QoS parser: > - Making it static > - Printing error message to stderr as well as to the log > - Function now can get formatted string as an argument > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 19 07:34:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 15:34:35 +0000 Subject: [ofa-general] Re: [PATCH] osm: broken logic when scanning subnet for PIR request In-Reply-To: <4741851E.7010308@dev.mellanox.co.il> References: <4741851E.7010308@dev.mellanox.co.il> Message-ID: <20071119153435.GC5986@sashak.voltaire.com> On 14:44 Mon 19 Nov , Yevgeny Kliteynik wrote: > Fixing broken logic when scanning subnet for PIR request, > that was causing some ports to be scanned twice if two ports > of the same node belong to the same fabric. > Fixing to iterate through nodes instead of ports. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 19 07:39:16 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 15:39:16 +0000 Subject: [ofa-general] Re: [PATCH v2] osm: Using new format of QoS parser error function In-Reply-To: <4741895F.2090809@dev.mellanox.co.il> References: <4741895F.2090809@dev.mellanox.co.il> Message-ID: <20071119153916.GD5986@sashak.voltaire.com> On 15:02 Mon 19 Nov , Yevgeny Kliteynik wrote: > Using new format of QoS parser error function > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 19 08:08:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 16:08:48 +0000 Subject: [ofa-general] Re: [PATCH v2] osm: QoS - adding simplified syntax for policy definition In-Reply-To: <474189EA.3070500@dev.mellanox.co.il> References: <474189EA.3070500@dev.mellanox.co.il> Message-ID: <20071119160848.GG5986@sashak.voltaire.com> On 15:04 Mon 19 Nov , Yevgeny Kliteynik wrote: > This patch adds simplified syntax for QoS definition in QoS Policy file. > Using this syntax the administrator is able to define QoS policy per > ULP and/or per Service ID and/or per partition (pkey). > > Here's an example of the policy file with the new syntax, which is added > in a new section called qos-policy: > > qos-ulps > sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 > sdp, port-num 10000-20000, 0xfffd : 2 > sdp : 0 #default SL for SDP > srp, target-port-guid 0x1234-0x1235 : 2 > iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports > iser : 5 #default SL for iSER > rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 > rds : 0 #default SL for RDS > ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > ipoib : 6 #default IPoIB partition - pkey=0x7FFF > any, pkey 0x0ABC : 3 > any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 > any, service-id 0x6234 : 2 > any, target-port-guid 0x2234-0xF235 : 2 > default : 0 #default SL > end-qos-ulps > > Since any section of the policy file is optional as long as basic rules > of the file are kept, the above example can serve as a complete QoS > policy file - short and clear. > > I suspect that most of the administrators will use only this syntax, but > if someone wishes to manage QoS in more detailed manner, there's always > the rest of the policy file to do so. > > Signed-off-by: Yevgeny Kliteynik Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 19 08:10:14 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 16:10:14 +0000 Subject: [ofa-general] Re: [PATCH v2] osm: QoS - adding simplified syntax for policy definition In-Reply-To: <474189EA.3070500@dev.mellanox.co.il> References: <474189EA.3070500@dev.mellanox.co.il> Message-ID: <20071119161014.GH5986@sashak.voltaire.com> Hi Yevgeny, On 15:04 Mon 19 Nov , Yevgeny Kliteynik wrote: > This patch adds simplified syntax for QoS definition in QoS Policy file. > Using this syntax the administrator is able to define QoS policy per > ULP and/or per Service ID and/or per partition (pkey). > > Here's an example of the policy file with the new syntax, which is added > in a new section called qos-policy: > > qos-ulps > sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 > sdp, port-num 10000-20000, 0xfffd : 2 > sdp : 0 #default SL for SDP > srp, target-port-guid 0x1234-0x1235 : 2 > iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports > iser : 5 #default SL for iSER > rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 > rds : 0 #default SL for RDS > ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 > ipoib : 6 #default IPoIB partition - pkey=0x7FFF > any, pkey 0x0ABC : 3 > any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 > any, service-id 0x6234 : 2 > any, target-port-guid 0x2234-0xF235 : 2 > default : 0 #default SL > end-qos-ulps > > Since any section of the policy file is optional as long as basic rules > of the file are kept, the above example can serve as a complete QoS > policy file - short and clear. I think it could be good idea to add to opensm/doc|man some paper which describes QoS setup in details. Sasha From hanafim.ctr at asc.hpc.mil Mon Nov 19 09:44:28 2007 From: hanafim.ctr at asc.hpc.mil (MAHMOUD HANAFI) Date: Mon, 19 Nov 2007 12:44:28 -0500 Subject: [ofa-general] CentOS4.5 build errors Message-ID: <4741CB7C.3050309@asc.hpc.mil> I am getting the following errors during build on centos4.5 with 2.6.9-55.0.12.ELsmp kernel OFED Version: OFED-1.2.5.3] gcc -m32 -g -O2 -m32 -g -O2 -o src/.libs/perfquery src_perfquery-perfquery.o -L/usr/lib -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibcommon -libcommon -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibumad -libumad -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibmad -libmad -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/opensm -lopensm -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/libvendor -losmvendor -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/complib -losmcomp ../libibcommon/.libs/libibcommon.so ../libibumad/.libs/libibumad.so ../libibmad/.libs/libibmad.so -Wl,--rpath -Wl,/usr/ofed/lib src_perfquery-perfquery.o(.text+0x6a2): In function `main': src/perfquery.c:229: undefined reference to `port_performance_ext_query' src_perfquery-perfquery.o(.text+0x6d0):src/perfquery.c:232: undefined reference to `mad_dump_perfcounters_ext' src_perfquery-perfquery.o(.text+0x7d6):src/perfquery.c:245: undefined reference to `port_performance_ext_reset' collect2: ld returned 1 exit status make[2]: *** [src/perfquery] Error 1 make[2]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/diags' make[1]: *** [all] Error 2 make[1]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/diags' make: *** [diags] Error 2 error: Bad exit status from /var/tmp/rpm-tmp.23212 (%install) RPM build errors: user vlad does not exist - using root group vlad does not exist - using root user vlad does not exist - using root group vlad does not exist - using root Bad exit status from /var/tmp/rpm-tmp.23212 (%install) ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-dapl --with-ipoibtools --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibmad --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-qlvnictools --with-sdpnetstat --with-srptools --with-mstflint --with-perftest --with-tvflash --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-dapl --with-ipoibtools --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibmad --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-opensm --with-librdmacm --with-libsdp --with-openib-diags --with-qlvnictools --with-sdpnetstat --with-srptools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5.3/SRPMS/ofa_user-1.2.5.3-0.src.rpm" [root at spirit-adm OFED-1.2.5.3]# uname -a Linux spirit-adm 2.6.9-55.0.12.ELsmp #1 SMP Fri Nov 2 12:38:56 EDT 2007 x86_64 x86_64 x86_64 GNU/Linux [root at spirit-adm OFED-1.2.5.3]# -- Mahmoud Hanafi Senior System Administrator ASC/MSRC www.asc.hpc.mil 2435 5th Street WPAFB, OHIO 45433 (937) 255-1536 From sashak at voltaire.com Mon Nov 19 10:26:12 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 18:26:12 +0000 Subject: [ofa-general] CentOS4.5 build errors In-Reply-To: <4741CB7C.3050309@asc.hpc.mil> References: <4741CB7C.3050309@asc.hpc.mil> Message-ID: <20071119182612.GK5986@sashak.voltaire.com> On 12:44 Mon 19 Nov , MAHMOUD HANAFI wrote: > I am getting the following errors during build on centos4.5 with > 2.6.9-55.0.12.ELsmp kernel > > OFED Version: OFED-1.2.5.3] > > > gcc -m32 -g -O2 -m32 -g -O2 -o src/.libs/perfquery src_perfquery-perfquery.o > -L/usr/lib > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibcommon > -libcommon > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibumad > -libumad > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibmad > -libmad > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/opensm > -lopensm > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/libvendor > -losmvendor > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/complib > -losmcomp ../libibcommon/.libs/libibcommon.so > ../libibumad/.libs/libibumad.so ../libibmad/.libs/libibmad.so -Wl,--rpath > -Wl,/usr/ofed/lib > src_perfquery-perfquery.o(.text+0x6a2): In function `main': > src/perfquery.c:229: undefined reference to `port_performance_ext_query' > src_perfquery-perfquery.o(.text+0x6d0):src/perfquery.c:232: undefined > reference to `mad_dump_perfcounters_ext' > src_perfquery-perfquery.o(.text+0x7d6):src/perfquery.c:245: undefined > reference to `port_performance_ext_reset' > collect2: ld returned 1 exit status > make[2]: *** [src/perfquery] Error 1 You need to build libibmad before diags. Looks at management README file for details. Sasha From momentichic at free.fr Mon Nov 19 10:14:51 2007 From: momentichic at free.fr (momentichic at free.fr) Date: Mon, 19 Nov 2007 20:14:51 +0200 Subject: [ofa-general] Dont move... you'll regret it if you do! Message-ID: <000701c82ad8$13ea64d0$809a5497@gvk> Monday Morning Alert....etg U on the move, UP 37.5% by 9:42AM EST EnerBrite Technologies Group, Inc. et G U $0.011 UP 37.5% The top 5 Things you should be considering. 1. Energy problems are a Global issue 2. Little relief is in site. Corporations everywhere are looking for new ways to meet the energy challenge. 3. With results as high as 30% savings in energy bills, SensorStat is a solution that many business will be turning to. 4. The SensorStat from E T gu is already operating in facilities that are raving about the incredible relief they have from there energy bills. 5. E tGU is launching a huge media campaign to increase investor awareness next week. EnerBrite is in the right place at the right time. Providing working solutions to the energy crisis we are facing. This penny stock has the potential to double or even triple early investor.s returns. Read up and move on et GU. From mhanafi at csc.com Mon Nov 19 10:26:54 2007 From: mhanafi at csc.com (Mahmoud Hanafi) Date: Mon, 19 Nov 2007 13:26:54 -0500 Subject: [ofa-general] CentOS4.5 build errors In-Reply-To: <20071119182612.GK5986@sashak.voltaire.com> Message-ID: I was using the build.sh script. Shouldn't the script build in order? Mahmoud Hanafi Sr. System Administrator CSC HPC COE Bld. 676 2435 Fifth Street WPAFB, Ohio 45433 (937) 255-1536 Computer Sciences Corporation Registered Office: 2100 East Grand Avenue, El Segundo California 90245, USA Registered in USA No: C-489-59 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- This is a PRIVATE message. If you are not the intended recipient, please delete without copying and kindly advise us by e-mail of the mistake in delivery. NOTE: Regardless of content, this e-mail shall not operate to bind CSC to any order or other contract unless pursuant to explicit written agreement or government initiative expressly permitting the use of e-mail for such purpose. ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Sasha Khapyorsky Sent by: general-bounces at lists.openfabrics.org 11/19/2007 01:26 PM To MAHMOUD HANAFI cc openib-general at openib.org Subject Re: [ofa-general] CentOS4.5 build errors On 12:44 Mon 19 Nov , MAHMOUD HANAFI wrote: > I am getting the following errors during build on centos4.5 with > 2.6.9-55.0.12.ELsmp kernel > > OFED Version: OFED-1.2.5.3] > > > gcc -m32 -g -O2 -m32 -g -O2 -o src/.libs/perfquery src_perfquery-perfquery.o > -L/usr/lib > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibcommon > -libcommon > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibumad > -libumad > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/libibmad > -libmad > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/opensm > -lopensm > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/libvendor > -losmvendor > -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5.3/src/userspace/management/osm/complib > -losmcomp ../libibcommon/.libs/libibcommon.so > ../libibumad/.libs/libibumad.so ../libibmad/.libs/libibmad.so -Wl,--rpath > -Wl,/usr/ofed/lib > src_perfquery-perfquery.o(.text+0x6a2): In function `main': > src/perfquery.c:229: undefined reference to `port_performance_ext_query' > src_perfquery-perfquery.o(.text+0x6d0):src/perfquery.c:232: undefined > reference to `mad_dump_perfcounters_ext' > src_perfquery-perfquery.o(.text+0x7d6):src/perfquery.c:245: undefined > reference to `port_performance_ext_reset' > collect2: ld returned 1 exit status > make[2]: *** [src/perfquery] Error 1 You need to build before diags. Looks at management README file for details. Sasha _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at dev.mellanox.co.il Mon Nov 19 11:30:16 2007 From: kliteyn at dev.mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 19 Nov 2007 21:30:16 +0200 Subject: [ofa-general] Re: [PATCH v2] osm: QoS - adding simplified syntax for policy definition In-Reply-To: <20071119161014.GH5986@sashak.voltaire.com> References: <474189EA.3070500@dev.mellanox.co.il> <20071119161014.GH5986@sashak.voltaire.com> Message-ID: <4741E448.3080002@dev.mellanox.co.il> Sasha Khapyorsky wrote: > Hi Yevgeny, > > On 15:04 Mon 19 Nov , Yevgeny Kliteynik wrote: >> This patch adds simplified syntax for QoS definition in QoS Policy file. >> Using this syntax the administrator is able to define QoS policy per >> ULP and/or per Service ID and/or per partition (pkey). >> >> Here's an example of the policy file with the new syntax, which is added >> in a new section called qos-policy: >> >> qos-ulps >> sdp, port-num 30000 : 1 #SL for SDP when destination port is 30000 >> sdp, port-num 10000-20000, 0xfffd : 2 >> sdp : 0 #default SL for SDP >> srp, target-port-guid 0x1234-0x1235 : 2 >> iser, port-num 0x3234-0x3235 : 4 #SL for iSER whith specific target ports >> iser : 5 #default SL for iSER >> rds, port-num 25000 : 2 #SL for RDS when destination port is 25000 >> rds : 0 #default SL for RDS >> ipoib, pkey 0x0001 : 5 #SL for IPoIB on partition with pkey 0x0001 >> ipoib : 6 #default IPoIB partition - pkey=0x7FFF >> any, pkey 0x0ABC : 3 >> any, pkey 0x0ABD-0x0ABF,0x0BBD-0x0BBA : 4 >> any, service-id 0x6234 : 2 >> any, target-port-guid 0x2234-0xF235 : 2 >> default : 0 #default SL >> end-qos-ulps >> >> Since any section of the policy file is optional as long as basic rules >> of the file are kept, the above example can serve as a complete QoS >> policy file - short and clear. > > I think it could be good idea to add to opensm/doc|man some paper which > describes QoS setup in details. Sure - it's on my to do list. -- Yevgeny > Sasha > From sashak at voltaire.com Mon Nov 19 12:18:44 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 20:18:44 +0000 Subject: [ofa-general] Re: [PATCH] opensm: support multiple routers in a subnet In-Reply-To: <20071116080840.GC30090@obsidianresearch.com> References: <20071116080840.GC30090@obsidianresearch.com> Message-ID: <20071119201844.GL5986@sashak.voltaire.com> Hi Rolf, On 01:08 Fri 16 Nov , Rolf Manderscheid wrote: > Hi Sasha, > > If a path record query is made for an off-subnet DGID, the SA needs to > return a path record where the DLID points to the router port that > handles the DGID prefix. In the case of a subnet with only one > router, the SA could just pick "the router", and that's exactly what > the ROUTER_EXP code did. However, ROUTER_EXP did not look beyond the > first available router. > > When additional routers are added, the SA needs more information. The > mechanism for gathering this information has not yet been specified, > so in the meantime, this patch adds a configuration file that > specifies which router ports handle which prefixes. > > The patch also removes all occurrences of ROUTER_EXP ifdefs. The > default behaviour remains unchanged with one minor exception: > hop limits are set to 0xFF for path records to multicast DGIDs if > the scope is non-local and to unicast DGIDs if off-subnet. > This used to happen for ROUTER_EXP only. > > Now, the same binary can be configured at run-time to enable the > ROUTER_EXP behaviour with a generic configuration file, or to handle > multiple routers on a subnet with a more explicit configuration file. > See the man page for details. > > Signed-off-by: Rolf Manderscheid Applied. Thanks. I like the idea and the patch. However have some mostly minor comments. I will be "out of office" and likely off-line for next couple of days, so I will do some changes in-place. The rest (less critical) improvements could be added as subsequent patch. > > -- > > One consequence of this patch is that people accustomed to using > ROUTER_EXP will need to specify a configuration file to get the > same behaviour. I toyed with the idea of keeping one ROUTER_EXP ifdef > to control the default behaviour, but then we're back to having two > versions of opensm with different default behaviours, and the > counter-intuitive: empty cfg file != non-existent cfg file. One of > the goals was to get to a single standard binary. So, to help avoid > surprises, I actually added back one ifdef ROUTER_EXP which causes the > compilation to fail with a useful message. This only helps those who > both build and configure their special ROUTER_EXP opensms, but I > suspect that's most. > > Thanks to Hal for reviewing early versions of this patch and providing > feedback. > > Rolf > > --- > diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h > index aa8d378..db58919 100644 > --- a/opensm/include/opensm/osm_base.h > +++ b/opensm/include/opensm/osm_base.h > @@ -253,6 +253,22 @@ BEGIN_C_DECLS > #endif /* __WIN__ */ > /***********/ > > +/****d* OpenSM: Base/OSM_DEFAULT_PREFIX_ROUTES_FILE > +* NAME > +* OSM_DEFAULT_PREFIX_ROUTES_FILE > +* > +* DESCRIPTION > +* Specifies the default prefix routes file name > +* > +* SYNOPSIS > +*/ > +#ifdef __WIN__ > +#define OSM_DEFAULT_PREFIX_ROUTES_FILE strcat(GetOsmCachePath(), "osm-prefix-routes.conf") > +#else > +#define OSM_DEFAULT_PREFIX_ROUTES_FILE "/etc/ofa/opensm-prefix-routes.conf" > +#endif > +/***********/ > + Recently we switched to using configurable config directory for OpenSM. I will add this separately. > /****d* OpenSM: Base/OSM_DEFAULT_SWEEP_INTERVAL_SECS > * NAME > * OSM_DEFAULT_SWEEP_INTERVAL_SECS > diff --git a/opensm/include/opensm/osm_prefix_route.h b/opensm/include/opensm/osm_prefix_route.h > new file mode 100644 > index 0000000..cebd532 > --- /dev/null > +++ b/opensm/include/opensm/osm_prefix_route.h > @@ -0,0 +1,52 @@ > +/* > + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. > + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. > + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: > + * > + * Redistribution and use in source and binary forms, with or > + * without modification, are permitted provided that the following > + * conditions are met: > + * > + * - Redistributions of source code must retain the above > + * copyright notice, this list of conditions and the following > + * disclaimer. > + * > + * - Redistributions in binary form must reproduce the above > + * copyright notice, this list of conditions and the following > + * disclaimer in the documentation and/or other materials > + * provided with the distribution. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > + * SOFTWARE. > + * > + */ > + > +#ifndef _OSM_PREFIX_ROUTES_H_ > +#define _OSM_PREFIX_ROUTES_H_ > + > +#include We are not using *_osd.h header files directly. There should be just #include Changing this. > +#include > + > +typedef struct { > + cl_list_item_t list_item; /* must be first */ > + uint64_t prefix; /* network order, zero means "any" */ > + uint64_t guid; /* network order, zero means "any" */ > +} osm_prefix_route_t; > + > +#ifdef ROUTER_EXP > +#error ROUTER_EXP is deprecated, specify prefix routes at runtime instead (see opensm man page for details) > +#endif > + > +#endif /* _OSM_PREFIX_ROUTES_H_ */ What do you think? Will it be helpful to merge osm_prefix_routes.h and osm_router.h (or actually to put 'struct osm_prefix_route' definition in osm_router.h)? When adding new header file - you need to add this to include/Makefile.am EXTRA_DIST list, it is in order to not break 'make dist' functionality. I'm adding this now. > diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h > index 452098b..b67add3 100644 > --- a/opensm/include/opensm/osm_subnet.h > +++ b/opensm/include/opensm/osm_subnet.h > @@ -54,6 +54,7 @@ > #include > #include > #include > +#include > > #ifdef __cplusplus > # define BEGIN_C_DECLS extern "C" { > @@ -298,6 +299,7 @@ typedef struct _osm_subn_opt { > #endif /* ENABLE_OSM_PERF_MGR */ > char *event_plugin_name; > char *node_name_map_name; > + char *prefix_routes_file; > } osm_subn_opt_t; > /* > * FIELDS > @@ -550,6 +552,7 @@ typedef struct _osm_subn { > cl_qmap_t node_guid_tbl; > cl_qmap_t port_guid_tbl; > cl_qmap_t rtr_guid_tbl; > + cl_qlist_t prefix_routes_list; > cl_qmap_t prtn_pkey_tbl; > cl_qmap_t mgrp_mlid_tbl; > cl_qmap_t sm_guid_tbl; > diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 > index ef12980..d5434be 100644 > --- a/opensm/man/opensm.8 > +++ b/opensm/man/opensm.8 > @@ -20,6 +20,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA) > [\-Q | \-\-qos] [\-N | \-\-no_part_enforce] [\-y | \-\-stay_on_fatal] > [\-B | \-\-daemon] [\-I | \-\-inactive] > [\-\-perfmgr] [\-\-perfmgr_sweep_time_s ] > +[\-\-prefix_routes_file ] > [\-v(erbose)] [\-V] [\-D ] [\-d(ebug) ] [\-h(elp)] [\-?] > > .SH DESCRIPTION > @@ -201,6 +202,13 @@ is accumulative. > This option defines the optional partition configuration file. > The default name is \'/etc/opensm/opensm-partitions.conf\'. > .TP > +.BI --prefix_routes_file= path I guess here should be also something like: "This option specifies the prefix routes file..." > +Prefix routes control how the SA responds to path record queries for > +off-subnet DGIDs. By default, the SA fails such queries. The > +.B PREFIX ROUTES > +section below describes the format of the configuration file. > +The default path is \fB\%/etc/ofa/opensm\-prefix\-routes.conf\fP. > +.TP > \fB\-Q\fR, \fB\-\-qos\fR > This option enables QoS setup. It is disabled by default. > .TP > @@ -465,6 +473,39 @@ Examples: > qos_ca_sl2vl=0,1,2,3,5,5,5,12,12,0, > qos_swe_high_limit=0 > > +.SH PREFIX ROUTES > +.PP > +Prefix routes control how the SA responds to path record queries for > +off-subnet DGIDs. By default, the SA fails such queries. > +Note that IBA does not specify how the SA should obtain off-subnet path > +record information. > +The prefix routes configuration is meant as a stop-gap until the > +specification is completed. > +.PP > +Each line in the configuration file is a 64-bit prefix followed by a > +64-bit GUID, separated by white space. > +The GUID specifies the router port on the local subnet that will > +handle the prefix. > +Blank lines are ignored, as is anything between a \fB#\fP character > +and the end of the line. > +The prefix and GUID are both in hex, the leading 0x is optional. > +Either, or both, can be wild-carded by specifying an > +asterisk instead of an explicit prefix or GUID. > +.PP > +When responding to a path record query for an off-subnet DGID, > +opensm searches for the first prefix match in the configuration file. > +Therefore, the order of the lines in the configuration file is important: > +a wild-carded prefix at the beginning of the configuration file renders > +all subsequent lines useless. > +If there is no match, then opensm fails the query. > +It is legal to repeat prefixes in the configuration file, > +opensm will return the path to the first available matching router. > +A configuration file with a single line where both prefix and GUID > +are wild-carded means that a path record query specifying any > +off-subnet DGID should return a path to the first available router. > +This configuration yields the same behaviour formerly achieved by > +compiling opensm with -DROUTER_EXP. > + > .SH ROUTING > .PP > OpenSM now offers five routing engines: > @@ -872,6 +913,10 @@ a file compatible with dump_lfts.sh output. This file can be used > as input for forwarding tables loading by 'file' routing engine. > Both or one of options -U and -M can be specified together with \'-R file\'. > > +.SH FILES > +.TP > +.B /etc/ofa/opensm-prefix-routes.conf > +default prefix routes file. > > .SH AUTHORS > .TP > diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c > index 13c9f70..4b99dd0 100644 > --- a/opensm/opensm/main.c > +++ b/opensm/opensm/main.c > @@ -291,6 +291,11 @@ void show_usage(void) > printf("--perfmgr_sweep_time_s \n" > " PerfMgr sweep interval in seconds.\n\n"); > #endif > + printf("--prefix_routes_file \n" > + " This option specifies the prefix routes file.\n" > + " Prefix routes control how the SA responds to path record\n" > + " queries for off-subnet DGIDs. Default file is:\n" > + " "OSM_DEFAULT_PREFIX_ROUTES_FILE"\n\n"); > printf("-v\n" > "--verbose\n" > " This option increases the log verbosity level.\n" > @@ -609,6 +614,7 @@ int main(int argc, char *argv[]) > {"perfmgr", 0, NULL, 1}, > {"perfmgr_sweep_time_s", 1, NULL, 2}, > #endif > + {"prefix_routes_file", 1, NULL, 3}, > {NULL, 0, NULL, 0} /* Required at the end of the array */ > }; > > @@ -911,6 +917,9 @@ int main(int argc, char *argv[]) > break; > #endif /* ENABLE_OSM_PERF_MGR */ > > + case 3: > + opt.prefix_routes_file = optarg; > + break; > case 'h': > case '?': > case ':': > diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c > index ce75ec8..2597046 100644 > --- a/opensm/opensm/osm_sa_path_record.c > +++ b/opensm/opensm/osm_sa_path_record.c > @@ -69,10 +69,9 @@ > #include > #include > #include > -#ifdef ROUTER_EXP > #include > #include > -#endif > +#include > > #define OSM_PR_RCV_POOL_MIN_SIZE 64 > #define OSM_PR_RCV_POOL_GROW_SIZE 64 > @@ -858,19 +857,12 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, > { > const osm_physp_t *p_src_physp; > const osm_physp_t *p_dest_physp; > -#ifdef ROUTER_EXP > boolean_t is_nonzero_gid = 0; > -#endif > > OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_build_pr); > > p_src_physp = p_src_port->p_physp; > -#ifndef ROUTER_EXP > - p_dest_physp = p_dest_port->p_physp; > > - p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp); > - p_pr->dgid.unicast.interface_id = osm_physp_get_port_guid(p_dest_physp); > -#else > if (p_dgid) { > if (memcmp(p_dgid, &zero_gid, sizeof(*p_dgid))) > is_nonzero_gid = 1; > @@ -886,7 +878,6 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, > p_pr->dgid.unicast.interface_id = > osm_physp_get_port_guid(p_dest_physp); > } > -#endif > > p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); > p_pr->sgid.unicast.interface_id = osm_physp_get_port_guid(p_src_physp); > @@ -895,11 +886,10 @@ __osm_pr_rcv_build_pr(IN osm_pr_rcv_t * const p_rcv, > p_pr->slid = cl_hton16(src_lid_ho); > > p_pr->hop_flow_raw &= cl_hton32(1 << 31); > -#ifdef ROUTER_EXP > + > /* Only set HopLimit if going through a router */ > if (is_nonzero_gid) > p_pr->hop_flow_raw |= cl_hton32(IB_HOPLIMIT_MAX); > -#endif > > p_pr->pkey = p_parms->pkey; > ib_path_rec_set_sl(p_pr, p_parms->sl); > @@ -1262,10 +1252,8 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, > ib_net64_t dest_guid; > ib_api_status_t status; > ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; > -#ifdef ROUTER_EXP > osm_router_t *p_rtr; > osm_port_t *p_rtr_port; > -#endif > > OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_get_end_points); > > @@ -1359,20 +1347,47 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, > "Non local DGID subnet prefix 0x%016" > PRIx64 "\n", > cl_ntoh64(p_pr->dgid.unicast.prefix)); This is part of osm_log(.. OSM_LOG_VERBOSE ..), we are going to make this prefix routes support to be mainstream, do you think verbosity of this message should be decreased (to let's say OSM_LOG_DEBUG)? > -#ifndef ROUTER_EXP > - /* > - This 'error' is the client's fault (bad gid) so > - don't enter it as an error in our own log. > - Return an error response to the client. > - */ > - sa_status = IB_SA_MAD_STATUS_INVALID_GID; > - goto Exit; > -#else > - /* Just use "first" router (if it exists) for now */ > - p_rtr = > - (osm_router_t *) cl_qmap_head(&p_rcv-> > - p_subn-> > - rtr_guid_tbl); > + > + /* Find the router port that is configured to handle > + this prefix, if any: */ > + osm_prefix_route_t *route = NULL; > + osm_prefix_route_t *r = (osm_prefix_route_t *) > + cl_qlist_head(&p_rcv->p_subn->prefix_routes_list); > + > + while (r != (osm_prefix_route_t *) > + cl_qlist_end(&p_rcv->p_subn->prefix_routes_list)) > + { > + if (r->prefix == p_pr->dgid.unicast.prefix || > + r->prefix == 0) > + { > + route = r; > + break; > + } > + r = (osm_prefix_route_t *) cl_qlist_next(&r->list_item); > + } > + > + if (! route) { > + /* > + This 'error' is the client's fault (bad gid) so > + don't enter it as an error in our own log. > + Return an error response to the client. > + */ > + sa_status = IB_SA_MAD_STATUS_INVALID_GID; > + goto Exit; > + } else if (route->guid == 0) { > + /* first router */ > + p_rtr = (osm_router_t *) > + cl_qmap_head(&p_rcv-> > + p_subn-> > + rtr_guid_tbl); > + } else { > + p_rtr = (osm_router_t *) > + cl_qmap_get(&p_rcv-> > + p_subn-> > + rtr_guid_tbl, > + route->guid); > + } > + > if (p_rtr == > (osm_router_t *) cl_qmap_end(&p_rcv-> > p_subn-> All this "non-local" section looks for me as good candidate for separate function. > @@ -1380,7 +1395,7 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, > { > osm_log(p_rcv->p_log, OSM_LOG_ERROR, > "__osm_pr_rcv_get_end_points: ERR 1F22: " > - "Off subnet DGID but no routers found\n"); > + "Off subnet DGID but router not found\n"); > sa_status = > IB_SA_MAD_STATUS_INVALID_GID; > goto Exit; > @@ -1390,7 +1405,6 @@ __osm_pr_rcv_get_end_points(IN osm_pr_rcv_t * const p_rcv, > dest_guid = osm_port_get_guid(p_rtr_port); > if (p_dgid) > *p_dgid = p_pr->dgid; > -#endif > } > } > > @@ -2134,22 +2148,14 @@ void osm_pr_rcv_process(IN void *context, IN void *data) > &sl, &flow_label, &hop_limit); > ib_path_rec_set_sl(&p_pr_item->path_rec, sl); > ib_path_rec_set_qos_class(&p_pr_item->path_rec, 0); > -#ifndef ROUTER_EXP > - p_pr_item->path_rec.hop_flow_raw = > - cl_hton32(hop_limit) | (flow_label << 8); > -#else > + > /* HopLimit is not yet set in non link local MC groups */ > /* If it were, this would not be needed */ > - if (ib_mgid_get_scope > - (&p_mgrp->mcmember_rec.mgid) == MC_SCOPE_LINK_LOCAL) > - p_pr_item->path_rec. > - hop_flow_raw = > - cl_hton32(hop_limit) | (flow_label << 8); > - else > - p_pr_item->path_rec. > - hop_flow_raw = > - cl_hton32(IB_HOPLIMIT_MAX) | (flow_label << 8); > -#endif > + if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != MC_SCOPE_LINK_LOCAL) > + hop_limit = IB_HOPLIMIT_MAX; > + > + p_pr_item->path_rec.hop_flow_raw = > + cl_hton32(hop_limit) | (flow_label << 8); > > cl_qlist_insert_tail(&pr_list, (cl_list_item_t *) > & p_pr_item->pool_item); > diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c > index 0f109a5..834d283 100644 > --- a/opensm/opensm/osm_subnet.c > +++ b/opensm/opensm/osm_subnet.c > @@ -91,6 +91,7 @@ void osm_subn_construct(IN osm_subn_t * const p_subn) > cl_qmap_init(&p_subn->sm_guid_tbl); > cl_qlist_init(&p_subn->sa_sr_list); > cl_qlist_init(&p_subn->sa_infr_list); > + cl_qlist_init(&p_subn->prefix_routes_list); And also prefix_routes_list elements should be freed on subnet object destruction. I'm adding this now. Thanks for your great work! Sasha > cl_qmap_init(&p_subn->rtr_guid_tbl); > cl_qmap_init(&p_subn->prtn_pkey_tbl); > cl_qmap_init(&p_subn->mgrp_mlid_tbl); > @@ -475,6 +476,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt) > p_opt->exit_on_fatal = TRUE; > p_opt->enable_quirks = FALSE; > p_opt->no_clients_rereg = FALSE; > + p_opt->prefix_routes_file = OSM_DEFAULT_PREFIX_ROUTES_FILE; > subn_set_default_qos_options(&p_opt->qos_options); > subn_set_default_qos_options(&p_opt->qos_ca_options); > subn_set_default_qos_options(&p_opt->qos_sw0_options); > @@ -686,6 +688,112 @@ subn_dump_qos_options(FILE * file, > > /********************************************************************** > **********************************************************************/ > +static ib_api_status_t > +append_prefix_route(IN osm_subn_t * const p_subn, uint64_t prefix, uint64_t guid) > +{ > + osm_prefix_route_t *route; > + > + route = malloc(sizeof *route); > + if (! route) { > + osm_log(&p_subn->p_osm->log, OSM_LOG_ERROR, "%s: out of memory", __FUNCTION__); > + return IB_ERROR; > + } > + > + route->prefix = cl_hton64(prefix); > + route->guid = cl_hton64(guid); > + cl_qlist_insert_tail(&p_subn->prefix_routes_list, &route->list_item); > + return IB_SUCCESS; > +} > + > +static ib_api_status_t > +osm_parse_prefix_routes_file(IN osm_subn_t * const p_subn) > +{ > + osm_log_t *log = &p_subn->p_osm->log; > + FILE *fp; > + char buf[1024]; > + int line = 0; > + int errors = 0; > + > + while (! cl_is_qlist_empty(&p_subn->prefix_routes_list)) { > + cl_list_item_t *item = cl_qlist_remove_head(&p_subn->prefix_routes_list); > + free(item); > + } > + > + fp = fopen(p_subn->opt.prefix_routes_file, "r"); > + if (! fp) { > + if (errno == ENOENT) > + return IB_SUCCESS; > + > + osm_log(log, OSM_LOG_ERROR, "%s: fopen(%s) failed: %s", > + __FUNCTION__, p_subn->opt.prefix_routes_file, strerror(errno)); > + return IB_ERROR; > + } > + > + while (fgets(buf, sizeof buf, fp) != NULL) { > + char *p_prefix, *p_guid, *p_extra, *p_last, *p_end; > + uint64_t prefix, guid; > + > + line++; > + if (errors > 10) > + break; > + > + p_prefix = strtok_r(buf, " \t\n", &p_last); > + if (! p_prefix) > + continue; /* ignore blank lines */ > + > + if (*p_prefix == '#') > + continue; /* ignore comment lines */ > + > + p_guid = strtok_r(NULL, " \t\n", &p_last); > + if (! p_guid) { > + osm_log(log, OSM_LOG_ERROR, "%s:%d: missing GUID\n", > + p_subn->opt.prefix_routes_file, line); > + errors++; > + continue; > + } > + > + p_extra = strtok_r(NULL, " \t\n", &p_last); > + if (p_extra && *p_extra != '#') { > + osm_log(log, OSM_LOG_INFO, "%s:%d: extra tokens ignored\n", > + p_subn->opt.prefix_routes_file, line); > + } > + > + if (strcmp(p_prefix, "*") == 0) > + prefix = 0; > + else { > + prefix = strtoull(p_prefix, &p_end, 16); > + if (*p_end != '\0') { > + osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal prefix: %s\n", > + p_subn->opt.prefix_routes_file, line, p_prefix); > + errors++; > + continue; > + } > + } > + > + if (strcmp(p_guid, "*") == 0) > + guid = 0; > + else { > + guid = strtoull(p_guid, &p_end, 16); > + if (*p_end != '\0' && *p_end != '#') { > + osm_log(log, OSM_LOG_ERROR, "%s:%d: illegal GUID: %s\n", > + p_subn->opt.prefix_routes_file, line, p_guid); > + errors++; > + continue; > + } > + } > + > + if (append_prefix_route(p_subn, prefix, guid) != IB_SUCCESS) { > + errors++; > + break; > + } > + } > + > + fclose(fp); > + return (errors == 0) ? IB_SUCCESS : IB_ERROR; > +} > + > +/********************************************************************** > + **********************************************************************/ > ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) > { > char *p_cache_dir = getenv("OSM_CACHE_DIR"); > @@ -745,6 +853,8 @@ ib_api_status_t osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn) > if (p_subn->opt.qos) > osm_qos_parse_policy_file(p_subn); > > + osm_parse_prefix_routes_file(p_subn); > + > return IB_SUCCESS; > } > > @@ -1285,6 +1395,9 @@ ib_api_status_t osm_subn_parse_conf_file(IN osm_subn_opt_t * const p_opts) > > opts_unpack_boolean("no_clients_rereg", > p_key, p_val, &p_opts->no_clients_rereg); > + > + opts_unpack_charp("prefix_routes_file", > + p_key, p_val, &p_opts->prefix_routes_file); > } > fclose(opts_file); > > @@ -1606,6 +1719,11 @@ ib_api_status_t osm_subn_write_conf_file(IN osm_subn_opt_t * const p_opts) > "QoS Router ports options", "qos_rtr", > &p_opts->qos_rtr_options); > > + fprintf(opts_file, > + "# Prefix routes file name\n" > + "prefix_routes_file %s\n\n", > + p_opts->prefix_routes_file); > + > /* optional string attributes ... */ > > fclose(opts_file); From sashak at voltaire.com Mon Nov 19 12:22:15 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 20:22:15 +0000 Subject: [ofa-general] [PATCH] opensm: support multiple routers in a subnet In-Reply-To: <20071117203327.GB32058@obsidianresearch.com> References: <20071116080840.GC30090@obsidianresearch.com> <20071117203327.GB32058@obsidianresearch.com> Message-ID: <20071119202215.GM5986@sashak.voltaire.com> On 13:33 Sat 17 Nov , Jason Gunthorpe wrote: > On Fri, Nov 16, 2007 at 01:08:40AM -0700, Rolf Manderscheid wrote: > > > If a path record query is made for an off-subnet DGID, the SA needs to > > return a path record where the DLID points to the router port that > > handles the DGID prefix. In the case of a subnet with only one > > router, the SA could just pick "the router", and that's exactly what > > the ROUTER_EXP code did. However, ROUTER_EXP did not look beyond the > > first available router. > > One thing we learned during the show is that the format of this file > should be improved slightly. I suggest copying the usual linux 'ip > route' syntax > > 2001::/64 via fe80::xxx:xxxx:xxxx > > The important thing is adding the prefix bits, but also normalzing the > information to match IPv6 convections. In future other keywords beyond > 'via' could be added, like 'mut' and or so on. > > The special word 'default' means ::/0 It is nice idea. For me it looks such improvements could be done subsequently and in way which preserved "backward compatibility", not sure this is so important yet. Sasha From hrosenstock at xsigo.com Mon Nov 19 12:14:52 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 19 Nov 2007 12:14:52 -0800 Subject: [ofa-general] Re: [PATCH] opensm: support multiple routers in a subnet In-Reply-To: <20071119201844.GL5986@sashak.voltaire.com> References: <20071116080840.GC30090@obsidianresearch.com> <20071119201844.GL5986@sashak.voltaire.com> Message-ID: <1195503292.6727.232.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-19 at 20:18 +0000, Sasha Khapyorsky wrote: > Hi Rolf, > > On 01:08 Fri 16 Nov , Rolf Manderscheid wrote: > > Hi Sasha, > > > > If a path record query is made for an off-subnet DGID, the SA needs to > > return a path record where the DLID points to the router port that > > handles the DGID prefix. In the case of a subnet with only one > > router, the SA could just pick "the router", and that's exactly what > > the ROUTER_EXP code did. However, ROUTER_EXP did not look beyond the > > first available router. > > > > When additional routers are added, the SA needs more information. The > > mechanism for gathering this information has not yet been specified, > > so in the meantime, this patch adds a configuration file that > > specifies which router ports handle which prefixes. > > > > The patch also removes all occurrences of ROUTER_EXP ifdefs. The > > default behaviour remains unchanged with one minor exception: > > hop limits are set to 0xFF for path records to multicast DGIDs if > > the scope is non-local and to unicast DGIDs if off-subnet. > > This used to happen for ROUTER_EXP only. > > > > Now, the same binary can be configured at run-time to enable the > > ROUTER_EXP behaviour with a generic configuration file, or to handle > > multiple routers on a subnet with a more explicit configuration file. > > See the man page for details. > > > > Signed-off-by: Rolf Manderscheid > > Applied. Thanks. > > I like the idea and the patch. > > However have some mostly minor comments. I will be "out of office" and > likely off-line for next couple of days, so I will do some changes > in-place. The rest (less critical) improvements could be added as > subsequent patch. > > > > > -- > > > > One consequence of this patch is that people accustomed to using > > ROUTER_EXP will need to specify a configuration file to get the > > same behaviour. I toyed with the idea of keeping one ROUTER_EXP ifdef > > to control the default behaviour, but then we're back to having two > > versions of opensm with different default behaviours, and the > > counter-intuitive: empty cfg file != non-existent cfg file. One of > > the goals was to get to a single standard binary. So, to help avoid > > surprises, I actually added back one ifdef ROUTER_EXP which causes the > > compilation to fail with a useful message. This only helps those who > > both build and configure their special ROUTER_EXP opensms, but I > > suspect that's most. > > > > Thanks to Hal for reviewing early versions of this patch and providing > > feedback. > > > > Rolf > > > > --- > > diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h > > index aa8d378..db58919 100644 > > --- a/opensm/include/opensm/osm_base.h > > +++ b/opensm/include/opensm/osm_base.h > > @@ -253,6 +253,22 @@ BEGIN_C_DECLS > > #endif /* __WIN__ */ > > /***********/ > > > > +/****d* OpenSM: Base/OSM_DEFAULT_PREFIX_ROUTES_FILE > > +* NAME > > +* OSM_DEFAULT_PREFIX_ROUTES_FILE > > +* > > +* DESCRIPTION > > +* Specifies the default prefix routes file name > > +* > > +* SYNOPSIS > > +*/ > > +#ifdef __WIN__ > > +#define OSM_DEFAULT_PREFIX_ROUTES_FILE strcat(GetOsmCachePath(), "osm-prefix-routes.conf") > > +#else > > +#define OSM_DEFAULT_PREFIX_ROUTES_FILE "/etc/ofa/opensm-prefix-routes.conf" > > +#endif > > +/***********/ > > + > > Recently we switched to using configurable config directory for OpenSM. > I will add this separately. > > > /****d* OpenSM: Base/OSM_DEFAULT_SWEEP_INTERVAL_SECS > > * NAME > > * OSM_DEFAULT_SWEEP_INTERVAL_SECS > > diff --git a/opensm/include/opensm/osm_prefix_route.h b/opensm/include/opensm/osm_prefix_route.h > > new file mode 100644 > > index 0000000..cebd532 > > --- /dev/null > > +++ b/opensm/include/opensm/osm_prefix_route.h > > @@ -0,0 +1,52 @@ > > +/* > > + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. > > + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. > > + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > + * > > + * Redistribution and use in source and binary forms, with or > > + * without modification, are permitted provided that the following > > + * conditions are met: > > + * > > + * - Redistributions of source code must retain the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer. > > + * > > + * - Redistributions in binary form must reproduce the above > > + * copyright notice, this list of conditions and the following > > + * disclaimer in the documentation and/or other materials > > + * provided with the distribution. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND > > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS > > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN > > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN > > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > + * SOFTWARE. > > + * > > + */ > > + > > +#ifndef _OSM_PREFIX_ROUTES_H_ > > +#define _OSM_PREFIX_ROUTES_H_ > > + > > +#include > > We are not using *_osd.h header files directly. There should be just > > #include > > Changing this. > > > +#include > > + > > +typedef struct { > > + cl_list_item_t list_item; /* must be first */ > > + uint64_t prefix; /* network order, zero means "any" */ > > + uint64_t guid; /* network order, zero means "any" */ > > +} osm_prefix_route_t; > > + > > +#ifdef ROUTER_EXP > > +#error ROUTER_EXP is deprecated, specify prefix routes at runtime instead (see opensm man page for details) > > +#endif > > + > > +#endif /* _OSM_PREFIX_ROUTES_H_ */ > > What do you think? Will it be helpful to merge osm_prefix_routes.h and > osm_router.h (or actually to put 'struct osm_prefix_route' definition > in osm_router.h)? I think this will ultimately be a separate SM attribute for the router so separating this seems better to me. > When adding new header file - you need to add this to > include/Makefile.am EXTRA_DIST list, it is in order to not break > 'make dist' functionality. I'm adding this now. -- Hal [snip...] From sashak at voltaire.com Mon Nov 19 12:56:29 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 20:56:29 +0000 Subject: [ofa-general] Re: PATCH diags: add saquery option to get path records by GIDs In-Reply-To: <20071116233222.GE30090@obsidianresearch.com> References: <20071116233222.GE30090@obsidianresearch.com> Message-ID: <20071119205629.GN5986@sashak.voltaire.com> On 16:32 Fri 16 Nov , Rolf Manderscheid wrote: > Hi Sasha, > > This patch adds a --sgid-to-dgid option to saquery, useful for > validating configuration of opensm prefix routes. > > Rolf > > Signed-off-by: Rolf Manderscheid Applied. Thanks. Sasha From sashak at voltaire.com Mon Nov 19 13:44:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 21:44:35 +0000 Subject: [ofa-general] Re: [ANNOUNCE] management tarballs release Message-ID: <20071119214435.GR5986@sashak.voltaire.com> Hi, There is a new release of the management (OpenSM and infiniband diagnostics) tarballs available in: http://www.openfabrics.org/downloads/management/ md5sum: 25b9491f90c7e851f5bafd556bcac5f6 libibcommon-1.0.6.tar.gz cec79c655914b83d13457d9a4096ef2b libibumad-1.1.5.tar.gz 169ddc65af6d1cfe8a92988ce7a60627 libibmad-1.1.4.tar.gz f8abde643dab46ce9b1dfec2fa644ab5 infiniband-diags-1.3.4.tar.gz 30f2260c4fe2ad0737d644e737b0d3bf opensm-3.1.7.tar.gz (libibcommon was not changed since last release). Sasha From jsquyres at cisco.com Mon Nov 19 13:32:28 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Mon, 19 Nov 2007 16:32:28 -0500 Subject: [ofa-general] OT: MPI 2.1: Getting the band back together Message-ID: (I realize that this post is slightly off-topic, but at least some of you on this list care about MPI and we're trying to spread the word as far as possible, so I hope the rest of you can forgive this OT posting. Thanks) All MPI users / developers: At both the Euro PVM/MPI'07 and the SC'07 conferences, we had productive meetings about how to progress the MPI standard: what to do about pending bugs in the MPI-1 and MPI-2 specs, new ideas that are emerging for possible successors to MPI-2, etc. It has been determined that the MPI Forum is formally going to re-convene and start having both electronic and physical meetings to resolve these issues. The first meeting is going to be in Chicago, IL, USA in early January 2008. I strongly encourage all interested parties to participate in the process. Please see the mpi-21 mailing list archives (particularly over the past week) and join the list yourself: http://www.mpi-forum.org/mpi2_1/index.htm Please spread the word. Thanks. -- Jeff Squyres Cisco Systems From sashak at voltaire.com Mon Nov 19 14:00:04 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 22:00:04 +0000 Subject: [ofa-general] [PATCH] libibmad/dump.c: Support link speed and width vendor extensions In-Reply-To: <20071117205349.GD32058@obsidianresearch.com> References: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> <20071117205349.GD32058@obsidianresearch.com> Message-ID: <20071119220004.GS5986@sashak.voltaire.com> On 13:53 Sat 17 Nov , Jason Gunthorpe wrote: > On Tue, Nov 13, 2007 at 07:38:43AM -0800, Hal Rosenstock wrote: > > libibmad/dump.c: Support link speed and width vendor extensions > > > > When decoding values, handle vendor extensions to link speed and width > > including accommodating a "documentation" change between IBA 1.2 and > > 1.2.1 > > Isn't this better done with a bitmap match? Yes, it is better IMO. Sasha > > const unsigned int widths[] = {1,4,8,12}; > > char S[300]; > int off = 0; > for (unsigned int I = 0; I != sizeof(width)/sizeof(width[0]) && off < sizeof(S); I++) > if ((width >> I) & 1) > off += snprintf(S,sizeof(S)-off,"%uX ",widths[I]); > > Regards, > Jason From sashak at voltaire.com Mon Nov 19 14:08:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 19 Nov 2007 22:08:35 +0000 Subject: [ofa-general] I'm not here this week Message-ID: <20071119220835.GU5986@sashak.voltaire.com> Hi, I'm traveling this week (starting from today), and likely will be off-line most of the time. Hope nothing urgent will happen in those days. Sasha From swise at opengridcomputing.com Mon Nov 19 14:14:17 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 19 Nov 2007 16:14:17 -0600 Subject: [ofa-general] [GIT PULL] ofed-1.3 - cxgb3 rh5.1 backport Message-ID: <47420AB9.50306@opengridcomputing.com> Vlad, I've added a RH5.1 backport for cxgb3. Please pull from: git://git.openfabrics.org/~swise/ofed-1.3 stevo Thanks, Steve. From hanafim.ctr at asc.hpc.mil Mon Nov 19 14:22:45 2007 From: hanafim.ctr at asc.hpc.mil (MAHMOUD HANAFI) Date: Mon, 19 Nov 2007 17:22:45 -0500 Subject: [ofa-general] ofed build errors with centos4.5 2.6.9-55.0.12.ELsmp Message-ID: <47420CB5.5090906@asc.hpc.mil> I have been unable to build any of the 1.2.5.x using the build.sh script. Has any one seen this before... thanks, ---- Building ofa_user RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm - ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm" See log file: /tmp/OFED.build.24587.log Press Enter to continue... ---output from /tmp/OFED.build.24587.log--- gcc -shared .libs/src_librdmacm_la-cma.o -L/usr/lib -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/libibverbs/src -libverbs -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm -m32 -Wl,--version-script=./src/librdmacm.map -Wl,-soname -Wl,librdmacm.so.1 -o src/.libs/librdmacm.so.1.0.0 (cd src/.libs && rm -f librdmacm.so.1 && ln -s librdmacm.so.1.0.0 librdmacm.so.1) (cd src/.libs && rm -f librdmacm.so && ln -s librdmacm.so.1.0.0 librdmacm.so) ar cru src/.libs/librdmacm.a src_librdmacm_la-cma.o ranlib src/.libs/librdmacm.a creating src/librdmacm.la (cd src/.libs && rm -f librdmacm.la && ln -s ../librdmacm.la librdmacm.la) if gcc -DHAVE_CONFIG_H -I. -I. -I. -I./include -I../libibverbs/include -g -Wall -D_GNU_SOURCE -m32 -g -O2 -L/usr/lib -MT cmatose.o -MD -MP -MF ".deps/cmatose.Tpo" -c -o cmatose.o `test -f 'examples/cmatose.c' || echo './'`examples/cmatose.c; \ then mv -f ".deps/cmatose.Tpo" ".deps/cmatose.Po"; else rm -f ".deps/cmatose.Tpo"; exit 1; fi /bin/sh ./libtool --tag=CC --mode=link gcc -g -Wall -D_GNU_SOURCE -m32 -g -O2 -L/usr/lib -L../libibverbs/src -libverbs -L. -o examples/ucmatose cmatose.o ./src/librdmacm.la gcc -g -Wall -D_GNU_SOURCE -m32 -g -O2 -o examples/.libs/ucmatose cmatose.o -L/usr/lib -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/libibverbs/src -libverbs -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm ./src/.libs/librdmacm.so -Wl,--rpath -Wl,/usr/local/ofed/lib ./src/.libs/librdmacm.so: undefined reference to `ibv_copy_ah_attr_from_kern' collect2: ld returned 1 exit status make[2]: *** [examples/ucmatose] Error 1 make[2]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm' make[1]: *** [all] Error 2 make[1]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm' make: *** [librdmacm] Error 2 error: Bad exit status from /var/tmp/rpm-tmp.98738 (%install) RPM build errors: user vlad does not exist - using root group vlad does not exist - using root user vlad does not exist - using root group vlad does not exist - using root Bad exit status from /var/tmp/rpm-tmp.98738 (%install) ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm" ~ From hanafim.ctr at asc.hpc.mil Mon Nov 19 14:22:45 2007 From: hanafim.ctr at asc.hpc.mil (MAHMOUD HANAFI) Date: Mon, 19 Nov 2007 17:22:45 -0500 Subject: [ofa-general] ofed build errors with centos4.5 2.6.9-55.0.12.ELsmp Message-ID: <47420CB5.5090906@asc.hpc.mil> I have been unable to build any of the 1.2.5.x using the build.sh script. Has any one seen this before... thanks, ---- Building ofa_user RPMs. Please wait... Running rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm - ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm" See log file: /tmp/OFED.build.24587.log Press Enter to continue... ---output from /tmp/OFED.build.24587.log--- gcc -shared .libs/src_librdmacm_la-cma.o -L/usr/lib -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/libibverbs/src -libverbs -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm -m32 -Wl,--version-script=./src/librdmacm.map -Wl,-soname -Wl,librdmacm.so.1 -o src/.libs/librdmacm.so.1.0.0 (cd src/.libs && rm -f librdmacm.so.1 && ln -s librdmacm.so.1.0.0 librdmacm.so.1) (cd src/.libs && rm -f librdmacm.so && ln -s librdmacm.so.1.0.0 librdmacm.so) ar cru src/.libs/librdmacm.a src_librdmacm_la-cma.o ranlib src/.libs/librdmacm.a creating src/librdmacm.la (cd src/.libs && rm -f librdmacm.la && ln -s ../librdmacm.la librdmacm.la) if gcc -DHAVE_CONFIG_H -I. -I. -I. -I./include -I../libibverbs/include -g -Wall -D_GNU_SOURCE -m32 -g -O2 -L/usr/lib -MT cmatose.o -MD -MP -MF ".deps/cmatose.Tpo" -c -o cmatose.o `test -f 'examples/cmatose.c' || echo './'`examples/cmatose.c; \ then mv -f ".deps/cmatose.Tpo" ".deps/cmatose.Po"; else rm -f ".deps/cmatose.Tpo"; exit 1; fi /bin/sh ./libtool --tag=CC --mode=link gcc -g -Wall -D_GNU_SOURCE -m32 -g -O2 -L/usr/lib -L../libibverbs/src -libverbs -L. -o examples/ucmatose cmatose.o ./src/librdmacm.la gcc -g -Wall -D_GNU_SOURCE -m32 -g -O2 -o examples/.libs/ucmatose cmatose.o -L/usr/lib -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/libibverbs/src -libverbs -L/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm ./src/.libs/librdmacm.so -Wl,--rpath -Wl,/usr/local/ofed/lib ./src/.libs/librdmacm.so: undefined reference to `ibv_copy_ah_attr_from_kern' collect2: ld returned 1 exit status make[2]: *** [examples/ucmatose] Error 1 make[2]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm' make[1]: *** [all] Error 2 make[1]: Leaving directory `/var/tmp/OFEDRPM/BUILD/ofa_user-1.2.5/src/userspace/librdmacm' make: *** [librdmacm] Error 2 error: Bad exit status from /var/tmp/rpm-tmp.98738 (%install) RPM build errors: user vlad does not exist - using root group vlad does not exist - using root user vlad does not exist - using root group vlad does not exist - using root Bad exit status from /var/tmp/rpm-tmp.98738 (%install) ERROR: Failed executing "rpmbuild --rebuild --define '_topdir /var/tmp/OFEDRPM' --define '_prefix /usr/local/ofed' --define 'build_root /var/tmp/OFED' --define 'configure_options --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --with-mstflint --with-perftest --sysconfdir=/etc --mandir=/usr/share/man' --define 'configure_options32 --with-libcxgb3 --with-libibcm --with-libibcommon --with-libibumad --with-libibverbs --with-libipathverbs --with-libmlx4 --with-libmthca --with-librdmacm --with-qlvnictools --sysconfdir=/etc --mandir=/usr/share/man' --define 'build_32bit 1' --define '_mandir /usr/share/man' /export/DISTRO/centos4.5/OFED-1.2.5/SRPMS/ofa_user-1.2.5-0.src.rpm" ~ From hrosenstock at xsigo.com Mon Nov 19 14:27:11 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 19 Nov 2007 14:27:11 -0800 Subject: [ofa-general] [PATCH] libibmad/dump.c: Support link speed and width vendor extensions In-Reply-To: <20071119220004.GS5986@sashak.voltaire.com> References: <1194968323.6542.213.camel@hrosenstock-ws.xsigo.com> <20071117205349.GD32058@obsidianresearch.com> <20071119220004.GS5986@sashak.voltaire.com> Message-ID: <1195511231.6727.251.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-19 at 22:00 +0000, Sasha Khapyorsky wrote: > On 13:53 Sat 17 Nov , Jason Gunthorpe wrote: > > On Tue, Nov 13, 2007 at 07:38:43AM -0800, Hal Rosenstock wrote: > > > libibmad/dump.c: Support link speed and width vendor extensions > > > > > > When decoding values, handle vendor extensions to link speed and width > > > including accommodating a "documentation" change between IBA 1.2 and > > > 1.2.1 > > > > Isn't this better done with a bitmap match? > > Yes, it is better IMO. I think this is applicable elsewhere as well. I'll see if I can cook up a patch in my spare time. -- Hal > Sasha > > > > > const unsigned int widths[] = {1,4,8,12}; > > > > char S[300]; > > int off = 0; > > for (unsigned int I = 0; I != sizeof(width)/sizeof(width[0]) && off < sizeof(S); I++) > > if ((width >> I) & 1) > > off += snprintf(S,sizeof(S)-off,"%uX ",widths[I]); > > > > Regards, > > Jason From perkinjo at cse.ohio-state.edu Mon Nov 19 16:03:04 2007 From: perkinjo at cse.ohio-state.edu (Jonathan Perkins) Date: Mon, 19 Nov 2007 19:03:04 -0500 Subject: [ofa-general] MVAPICH2 1.0.1 SRPM Available Message-ID: <47422438.8030902@cse.ohio-state.edu> Vlad: Hi, I recently uploaded a new SRPM for MVAPICH2 to the openfabrics server. This is located in ~perkinjo/ofed_1_3/ and is identified by the latest.txt file. -- Jonathan Perkins http://www.cse.ohio-state.edu/~perkinjo From disconcertsyop at idt.net Mon Nov 19 16:24:30 2007 From: disconcertsyop at idt.net (Avis Moran) Date: Mon, 19 Nov 2007 20:24:30 -0400 Subject: [ofa-general] For General attention Message-ID: <295586737.51582012098484@idt.net> An HTML attachment was scrubbed... URL: From joe at perches.com Mon Nov 19 17:48:11 2007 From: joe at perches.com (Joe Perches) Date: Mon, 19 Nov 2007 17:48:11 -0800 Subject: [ofa-general] [PATCH 19/59] drivers/infiniband: Add missing "space" In-Reply-To: <1195523331-15303-19-git-send-email-joe@perches.com> References: 1234567 <1195523331-15303-1-git-send-email-joe@perches.com> <1195523331-15303-2-git-send-email-joe@perches.com> <1195523331-15303-3-git-send-email-joe@perches.com> <1195523331-15303-4-git-send-email-joe@perches.com> <1195523331-15303-5-git-send-email-joe@perches.com> <1195523331-15303-6-git-send-email-joe@perches.com> <1195523331-15303-7-git-send-email-joe@perches.com> <1195523331-15303-8-git-send-email-joe@perches.com> <1195523331-15303-9-git-send-email-joe@perches.com> <1195523331-15303-10-git-send-email-joe@perches.com> <1195523331-15303-11-git-send-email-joe@perches.com> <1195523331-15303-12-git-send-email-joe@perches.com> <1195523331-15303-13-git-send-email-joe@perches.com> <1195523331-15303-14-git-send-email-joe@perches.com> <1195523331-15303-15-git-send-email-joe@perches.com> <1195523331-15303-16-git-send-email-joe@perches.com> <1195523331-15303-17-git-send-email-joe@perches.com> <1195523331-15303-18-git-send-email-joe@perches.com> <1195523331-15303-19-git-send-email-joe@perches.com> Message-ID: <1195523331-15303-20-git-send-email-joe@perches.com> Signed-off-by: Joe Perches --- drivers/infiniband/hw/ehca/ehca_cq.c | 2 +- drivers/infiniband/hw/ehca/ehca_qp.c | 6 +++--- drivers/infiniband/ulp/iser/iser_initiator.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 79c25f5..0467c15 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%li" + "ehca_cq=%p cq_num=%x h_ret=%li " "counter=%i act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 2e3e654..091985b 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -858,7 +858,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT" + ehca_err(pd->device, "Could not modify SRQ to INIT " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -872,7 +872,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ" + ehca_err(pd->device, "Could not enable SRQ " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; @@ -886,7 +886,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, update_mask, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR" + ehca_err(pd->device, "Could not modify SRQ to RTR " "ehca_qp=%p qp_num=%x h_ret=%li", my_qp, my_qp->real_qp_num, hret); goto create_srq2; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index a6f2303..ba1b455 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -561,7 +561,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, if (opcode == ISCSI_OP_SCSI_CMD_RSP) { itt = get_itt(hdr->itt); /* mask out cid and age bits */ if (!(itt < session->cmds_max)) - iser_err("itt can't be matched to task!!!" + iser_err("itt can't be matched to task!!! " "conn %p opcode %d cmds_max %d itt %d\n", conn->iscsi_conn,opcode,session->cmds_max,itt); /* use the mapping given with the cmds array indexed by itt */ -- 1.5.3.5.652.gf192c From krkumar2 at in.ibm.com Mon Nov 19 19:52:10 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 20 Nov 2007 09:22:10 +0530 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: Message-ID: Hi Roland, I am not sure if my answer was clear, so I will try to be clearer: qdisc_run() first checks netif_queue_stopped(dev), and then if it can get the __LINK_STATE_QDISC_RUNNING bit, it calls __qdisc_run() which will do the actual xmit. Subsequent calls to xmit within __qdisc_run checks for netif_queue_stopped. So there is no way that xmit can be called with a stopped queue as the core does it for every skb. And no other cpu can execute this at the same time as the RUNNING bit is held. So this is a completely safe removal of check for every skb. I have tested this code extensively as part of batching skbs and have never hit it. Thanks, - KK > Hi Roland, > > > This check was added because of a real problem seen in practice a > > while ago. Has something changed in the tx queue locking that makes > > it redundant now? > > I am not sure of how it was earlier, but currently a device's xmit can be > called > only on one cpu at a time (by holding the __LINK_STATE_QDISC_RUNNING > bit in qdisc_run). And queue_stopped check is present before xmit. > > > I seem to remember that I could make the problem race trigger pretty > > fast by making the tx queue very small so that it got stopped a lot. > > I just tested with a smaller queue size (tx queue size=4), put a debug in > the > queue_stopped check in xmit(), and a counter to find how many times the > queue was stopped (in ipoib_send). After a 20 min test run with 64 threads, > the queue was stopped 16.5 million times, but the debug never hit. > > I tested with buffer sizes varying from 128 to 16K bytes (though TSO/GSO is > not implemented in IPoIB anyway). > > Thanks, > > - KK > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From rdreier at cisco.com Mon Nov 19 20:25:00 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 19 Nov 2007 20:25:00 -0800 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: (Krishna Kumar2's message of "Tue, 20 Nov 2007 09:22:10 +0530") References: Message-ID: > qdisc_run() first checks netif_queue_stopped(dev), and then if > it can get the __LINK_STATE_QDISC_RUNNING bit, it calls > __qdisc_run() which will do the actual xmit. Subsequent calls > to xmit within __qdisc_run checks for netif_queue_stopped. > > So there is no way that xmit can be called with a stopped queue > as the core does it for every skb. And no other cpu can execute > this at the same time as the RUNNING bit is held. So this is a > completely safe removal of check for every skb. Hmm, I don't see any changes that prevent the race I originally described in http://oss.sgi.com/archives/netdev/2004-12/msg00474.html Maybe your test may just not be able to hit the race, or am I missing something? Anyway medium-term I want to move IPoIB away from LLTX so this doesn't matter that much. - R. From rdreier at cisco.com Mon Nov 19 20:29:36 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 19 Nov 2007 20:29:36 -0800 Subject: [ofa-general] IPoIB CQ overrun In-Reply-To: <20071115202302.GK5448@sgi.com> (akepner@sgi.com's message of "Thu, 15 Nov 2007 12:23:02 -0800") References: <20071115202302.GK5448@sgi.com> Message-ID: > The thing that all the IPoIB failures have in common seems to be > an appearance of a "CQ overrun" in syslog, e.g.: > > ib_mthca 0000:06:00.0: CQ overrun on CQN 180082 > We are using MT25204 HCAs with 1.2.0 firmware, and OFED 1.2. OFED 1.2 uses a separate CQ for send completions in connected mode. (I'm assuming you're using the OFED default of connected mode for IPoIB). I guess it would be useful to know which CQ is overrunning, ie whether it is the main IPoIB CQ or one of the CM send CQs. One way to check this would be to add a print to mthca to dump the CQN when a CQ is created, and also add prints to IPoIB just before each call to ib_create_cq() so that the CQNs can be correlated. Another thing you could try would be a 2.6.24-rc kernel (or an OFED 1.3 prerelease I guess), which has a change that moves all completions into one CQ in IPoIB. This may fix the bug by accident. - R. From krkumar2 at in.ibm.com Mon Nov 19 21:27:31 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 20 Nov 2007 10:57:31 +0530 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: Message-ID: Hi Roland, Roland Dreier wrote on 11/20/2007 09:55:00 AM: > > qdisc_run() first checks netif_queue_stopped(dev), and then if > > it can get the __LINK_STATE_QDISC_RUNNING bit, it calls > > __qdisc_run() which will do the actual xmit. Subsequent calls > > to xmit within __qdisc_run checks for netif_queue_stopped. > > > > So there is no way that xmit can be called with a stopped queue > > as the core does it for every skb. And no other cpu can execute > > this at the same time as the RUNNING bit is held. So this is a > > completely safe removal of check for every skb. > > Hmm, I don't see any changes that prevent the race I originally > described in http://oss.sgi.com/archives/netdev/2004-12/msg00474.html > Maybe your test may just not be able to hit the race, or am I missing > something? (Thanks for the link, I hadn't seen this earlier) That race will not happen as CPU#2 cannot call qdisc_restart when CPU#1 is holding the RUNNING bit. In this case, CPU#2 simply queue's the skb to dev->q and returns, while CPU#1 finds this new skb in it's iteration of __qdisc_run (and ends up processing all queue'd skb after checking for stopped queue). > Anyway medium-term I want to move IPoIB away from LLTX so this doesn't > matter that much. Are you planning for 2.6.25? Thanks, - KK From krkumar2 at in.ibm.com Mon Nov 19 21:52:19 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 20 Nov 2007 11:22:19 +0530 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: Message-ID: I forgot to mention, maybe the RUNNING bit was added as part of this bug fix, which means no LLTX driver need to really check for this condition again in their xmit handler. That might explain the problem you faced then. thanks, - KK Krishna Kumar2/India/IBM wrote on 11/20/2007 10:57:31 AM: > Hi Roland, > > Roland Dreier wrote on 11/20/2007 09:55:00 AM: > > > > qdisc_run() first checks netif_queue_stopped(dev), and then if > > > it can get the __LINK_STATE_QDISC_RUNNING bit, it calls > > > __qdisc_run() which will do the actual xmit. Subsequent calls > > > to xmit within __qdisc_run checks for netif_queue_stopped. > > > > > > So there is no way that xmit can be called with a stopped queue > > > as the core does it for every skb. And no other cpu can execute > > > this at the same time as the RUNNING bit is held. So this is a > > > completely safe removal of check for every skb. > > > > Hmm, I don't see any changes that prevent the race I originally > > described in http://oss.sgi.com/archives/netdev/2004-12/msg00474.html > > Maybe your test may just not be able to hit the race, or am I missing > > something? > (Thanks for the link, I hadn't seen this earlier) > > That race will not happen as CPU#2 cannot call qdisc_restart when CPU#1 > is holding the RUNNING bit. In this case, CPU#2 simply queue's the skb > to dev->q and returns, while CPU#1 finds this new skb in it's iteration > of __qdisc_run (and ends up processing all queue'd skb after checking for > stopped queue). > > > Anyway medium-term I want to move IPoIB away from LLTX so this doesn't > > matter that much. > Are you planning for 2.6.25? > > Thanks, > > - KK From vlad at dev.mellanox.co.il Mon Nov 19 23:51:39 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 09:51:39 +0200 Subject: [ofa-general] Re: [GIT PULL] ofed-1.3 - cxgb3 rh5.1 backport In-Reply-To: <47420AB9.50306@opengridcomputing.com> References: <47420AB9.50306@opengridcomputing.com> Message-ID: <4742920B.2010600@dev.mellanox.co.il> Steve Wise wrote: > Vlad, > > I've added a RH5.1 backport for cxgb3. Please pull from: > > git://git.openfabrics.org/~swise/ofed-1.3 stevo > > Thanks, > > Steve. > > Done, Regards, Vladimir From vlad at dev.mellanox.co.il Tue Nov 20 01:02:15 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 11:02:15 +0200 Subject: [ofa-general] ofed build errors with centos4.5 2.6.9-55.0.12.ELsmp In-Reply-To: <47420CB5.5090906@asc.hpc.mil> References: <47420CB5.5090906@asc.hpc.mil> Message-ID: <4742A297.2040204@dev.mellanox.co.il> MAHMOUD HANAFI wrote: > I have been unable to build any of the 1.2.5.x using the build.sh > script. Has any one seen this before... > > thanks, > ---- > > Hi Mahmoud, You should remove OFED RPMs that were installed with CentOS and then rerun OFED-1.2.5* installation. You can do this by running 'OFED-1.2.5.3/uninstall.sh'. Regards, Vladimir From vlad at dev.mellanox.co.il Tue Nov 20 01:02:15 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 11:02:15 +0200 Subject: [ofa-general] ofed build errors with centos4.5 2.6.9-55.0.12.ELsmp In-Reply-To: <47420CB5.5090906@asc.hpc.mil> References: <47420CB5.5090906@asc.hpc.mil> Message-ID: <4742A297.2040204@dev.mellanox.co.il> MAHMOUD HANAFI wrote: > I have been unable to build any of the 1.2.5.x using the build.sh > script. Has any one seen this before... > > thanks, > ---- > > Hi Mahmoud, You should remove OFED RPMs that were installed with CentOS and then rerun OFED-1.2.5* installation. You can do this by running 'OFED-1.2.5.3/uninstall.sh'. Regards, Vladimir From vlad at dev.mellanox.co.il Tue Nov 20 01:07:45 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 11:07:45 +0200 Subject: [ofa-general] Re: MVAPICH2 1.0.1 SRPM Available In-Reply-To: <47422438.8030902@cse.ohio-state.edu> References: <47422438.8030902@cse.ohio-state.edu> Message-ID: <4742A3E1.2030507@dev.mellanox.co.il> Jonathan Perkins wrote: > Vlad: > Hi, I recently uploaded a new SRPM for MVAPICH2 to the openfabrics > server. This is located in ~perkinjo/ofed_1_3/ and is identified by the > latest.txt file. > Hi Jonathan, build_ofed.sh script was updated with the new MVAPICH2 location. Regards, Vladimir From vlad at lists.openfabrics.org Tue Nov 20 02:55:32 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 20 Nov 2007 02:55:32 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071120-0200 daily build status Message-ID: <20071120105532.15B13E60831@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.23 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.22 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Failed: From pk at q-leap.com Tue Nov 20 03:10:34 2007 From: pk at q-leap.com (Peter Kruse) Date: Tue, 20 Nov 2007 12:10:34 +0100 Subject: [ofa-general] RHEL5.1 support? Message-ID: <4742C0AA.9040208@q-leap.com> Hello List! I would like to draw your attention to bug report #785 (https://bugs.openfabrics.org/show_bug.cgi?id=785). The problem is that even the latest ofed 1.2.5.3 fails to compile under RHEL5.1. Do you officially intend to support RHEL 5.1? Thanks, Peter From jackm at dev.mellanox.co.il Tue Nov 20 04:42:35 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 20 Nov 2007 14:42:35 +0200 Subject: [ofa-general] mlx4: problem with resource limits > 2^20 Message-ID: <200711201442.35922.jackm@dev.mellanox.co.il> Roland, We're encountering a problem with resource profiles which have elements which exceed 1 Meg (e.g., log_num_qp=21, or log_num_mtt=21 as module options for mlx4_core) Many kernels allow kmalloc of only up to 128KB (which can support a 1 MB bitmap). If the resource max is greater than 1 Meg, the kmalloc will fail. This occurred for MTTs when allocating the buddy table -- file net/mlx4/mr.c, procedure mlx4_buddy_init(): for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); if (!buddy->bits[i]) goto err_out_free; The kmalloc here fails for max_order > 20. Additionally, kmalloc will fail in net/mlx4/alloc.c, procedure mlx4_bitmap_init(): /* num must be a power of 2 */ if (num != roundup_pow_of_two(num)) return -EINVAL; bitmap->last = 0; bitmap->top = 0; bitmap->max = num; bitmap->mask = mask; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(num) * sizeof (long), GFP_KERNEL); Here, num is the resource max. Thus, if we set the profile to allow log_num_qp=21, the above kzalloc will fail (also because the required bitmap is greater than 128KB). I know that we can use vmalloc here and succeed -- however, this will present a severe problem on x86 systems (really small kernel virtual-memory space). We have 3 options, as I see it: 1. Change the bitmap allocator and buddy systems to use a 2-level scheme. 2. Use vmalloc for allocations greater than 128K, and note that for x86 systems you cannot specify more than 1M for any resource in the profile. 3. Do nothing, and just note that cannot allocate more than 1M of any resource for ANY system. My own preference is 2 (with maybe some test to determine just what the crossover point is, rather than just having 128K as a defined constant) -- or, given some time, 1 (which is a more general and scalable solution). Any suggestions? - Jack From eeb at sun.com Tue Nov 20 04:45:06 2007 From: eeb at sun.com (Eric Barton) Date: Tue, 20 Nov 2007 12:45:06 +0000 Subject: [ofa-general] Mapping non-contiguous memory in userspace Message-ID: <00c101c82b73$2de22d50$0281a8c0@ebpc> Hi! I'm trying to guage whether there might be any support to introduce a userspace API for mapping non-contiguous memory. We (the lustre group within SUN) would find it very useful for creating much simpler RDMAs for bulk I/O in userspace. Currently, if the buffers for a bulk data RPC are not contiguous, we have to map each section separately. This makes the RDMA descriptor which we pass over the wire to the peer who will actually perform the RDMA quite verbose since it requires one (key, address, length) tuple per buffer fragment. If we can map all the fragments together, we could have a single key for all fragments. And if the buffer fragments are page aligned whole pages (which they generally are in our usage), we can collapse the RDMA descriptor into a single contiguous (in network VM) buffer. Cheers, Eric From ilmusichiere at colonialdowns.com Tue Nov 20 04:51:50 2007 From: ilmusichiere at colonialdowns.com (ilmusichiere at colonialdowns.com) Date: Tue, 20 Nov 2007 12:51:50 +0000 Subject: [ofa-general] Trading Update Message-ID: <4742D866.1070606@colonialdowns.com> eTg U Takes Off! Stock Closes UP 25% EnerBrite Technologies Group, Inc. (e tGu) $0.01 UP 25% This one is moving hard today and investors are raking it in. We saw spikes over 50% today and it closed at over 25% increase over Friday. We see this as just the beginning. Once these results hit the street, it could explode tomorrow. There is a time for all things, and this one is now. Get on et G U first thing Tuesday. From kliteyn at mellanox.co.il Mon Nov 19 21:18:06 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 20 Nov 2007 07:18:06 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-20:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-19 OpenSM git rev = Mon_Nov_19_15:04:42_2007 [f575328ea80d1cd647711be8f3a22c341a501c7a] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From tziporet at dev.mellanox.co.il Tue Nov 20 06:33:50 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 20 Nov 2007 16:33:50 +0200 Subject: [ofa-general] RHEL5.1 support? In-Reply-To: <4742C0AA.9040208@q-leap.com> References: <4742C0AA.9040208@q-leap.com> Message-ID: <4742F04E.1050001@mellanox.co.il> Peter Kruse wrote: > Hello List! > > I would like to draw your attention to bug report #785 > (https://bugs.openfabrics.org/show_bug.cgi?id=785). > The problem is that even the latest ofed 1.2.5.3 fails > to compile under RHEL5.1. Do you officially intend > to support RHEL 5.1? > Only OFED 1.3 will support RHEL5.1 You can take the daily build or wait for the beta that should be out this week Tziporet From pk at q-leap.com Tue Nov 20 06:50:56 2007 From: pk at q-leap.com (Peter Kruse) Date: Tue, 20 Nov 2007 15:50:56 +0100 Subject: [ofa-general] RHEL5.1 support? In-Reply-To: <4742F04E.1050001@mellanox.co.il> References: <4742C0AA.9040208@q-leap.com> <4742F04E.1050001@mellanox.co.il> Message-ID: <4742F450.60401@q-leap.com> Hi, thanks for your reply. Tziporet Koren wrote: > Only OFED 1.3 will support RHEL5.1 good news! > You can take the daily build or wait for the beta that should be out > this week looking forward to that! Peter From jackm at dev.mellanox.co.il Tue Nov 20 07:44:15 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 20 Nov 2007 17:44:15 +0200 Subject: [ofa-general] [PATCH] mlx4_core: increase max number of qp's and of srq's to 128K Message-ID: <200711201744.15766.jackm@dev.mellanox.co.il> mlx4_core: increase max QPs/SRQs to 128K. Signed-off-by: Jack Morgenstein --- Roland, With the advent large clusters which utilize multicore hosts, 64K qp's is not enough. We want to increase the default maxima for QPs and SRQs to 128K. diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 89b3f0b..7d2064c 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -76,8 +76,8 @@ static const char mlx4_version[] __devinitdata = DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mlx4_profile default_profile = { - .num_qp = 1 << 16, - .num_srq = 1 << 16, + .num_qp = 1 << 17, + .num_srq = 1 << 17, .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, .num_mcg = 1 << 13, From tziporet at mellanox.co.il Tue Nov 20 07:59:12 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 20 Nov 2007 17:59:12 +0200 Subject: [ofa-general] Agenda for today OFED meeting Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E31E@mtlexch01.mtl.com> Agenda on OFED 1.3 beta readiness: ============================ 1. Beta open tasks: * Fix compilation problems on PPC SLES10 with 32 bits - Vlad * SPEC files should be part of each user space package - each owner should take the spec file - ongoing Our target is to have the beta release tomorrow thus I think we need to do the beta even if these issues are not closed by tomorrow. 2. Release schedule: * Beta: Nov 21, 2007- kernel base: 2.6.24-rc2 * RC1: Dec 4, 2007 * RC2: Dec 18, 2007 * RC3: Jan 8, 2008 * RC4: Jan 22, 2008 * Release: Jan 29, 2008 3. Open issues Done tasks for the beta: * Rebase for kernel 2.6.24-rc1 - all * Management readiness and open a branch for 1.3 - Sasha * Added SRPT to the package - Vu & Vlad * Open MPI compilation on SLES10 SP1 - Vlad * Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) * ibutils on SLES10 PPC64 (64 bits) - Vlad * Add qperf test from Qlogic - Johann (Qlogic) * Support RHEL 5 up1 - Woody & Vlad * Apply patches that fix warning of backport patches - Vlad * New MVAPICH package - Pasha & DK (OSU) * Complete RDS work - Vlad (Mellanox) * Integrate all SDP features - Jim (Mellanox) * nes - updated backport patches - Glenn (NetEffect) Tziporet Koren Software Director Mellanox Technologies mailto: tziporet at mellanox.co.il Tel +972-4-9097200, ext 380 -------------- next part -------------- An HTML attachment was scrubbed... URL: From vlad at dev.mellanox.co.il Tue Nov 20 08:08:03 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 18:08:03 +0200 Subject: [ofa-general] Re: [PATCH] ofed-1.3: Remove hvcall.h backport file for rhel-5.1 In-Reply-To: <200711191525.59412.ossrosch@linux.vnet.ibm.com> References: <200711191525.59412.ossrosch@linux.vnet.ibm.com> Message-ID: <47430663.5020107@dev.mellanox.co.il> Stefan Roscher wrote: > This patch deletes the backport file hvcall.h for RHEL-5.1. > The backport is no longer needed because RHEL-5.1 already contains the needed hvcalls. > > regards Stefan > > Signed-off-by: Stefan Roscher > --- > Applied. Regards, Vladimir From vlad at dev.mellanox.co.il Tue Nov 20 08:08:29 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 18:08:29 +0200 Subject: [ofa-general] Re: [PATCH ofed-1.3] ehca: change kmem_cache_alloc() call to atomic In-Reply-To: <200711191527.09253.ossrosch@linux.vnet.ibm.com> References: <200711191527.09253.ossrosch@linux.vnet.ibm.com> Message-ID: <4743067D.8030602@dev.mellanox.co.il> Stefan Roscher wrote: > This patch fixes a migth_sleep kernel warning with using sdp + ehca device driver. > We have to change the kmem_cache_alloc() attribute from GFP_KERNEL to GFP_ATOMIC because sdp is running within a spinlock > during ah allocation. > > regards Stefan > > Signed-off-by: Stefan Roscher > --- > Applied. Regards, Vladimir From vlad at dev.mellanox.co.il Tue Nov 20 08:09:28 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Tue, 20 Nov 2007 18:09:28 +0200 Subject: [ofa-general] Re: [PATCH ofed-1.3] ehca: Fix dma_map_page() call for rhel-4.5 backport In-Reply-To: <200711191529.45675.ossrosch@linux.vnet.ibm.com> References: <200711191529.45675.ossrosch@linux.vnet.ibm.com> Message-ID: <474306B8.5040501@dev.mellanox.co.il> Stefan Roscher wrote: > This patch fixes a wrong function call in backport file for rhel-4.5. Because rhel-4.5 is not supporting dma_map_page() > for ibmebus devices we have to call ibmebus_map_single() explicitly. > > regards Stefan > > Signed-off-by: Stefan Roscher > --- Applied. Regards, Vladimir From Jeffrey.C.Becker at nasa.gov Tue Nov 20 10:12:12 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Tue, 20 Nov 2007 10:12:12 -0800 Subject: [Fwd: [ofa-general] nightly osm_sim report 2007-11-20:normal completion] Message-ID: <4743237C.9020206@nasa.gov> Test. Please ignore. -jeff -------------- next part -------------- An embedded message was scrubbed... From: kliteyn at mellanox.co.il Subject: [ofa-general] nightly osm_sim report 2007-11-20:normal completion Date: 20 Nov 2007 07:18:06 +0200 Size: 5534 URL: From arthur.jones at qlogic.com Tue Nov 20 10:21:59 2007 From: arthur.jones at qlogic.com (Arthur Jones) Date: Tue, 20 Nov 2007 10:21:59 -0800 Subject: [ofa-general] SDP compile error... Message-ID: <20071120182159.GG5630@bauxite.pathscale.com> hi vlad, all, i'm unable to compile SDP against the current ofed_kernel (no backports, but fixes applied -- though i get about the same errors without fixes applied). i'm guessing this is supposed to work, i get: ]$ make M=drivers/infiniband/ulp/sdp CC [M] drivers/infiniband/ulp/sdp/sdp_main.o drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_create_socket': drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 1 of `sk_alloc' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 3 of `sk_alloc' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 4 of `sk_alloc' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_proc_init': drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 1 of `proc_net_fops_create' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 2 of `proc_net_fops_create' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 3 of `proc_net_fops_create' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2328: error: too few arguments to function `proc_net_fops_create' drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 1 of `proc_net_fops_create' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 2 of `proc_net_fops_create' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 3 of `proc_net_fops_create' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2331: error: too few arguments to function `proc_net_fops_create' drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_proc_unregister': drivers/infiniband/ulp/sdp/sdp_main.c:2342: warning: passing arg 1 of `proc_net_remove' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2342: error: too few arguments to function `proc_net_remove' drivers/infiniband/ulp/sdp/sdp_main.c: At top level: drivers/infiniband/ulp/sdp/sdp_main.c:2409: warning: initialization from incompatible pointer type make[1]: *** [drivers/infiniband/ulp/sdp/sdp_main.o] Error 1 make: *** [_module_drivers/infiniband/ulp/sdp] Error 2 arthur From jimmott at austin.rr.com Tue Nov 20 10:36:15 2007 From: jimmott at austin.rr.com (Jim Mott) Date: Tue, 20 Nov 2007 12:36:15 -0600 Subject: [ofa-general] SDP compile error... In-Reply-To: <20071120182159.GG5630@bauxite.pathscale.com> References: <20071120182159.GG5630@bauxite.pathscale.com> Message-ID: <000701c82ba4$3bd6e870$b384b950$@rr.com> This is my issue. I'll get to it today or tomorrow. -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Arthur Jones Sent: Tuesday, November 20, 2007 12:22 PM To: Vladimir Sokolovsky Cc: general at lists.openfabrics.org Subject: [ofa-general] SDP compile error... hi vlad, all, i'm unable to compile SDP against the current ofed_kernel (no backports, but fixes applied -- though i get about the same errors without fixes applied). i'm guessing this is supposed to work, i get: ]$ make M=drivers/infiniband/ulp/sdp CC [M] drivers/infiniband/ulp/sdp/sdp_main.o drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_create_socket': drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 1 of `sk_alloc' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 3 of `sk_alloc' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2144: warning: passing arg 4 of `sk_alloc' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_proc_init': drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 1 of `proc_net_fops_create' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 2 of `proc_net_fops_create' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2328: warning: passing arg 3 of `proc_net_fops_create' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2328: error: too few arguments to function `proc_net_fops_create' drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 1 of `proc_net_fops_create' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 2 of `proc_net_fops_create' makes pointer from integer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2331: warning: passing arg 3 of `proc_net_fops_create' makes integer from pointer without a cast drivers/infiniband/ulp/sdp/sdp_main.c:2331: error: too few arguments to function `proc_net_fops_create' drivers/infiniband/ulp/sdp/sdp_main.c: In function `sdp_proc_unregister': drivers/infiniband/ulp/sdp/sdp_main.c:2342: warning: passing arg 1 of `proc_net_remove' from incompatible pointer type drivers/infiniband/ulp/sdp/sdp_main.c:2342: error: too few arguments to function `proc_net_remove' drivers/infiniband/ulp/sdp/sdp_main.c: At top level: drivers/infiniband/ulp/sdp/sdp_main.c:2409: warning: initialization from incompatible pointer type make[1]: *** [drivers/infiniband/ulp/sdp/sdp_main.o] Error 1 make: *** [_module_drivers/infiniband/ulp/sdp] Error 2 arthur _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From rdreier at cisco.com Tue Nov 20 11:06:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 11:06:04 -0800 Subject: [ofa-general] Re: [PATCH 4/4] IB/ipath - Normalize error return codes for different posts In-Reply-To: <20071116153611.13665.27415.stgit@eng-46.internal.keyresearch.com> (Arthur Jones's message of "Fri, 16 Nov 2007 07:36:11 -0800") References: <20071116153550.13665.66954.stgit@eng-46.internal.keyresearch.com> <20071116153611.13665.27415.stgit@eng-46.internal.keyresearch.com> Message-ID: thanks, applied 1-4. From rdreier at cisco.com Tue Nov 20 11:06:42 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 11:06:42 -0800 Subject: [ofa-general] Re: [PATCH] IPoIB: Remove redundant check in xmit handler In-Reply-To: (Krishna Kumar2's message of "Tue, 20 Nov 2007 11:22:19 +0530") References: Message-ID: > I forgot to mention, maybe the RUNNING bit was added as part of this bug > fix, > which means no LLTX driver need to really check for this condition again in > their xmit handler. That might explain the problem you faced then. Yes, that is the missing piece. OK, I think your patch is correct and I'll apply it if I can't get rid of LLTX for 2.6.25. - R. From pradeeps at linux.vnet.ibm.com Tue Nov 20 11:13:19 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 20 Nov 2007 11:13:19 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <47322E87.60409@linux.vnet.ibm.com> References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> <47322E87.60409@linux.vnet.ibm.com> Message-ID: <474331CF.4080802@linux.vnet.ibm.com> Roland, You may have missed this being busy with SC'07. I provided my comments: http://lists.openfabrics.org/pipermail/general/2007-November/042865.html I can provide a patch to address these issues once we discuss them. Pradeep From jlentini at netapp.com Tue Nov 20 12:02:34 2007 From: jlentini at netapp.com (James Lentini) Date: Tue, 20 Nov 2007 15:02:34 -0500 (EST) Subject: [ofa-general] [PATCH] v2 dapltest - RHEL5.1 ia64 build problems, convert timestamps to gettimeofday In-Reply-To: References: Message-ID: On Fri, 16 Nov 2007, Davis, Arlin R wrote: > > Fix RHEL5.1 ia64 v2 dapltest build problems with get_cycles by > moving to gettimeofday. > > James, I kept the get_cycle code in with compile-time switch > (RDTSC_TIMERS) and fixed the ia64 build issue just in case some > cannot live without it. If you think we should just blow it all away > let me know and I will update the patch. This looks good to me. I see value in keeping the get_cycle code for the platforms that support it. > > - Change dapltest timing to use gettimeofday instead of get_cycles > for better portability. > > Signed-off by: Arlin Davis >   > diff --git a/test/dapltest/mdep/linux/dapl_mdep_user.c b/test/dapltest/mdep/linux/dapl_mdep_user.c > index 015e53c..6402623 100644 > --- a/test/dapltest/mdep/linux/dapl_mdep_user.c > +++ b/test/dapltest/mdep/linux/dapl_mdep_user.c > @@ -181,6 +181,7 @@ DT_Mdep_GetTime (void) > return tv.tv_sec * 1000 + tv.tv_usec / 1000; > } > > +#ifdef RDTSC_TIMERS > double > DT_Mdep_GetCpuMhz ( > void ) > @@ -231,6 +232,15 @@ DT_Mdep_GetCpuMhz ( > > return strtod (mhz_str, NULL) / DT_TSC_BASE; > } > +#else /* !RDTSC_TIMERS */ > + > +double > +DT_Mdep_GetCpuMhz ( > + void ) > +{ > + return 1; > +} > +#endif > > > unsigned long > diff --git a/test/dapltest/mdep/linux/dapl_mdep_user.h b/test/dapltest/mdep/linux/dapl_mdep_user.h > index 4aa25d3..153c8c1 100755 > --- a/test/dapltest/mdep/linux/dapl_mdep_user.h > +++ b/test/dapltest/mdep/linux/dapl_mdep_user.h > @@ -43,10 +43,6 @@ > #include > #include > > -#ifdef __ia64__ > -#include > -#endif > - > /* Default Device Name */ > #define DT_MdepDeviceName "ofa-v2-ib0" > > @@ -111,7 +107,7 @@ typedef struct > /* > * Timing > */ > - > +#ifdef RDTSC_TIMERS > typedef unsigned long long int DT_Mdep_TimeStamp; > > static _INLINE_ DT_Mdep_TimeStamp > @@ -122,12 +118,10 @@ DT_Mdep_GetTimeStamp ( void ) > __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); > return x; > #else > - > #ifdef __ia64__ > - unsigned long x; > - > - x = get_cycles (); > - return x; > + unsigned long ret; > + __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); > + return ret; > #else > #if defined(__PPC__) || defined(__PPC64__) > unsigned int tbl, tbu0, tbu1; > @@ -149,6 +143,21 @@ DT_Mdep_GetTimeStamp ( void ) > #endif > #endif > } > +#else /* !RDTSC_TIMERS */ > +/* > + * Get timestamp, microseconds, (relative to some fixed point) > + */ > +typedef double DT_Mdep_TimeStamp; > + > +static _INLINE_ DT_Mdep_TimeStamp > +DT_Mdep_GetTimeStamp ( void ) > +{ > + struct timeval tv; > + gettimeofday(&tv, NULL); > + return (tv.tv_sec * 1000000 + tv.tv_usec); > +} > +#endif > + > > /* > * Define long format types to be used in *printf format strings. We > diff --git a/test/dapltest/test/dapl_performance_stats.c b/test/dapltest/test/dapl_performance_stats.c > index ce29272..0b1eaf1 100644 > --- a/test/dapltest/test/dapl_performance_stats.c > +++ b/test/dapltest/test/dapl_performance_stats.c > @@ -281,7 +281,7 @@ DT_performance_stats_print ( > " Operation Type : %s\n" > " Number of Operations : %u\n" > " Segment Size : %u\n" > - " Number of Segments : %u bytes\n" > + " Number of Segments : %u \n" > " Pipeline Length : %u\n\n", > DT_PerformanceModeToString (cmd->mode), > DT_TransferTypeToString (cmd->op.transfer_type), > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From arlin.r.davis at intel.com Tue Nov 20 12:07:27 2007 From: arlin.r.davis at intel.com (Arlin Davis) Date: Tue, 20 Nov 2007 12:07:27 -0800 Subject: [ofa-general] [PATCH] uDAPL v2.0 - lower default attributes, add missing query, cleanup Message-ID: <000001c82bb0$f9a9c370$9f97070a@amr.corp.intel.com> - Lower default settings (rdma ops, inline sends) for latest iWARP/IB devices. - Add missing ia_query for max_iov_segments_per_rdma_write - Cleanup CMA code no longer supported by rdma_cm. Signed-off by: Arlin Davis diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c index d11aed3..4518a2b 100644 --- a/dapl/common/dapl_ep_util.c +++ b/dapl/common/dapl_ep_util.c @@ -57,7 +57,7 @@ /* * Default number of RDMA operations in progress at a time */ -#define IB_RDMA_DEFAULT 8 +#define IB_RDMA_DEFAULT 4 extern void dapli_ep_default_attrs ( IN DAPL_EP *ep_ptr ); diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index 518a7af..e8c33f2 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -141,37 +141,7 @@ static void dapli_route_resolve(struct dapl_cm_id *conn) conn->params.private_data_len, conn->params.responder_resources, conn->params.initiator_depth ); -#if 0 - /* Get default connect request timeout values, and adjust */ - ret = rdma_get_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS, - (void*)&req_opt, &optlen); - if (ret) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_get_option failed: %s\n", - strerror(errno)); - goto bail; - } - dapl_dbg_log(DAPL_DBG_TYPE_CM, " route_resolve: " - "Set CR times - response %d to %d, retry %d to %d\n", - req_opt.remote_cm_response_timeout, - conn->hca->ib_trans.max_cm_timeout, - req_opt.max_cm_retries, - conn->hca->ib_trans.max_cm_retries); - - /* Use hca response time setting for connect requests */ - req_opt.max_cm_retries = conn->hca->ib_trans.max_cm_retries; - req_opt.remote_cm_response_timeout = - conn->hca->ib_trans.max_cm_timeout; - req_opt.local_cm_response_timeout = - req_opt.remote_cm_response_timeout; - ret = rdma_set_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS, - (void*)&req_opt, optlen); - if (ret) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_set_option failed: %s\n", - strerror(errno)); - goto bail; - } -#endif ret = rdma_connect(conn->cm_id, &conn->params); if (ret) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_connect failed: %s\n", diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c index 83b4b3a..23655b6 100755 --- a/dapl/openib_cma/dapl_ib_util.c +++ b/dapl/openib_cma/dapl_ib_util.c @@ -481,6 +481,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA *hca_ptr, ia_attr->num_vendor_attr = 0; ia_attr->vendor_attr = NULL; ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge; + ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge; /* save rd_atom for peer validation during connect requests */ hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom; hca_ptr->ib_trans.max_rdma_rd_out = dev_attr.max_qp_rd_atom; diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h index 6f57d71..5d7d9f8 100755 --- a/dapl/openib_cma/dapl_ib_util.h +++ b/dapl/openib_cma/dapl_ib_util.h @@ -111,7 +111,7 @@ typedef struct _ib_wait_obj_handle #define IB_INVALID_HANDLE NULL /* inline send rdma threshold */ -#define INLINE_SEND_DEFAULT 128 +#define INLINE_SEND_DEFAULT 64 /* CM private data areas */ #define IB_MAX_REQ_PDATA_SIZE 48 From jlentini at netapp.com Tue Nov 20 12:10:24 2007 From: jlentini at netapp.com (James Lentini) Date: Tue, 20 Nov 2007 15:10:24 -0500 (EST) Subject: [ofa-general] Re: [PATCH] uDAPL v2.0 - lower default attributes, add missing query, cleanup In-Reply-To: <000001c82bb0$f9a9c370$9f97070a@amr.corp.intel.com> References: <000001c82bb0$f9a9c370$9f97070a@amr.corp.intel.com> Message-ID: Ack. Looks good. On Tue, 20 Nov 2007, Arlin Davis wrote: > > - Lower default settings (rdma ops, inline sends) for latest iWARP/IB devices. > - Add missing ia_query for max_iov_segments_per_rdma_write > - Cleanup CMA code no longer supported by rdma_cm. > > Signed-off by: Arlin Davis > > diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c > index d11aed3..4518a2b 100644 > --- a/dapl/common/dapl_ep_util.c > +++ b/dapl/common/dapl_ep_util.c > @@ -57,7 +57,7 @@ > /* > * Default number of RDMA operations in progress at a time > */ > -#define IB_RDMA_DEFAULT 8 > +#define IB_RDMA_DEFAULT 4 > > extern void dapli_ep_default_attrs ( > IN DAPL_EP *ep_ptr ); > diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c > index 518a7af..e8c33f2 100755 > --- a/dapl/openib_cma/dapl_ib_cm.c > +++ b/dapl/openib_cma/dapl_ib_cm.c > @@ -141,37 +141,7 @@ static void dapli_route_resolve(struct dapl_cm_id *conn) > conn->params.private_data_len, > conn->params.responder_resources, > conn->params.initiator_depth ); > -#if 0 > - /* Get default connect request timeout values, and adjust */ > - ret = rdma_get_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS, > - (void*)&req_opt, &optlen); > - if (ret) { > - dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_get_option failed: %s\n", > - strerror(errno)); > - goto bail; > - } > > - dapl_dbg_log(DAPL_DBG_TYPE_CM, " route_resolve: " > - "Set CR times - response %d to %d, retry %d to %d\n", > - req_opt.remote_cm_response_timeout, > - conn->hca->ib_trans.max_cm_timeout, > - req_opt.max_cm_retries, > - conn->hca->ib_trans.max_cm_retries); > - > - /* Use hca response time setting for connect requests */ > - req_opt.max_cm_retries = conn->hca->ib_trans.max_cm_retries; > - req_opt.remote_cm_response_timeout = > - conn->hca->ib_trans.max_cm_timeout; > - req_opt.local_cm_response_timeout = > - req_opt.remote_cm_response_timeout; > - ret = rdma_set_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS, > - (void*)&req_opt, optlen); > - if (ret) { > - dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_set_option failed: %s\n", > - strerror(errno)); > - goto bail; > - } > -#endif > ret = rdma_connect(conn->cm_id, &conn->params); > if (ret) { > dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_connect failed: %s\n", > diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c > index 83b4b3a..23655b6 100755 > --- a/dapl/openib_cma/dapl_ib_util.c > +++ b/dapl/openib_cma/dapl_ib_util.c > @@ -481,6 +481,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA *hca_ptr, > ia_attr->num_vendor_attr = 0; > ia_attr->vendor_attr = NULL; > ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge; > + ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge; > /* save rd_atom for peer validation during connect requests */ > hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom; > hca_ptr->ib_trans.max_rdma_rd_out = dev_attr.max_qp_rd_atom; > diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h > index 6f57d71..5d7d9f8 100755 > --- a/dapl/openib_cma/dapl_ib_util.h > +++ b/dapl/openib_cma/dapl_ib_util.h > @@ -111,7 +111,7 @@ typedef struct _ib_wait_obj_handle > #define IB_INVALID_HANDLE NULL > > /* inline send rdma threshold */ > -#define INLINE_SEND_DEFAULT 128 > +#define INLINE_SEND_DEFAULT 64 > > /* CM private data areas */ > #define IB_MAX_REQ_PDATA_SIZE 48 > From rdreier at cisco.com Tue Nov 20 13:01:36 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:01:36 -0800 Subject: [ofa-general] Re: [PATCH] mlx4: fix qp state bounds check for mlx4_qp_modify In-Reply-To: <200711151718.48203.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Thu, 15 Nov 2007 17:18:47 +0200") References: <200711151718.48203.jackm@dev.mellanox.co.il> Message-ID: Yikes... thanks, applied. From rdreier at cisco.com Tue Nov 20 13:03:18 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:03:18 -0800 Subject: [ofa-general] Re: mlx4: problem with resource limits > 2^20 In-Reply-To: <200711201442.35922.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Tue, 20 Nov 2007 14:42:35 +0200") References: <200711201442.35922.jackm@dev.mellanox.co.il> Message-ID: > We're encountering a problem with resource profiles which have elements which exceed 1 Meg > (e.g., log_num_qp=21, or log_num_mtt=21 as module options for mlx4_core) > > Many kernels allow kmalloc of only up to 128KB (which can support a 1 MB bitmap). If the > resource max is greater than 1 Meg, the kmalloc will fail. Yes, and mthca has the same issue too... > 1. Change the bitmap allocator and buddy systems to use a 2-level scheme. > 2. Use vmalloc for allocations greater than 128K, and note that for x86 systems you cannot specify more > than 1M for any resource in the profile. > 3. Do nothing, and just note that cannot allocate more than 1M of any resource for ANY system. > > My own preference is 2 (with maybe some test to determine just what the crossover point is, rather than > just having 128K as a defined constant) -- or, given some time, 1 (which is a more general and scalable solution). I think doing "if (something) kmalloc() else vmalloc()" is really ugly, so I would really prefer to see #1. - R. From ardavis at ichips.intel.com Tue Nov 20 13:03:01 2007 From: ardavis at ichips.intel.com (Arlin Davis) Date: Tue, 20 Nov 2007 13:03:01 -0800 Subject: [ofa-general] [ANNOUNCE] dapl-2.0.3 release Message-ID: <47434B85.4050008@ichips.intel.com> New release for uDAPL v2 (2.0.3) is available at: http://www.openfabrics.org/downloads/dapl md5sum: cba6e427a8e1adcf5d143fe2f0df3131 dapl-2.0.3.tar.gz uDAPL v1 remains unchanged at dapl-1.2.3. Vlad, please pull this new release into OFED 1.3 beta, using the configure options from the package spec files, and install the following: dapl-1.2.3-1 dapl-2.0.3-1 dapl-utils-2.0.3-1 dapl-devel-2.0.3-1 dapl-debuginfo-2.0.3-1 Fixes since 2.0.2 - Lower default settings (rdma ops, inline sends) for latest iWARP/IB devices - Add missing ia_query for max_iov_segments_per_rdma_write - Cleanup CMA code no longer supported by rdma_cm - Change dapltest timers to use gettimeofday instead of get_cycles for better portability. Build failure on RH5.1 ia64 See http://www.openfabrics.org/downloads/dapl/README for more details. -arlin From rdreier at cisco.com Tue Nov 20 13:04:42 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:04:42 -0800 Subject: [ofa-general] Mapping non-contiguous memory in userspace In-Reply-To: <00c101c82b73$2de22d50$0281a8c0@ebpc> (Eric Barton's message of "Tue, 20 Nov 2007 12:45:06 +0000") References: <00c101c82b73$2de22d50$0281a8c0@ebpc> Message-ID: > I'm trying to guage whether there might be any support to introduce a > userspace API for mapping non-contiguous memory. I've mentioned this as a possibility before. I guess it really comes down to a tradeoff between how much it helps vs. how much it complicates the interface and the implementation. ie if it is a lot of cruft that gives a small benefit to only a few apps, then I'm not too interested; if it can be done cleanly and gives a big benefit, then I'm all for it. - R. From rdreier at cisco.com Tue Nov 20 13:13:09 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:13:09 -0800 Subject: [ofa-general] Re: [PATCH] libibverbs - return valid bad_wr from ibv_cmd_post_send() In-Reply-To: <1194909302.3612.74.camel@brick.pathscale.com> (Ralph Campbell's message of "Mon, 12 Nov 2007 15:15:02 -0800") References: <1194909302.3612.74.camel@brick.pathscale.com> Message-ID: OK, I fixed up ibv_post_send, ibv_post_recv and ibv_post_srq_recv and pushed out a new libibverbs tree. From rdreier at cisco.com Tue Nov 20 13:14:51 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:14:51 -0800 Subject: [ofa-general] Re: [PATCH 19/59] drivers/infiniband: Add missing "space" In-Reply-To: <1195523331-15303-20-git-send-email-joe@perches.com> (Joe Perches's message of "Mon, 19 Nov 2007 17:48:11 -0800") References: <1195523331-15303-1-git-send-email-joe@perches.com> <1195523331-15303-2-git-send-email-joe@perches.com> <1195523331-15303-3-git-send-email-joe@perches.com> <1195523331-15303-4-git-send-email-joe@perches.com> <1195523331-15303-5-git-send-email-joe@perches.com> <1195523331-15303-6-git-send-email-joe@perches.com> <1195523331-15303-7-git-send-email-joe@perches.com> <1195523331-15303-8-git-send-email-joe@perches.com> <1195523331-15303-9-git-send-email-joe@perches.com> <1195523331-15303-10-git-send-email-joe@perches.com> <1195523331-15303-11-git-send-email-joe@perches.com> <1195523331-15303-12-git-send-email-joe@perches.com> <1195523331-15303-13-git-send-email-joe@perches.com> <1195523331-15303-14-git-send-email-joe@perches.com> <1195523331-15303-15-git-send-email-joe@perches.com> <1195523331-15303-16-git-send-email-joe@perches.com> <1195523331-15303-17-git-send-email-joe@perches.com> <1195523331-15303-18-git-send-email-joe@perches.com> <1195523331-15303-19-git-send-email-joe@perches.com> <1195523331-15303-20-git-send-email-joe@perches.com> Message-ID: thanks, applied. From rdreier at cisco.com Tue Nov 20 13:16:51 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 13:16:51 -0800 Subject: [ofa-general] Re: [RFC] mlx4: Add some form of error-path debug output In-Reply-To: <200711180835.37745.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Sun, 18 Nov 2007 08:35:37 +0200") References: <200711151920.59592.jackm@dev.mellanox.co.il> <6C2C79E72C305246B504CBA17B5500C90282E2D6@mtlexch01.mtl.com> <200711180835.37745.jackm@dev.mellanox.co.il> Message-ID: > The philosophy I used below is to focus more on errors which would be > returned as -EINVAL -- to provide a bit more detail. > Also, I wanted the facility to be on-off switchable at run-time > (i.e., module-parameter controlled). Yes, this looks pretty good to me. - R. From rdreier at cisco.com Tue Nov 20 14:31:40 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 20 Nov 2007 14:31:40 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <47322E87.60409@linux.vnet.ibm.com> (Pradeep Satyanarayana's message of "Wed, 07 Nov 2007 13:30:47 -0800") References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> <47322E87.60409@linux.vnet.ibm.com> Message-ID: > static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, > struct ipoib_cm_rx *rx) ... > if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, > rx->rx_ring[i].mapping)) { > ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); > ret = -ENOMEM; > This will cause an skb leak- see generic comments about ipoib_cm_alloc_rx_skb() below Thanks... I fixed the error path here so that it cleans up properly I think. > Generic comment about ipoib_cm_alloc_rx_skb() which is true for both the srq and non srq cases > (except in the receive wc handler): > I find that there will be skb leakage if ipoib_cm_alloc_rx_skb() fails before all the rx skbs > are allocated. We must undo those allocations and mappings. Probably we should call > ipoib_cm_dev_cleanup() and free the skbs and do the unmap in that routine. I audited all the calls to ipoib_cm_alloc_rx_skb() and I think they are all OK now. > if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { > spin_lock_irqsave(&priv->lock, flags); > list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); > ipoib_cm_start_rx_drain(priv); > > > I do not understand why we need to call ipoib_cm_start_rx_drain(). We have already received a work > completion with RX_DRAIN set. This is not new code... it is there since the original SRQ cleanup code in 518b1646. Basically after one drain completion, we need to go back and start draining any new connections that are waiting to clean up. > if (!p->rx_ring) > > > If rx_ring is indeed NULL, there is no point in continuing further, > we will probably crash sooner or later. We should insert a BUG_ON and > thus crash the system. This is actually checking whether the rx structure has an rx_ring or not -- it is an obfuscated way of checking whether we are in the SRQ case or not. I changed things so that there is an explicit "has_srq" local variable in the RX wc handler to make things clearer. > repost: > if (p->rx_ring) { > > > Shouldn't this be if(!ipoib_cm_has_srq())? It's equivalent as I mentioned above. Anyway I changed this to if (has_srq). I pushed out a new tree. Let me know what you think now. - R. From jim at mellanox.com Tue Nov 20 14:52:15 2007 From: jim at mellanox.com (Jim Mott) Date: Tue, 20 Nov 2007 14:52:15 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - Support 2.6.24-rc2 Message-ID: Modifies SDP to support the updated 2.6.24-rc2 kernel APIs. Signed-off-by: Jim Mott --- Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== --- ofed_1_3.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-20 13:44:23.000000000 -0800 +++ ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-20 13:43:43.000000000 -0800 @@ -2122,13 +2122,16 @@ static struct proto_ops sdp_proto_ops = .sendpage = sock_no_sendpage, }; -static int sdp_create_socket(struct socket *sock, int protocol) +static int sdp_create_socket(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc; sdp_dbg(NULL, "%s: type %d protocol %d\n", __func__, sock->type, protocol); + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_STREAM) { sdp_warn(NULL, "SDP: unsupported type %d.\n", sock->type); return -ESOCKTNOSUPPORT; @@ -2140,7 +2143,7 @@ static int sdp_create_socket(struct sock return -EPROTONOSUPPORT; } - sk = sk_alloc(PF_INET_SDP, GFP_KERNEL, &sdp_proto, 1); + sk = sk_alloc(net, PF_INET_SDP, GFP_KERNEL, &sdp_proto); if (!sk) { sdp_warn(NULL, "SDP: failed to allocate socket.\n"); return -ENOMEM; @@ -2324,10 +2327,12 @@ static int __init sdp_proc_init(void) sdp_seq_afinfo.seq_fops->llseek = seq_lseek; sdp_seq_afinfo.seq_fops->release = seq_release_private; - p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops); + p = proc_net_fops_create(&init_net, sdp_seq_afinfo.name, S_IRUGO, + sdp_seq_afinfo.seq_fops); if (p) p->data = &sdp_seq_afinfo; - p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops); + p = proc_net_fops_create(&init_net, sdp_seq_afinfo.name, S_IRUGO, + sdp_seq_afinfo.seq_fops); if (p) p->data = &sdp_seq_afinfo; else @@ -2338,7 +2343,7 @@ static int __init sdp_proc_init(void) static void sdp_proc_unregister(void) { - proc_net_remove(sdp_seq_afinfo.name); + proc_net_remove(&init_net, sdp_seq_afinfo.name); memset(sdp_seq_afinfo.seq_fops, 0, sizeof(*sdp_seq_afinfo.seq_fops)); } From jim at mellanox.com Tue Nov 20 14:55:54 2007 From: jim at mellanox.com (Jim Mott) Date: Tue, 20 Nov 2007 14:55:54 -0800 Subject: [ofa-general] [PATCH 1/1 v2] SDP - Fix a memory leak in bzcopy Message-ID: A better fix of a potential memory leak in the new bzcopy code. Signed-off-by: Dotan Barak --- Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-19 16:40:26.000000000 -0600 +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c 2007-11-19 16:40:51.000000000 -0600 @@ -1238,10 +1238,6 @@ if (!can_do_mlock()) return NULL; - bz = kzalloc(sizeof(*bz), GFP_KERNEL); - if (!bz) - return NULL; - /* * Since we use the TCP segmentation fields of the skb to map user * pages, we must make sure that everything we send in a single chunk @@ -1251,6 +1247,10 @@ if (size_goal >= MAX_SKB_FRAGS) return NULL; + bz = kzalloc(sizeof(*bz), GFP_KERNEL); + if (!bz) + return NULL; + addr = (unsigned long)base; bz->u_base = base; From ichthulin at kentlandsmidtown.com Tue Nov 20 19:04:58 2007 From: ichthulin at kentlandsmidtown.com (Roland Young) Date: Tue, 20 Nov 2007 22:04:58 -0500 Subject: [ofa-general] Autodesk 3D Studio Max 9 for XP for 149, Retails @ 6720 (You save 6590) Message-ID: <000001c82bea$75dfc500$0100007f@localhost> autodesk architectural desktop 2006 - 119 autodesk 3ds max 9.0 - 149 adobe photoshop cs2 v 9.0 - 69 autodesk building systems 2006 - 129 2008 microsoft office beta for mac - 79 intuit quicken home and business 2008 - 39 systran 6 premium translator - 159 crystal xcelsius professional v4.5 - 59 visit cheapoemsoft5. com in Internet Explorer From ogerlitz at voltaire.com Tue Nov 20 23:53:54 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 21 Nov 2007 09:53:54 +0200 Subject: [ofa-general] slides of the open fabrics sc07 developer conference Message-ID: <4743E412.9010609@voltaire.com> Hi Johann, Are the slides online already? Or. From ogerlitz at voltaire.com Wed Nov 21 00:09:55 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Wed, 21 Nov 2007 10:09:55 +0200 Subject: [ofa-general] [PATCH] mlx4_core: increase max number of qp's and of srq's to 128K In-Reply-To: <200711201744.15766.jackm@dev.mellanox.co.il> References: <200711201744.15766.jackm@dev.mellanox.co.il> Message-ID: <4743E7D3.8090201@voltaire.com> Jack Morgenstein wrote: > With the advent large clusters which utilize multicore hosts, > 64K qp's is not enough. > > We want to increase the default maxima for QPs and SRQs to 128K. Why you want to increase the maxima for SRQs as well? a 1:1 ratio between QPs to SRQs means a broken application design, isn't it? Or. From vlad at dev.mellanox.co.il Wed Nov 21 01:06:00 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 21 Nov 2007 11:06:00 +0200 Subject: [ofa-general] [PATCH 1/1] SDP - Support 2.6.24-rc2 In-Reply-To: References: Message-ID: <4743F4F8.4060802@dev.mellanox.co.il> Jim Mott wrote: > Modifies SDP to support the updated 2.6.24-rc2 kernel APIs. > > > Signed-off-by: Jim Mott > --- > > Index: ofed_1_3/drivers/infiniband/ulp/sdp/sdp_main.c > =================================================================== Applied, Regards, Vladimir From vlad at dev.mellanox.co.il Wed Nov 21 01:06:42 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Wed, 21 Nov 2007 11:06:42 +0200 Subject: [ofa-general] [PATCH 1/1 v2] SDP - Fix a memory leak in bzcopy In-Reply-To: References: Message-ID: <4743F522.8050901@dev.mellanox.co.il> Jim Mott wrote: > A better fix of a potential memory leak in the new bzcopy code. > > Signed-off-by: Dotan Barak > --- > > Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c > =================================================================== Applied, Regards, Vladimir From allenh at gosten.com Wed Nov 21 02:24:41 2007 From: allenh at gosten.com (Rochelle Castle) Date: Wed, 21 Nov 2007 06:24:41 -0400 Subject: [ofa-general] Adult Meat Guitar Water Staircase Monster Guitar Message-ID: <01c82c07$33075190$b25f6242@allenh> We are Canadian RX All of our stock is onsale Come see our closeout prices http://geocities.com/allencbv118 From allenh at gosten.com Wed Nov 21 02:24:41 2007 From: allenh at gosten.com (Rochelle Castle) Date: Wed, 21 Nov 2007 06:24:41 -0400 Subject: [ofa-general] Adult Meat Guitar Water Staircase Monster Guitar Message-ID: <01c82c07$33075190$b25f6242@allenh> We are Canadian RX All of our stock is onsale Come see our closeout prices http://geocities.com/allencbv118 From vlad at lists.openfabrics.org Wed Nov 21 02:56:13 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 21 Nov 2007 02:56:13 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071121-0200 daily build status Message-ID: <20071121105613.D994CE60873@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.21.1 Passed on powerpc with linux-2.6.13 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.16 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.23 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: From erezz at voltaire.com Wed Nov 21 03:11:37 2007 From: erezz at voltaire.com (Erez Zilber) Date: Wed, 21 Nov 2007 13:11:37 +0200 Subject: [ofa-general] [PATCH 1/1] IB/iSER: Add missing counter incrementation in iser_data_buf_aligned_len Message-ID: <1195643497.4132.4.camel@localhost> Roland, The following patch fixes a bug in iSER. Without it, iSER will have poor performance. Can you push it upstream for 2.6.24? While adding sg chaining support to iSER, a "for" loop was replaced with a "for_each_sg" loop. The "for" loop included the incrementation of 2 variables. Only one of them is incremented in the current "for_each_sg" loop. This caused iSER to think that all data is unaligned, and all data was copied to aligned buffers. This patch increments the missing counter inside the "for_each_sg" loop whenever necessary. Signed-off-by: Erez Zilber --- drivers/infiniband/ulp/iser/iser_memory.c | 5 ++++- 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d687980..a28f552 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -310,13 +310,16 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, if (i + 1 < data->dma_nents) { next_addr = ib_sg_dma_address(ibdev, sg_next(sg)); /* are i, i+1 fragments of the same page? */ - if (end_addr == next_addr) + if (end_addr == next_addr) { + cnt++; continue; + } else if (!IS_4K_ALIGNED(end_addr)) { ret_len = cnt + 1; break; } } + cnt++; } if (i == data->dma_nents) ret_len = cnt; /* loop ended */ -- 1.5.2 From kliteyn at mellanox.co.il Tue Nov 20 21:20:03 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 21 Nov 2007 07:20:03 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-21:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-20 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From dotanb at dev.mellanox.co.il Wed Nov 21 08:00:48 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Wed, 21 Nov 2007 18:00:48 +0200 Subject: [ofa-general] the SDP module prints allot of error messages to the /var/log/messages Message-ID: <47445630.10000@dev.mellanox.co.il> In our nightly regression i noticed that the /var/log/messages is filled with the following error messages: Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -32 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -2 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -29 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42204:19005): Could not reap -14 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -2 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -7 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -28 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -4 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -32 in-flight sends Nov 21 17:28:31 sw186 kernel: sdp_sock(42204:19005): Could not reap -14 in-flight sends Are those error messages are really necessary? thanks Dotan From Arkady.Kanevsky at netapp.com Wed Nov 21 08:02:25 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 21 Nov 2007 11:02:25 -0500 Subject: [ofa-general] iWARP peer-to-peer CM proposal Message-ID: Group, below is proposal on how to resolve peer-to-peer iWARP CM issue discovered at interop event. The main issue is that MPA spec (relevant portion of IETF RFC 5044 is below) require that connection initiator send first message over the established connection. Multiple MPI implementations and several other apps use peer-to-peer model. So rather then forcing all of them to do it on their own, which will not help with interop between different implementations, the goal is to extend lower layers to provide it. Our first idea was to leave MPA protocol untouched and try to solve this problem in iw_cm. But there are too many complications to it. First, in order to adhere to RFC5044 initiator must send first FPDU and responder process it. But since the connection is already established processing FPDU involves ULP on whose behalf the connection is created. So either initiator sends a message which generates completion on responder CQ, thus visible to ULP, or not. In the later case, the only op which can do it is RDMA one, which means that responder somehow provided initiator S-tag which it can use. So, this is an extension to MPA, probably using private data. And that responder upon receiving it destroy this S-tag. In any case this is an extension of MPA. In the former, Send is used but this requires a buffer to be posted to CQ. But since the same CQ (or SharedCQ) can be used by other connections at the same time it can cause the responder CM posted buffer to be consumed by other connection. This is not acceptable. So new we consider extension to MPA protocol. The goal is to be completely backwards compatible to existing version 1. In a nutshell, use a "flag" in the MPA request message which indicates that "ready to receive" message will be send by requestor upon receiving MPA response message with connection acceptance. here are the changes to IETF RFC5044 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 | | + Key (16 bytes containing "MPA ID Req Frame") + 4 | (4D 50 41 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 bytes containing "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 70 20 46 72 61 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") + 12 | (4D 50 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 |M|C|R|S| Res | Rev | PD_Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ~ ~ ~ Private Data ~ | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 2. S: indicator in the Req frame whether or not Requestor will send Rtr frame. In Req frame, if set to 1 then Rtr frame will be sent if responder sends Rep frame with accept bit set. 0 indicate that Rtr frame will not be sent. In Rep frame, 0 means that Responder cannot support Rtr frame, while 1 that it is and is waiting for it. (While my preference is to handle this as MPA protocol version matching rules, proposed method will provide complete backwards compatibility) Unused by Rtr frame. That is set to 0 in Rtr frame and ignored by responder. All other bits M,C,R and remainder of Res treated as in MPA ver 1. Rtr frame adhere to C bit as specified in Rep frame 3. No private data format is defined for Rtr in this version. 4. Example will be added to present Rtr model. That is if S bit is not set the current MPA ver 1 model is followed. And if S bit is set then "proposed" model with Rtr message is followed. 5. Requestor use of Rtr frame must adhere to S bit setting of Rep frame. ************************************************** While the process of driving this proposal thru IETF is very very length, in order to solve this problem now, we can still use this proposal with the current version 1 of MPA. All existing implementation will still work. And if both sides support this change than peer-to-peer model is also provided. Comments, suggestion, critics requested. I am especially want to know if we are missing some gotch you which was discussed by RDDP WG when they rejected peer-to-peer model for MPA. iWARP vendors, please comment on the feasibility of implementing this MPA extension. ********************************************************************* 7. Connection Semantics 7.1. Connection Setup MPA requires that the Consumer MUST activate MPA, and any TCP enhancements for MPA, on a TCP half connection at the same location in the octet stream at both the sender and the receiver. This is required in order for the Marker scheme to correctly locate the Markers (if enabled) and to correctly locate the first FPDU. MPA, and any TCP enhancements for MPA are enabled by the ULP in both directions at once at an endpoint. Culley, et al. Standards Track [Page 24] RFC 5044 MPA Framing for TCP October 2007 This can be accomplished several ways, and is left up to DDP's ULP: * DDP's ULP MAY require DDP on MPA startup immediately after TCP connection setup. This has the advantage that no streaming mode negotiation is needed. An example of such a protocol is shown in Figure 10: Example Immediate Startup negotiation. This may be accomplished by using a well-known port, or a service locator protocol to locate an appropriate port on which DDP on MPA is expected to operate. * DDP's ULP MAY negotiate the start of DDP on MPA sometime after a normal TCP startup, using TCP streaming data exchanges on the same connection. The exchange establishes that DDP on MPA (as well as other ULPs) will be used, and exactly locates the point in the octet stream where MPA is to begin operation. Note that such a negotiation protocol is outside the scope of this specification. A simplified example of such a protocol is shown in Figure 9: Example Delayed Startup negotiation on page 33. An MPA endpoint operates in two distinct phases. The Startup Phase is used to verify correct MPA setup, exchange CRC and Marker configuration, and optionally pass Private Data between endpoints prior to completing a DDP connection. During this phase, specifically formatted frames are exchanged as TCP byte streams without using CRCs or Markers. During this phase a DDP endpoint need not be "bound" to the MPA connection. In fact, the choice of DDP endpoint and its operating parameters may not be known until the Consumer supplied Private Data (if any) has been examined by the Consumer. The second distinct phase is Full Operation during which FPDUs are sent using all the rules that pertain (CRCs, Markers, MULPDU restrictions, etc.). A DDP endpoint MUST be "bound" to the MPA connection at entry to this phase. When Private Data is passed between ULPs in the Startup Phase, the ULP is responsible for interpreting that data, and then placing MPA into Full Operation. Note: The following text differentiates the two endpoints by calling them Initiator and Responder. This is quite arbitrary and is NOT related to the TCP startup (SYN, SYN/ACK sequence). The Initiator is the side that sends first in the MPA startup sequence (the MPA Request Frame). Culley, et al. Standards Track [Page 25] RFC 5044 MPA Framing for TCP October 2007 Note: The possibility that both endpoints would be allowed to make a connection at the same time, sometimes called an active/active connection, was considered by the work group and rejected. There were several motivations for this decision. One was that applications needing this facility were few (none other than theoretical at the time of this document). Another was that the facility created some implementation difficulties, particularly with the "dual stack" designs described later on. A last issue was that dealing with rejected connections at startup would have required at least an additional frame type, and more recovery actions, complicating the protocol. While none of these issues was overwhelming, the group and implementers were not motivated to do the work to resolve these issues. The protocol includes a method of detecting these active/active startup attempts so that they can be rejected and an error reported. The ULP is responsible for determining which side is Initiator or Responder. For client/server type ULPs, this is easy. For peer-peer ULPs (which might utilize a TCP style active/active startup), some mechanism (not defined by this specification) must be established, or some streaming mode data exchanged prior to MPA startup to determine which side starts in Initiator and which starts in Responder MPA mode. 7.1.1 MPA Request and Reply Frame Format 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 | | + Key (16 bytes containing "MPA ID Req Frame") + 4 | (4D 50 41 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 bytes containing "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 70 20 46 72 61 6D 65) | + + 12 | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 |M|C|R| Res | Rev | PD_Length | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | ~ ~ ~ Private Data ~ | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Figure 8: MPA Request/Reply Frame Culley, et al. Standards Track [Page 26] RFC 5044 MPA Framing for TCP October 2007 Key: This field contains the "key" used to validate that the sender is an MPA sender. Initiator mode senders MUST set this field to the fixed value "MPA ID Req Frame" or (in byte order) 4D 50 41 20 49 44 20 52 65 71 20 46 72 61 6D 65 (in hexadecimal). Responder mode receivers MUST check this field for the same value, and close the connection and report an error locally if any other value is detected. Responder mode senders MUST set this field to the fixed value "MPA ID Rep Frame" or (in byte order) 4D 50 41 20 49 44 20 52 65 70 20 46 72 61 6D 65 (in hexadecimal). Initiator mode receivers MUST check this field for the same value, and close the connection and report an error locally if any other value is detected. M: This bit declares an endpoint's REQUIRED Marker usage. When this bit is '1' in an MPA Request Frame, the Initiator declares that Markers are REQUIRED in FPDUs sent from the Responder. When set to '1' in an MPA Reply Frame, this bit declares that Markers are REQUIRED in FPDUs sent from the Initiator. When in a received MPA Request Frame or MPA Reply Frame and the value is '0', Markers MUST NOT be added to the data stream by that endpoint. When '1' Markers MUST be added as described in Section 4.3 , MPA Markers. C: This bit declares an endpoint's preferred CRC usage. When this field is '0' in the MPA Request Frame and the MPA Reply Frame, CRCs MUST not be checked and need not be generated by either endpoint. When this bit is '1' in either the MPA Request Frame or MPA Reply Frame, CRCs MUST be generated and checked by both endpoints. Note that even when not in use, the CRC field remains present in the FPDU. When CRCs are not in use, the CRC field MUST be considered valid for FPDU checking regardless of its contents. R: This bit is set to zero, and not checked on reception in the MPA Request Frame. In the MPA Reply Frame, this bit is the Rejected Connection bit, set by the Responders ULP to indicate acceptance '0', or rejection '1', of the connection parameters provided in the Private Data. Res: This field is reserved for future use. It MUST be set to zero when sending, and not checked on reception. Culley, et al. Standards Track [Page 27] RFC 5044 MPA Framing for TCP October 2007 Rev: This field contains the revision of MPA. For this version of the specification, senders MUST set this field to one. MPA receivers compliant with this version of the specification MUST check this field. If the MPA receiver cannot interoperate with the received version, then it MUST close the connection and report an error locally. Otherwise, the MPA receiver should report the received version to the ULP. PD_Length: This field MUST contain the length in octets of the Private Data field. A value of zero indicates that there is no Private Data field present at all. If the receiver detects that the PD_Length field does not match the length of the Private Data field, or if the length of the Private Data field exceeds 512 octets, the receiver MUST close the connection and report an error locally. Otherwise, the MPA receiver should pass the PD_Length value and Private Data to the ULP. Private Data: This field may contain any value defined by ULPs or may not be present. The Private Data field MUST be between 0 and 512 octets in length. ULPs define how to size, set, and validate this field within these limits. Private Data usage is further discussed in Section 7.1.4 . 7.1.2. Connection Startup Rules The following rules apply to MPA connection Startup Phase: 1. When MPA is started in the Initiator mode, the MPA implementation MUST send a valid MPA Request Frame. The MPA Request Frame MAY include ULP-supplied Private Data. 2. When MPA is started in the Responder mode, the MPA implementation MUST wait until an MPA Request Frame is received and validated before entering Full MPA/DDP Operation. If the MPA Request Frame is improperly formatted, the implementation MUST close the TCP connection and exit MPA. If the MPA Request Frame is properly formatted but the Private Data is not acceptable, the implementation SHOULD return an MPA Reply Frame with the Rejected Connection bit set to '1'; the MPA Reply Frame MAY include ULP-supplied Private Data; the implementation MUST exit MPA, leaving the TCP connection open. The ULP may close TCP or use the connection for other purposes. If the MPA Request Frame is properly formatted and the Private Data is acceptable, the implementation SHOULD return an MPA Reply Frame with the Rejected Connection bit set to '0'; the MPA Reply Culley, et al. Standards Track [Page 28] RFC 5044 MPA Framing for TCP October 2007 Frame MAY include ULP-supplied Private Data; and the Responder SHOULD prepare to interpret any data received as FPDUs and pass any received ULPDUs to DDP. Note: Since the receiver's ability to deal with Markers is unknown until the Request and Reply Frames have been received, sending FPDUs before this occurs is not possible. Note: The requirement to wait on a Request Frame before sending a Reply Frame is a design choice. It makes for a well-ordered sequence of events at each end, and avoids having to specify how to deal with situations where both ends start at the same time. 3. MPA Initiator mode implementations MUST receive and validate an MPA Reply Frame. If the MPA Reply Frame is improperly formatted, the implementation MUST close the TCP connection and exit MPA. If the MPA Reply Frame is properly formatted but is the Private Data is not acceptable, or if the Rejected Connection bit is set to '1', the implementation MUST exit MPA, leaving the TCP connection open. The ULP may close TCP or use the connection for other purposes. If the MPA Reply Frame is properly formatted and the Private Data is acceptable, and the Reject Connection bit is set to '0', the implementation SHOULD enter Full MPA/DDP Operation Phase; interpreting any received data as FPDUs and sending DDP ULPDUs as FPDUs. 4. MPA Responder mode implementations MUST receive and validate at least one FPDU before sending any FPDUs or Markers. Note: This requirement is present to allow the Initiator time to get its receiver into Full Operation before an FPDU arrives, avoiding potential race conditions at the Initiator. This was also subject to some debate in the work group before rough consensus was reached. Eliminating this requirement would allow faster startup in some types of applications. However, that would also make certain implementations (particularly "dual stack") much harder. 5. If a received "Key" does not match the expected value (see Section 7.1.1 , MPA Request and Reply Frame Format) the TCP/DDP connection MUST be closed, and an error returned to the ULP. Culley, et al. Standards Track [Page 29] RFC 5044 MPA Framing for TCP October 2007 6. The received Private Data fields may be used by Consumers at either end to further validate the connection and set up DDP or other ULP parameters. The Initiator ULP MAY close the TCP/MPA/DDP connection as a result of validating the Private Data fields. The Responder SHOULD return an MPA Reply Frame with the "Reject Connection" bit set to '1' if the validation of the Private Data is not acceptable to the ULP. 7. When the first FPDU is to be sent, then if Markers are enabled, the first octets sent are the special Marker 0x00000000, followed by the start of the FPDU (the FPDU's ULPDU Length field). If Markers are not enabled, the first octets sent are the start of the FPDU (the FPDU's ULPDU Length field). 8. MPA implementations MUST use the difference between the MPA Request Frame and the MPA Reply Frame to check for incorrect "Initiator/Initiator" startups. Implementations SHOULD put a timeout on waiting for the MPA Request Frame when started in Responder mode, to detect incorrect "Responder/Responder" startups. 9. MPA implementations MUST validate the PD_Length field. The buffer that receives the Private Data field MUST be large enough to receive that data; the amount of Private Data MUST not exceed the PD_Length or the application buffer. If any of the above fails, the startup frame MUST be considered improperly formatted. 10. MPA implementations SHOULD implement a reasonable timeout while waiting for the entire set of startup frames; this prevents certain denial-of-service attacks. ULPs SHOULD implement a reasonable timeout while waiting for FPDUs, ULPDUs, and application level messages to guard against application failures and certain denial-of-service attacks. 7.1.3. Example Delayed Startup Sequence A variety of startup sequences are possible when using MPA on TCP. Following is an example of an MPA/DDP startup that occurs after TCP has been running for a while and has exchanged some amount of streaming data. This example does not use any Private Data (an example that does is shown later in Section 7.1.4.2 , Example Immediate Startup Using Private Data), although it is perfectly legal to include the Private Data. Note that since the example does not use any Private Data, there are no ULP interactions shown between receiving "startup frames" and putting MPA into Full Operation. Culley, et al. Standards Track [Page 30] RFC 5044 MPA Framing for TCP October 2007 Initiator Responder +---------------------------+ |ULP streaming mode | | request to | | transition to DDP/MPA | +---------------------------+ | mode (optional). | --------> |ULP gets request; | +---------------------------+ | enables MPA Responder | | mode with last (optional)| | streaming mode | | for MPA to | | send. | +---------------------------+ |MPA waits for incoming | |ULP receives streaming | <-------- | . | | ; | +---------------------------+ |Enters MPA Initiator mode; | |MPA sends | | ; | |MPA waits for incoming | +---------------------------+ | . | - - - - > |MPA receives | +---------------------------+ | . | |Consumer binds DDP to MPA; | |MPA sends the | | . | |DDP/MPA enables FPDU | +---------------------------+ | decoding, but does not | |MPA receives the | < - - - - | send any FPDUs. | | | +---------------------------+ |Consumer binds DDP to MPA; | |DDP/MPA begins Full | | Operation. | |MPA sends first FPDU (as | +---------------------------+ | DDP ULPDUs become | ========> |MPA receives first FPDU. | | available). | |MPA sends first FPDU (as | +---------------------------+ | DDP ULPDUs become | <====== | available). | +---------------------------+ Figure 9: Example Delayed Startup Negotiation Culley, et al. Standards Track [Page 31] RFC 5044 MPA Framing for TCP October 2007 An example Delayed Startup sequence is described below: * Active and passive sides start up a TCP connection in the usual fashion, probably using sockets APIs. They exchange some amount of streaming mode data. At some point, one side (the MPA Initiator) sends streaming mode data that effectively says "Hello, let's go into MPA/DDP mode". * When the remote side (the MPA Responder) gets this streaming mode message, the Consumer would send a last streaming mode message that effectively says "I acknowledge your Hello, and am now in MPA Responder mode". The exchange of these messages establishes the exact point in the TCP stream where MPA is enabled. The Responding Consumer enables MPA in the Responder mode and waits for the initial MPA startup message. * The Initiating Consumer would enable MPA startup in the Initiator mode which then sends the MPA Request Frame. It is assumed that no Private Data messages are needed for this example, although it is possible to do so. The Initiating MPA (and Consumer) would also wait for the MPA connection to be accepted. * The Responding MPA would receive the initial MPA Request Frame and would inform the Consumer that this message arrived. The Consumer can then accept the MPA/DDP connection or close the TCP connection. * To accept the connection request, the Responding Consumer would use an appropriate API to bind the TCP/MPA connections to a DDP endpoint, thus enabling MPA/DDP into Full Operation. In the process of going to Full Operation, MPA sends the MPA Reply Frame. MPA/DDP waits for the first incoming FPDU before sending any FPDUs. * If the initial TCP data was not a properly formatted MPA Request Frame, MPA will close or reset the TCP connection immediately. * The Initiating MPA would receive the MPA Reply Frame and would report this message to the Consumer. The Consumer can then accept the MPA/DDP connection, or close or reset the TCP connection to abort the process. * On determining that the connection is acceptable, the Initiating Consumer would use an appropriate API to bind the TCP/MPA connections to a DDP endpoint thus enabling MPA/DDP into Full Operation. MPA/DDP would begin sending DDP messages as MPA FPDUs. Culley, et al. Standards Track [Page 32] RFC 5044 MPA Framing for TCP October 2007 7.1.4. Use of Private Data This section is advisory in nature, in that it suggests a method by which a ULP can deal with pre-DDP connection information exchange. 7.1.4.1. Motivation Prior RDMA protocols have been developed that provide Private Data via out-of-band mechanisms. As a result, many applications now expect some form of Private Data to be available for application use prior to setting up the DDP/RDMA connection. Following are some examples of the use of Private Data. An RDMA endpoint (referred to as a Queue Pair, or QP, in InfiniBand and the [VERBS-RDMA ]) must be associated with a Protection Domain. No receive operations may be posted to the endpoint before it is associated with a Protection Domain. Indeed under both the InfiniBand and proposed RDMA/DDP verbs [VERBS-RDMA ] an endpoint/QP is created within a Protection Domain. There are some applications where the choice of Protection Domain is dependent upon the identity of the remote ULP client. For example, if a user session requires multiple connections, it is highly desirable for all of those connections to use a single Protection Domain. Note: Use of Protection Domains is further discussed in [RDMASEC ]. InfiniBand, the DAT APIs [DAT-API ], and the IT-API [IT-API ] all provide for the active-side ULP to provide Private Data when requesting a connection. This data is passed to the ULP to allow it to determine whether to accept the connection, and if so with which endpoint (and implicitly which Protection Domain). The Private Data can also be used to ensure that both ends of the connection have configured their RDMA endpoints compatibly on such matters as the RDMA Read capacity (see [RDMAP ]). Further ULP- specific uses are also presumed, such as establishing the identity of the client. Private Data is also allowed for when accepting the connection, to allow completion of any negotiation on RDMA resources and for other ULP reasons. There are several potential ways to exchange this Private Data. For example, the InfiniBand specification includes a connection management protocol that allows a small amount of Private Data to be exchanged using datagrams before actually starting the RDMA connection. Culley, et al. Standards Track [Page 33] RFC 5044 MPA Framing for TCP October 2007 This document allows for small amounts of Private Data to be exchanged as part of the MPA startup sequence. The actual Private Data fields are carried in the MPA Request Frame and the MPA Reply Frame. If larger amounts of Private Data or more negotiation is necessary, TCP streaming mode messages may be exchanged prior to enabling MPA. Culley, et al. Standards Track [Page 34] RFC 5044 MPA Framing for TCP October 2007 7.1.4.2. Example Immediate Startup Using Private Data Initiator Responder +---------------------------+ |TCP SYN sent. | +--------------------------+ +---------------------------+ --------> |TCP gets SYN packet; | +---------------------------+ | sends SYN-Ack. | |TCP gets SYN-Ack | <-------- +--------------------------+ | sends Ack. | +---------------------------+ --------> +--------------------------+ +---------------------------+ |Consumer enables MPA | |Consumer enables MPA | |Responder mode, waits for | |Initiator mode with | | . | |Private Data; MPA sends | +--------------------------+ | ; | |MPA waits for incoming | +--------------------------+ | . | - - - - > |MPA receives | +---------------------------+ | . | |Consumer examines Private | |Data, provides MPA with | |return Private Data, | |binds DDP to MPA, and | |enables MPA to send an | | . | |DDP/MPA enables FPDU | +---------------------------+ |decoding, but does not | |MPA receives the | < - - - - |send any FPDUs. | | . | +--------------------------+ |Consumer examines Private | |Data, binds DDP to MPA, | |and enables DDP/MPA to | |begin Full Operation. | |MPA sends first FPDU (as | +--------------------------+ |DDP ULPDUs become | ========> |MPA receives first FPDU. | |available). | |MPA sends first FPDU (as | +---------------------------+ |DDP ULPDUs become | <====== |available). | +--------------------------+ Figure 10: Example Immediate Startup Negotiation Note: The exact order of when MPA is started in the TCP connection sequence is implementation dependent; the above diagram shows one possible sequence. Also, the Initiator "Ack" to the Responder's "SYN-Ack" may be combined into the same TCP segment containing the MPA Request Frame (as is allowed by TCP RFCs). Culley, et al. Standards Track [Page 35] RFC 5044 MPA Framing for TCP October 2007 The example immediate startup sequence is described below: * The passive side (Responding Consumer) would listen on the TCP destination port, to indicate its readiness to accept a connection. * The active side (Initiating Consumer) would request a connection from a TCP endpoint (that expected to upgrade to MPA/DDP/RDMA and expected the Private Data) to a destination address and port. * The Initiating Consumer would initiate a TCP connection to the destination port. Acceptance/rejection of the connection would proceed as per normal TCP connection establishment. * The passive side (Responding Consumer) would receive the TCP connection request as usual allowing normal TCP gatekeepers, such as INETD and TCPserver, to exercise their normal safeguard/logging functions. On acceptance of the TCP connection, the Responding Consumer would enable MPA in the Responder mode and wait for the initial MPA startup message. * The Initiating Consumer would enable MPA startup in the Initiator mode to send an initial MPA Request Frame with its included Private Data message to send. The Initiating MPA (and Consumer) would also wait for the MPA connection to be accepted, and any returned Private Data. * The Responding MPA would receive the initial MPA Request Frame with the Private Data message and would pass the Private Data through to the Consumer. The Consumer can then accept the MPA/DDP connection, close the TCP connection, or reject the MPA connection with a return message. * To accept the connection request, the Responding Consumer would use an appropriate API to bind the TCP/MPA connections to a DDP endpoint, thus enabling MPA/DDP into Full Operation. In the process of going to Full Operation, MPA sends the MPA Reply Frame, which includes the Consumer-supplied Private Data containing any appropriate Consumer response. MPA/DDP waits for the first incoming FPDU before sending any FPDUs. * If the initial TCP data was not a properly formatted MPA Request Frame, MPA will close or reset the TCP connection immediately. Culley, et al. Standards Track [Page 36] RFC 5044 MPA Framing for TCP October 2007 * To reject the MPA connection request, the Responding Consumer would send an MPA Reply Frame with any ULP-supplied Private Data (with reason for rejection), with the "Rejected Connection" bit set to '1', and may close the TCP connection. * The Initiating MPA would receive the MPA Reply Frame with the Private Data message and would report this message to the Consumer, including the supplied Private Data. If the "Rejected Connection" bit is set to a '1', MPA will close the TCP connection and exit. If the "Rejected Connection" bit is set to a '0', and on determining from the MPA Reply Frame Private Data that the connection is acceptable, the Initiating Consumer would use an appropriate API to bind the TCP/MPA connections to a DDP endpoint thus enabling MPA/DDP into Full Operation. MPA/DDP would begin sending DDP messages as MPA FPDUs. 7.1.5. "Dual Stack" Implementations MPA/DDP implementations are commonly expected to be implemented as part of a "dual stack" architecture. One stack is the traditional TCP stack, usually with a sockets interface API (Application Programming Interface). The second stack is the MPA/DDP stack with its own API, and potentially separate code or hardware to deal with the MPA/DDP data. Of course, implementations may vary, so the following comments are of an advisory nature only. The use of the two stacks offers advantages: TCP connection setup is usually done with the TCP stack. This allows use of the usual naming and addressing mechanisms. It also means that any mechanisms used to "harden" the connection setup against security threats are also used when starting MPA/DDP. Some applications may have been originally designed for TCP, but are "enhanced" to utilize MPA/DDP after a negotiation reveals the capability to do so. The negotiation process takes place in TCP's streaming mode, using the usual TCP APIs. Some new applications, designed for RDMA or DDP, still need to exchange some data prior to starting MPA/DDP. This exchange can be of arbitrary length or complexity, but often consists of only a small amount of Private Data, perhaps only a single message. Using the TCP streaming mode for this exchange allows this to be done using well-understood methods. Culley, et al. Standards Track [Page 37] RFC 5044 MPA Framing for TCP October 2007 The main disadvantage of using two stacks is the conversion of an active TCP connection between them. This process must be done with care to prevent loss of data. To avoid some of the problems when using a "dual stack" architecture, the following additional restrictions may be required by the implementation: 1. Enabling the DDP/MPA stack SHOULD be done only when no incoming stream data is expected. This is typically managed by the ULP protocol. When following the recommended startup sequence, the Responder side enters DDP/MPA mode, sends the last streaming mode data, and then waits for the MPA Request Frame. No additional streaming mode data is expected. The Initiator side ULP receives the last streaming mode data, and then enters DDP/MPA mode. Again, no additional streaming mode data is expected. 2. The DDP/MPA MAY provide the ability to send a "last streaming message" as part of its Responder DDP/MPA enable function. This allows the DDP/MPA stack to more easily manage the conversion to DDP/MPA mode (and avoid problems with a very fast return of the MPA Request Frame from the Initiator side). Note: Regardless of the "stack" architecture used, TCP's rules MUST be followed. For example, if network data is lost, re-segmented, or re-ordered, TCP MUST recover appropriately even when this occurs while switching stacks. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 -------------- next part -------------- An HTML attachment was scrubbed... URL: From jim at mellanox.com Wed Nov 21 08:22:21 2007 From: jim at mellanox.com (Jim Mott) Date: Wed, 21 Nov 2007 08:22:21 -0800 Subject: [ofa-general] RE: the SDP module prints allot of error messages to the /var/log/messages In-Reply-To: <47445630.10000@dev.mellanox.co.il> References: <47445630.10000@dev.mellanox.co.il> Message-ID: Hi, These messages indicate real errors in zero copy bcopy operations. They mean that when sdp_sendmsg() returned to user space, the SDP driver thought that there were in-flight IB send operations pointing to user pages. Bad things can happen in this case. I have never seen these in simple testing by hand. The patch I sent you yesterday for review adds this test to the error recovery path. If you hit ^C (for example) during a transfer, you may see these messages but no crash. That patch is not in the current code yet because I need to understand why cleanup is not coordinated with completions and thus generating these messages. Were the regressions running with the test patch or did you see these in normal operation? If normal operation, please let me know how to reproduce. Thanks, JIm Jim Mott Mellanox Technologies Ltd. mail: jim at mellanox.com Phone: 512-294-5481 -----Original Message----- From: Dotan Barak [mailto:dotanb at dev.mellanox.co.il] Sent: Wednesday, November 21, 2007 10:01 AM To: Jim Mott; openib-general Subject: the SDP module prints allot of error messages to the /var/log/messages In our nightly regression i noticed that the /var/log/messages is filled with the following error messages: Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -32 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -2 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -29 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42204:19005): Could not reap -14 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -2 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42202:19010): Could not reap -7 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -28 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -4 in-flight sends Nov 21 17:28:30 sw186 kernel: sdp_sock(42203:19000): Could not reap -32 in-flight sends Nov 21 17:28:31 sw186 kernel: sdp_sock(42204:19005): Could not reap -14 in-flight sends Are those error messages are really necessary? thanks Dotan From swise at opengridcomputing.com Wed Nov 21 10:07:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 21 Nov 2007 12:07:25 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: Message-ID: <474473DD.3050507@opengridcomputing.com> Comments in-line below... Kanevsky, Arkady wrote: > > Group, > > > below is proposal on how to resolve peer-to-peer iWARP CM issue > discovered at interop event. > > > The main issue is that MPA spec (relevant portion of IETF RFC 5044 > is below) require that > > > connection initiator send first message over the established connection. > > > Multiple MPI implementations and several other apps use peer-to-peer > model. > > > So rather then forcing all of them to do it on their own, which will > not help with > > > interop between different implementations, the goal is to extend > lower layers to provide it. > > > > > > Our first idea was to leave MPA protocol untouched and try to solve > this problem > > > in iw_cm. But there are too many complications to it. First, in > order to adhere to RFC5044 > > > initiator must send first FPDU and responder process it. But since > the connection is already > > > established processing FPDU involves ULP on whose behalf the > connection is created. > > > So either initiator sends a message which generates completion on > responder CQ, thus visible > > > to ULP, or not. > In the later case, the only op which can do it is > RDMA one, which means > > > that responder somehow provided initiator S-tag which it can use. > So, this is an extension > > > to MPA, probably using private data. And that responder upon > receiving it destroy this S-tag. > > > In any case this is an extension of MPA. > This stag exchange isn't needed if this RDMA op is a 0B READ. The responder waits for that 0B read and only indicates the rdma connection is established to its ULP when it replies to the 0B read. In this scenario, the responder/server side doesn't consume any CQ resources. But it would require an IRD of at least 1 to be configured on the QP. The initiator still requires an SQ entry, and possibly a CQ entry, for initiating the 0B read and handling completion. But its perhaps a little less painful than doing a SEND/RECV exchange. The read wr could be unsignaled so that it won't generate a CQE. But it still consumes an SQ WR slot so the SQ would have to be sized to allow this extra WR. And I guess the CQ would also need to be sized accordingly in case the read failed. > > In the former, Send is used but this requires a buffer to be posted > to CQ. But since > > > the same CQ (or SharedCQ) can be used by other connections at the > same time it can cause > > > the responder CM posted buffer to be consumed by other connection. > This is not acceptable. > > > > > > So new we consider extension to MPA protocol. > > > The goal is to be completely backwards compatible to existing version 1. > > > In a nutshell, use a "flag" in the MPA request message which > indicates that > > > "ready to receive" message will be send by requestor upon receiving > > > MPA response message with connection acceptance. > > > > > > here are the changes to IETF RFC5044 > > > > > > 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 > 9 0 1 > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 > | | + Key (16 bytes containing "MPA ID Req Frame") + 4 | (4D 50 41 > 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 bytes containing > "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 70 20 46 72 61 > 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") + 12 | (4D 50 > 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 > |M|C|R|S| Res | Rev | PD_Length | > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | > | ~ ~ ~ Private Data ~ | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > > > > > > 2. S: indicator in the Req frame whether or not Requestor will send > Rtr frame. > > > In Req frame, if set to 1 then Rtr frame will be sent if responder > > > sends Rep frame with accept bit set. 0 indicate that Rtr frame > > > will not be sent. > > > In Rep frame, 0 means that Responder cannot support Rtr frame, > > > while 1 that it is and is waiting for it. > > > (While my preference is to handle this as MPA protocol version > matching rules, > > > proposed method will provide complete backwards compatibility) > > > Unused by Rtr frame. That is set to 0 in Rtr frame and ignored > > > by responder. > > > > > > All other bits M,C,R and remainder of Res treated as in MPA ver 1. > > > > > > Rtr frame adhere to C bit as specified in Rep frame > > First, the RTR frame _must_ be an FPDU for this to work. Thus it violates the DDP/RDMAP specs because it is an known DDP/RDMAP opcode. Second, assuming the RTR frame is sent as an FPDU, then this won't work with existing RNIC HW. The HW will post an async error because the incoming DDP/RDMAP opcode is unknown. The only way I see that we can fix this for the existing rnic HW is to come up with some way to send a valid RDMAP message from the initiator to the responder under the covers -and- have the responder only indicate that the connection is established when that FPDU is received. Chelsio cannot support this hack via a 0B write, but the could support a 0B read or send/recv exchange. But as you indicate, this is very painful and perhaps impossible to do without impacting the ULP and breaking verbs semantics. (that's why we punted on this a year ago :) Steve. From johann.george at qlogic.com Wed Nov 21 10:47:16 2007 From: johann.george at qlogic.com (Johann George) Date: Wed, 21 Nov 2007 10:47:16 -0800 Subject: [ofa-general] Presentations from OpenFabrics Developer's Summit Message-ID: <20071121184716.GE762@cuprite.pathscale.com> You can find a copy of the slides used at the OpenFabrics Developer's Summit at the following link: http://www.openfabrics.org/archives/nov2007sc.htm courtesy of Jeff Becker who collected the presentations and put together the web page. Thanks to Jeff and everyone else who participated. Johann From pradeeps at linux.vnet.ibm.com Wed Nov 21 11:16:42 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Wed, 21 Nov 2007 11:16:42 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> <47322E87.60409@linux.vnet.ibm.com> Message-ID: <4744841A.2090801@linux.vnet.ibm.com> I downloaded the latest tree and reviewed the code and it looks good. For some reason I have not been able to actually run the tests. I will do that next week and confirm that all is OK. On another note, along with the main non srq patch I had submitted a secondary patch that would switch to UD mode if no connected mode QPs are available. I have not updated that patch in a while now. If there is any interest in that please let me know and I can submit an updated patch. Pradeep From unenvied at amazing-porn.com Wed Nov 21 15:10:13 2007 From: unenvied at amazing-porn.com (David Griffin) Date: Thu, 22 Nov 2007 02:10:13 +0300 Subject: [ofa-general] Adobe Font Folio 11 MAC/XP/Vista for 189, Retails @ 2599 (You save 2409) Message-ID: <000001c82c92$99f6e100$0100007f@localhost> borland developer studio 2006 - 149 ulead mediastudio pro v8.0 with extras - 79 webeasy pro 6.0 - 39 ms windows 2003 enterprise server - 69 adobe creative suite 3 master collection for win - 299 readiris pro 11.5 for mac - 39 cyberlink powerdvd ultra deluxe 7 - 29 steinberg nuendo 3.1 - 99 type cheapxpsoft8. com in Internet Explorer From Arkady.Kanevsky at netapp.com Wed Nov 21 15:30:29 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 21 Nov 2007 18:30:29 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474473DD.3050507@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> Message-ID: Very good points. Thanks Steve. If we can do unsignalled 0-size RDMA Read with "bogus" S-tag this may work better. Yes, it will require IRD not to be 0 set at Responder. Ditto ORD of at least 1 on Responder. There is no need to have extra CQ entry on either side for it. It is only needed for error path. So this will only be needed if Sender posted the full queue of sends. But it can not post anything because CM will not let it know that connection is established. Happy Thanksgiving, Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Steve Wise [mailto:swise at opengridcomputing.com] > Sent: Wednesday, November 21, 2007 1:07 PM > To: Kanevsky, Arkady > Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org > Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal > > Comments in-line below... > > > Kanevsky, Arkady wrote: > > > > Group, > > > > > > below is proposal on how to resolve peer-to-peer iWARP CM issue > > discovered at interop event. > > > > > > The main issue is that MPA spec (relevant portion of > IETF RFC 5044 > > is below) require that > > > > > > connection initiator send first message over the > established connection. > > > > > > Multiple MPI implementations and several other apps use > peer-to-peer > > model. > > > > > > So rather then forcing all of them to do it on their > own, which will > > not help with > > > > > > interop between different implementations, the goal is to extend > > lower layers to provide it. > > > > > > > > > > > > Our first idea was to leave MPA protocol untouched and > try to solve > > this problem > > > > > > in iw_cm. But there are too many complications to it. First, in > > order to adhere to RFC5044 > > > > > > initiator must send first FPDU and responder process > it. But since > > the connection is already > > > > > > established processing FPDU involves ULP on whose behalf the > > connection is created. > > > > > > So either initiator sends a message which generates > completion on > > responder CQ, thus visible > > > > > > to ULP, or not. > > > > > In the later case, the only op which can do it is > > RDMA one, which means > > > > > > that responder somehow provided initiator S-tag which > it can use. > > So, this is an extension > > > > > > to MPA, probably using private data. And that responder upon > > receiving it destroy this S-tag. > > > > > > In any case this is an extension of MPA. > > > > > This stag exchange isn't needed if this RDMA op is a 0B READ. > The responder waits for that 0B read and only indicates the > rdma connection is established to its ULP when it replies to > the 0B read. In this scenario, the responder/server side > doesn't consume any CQ resources. > But it would require an IRD of at least 1 to be configured on the QP. > The initiator still requires an SQ entry, and possibly a CQ > entry, for initiating the 0B read and handling completion. > But its perhaps a little less painful than doing a SEND/RECV > exchange. The read wr could be unsignaled so that it won't > generate a CQE. But it still consumes an SQ WR slot so the > SQ would have to be sized to allow this extra WR. And I guess > the CQ would also need to be sized accordingly in case the > read failed. > > > > > In the former, Send is used but this requires a buffer > to be posted > > to CQ. But since > > > > > > the same CQ (or SharedCQ) can be used by other > connections at the > > same time it can cause > > > > > > the responder CM posted buffer to be consumed by other > connection. > > This is not acceptable. > > > > > > > > > > > > So new we consider extension to MPA protocol. > > > > > > The goal is to be completely backwards compatible to > existing version 1. > > > > > > In a nutshell, use a "flag" in the MPA request message which > > indicates that > > > > > > "ready to receive" message will be send by requestor upon > > receiving > > > > > > MPA response message with connection acceptance. > > > > > > > > > > > > here are the changes to IETF RFC5044 > > > > > > > > > > > > 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > 2 3 4 5 6 7 8 > > 9 0 1 > > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 > > | | + Key (16 bytes containing "MPA ID Req Frame") + 4 > | (4D 50 41 > > 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 > bytes containing > > "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 > 70 20 46 72 61 > > 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") > + 12 | (4D 50 > > 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + > > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 > > |M|C|R|S| Res | Rev | PD_Length | > > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | > > | ~ ~ ~ Private Data ~ | | | > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > > > > > > > > > > > > 2. S: indicator in the Req frame whether or not > Requestor will send > > Rtr frame. > > > > > > In Req frame, if set to 1 then Rtr frame will be sent if > > responder > > > > > > sends Rep frame with accept bit set. 0 indicate > that Rtr frame > > > > > > will not be sent. > > > > > > In Rep frame, 0 means that Responder cannot support > Rtr frame, > > > > > > while 1 that it is and is waiting for it. > > > > > > (While my preference is to handle this as MPA > protocol version > > matching rules, > > > > > > proposed method will provide complete backwards > compatibility) > > > > > > Unused by Rtr frame. That is set to 0 in Rtr frame > and ignored > > > > > > by responder. > > > > > > > > > > > > All other bits M,C,R and remainder of Res treated > as in MPA ver 1. > > > > > > > > > > > > Rtr frame adhere to C bit as specified in Rep frame > > > > > > First, the RTR frame _must_ be an FPDU for this to work. > Thus it violates the DDP/RDMAP specs because it is an known > DDP/RDMAP opcode. > > Second, assuming the RTR frame is sent as an FPDU, then this > won't work with existing RNIC HW. The HW will post an async > error because the incoming DDP/RDMAP opcode is unknown. > > The only way I see that we can fix this for the existing rnic > HW is to come up with some way to send a valid RDMAP message > from the initiator to the responder under the covers -and- > have the responder only indicate that the connection is > established when that FPDU is received. > > Chelsio cannot support this hack via a 0B write, but the > could support a 0B read or send/recv exchange. But as you > indicate, this is very painful and perhaps impossible to do > without impacting the ULP and breaking verbs semantics. > > (that's why we punted on this a year ago :) > > > Steve. > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From outswagger at togoco.com Wed Nov 21 15:40:10 2007 From: outswagger at togoco.com (Kamel Dickinson) Date: Wed, 21 Nov 2007 20:40:10 -0300 Subject: [ofa-general] Adobe Creative Suite 3 MAC/XP/Vista for 269, Retails @ 1799 (You save 1529) Message-ID: <000001c82c96$f0661e80$0100007f@localhost> cakewalk project 5 - 59 adobe after effects cs3 - 69 adobe flash cs3 professional - 59 adobe illustrator cs2 - 59 cadlink signlab vinyl 7.1 - 69 ms xp professional with sp2 - 49 final draft 7 - 39 avid xpress pro 5.7 - 119 visit cheapoemsoft6. com in Internet Explorer From kliteyn at mellanox.co.il Wed Nov 21 21:14:57 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 22 Nov 2007 07:14:57 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-22:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-21 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From fenkes at de.ibm.com Thu Nov 22 02:26:26 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Thu, 22 Nov 2007 12:26:26 +0200 Subject: [ofa-general] [PATCH] IB/ehca: Fix static rate regression Message-ID: <200711221126.27465.fenkes@de.ibm.com> Wrong choice of port number caused modify_qp() to fail -- fixed. Signed-off-by: Joachim Fenkes --- Please apply this for 2.6.24-rc4 as it fixes a regression introduced in rc3. Thanks! drivers/infiniband/hw/ehca/ehca_qp.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 2e3e654..dd12668 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1203,7 +1203,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, attr->ah_attr.static_rate, &mqpcb->max_static_rate)) { ret = -EINVAL; @@ -1302,7 +1302,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, attr->alt_ah_attr.static_rate, &mqpcb->max_static_rate_al)) { ret = -EINVAL; -- 1.5.2 From vlad at lists.openfabrics.org Thu Nov 22 02:55:59 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Thu, 22 Nov 2007 02:55:59 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071122-0200 daily build status Message-ID: <20071122105559.53526E60875@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.15 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.19 Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.22 Passed on ppc64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.17 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.18 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.12 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.16 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Failed: From tonic at reefkeep.com Thu Nov 22 03:19:22 2007 From: tonic at reefkeep.com (James Adams) Date: Thu, 22 Nov 2007 06:19:22 -0500 Subject: [ofa-general] Autodesk 3D Studio Max 9 for XP for 149, Retails @ 6720 (You save 6590) Message-ID: <000001c82cf8$b594d280$0100007f@localhost> adobe audition 2.0 - 49 crystal xcelsius professional v4.5 - 59 adobe atmosphere 1.0 - 29 realize voice 3.51 - 29 sony sound forge 9.0 - 49 cyberlink powerdvd ultra deluxe 7 - 29 discreet 3d studio max 6.0 with character studio - 129 adobe photoshop cs3 extended - 89 visit cheapoemsoft4. com in Internet Explorer From vlad at dev.mellanox.co.il Thu Nov 22 03:40:43 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Thu, 22 Nov 2007 13:40:43 +0200 Subject: [ofa-general] [ANNOUNCE] ofed_1_3/linux-2.6.git updated to 2.6.24-rc3 Message-ID: <47456ABB.1060701@dev.mellanox.co.il> FYI, git://git.openfabrics.org/ofed_1_3/linux-2.6.git ofed_kernel I've merged in 2.6.24-rc3. Regards, Vladimir From akstcfuturovenezuelamnsdgs at futurovenezuela.org Thu Nov 22 04:39:46 2007 From: akstcfuturovenezuelamnsdgs at futurovenezuela.org (Randolph Gustafson) Date: Thu, 22 Nov 2007 19:39:46 +0700 Subject: [ofa-general] =?koi8-r?b?8PLv4uzl7fkg/O7l8ufv7+Ll8/Dl/uXu6fEg8PLl?= =?koi8-r?b?5PDy8fTp8Q==?= Message-ID: <910356105.30140347891127@futurovenezuela.org> An HTML attachment was scrubbed... URL: From xhejtman at ics.muni.cz Thu Nov 22 06:05:54 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Thu, 22 Nov 2007 15:05:54 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package Message-ID: <20071122140554.GB13609@ics.muni.cz> Hello, is the mentioned driver suppose to work under Xen in Dom0 which is basically same as real HW? Ofed 1.2 driver works. Ofed 1.3a does not. this is what I got in dmesg: ib_mthca 0000:08:00.0: NOP command failed to generate interrupt (IRQ 16), aborti ng. ib_mthca 0000:08:00.0: BIOS or ACPI interrupt routing problem? ib_mthca: probe of 0000:08:00.0 failed with error -16 -- Lukáš Hejtmánek From teepsnqeesl at book.com Thu Nov 22 07:25:46 2007 From: teepsnqeesl at book.com (Fredrick Bland) Date: Fri, 23 Nov 2007 00:25:46 +0900 Subject: [ofa-general] Ever made love to a virgin? Try Personal Puss! to fuck a virgin pussy. Message-ID: <01c82d67$63fe9810$49668e3a@teepsnqeesl> If you have never had a virgin or just want to experience a virgin pussy one more time, order your Personal Pussy! The Personal Pussy is a virgin pussy and you'll break a hymen enveloping this pussy on your first penetration. It is specially designed and made of soft and stretchable materials to ensure the feel of real pussy. With the Personal Puss! you can make a nice fuck any day, any time. Ordering your Personal Pussy you'll receive a virgin pussy and you will tear the hymen on your first penetration. http://greysay.com Personal Puss! is the best sex toy to fulfill all your fantasies From tziporet at mellanox.co.il Thu Nov 22 07:45:30 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Thu, 22 Nov 2007 17:45:30 +0200 Subject: [ofa-general] OFED 1.3 Beta release is available Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> Hi, OFED 1.3 Beta release is available on http://www.openfabrics.org/downloads/OFED/ofed-1.3/OFED-1.3-beta2.tgz To get BUILD_ID run ofed_info Please report any issues in bugzilla https://bugs.openfabrics.org/ The RC1 release is expected on December 5 Tziporet & Vlad ======================================================================== Release information: -------------------- OS support: Novell: - SLES10 - SLES10 SP1 and up1 Redhat: - Redhat EL4 up4 and up5 - Redhat EL5 and up1 kernel.org: - 2.6.23 and 2.6.24-rc2 Systems: * x86_64 * x86 * ia64 * ppc64* Main Changes from OFED 1.3-alpha ================================ * Kernel code based on 2.6.24-rc2 * New packages: * SRP target * qperf test from Qlogic * ibsim package * uDAPL 2.0 library (1.0 & 2.0 are coexist) * New OSes Support: * RHEL 5 up1 * SLES10 SP1 up1 * Compilation issues resolved: * Open MPI compilation on SLES10 SP1 * ibutils compiles on SLES10 PPC64 (64 bits) * Apply patches that fix warning of backport patches * Prefix is now supported properly * RDS implementation for API version 2 was updated form 1.2.5 branch * Fix binary compatibility of libibverbs caused by XRC implementation * Uninstall is now working properly * ib-bonding update to release 19 * MPI packages update: * mvapich-1.0.0-1625.src.rpm * mvapich2-1.0.1-1.src.rpm * openmpi-1.2.4-1.src.rpm Mlx4 driver specific changes: * Enable changing the default of HCA resource limits with module parameters * Default number of maximum QPs is now 128K (was 64K) * Fixing max_cqe's (not adding an extra cqe) * Fix state check in mlx4_qp_modify * Sanity check userspace send queue sizes * Several bug fixes in XRC Tasks that should be completed for the beta release: ==================================================== 1. 32-bit libraries to be supported on SLES10 SP1 Update1. 2. Fix SDP stability issues 3. IPoIB performance improvements for small messages 4. Fix bugs -------------- next part -------------- An HTML attachment was scrubbed... URL: From kliteyn at mellanox.co.il Thu Nov 22 21:11:42 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 23 Nov 2007 07:11:42 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-23:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-22 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=480 Fail=0 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 LidMgr IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From keshetti85-student at yahoo.co.in Fri Nov 23 02:35:43 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Fri, 23 Nov 2007 16:05:43 +0530 Subject: [ofa-general] Re: LASH routing algorithm in openSM In-Reply-To: <51504.85.164.64.55.1195807306.squirrel@webmail.uio.no> References: <829ded920711212055t772a605ev5307e2c15a7bda5d@mail.gmail.com> <47454B3F.7000300@simula.no> <829ded920711220259s5a2e74f8t5048c033dddd5a35@mail.gmail.com> <829ded920711230022he95da4fy946a5c35968f5f16@mail.gmail.com> <829ded920711230023m127bdc9ah2c78ebf8a06cd163@mail.gmail.com> <51504.85.164.64.55.1195807306.squirrel@webmail.uio.no> Message-ID: <829ded920711230235r51dd1f6di5cf1004bdf3cb4f@mail.gmail.com> > I believe that LMC>0 will be (is being implemented) but i am not sure of > the situation. My colleague, might be able to give you a quick reply on > this. > > We are not heavily involved in openSM. We were invited to implement the > LASH algorithm in OFED 1.2 / 1.3 and have not had any interaction with > OpenFabrics since then. > > I suggest that if Sven-Arne doesn't have any more information than I do > then you should send a question to the openSM mailing list. I believe they > are very active and answer quickly. > > - Tom > Is there any work going on in OFED community to support LMC>0 with LASH routing algorithm in openSM currently ? -Mahesh From vlad at lists.openfabrics.org Fri Nov 23 02:56:43 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 23 Nov 2007 02:56:43 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071123-0200 daily build status Message-ID: <20071123105643.39767E60861@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.20 Passed on ia64 with linux-2.6.18 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-53.el5 Failed: From swise at opengridcomputing.com Fri Nov 23 07:35:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 23 Nov 2007 09:35:37 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: <474473DD.3050507@opengridcomputing.com> Message-ID: <4746F349.3040606@opengridcomputing.com> Kanevsky, Arkady wrote: > Very good points. > Thanks Steve. > > If we can do unsignalled 0-size RDMA Read with "bogus" S-tag this may > work better. > Yes, it will require IRD not to be 0 set at Responder. > Ditto ORD of at least 1 on Responder. > There is no need to have extra CQ entry on either side for it. > It is only needed for error path. > So this will only be needed if Sender posted the full queue of sends. > But it can not post anything because CM will not let it know that > connection is established. > > Well, actually, I think the ULP _can_ post before establishing the connection. But I guess we can define the semantics such that applications using the rdma-cm interface must adhere to whatever we need to make this hack work. Q: are there apps using the rdma-cm out there today that pre-post SQ WRs before getting a ESTABLISHED event? Steve. > Happy Thanksgiving, > > Arkady Kanevsky email: arkady at netapp.com > Network Appliance Inc. phone: 781-768-5395 > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > Waltham, MA 02451 central phone: 781-768-5300 > > > >> -----Original Message----- >> From: Steve Wise [mailto:swise at opengridcomputing.com] >> Sent: Wednesday, November 21, 2007 1:07 PM >> To: Kanevsky, Arkady >> Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org >> Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal >> >> Comments in-line below... >> >> >> Kanevsky, Arkady wrote: >> >>> Group, >>> >>> >>> below is proposal on how to resolve peer-to-peer iWARP CM issue >>> discovered at interop event. >>> >>> >>> The main issue is that MPA spec (relevant portion of >>> >> IETF RFC 5044 >> >>> is below) require that >>> >>> >>> connection initiator send first message over the >>> >> established connection. >> >>> Multiple MPI implementations and several other apps use >>> >> peer-to-peer >> >>> model. >>> >>> >>> So rather then forcing all of them to do it on their >>> >> own, which will >> >>> not help with >>> >>> >>> interop between different implementations, the goal is to extend >>> lower layers to provide it. >>> >>> >>> >>> >>> >>> Our first idea was to leave MPA protocol untouched and >>> >> try to solve >> >>> this problem >>> >>> >>> in iw_cm. But there are too many complications to it. First, in >>> order to adhere to RFC5044 >>> >>> >>> initiator must send first FPDU and responder process >>> >> it. But since >> >>> the connection is already >>> >>> >>> established processing FPDU involves ULP on whose behalf the >>> connection is created. >>> >>> >>> So either initiator sends a message which generates >>> >> completion on >> >>> responder CQ, thus visible >>> >>> >>> to ULP, or not. >>> >> >> >>> In the later case, the only op which can do it is >>> RDMA one, which means >>> >>> >>> that responder somehow provided initiator S-tag which >>> >> it can use. >> >>> So, this is an extension >>> >>> >>> to MPA, probably using private data. And that responder upon >>> receiving it destroy this S-tag. >>> >>> >>> In any case this is an extension of MPA. >>> >>> >> This stag exchange isn't needed if this RDMA op is a 0B READ. >> The responder waits for that 0B read and only indicates the >> rdma connection is established to its ULP when it replies to >> the 0B read. In this scenario, the responder/server side >> doesn't consume any CQ resources. >> But it would require an IRD of at least 1 to be configured on the QP. >> The initiator still requires an SQ entry, and possibly a CQ >> entry, for initiating the 0B read and handling completion. >> But its perhaps a little less painful than doing a SEND/RECV >> exchange. The read wr could be unsignaled so that it won't >> generate a CQE. But it still consumes an SQ WR slot so the >> SQ would have to be sized to allow this extra WR. And I guess >> the CQ would also need to be sized accordingly in case the >> read failed. >> >> >>> In the former, Send is used but this requires a buffer >>> >> to be posted >> >>> to CQ. But since >>> >>> >>> the same CQ (or SharedCQ) can be used by other >>> >> connections at the >> >>> same time it can cause >>> >>> >>> the responder CM posted buffer to be consumed by other >>> >> connection. >> >>> This is not acceptable. >>> >>> >>> >>> >>> >>> So new we consider extension to MPA protocol. >>> >>> >>> The goal is to be completely backwards compatible to >>> >> existing version 1. >> >>> In a nutshell, use a "flag" in the MPA request message which >>> indicates that >>> >>> >>> "ready to receive" message will be send by requestor upon >>> receiving >>> >>> >>> MPA response message with connection acceptance. >>> >>> >>> >>> >>> >>> here are the changes to IETF RFC5044 >>> >>> >>> >>> >>> >>> 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 >>> >> 2 3 4 5 6 7 8 >> >>> 9 0 1 >>> >>> >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 >> >>> | | + Key (16 bytes containing "MPA ID Req Frame") + 4 >>> >> | (4D 50 41 >> >>> 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 >>> >> bytes containing >> >>> "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 >>> >> 70 20 46 72 61 >> >>> 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") >>> >> + 12 | (4D 50 >> >>> 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + >>> >>> >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 >> >>> |M|C|R|S| Res | Rev | PD_Length | >>> >>> >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | >> >>> | ~ ~ ~ Private Data ~ | | | >>> >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | >> >>> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ >>> >>> >>> >>> >>> >>> 2. S: indicator in the Req frame whether or not >>> >> Requestor will send >> >>> Rtr frame. >>> >>> >>> In Req frame, if set to 1 then Rtr frame will be sent if >>> responder >>> >>> >>> sends Rep frame with accept bit set. 0 indicate >>> >> that Rtr frame >> >>> will not be sent. >>> >>> >>> In Rep frame, 0 means that Responder cannot support >>> >> Rtr frame, >> >>> while 1 that it is and is waiting for it. >>> >>> >>> (While my preference is to handle this as MPA >>> >> protocol version >> >>> matching rules, >>> >>> >>> proposed method will provide complete backwards >>> >> compatibility) >> >>> Unused by Rtr frame. That is set to 0 in Rtr frame >>> >> and ignored >> >>> by responder. >>> >>> >>> >>> >>> >>> All other bits M,C,R and remainder of Res treated >>> >> as in MPA ver 1. >> >>> >>> >>> >>> Rtr frame adhere to C bit as specified in Rep frame >>> >>> >>> >> First, the RTR frame _must_ be an FPDU for this to work. >> Thus it violates the DDP/RDMAP specs because it is an known >> DDP/RDMAP opcode. >> >> Second, assuming the RTR frame is sent as an FPDU, then this >> won't work with existing RNIC HW. The HW will post an async >> error because the incoming DDP/RDMAP opcode is unknown. >> >> The only way I see that we can fix this for the existing rnic >> HW is to come up with some way to send a valid RDMAP message >> from the initiator to the responder under the covers -and- >> have the responder only indicate that the connection is >> established when that FPDU is received. >> >> Chelsio cannot support this hack via a 0B write, but the >> could support a 0B read or send/recv exchange. But as you >> indicate, this is very painful and perhaps impossible to do >> without impacting the ULP and breaking verbs semantics. >> >> (that's why we punted on this a year ago :) >> >> >> Steve. >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> >> From pelagial at stormg.com Fri Nov 23 12:46:22 2007 From: pelagial at stormg.com (Nichael Jackson) Date: Fri, 23 Nov 2007 14:46:22 -0600 Subject: [ofa-general] Adobe Acrobat Professional 8 MAC/XP/Vista for 79, Retails @ 599 (You Save 520) Message-ID: <000001c82e10$f97afb00$0100007f@localhost> steinberg nuendo 3.1 - 99 sonic scenarist 3.0 - 49 microsoft frontpage 2003 - 29 microsoft vista ultimate - 89 microsoft exchange server enterprise 2003 - 59 corel wordperfect office standard edition 12 - 49 adobe indesign cs2 - 59 adobe after effects cs3 - 69 visit cheapoemsoft7. com in Internet Explorer From alivincular at whyihatedelta.com Fri Nov 23 14:20:18 2007 From: alivincular at whyihatedelta.com (Shahid Olsen) Date: Sat, 24 Nov 2007 01:20:18 +0300 Subject: [ofa-general] Adobe Font Folio 11 MAC/XP/Vista for 189, Retails @ 2599 (You save 2409) Message-ID: <000001c82e1e$49ac8a00$0100007f@localhost> acronis true image enterprise server 9.1.3666 - 79 discreet combustion 4.0 for windows - 69 microsoft money home & business 7 - 39 autodesk architectural studio 3.0 - 39 microsoft expression studio 1.0 - 79 sony sound forge 9.0 - 49 avid liquid pro 7 - 69 adobe framemaker 8.0 - 69 type cheapxpsoft2. com in Internet Explorer From betty.h at adelphia.net Fri Nov 23 18:29:47 2007 From: betty.h at adelphia.net (Cancun Lottery) Date: Fri, 23 Nov 2007 18:29:47 -0800 Subject: [ofa-general] 2007 End Of Year Promo Message-ID: <2969413.1195871387270.JavaMail.root@web29> WINNER We wish to congratulate and inform you on the selection of your email coupon number which was selected among the 6 lucky consolation prizewinners. Your email ID identified with coupon No. CAN:9728939934.withentries from the 25,000,000 different email addresses enrolled for the E-game.Your email ID was included among the 25,000,000 different email addresses submitted by our partner international email provider companies. No tickets were sold.You have won a prize money of Two million Five Hundred Thousand Great Britain Pounds Sterlings for the Month of November, 2007, You are to contact the claims agent with the following details for the release of your winnings. Claims Requirements: 1.Name in full: 2.Home Address: 3.Age: 4.Occupation: 5.Phone Number: 6.Present Country: 7.Sex: 8.Marital Status: NOTE: Please choose one of the most preferrable options of receiving winning claims below: 1 BANK TO BANK WIRE TRANSFER 2 COURIER DELIVERY SERVICE Contact: Mr. williams Nicolas Tel:+447031898452 or +4470301898599 E-mail: cancunpayofficer05 at yahoo.co.uk Regards, Mrs. Sandra Crouch (Group Coordinator) From kliteyn at mellanox.co.il Fri Nov 23 21:08:46 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 24 Nov 2007 07:08:46 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-24:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-23 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=480 Fail=0 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 LidMgr IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From vlad at lists.openfabrics.org Sat Nov 24 02:54:54 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sat, 24 Nov 2007 02:54:54 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071124-0200 daily build status Message-ID: <20071124105454.9501BE608B4@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.18 Passed on powerpc with linux-2.6.12 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.23 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.17 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ppc64 with linux-2.6.19 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Failed: From rpearson at systemfabricworks.com Sat Nov 24 09:43:56 2007 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Sat, 24 Nov 2007 11:43:56 -0600 Subject: [ofa-general] libipath crash Message-ID: <5p5klh$2fqer4@rrcs-agw-01.hrndva.rr.com> The following test case will cause system crash on 1.2.5.1 if run a few times with an ipath device but not with an mthca device. I noticed some recent fixes to ibv_resize_cq but did not see anything about ibv_create_cq. #include #include int main(int argc, char *argv[]) { int num_hca; int ret; int cqe; int max_cqe; int i; struct ibv_device **device_list; struct ibv_context *context; struct ibv_device_attr device_attr; struct ibv_cq *cq; srandom((unsigned int)time(NULL)); if ((device_list = ibv_get_device_list(&num_hca)) == NULL) { printf("unable to get device list\n"); return 1; } if (num_hca == 0) { printf("no HCAs\n"); return 1; } if ((context = ibv_open_device(device_list[0])) == NULL) { printf("unable to open device\n"); return 1; } if ((ret = ibv_query_device(context, &device_attr)) != 0) { printf("unable to query device\n"); return 1; } max_cqe = 100000; if (max_cqe > device_attr.max_cqe) max_cqe = device_attr.max_cqe; for (i = 0; i < 1000; i++) { cqe = ((int)random() % max_cqe) + 1; printf("cqe = %d\n", cqe); fflush(stdout); if ((cq = ibv_create_cq(context, cqe, NULL, NULL, 0)) == NULL) { printf("unable to create cq\n"); return 1; } if ((ret = ibv_destroy_cq(cq)) != 0) { printf("unable to destroy cq\n"); return 1; } } printf("passed\n"); return 0; } Bob Pearson From rdreier at cisco.com Sat Nov 24 13:42:32 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 13:42:32 -0800 Subject: [ofa-general] libipath crash In-Reply-To: <5p5klh$2fqer4@rrcs-agw-01.hrndva.rr.com> (Robert Pearson's message of "Sat, 24 Nov 2007 11:43:56 -0600") References: <5p5klh$2fqer4@rrcs-agw-01.hrndva.rr.com> Message-ID: > The following test case will cause system crash on 1.2.5.1 if run a few > times with an ipath device but not with an mthca device. I noticed some > recent fixes to ibv_resize_cq but did not see anything about ibv_create_cq. I can't reproduce here with Linus's latest git tree (v2.6.24-rc3-19-g2ffbb83). What are the kernel messages from the crash? What kernel are you running on? - R. From rdreier at cisco.com Sat Nov 24 13:47:07 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 13:47:07 -0800 Subject: [ofa-general] Re: MTHCA driver from OFED 1.3a package In-Reply-To: <20071122140554.GB13609@ics.muni.cz> (Lukas Hejtmanek's message of "Thu, 22 Nov 2007 15:05:54 +0100") References: <20071122140554.GB13609@ics.muni.cz> Message-ID: > is the mentioned driver suppose to work under Xen in Dom0 which is basically > same as real HW? Ofed 1.2 driver works. Ofed 1.3a does not. I don't think OFED "supports" Xen, but it should work, and I would like to try to have at least the upstream kernel's mthca and other IB drivers work under Xen. I'm not sure what code is in OFED 1.3a exactly, but I don't see too many recent changes to mthca that look like they could cause your issue: > ib_mthca 0000:08:00.0: NOP command failed to generate interrupt (IRQ 16), aborting. > ib_mthca 0000:08:00.0: BIOS or ACPI interrupt routing problem? > ib_mthca: probe of 0000:08:00.0 failed with error -16 Maybe the patch to try to enable MSI-X by default (which went in after 2.6.23 I think) is causing problems, although it looks like you are not on the MSI-X path. (Or were there other mthca messages too?) What kernel are you running in dom0? Is the change from OFED 1.2 to 1.3a the only difference between the working and non-working cases, or did you change anything else in your setup? - R. From rdreier at cisco.com Sat Nov 24 13:48:13 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 13:48:13 -0800 Subject: [ofa-general] Re: [PATCH] IB/ehca: Fix static rate regression In-Reply-To: <200711221126.27465.fenkes@de.ibm.com> (Joachim Fenkes's message of "Thu, 22 Nov 2007 12:26:26 +0200") References: <200711221126.27465.fenkes@de.ibm.com> Message-ID: thanks, applied. From rdreier at cisco.com Sat Nov 24 13:54:35 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 13:54:35 -0800 Subject: [ofa-general] Re: [PATCH 1/1] IB/iSER: Add missing counter incrementation in iser_data_buf_aligned_len In-Reply-To: <1195643497.4132.4.camel@localhost> (Erez Zilber's message of "Wed, 21 Nov 2007 13:11:37 +0200") References: <1195643497.4132.4.camel@localhost> Message-ID: thanks, applied. > The following patch fixes a bug in iSER. Without it, iSER will have poor > performance. Can you push it upstream for 2.6.24? Please put commentary like this (that should not go into the kernel changelog) after the '---' line in your email, so that it is automatically dropped when I import the patch into git. > diff --git a/drivers/infiniband/ulp/iser/iser_memory.c > b/drivers/infiniband/ulp/iser/iser_memory.c > index d687980..a28f552 100644 > --- a/drivers/infiniband/ulp/iser/iser_memory.c Something line-wrapped your email too... > + if (end_addr == next_addr) { > + cnt++; > continue; > + } > else if (!IS_4K_ALIGNED(end_addr)) { For this type of change, please put the '}' on the same line as the 'else' (I fixed it by hand). Also, I never noticed this before, but the use of IS_4K_ALIGNED() seems pretty suspicious here. Why are you hard-coding 4K? What would be the correct thing on a system with 64K pages and an HCA that can handle 512-byte aligned memory regions? - R. From rdreier at cisco.com Sat Nov 24 14:56:22 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 14:56:22 -0800 Subject: [ofa-general] Re: MTHCA driver from OFED 1.3a package In-Reply-To: <20071124223117.GA4265@ics.muni.cz> (Lukas Hejtmanek's message of "Sat, 24 Nov 2007 23:31:17 +0100") References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> Message-ID: > I've already resolved this issue. For some reason, kernel drivers (2.6.18) > works correctly under Xen Dom0. I was confused about OFED 1.2 which works but > it contained patch to work under Xen. After applying similar patch to the OFED > 1.3a, it works now as well. What is the patch that you need? > There is still outstanding problem discussed in July (if you remember) > regarding wrong usage of DMA API and assumption of coherent mapping - pages > returned by kernel allocator do not form contiguos area under Xen (with vanila > kernel it is mostly contiguous but it is not guaranteed). Yes, I remember. Actually the problem is in using dma_sync_single on something not mapped with dma_map_single. > I hoped that driver evolved in this case. You disagreed to use coherent > allocator instead of standard. Did you do any progress? No, it is still on my list of things to address, but not a very high priority. I thought this is only an issue in domU though, not dom0? - R. From rpearson at systemfabricworks.com Sat Nov 24 16:38:41 2007 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Sat, 24 Nov 2007 18:38:41 -0600 Subject: [ofa-general] libipath crash In-Reply-To: Message-ID: <5p5klh$2ft8d8@rrcs-agw-01.hrndva.rr.com> Roland, [root at client1 src]# uname -a Linux client1 2.6.18-8.1.15.el5 #1 SMP Mon Oct 22 08:32:28 EDT 2007 x86_64 x86_64 x86_64 GNU/Linux [root at client1 src]# /etc/infiniband/info prefix=/usr Kernel=2.6.18-8.1.15.el5 Configure options: --with-cxgb3-mod --with-ipath_inf-mod --with-ipoib-mod --with-iser-mod --with-mthca-mod --with-sdp-mo d --with-srp-mod --with-core-mod --with-user_mad-mod --with-user_access-mod --with-addr_trans-mod --with-mlx4-mod --with -rds-mod [root at client1 src]# ibstat CA 'ipath0' CA type: InfiniPath_QLE7140 Number of ports: 1 Firmware version: Hardware version: 1 Node GUID: 0x00117500006870d1 System image GUID: 0x00117500006870d1 Port 1: State: Active Physical state: LinkUp Rate: 10 Base lid: 12 LMC: 0 SM lid: 12 Capability mask: 0x02010802 Port GUID: 0x00117500006870d1 [root at client1 src]# grep INF /boot/config-2.6.18-8.1.15.el5 CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=y CONFIG_IPV6_ROUTE_INFO=y # CONFIG_INFTL is not set CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_ADDR_TRANS=y CONFIG_INFINIBAND_MTHCA=m CONFIG_INFINIBAND_MTHCA_DEBUG=y CONFIG_INFINIBAND_IPATH=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_IPOIB_DEBUG=y # CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set CONFIG_INFINIBAND_SRP=m CONFIG_INFINIBAND_ISER=m CONFIG_INFINIBAND_SDP=m # CONFIG_INFINIBAND_SDP_DEBUG is not set CONFIG_INFINIBAND_MADEYE=m CONFIG_DEBUG_INFO=y CONFIG_ZLIB_INFLATE=y last interesting message Nov 24 12:56:35 client1 kernel: ----------- [cut here ] --------- [please bite here ] --------- Nov 24 12:56:35 client1 kernel: Kernel BUG at mm/slab.c:2649 Nov 24 12:56:35 client1 kernel: invalid opcode: 0000 [1] SMP Nov 24 12:56:35 client1 kernel: last sysfs file: /class/infiniband/ipath0/ports/1/pkeys/3 Nov 24 12:56:35 client1 kernel: CPU 1 Nov 24 12:56:35 client1 kernel: Modules linked in: autofs4 hidp rfcomm l2cap bluetooth sunrpc rdma_ucm(U) ib_srp(U) ib_s dp(U) rdma_cm(U) iw_cm(U) ib_addr(U) ib_uverbs(U) ib_umad(U) ib_mthca(U) ib_ipoib(U) ib_cm(U) ib_sa(U) ib_mad(U) ip_conn track_netbios_ns ipt_REJECT xt_state ip_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filt er ip6_tables x_tables ipv6 dm_mirror dm_mod video sbs i2c_ec i2c_core button battery asus_acpi acpi_memhotplug ac parpo rt_pc lp parport sg ib_ipath(U) ib_core(U) ide_cd shpchp cdrom bnx2 serio_raw pcspkr mptsas mptscsih mptbase scsi_transp ort_sas sd_mod scsi_mod ext3 jbd ehci_hcd ohci_hcd uhci_hcd Nov 24 12:56:35 client1 kernel: Pid: 5163, comm: test Not tainted 2.6.18-8.1.15.el5 #1 Nov 24 12:56:35 client1 kernel: RIP: 0010:[] [] cache_grow+0x1e/0x395 Nov 24 12:56:35 client1 kernel: RSP: 0018:ffff81001f37dc08 EFLAGS: 00010006 Nov 24 12:56:35 client1 kernel: RAX: 0000000000000000 RBX: 00000000000080d0 RCX: 00000000ffffffff Nov 24 12:56:35 client1 kernel: RDX: 0000000000000000 RSI: 00000000000080d0 RDI: ffff810037e70040 Nov 24 12:56:35 client1 kernel: RBP: ffff81003ffa0d60 R08: ffff810037e26c00 R09: ffff810002013000 Nov 24 12:56:35 client1 kernel: R10: 0000000000000000 R11: 000000d000000001 R12: ffff810037e70040 Nov 24 12:56:35 client1 kernel: R13: ffff81003ffa0d40 R14: 0000000000000000 R15: ffff810037e70040 Nov 24 12:56:35 client1 kernel: FS: 00002aaaaaad7460(0000) GS:ffff810037e117c0(0000) knlGS:0000000000000000 Nov 24 12:56:35 client1 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b Nov 24 12:56:35 client1 kernel: CR2: 0000000000404000 CR3: 000000002ef7a000 CR4: 00000000000006e0 Nov 24 12:56:35 client1 kernel: Process test (pid: 5163, threadinfo ffff81001f37c000, task ffff81003f18b0c0) Nov 24 12:56:35 client1 kernel: Stack: ffff81003ffa04c0 ffffffff8000ee4e 00000010000200d0 ffff8100016bc7a0 Nov 24 12:56:35 client1 kernel: ffff81000000ec10 00000000ffffffff ffff81003ffa0d60 ffff810037e26c00 Nov 24 12:56:35 client1 kernel: ffff81003ffa0d40 000000000000003c ffff810037e70040 ffffffff8005a5ce Nov 24 12:56:35 client1 kernel: Call Trace: Nov 24 12:56:35 client1 kernel: [] __alloc_pages+0x65/0x2b2 Nov 24 12:56:35 client1 kernel: [] cache_alloc_refill+0x136/0x186 Nov 24 12:56:35 client1 kernel: [] kmem_cache_alloc_node+0x98/0xb2 Nov 24 12:56:35 client1 kernel: [] __vmalloc_area_node+0x62/0x153 Nov 24 12:56:35 client1 kernel: [] vmalloc_user+0x15/0x50 Nov 24 12:56:35 client1 kernel: [] :ib_ipath:ipath_create_qp+0x171/0x5e2 Nov 24 12:56:35 client1 kernel: [] avc_has_perm+0x43/0x55 Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:__idr_get_uobj+0x33/0x45 Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:ib_uverbs_create_qp+0x251/0x467 Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:ib_uverbs_qp_event_handler+0x0/0x21 Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:ib_uverbs_write+0x93/0xa9 Nov 24 12:56:35 client1 kernel: [] selinux_file_permission+0x9f/0xb6 Nov 24 12:56:35 client1 kernel: [] vfs_write+0xce/0x174 Nov 24 12:56:35 client1 kernel: [] sys_write+0x45/0x6e Nov 24 12:56:35 client1 kernel: [] tracesys+0xd1/0xdc Nov 24 12:56:35 client1 kernel: Nov 24 12:56:35 client1 kernel: Nov 24 12:56:35 client1 kernel: Code: 0f 0b 68 4a 38 28 80 c2 59 0a f6 c7 20 0f 85 53 03 00 00 89 Nov 24 12:56:35 client1 kernel: RIP [] cache_grow+0x1e/0x395 Nov 24 12:56:35 client1 kernel: RSP Nov 24 12:56:35 client1 kernel: <0>Kernel panic - not syncing: Fatal exception restarted after this Thanks, Bob -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Saturday, November 24, 2007 3:43 PM To: Robert Pearson Cc: openib-general at openib.org; Arthur Jones Subject: Re: [ofa-general] libipath crash > The following test case will cause system crash on 1.2.5.1 if run a few > times with an ipath device but not with an mthca device. I noticed some > recent fixes to ibv_resize_cq but did not see anything about ibv_create_cq. I can't reproduce here with Linus's latest git tree (v2.6.24-rc3-19-g2ffbb83). What are the kernel messages from the crash? What kernel are you running on? - R. From rdreier at cisco.com Sat Nov 24 20:53:43 2007 From: rdreier at cisco.com (Roland Dreier) Date: Sat, 24 Nov 2007 20:53:43 -0800 Subject: [ofa-general] libipath crash In-Reply-To: <5p5klh$2ft8d8@rrcs-agw-01.hrndva.rr.com> (Robert Pearson's message of "Sat, 24 Nov 2007 18:38:41 -0600") References: <5p5klh$2ft8d8@rrcs-agw-01.hrndva.rr.com> Message-ID: > [root at client1 src]# uname -a > Linux client1 2.6.18-8.1.15.el5 #1 SMP Mon Oct 22 08:32:28 EDT 2007 x86_64 x86_64 x86_64 GNU/Linux OK... your test case doesn't crash on my system and I don't have much interest in trying to figure out what pile of backport patches OFED has added on top of the already old kernel sources it started with, but maybe someone at Qlogic does. On the other hand, it's pretty odd that your stack trace seems to be coming from the create QP path: > Nov 24 12:56:35 client1 kernel: [] :ib_ipath:ipath_create_qp+0x171/0x5e2 > Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:ib_uverbs_create_qp+0x251/0x467 but the test case you sent does only create CQ. Are you positive that the code you sent is sufficient to trigger the crash you see? - R. From kliteyn at mellanox.co.il Sat Nov 24 21:21:06 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 25 Nov 2007 07:21:06 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-25:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-24 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=479 Fail=1 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 11 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From moshek at voltaire.com Sat Nov 24 22:16:07 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Sun, 25 Nov 2007 08:16:07 +0200 Subject: [ofa-general] OFED 1.3 Beta release is available In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> Message-ID: <39C75744D164D948A170E9792AF8E7CA4D2C6A@exil.voltaire.com> Are backport patches for SLES9 SP4 and RHEL 5.1 included in this version ? Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Tziporet Koren Sent: Thursday, November 22, 2007 5:46 PM To: ewg at lists.openfabrics.org Cc: general at lists.openfabrics.org Subject: [ofa-general] OFED 1.3 Beta release is available Hi, OFED 1.3 Beta release is available on http://www.openfabrics.org/downloads/OFED/ofed-1.3/OFED-1.3-beta2.tgz To get BUILD_ID run ofed_info Please report any issues in bugzilla https://bugs.openfabrics.org/ The RC1 release is expected on December 5 Tziporet & Vlad ======================================================================== Release information: -------------------- OS support: Novell: - SLES10 - SLES10 SP1 and up1 Redhat: - Redhat EL4 up4 and up5 - Redhat EL5 and up1 kernel.org: - 2.6.23 and 2.6.24-rc2 Systems: * x86_64 * x86 * ia64 * ppc64* Main Changes from OFED 1.3-alpha ================================ * Kernel code based on 2.6.24-rc2 * New packages: * SRP target * qperf test from Qlogic * ibsim package * uDAPL 2.0 library (1.0 & 2.0 are coexist) * New OSes Support: * RHEL 5 up1 * SLES10 SP1 up1 * Compilation issues resolved: * Open MPI compilation on SLES10 SP1 * ibutils compiles on SLES10 PPC64 (64 bits) * Apply patches that fix warning of backport patches * Prefix is now supported properly * RDS implementation for API version 2 was updated form 1.2.5 branch * Fix binary compatibility of libibverbs caused by XRC implementation * Uninstall is now working properly * ib-bonding update to release 19 * MPI packages update: * mvapich-1.0.0-1625.src.rpm * mvapich2-1.0.1-1.src.rpm * openmpi-1.2.4-1.src.rpm Mlx4 driver specific changes: * Enable changing the default of HCA resource limits with module parameters * Default number of maximum QPs is now 128K (was 64K) * Fixing max_cqe's (not adding an extra cqe) * Fix state check in mlx4_qp_modify * Sanity check userspace send queue sizes * Several bug fixes in XRC Tasks that should be completed for the beta release: ==================================================== 1. 32-bit libraries to be supported on SLES10 SP1 Update1. 2. Fix SDP stability issues 3. IPoIB performance improvements for small messages 4. Fix bugs -------------- next part -------------- An HTML attachment was scrubbed... URL: From moshek at voltaire.com Sat Nov 24 23:33:01 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Sun, 25 Nov 2007 09:33:01 +0200 Subject: [ofa-general] RHEL5.1 support? In-Reply-To: <4742F450.60401@q-leap.com> Message-ID: <39C75744D164D948A170E9792AF8E7CA4D2C6B@exil.voltaire.com> The attached patches do the work on RHEL 5.1. OFED-1.2.5_RHEL_5.1_configure.diff includes the change requiered in the configure file . OFED-1.2.5_RHEL_5.1_backport.diff include the added files requiered in the kernel_addons and kernel_patches directories. Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Peter Kruse Sent: Tuesday, November 20, 2007 4:51 PM To: general at lists.openfabrics.org Subject: Re: [ofa-general] RHEL5.1 support? Hi, thanks for your reply. Tziporet Koren wrote: > Only OFED 1.3 will support RHEL5.1 good news! > You can take the daily build or wait for the beta that should be out > this week looking forward to that! Peter _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED_1.2.5_RHEL_5.1_configure.diff Type: application/octet-stream Size: 504 bytes Desc: OFED_1.2.5_RHEL_5.1_configure.diff URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED_1.2.5_RHEL_5.1_backport.diff Type: application/octet-stream Size: 81700 bytes Desc: OFED_1.2.5_RHEL_5.1_backport.diff URL: From vlad at dev.mellanox.co.il Sat Nov 24 23:56:56 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 25 Nov 2007 09:56:56 +0200 Subject: [ofa-general] RHEL5.1 support? In-Reply-To: <39C75744D164D948A170E9792AF8E7CA4D2C6B@exil.voltaire.com> References: <39C75744D164D948A170E9792AF8E7CA4D2C6B@exil.voltaire.com> Message-ID: <47492AC8.2030907@dev.mellanox.co.il> Moshe Kazir wrote: > The attached patches do the work on RHEL 5.1. > > OFED-1.2.5_RHEL_5.1_configure.diff includes the change requiered in the > configure file . > OFED-1.2.5_RHEL_5.1_backport.diff include the added files requiered in > the kernel_addons and kernel_patches directories. > > > Moshe > > ____________________________________________________________ > Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) > > Voltaire - The Grid Backbone > > www.voltaire.com > Applied to ofed_1_2/linux-2.6.git ofed_1_2_c Regards, Vladimir From moshek at voltaire.com Sat Nov 24 23:59:26 2007 From: moshek at voltaire.com (Moshe Kazir) Date: Sun, 25 Nov 2007 09:59:26 +0200 Subject: [ofa-general] OFED-1.2.5 backport patches for SLES9 SP4 In-Reply-To: <4742F450.60401@q-leap.com> Message-ID: <39C75744D164D948A170E9792AF8E7CA4D2C6D@exil.voltaire.com> The attached files do the work. OFED_1.2.5_sles9_sp4_configure.diff include the changes in the configure file. OFED_1.2.5_sles9_sp4_backport.diff include the canges requiered in the kernel_patche and kernel_addons directories. Moshe ____________________________________________________________ Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) Voltaire - The Grid Backbone www.voltaire.com -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED_1.2.5_sles9_sp4_configure.diff Type: application/octet-stream Size: 450 bytes Desc: OFED_1.2.5_sles9_sp4_configure.diff URL: -------------- next part -------------- A non-text attachment was scrubbed... Name: OFED_1.2.5_sles9_sp4_backport.diff Type: application/octet-stream Size: 270488 bytes Desc: OFED_1.2.5_sles9_sp4_backport.diff URL: From tziporet at dev.mellanox.co.il Sun Nov 25 00:12:14 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Sun, 25 Nov 2007 10:12:14 +0200 Subject: [ewg] RE: [ofa-general] OFED 1.3 Beta release is available In-Reply-To: <39C75744D164D948A170E9792AF8E7CA4D2C6A@exil.voltaire.com> References: <39C75744D164D948A170E9792AF8E7CA4D2C6A@exil.voltaire.com> Message-ID: <47492E5E.5040000@mellanox.co.il> Moshe Kazir wrote: > Are backport patches for SLES9 SP4 and RHEL 5.1 included in this > version ? > > > As you can see here: RHEL5.1 - yes SLES9 (any SP) - No > > OS support: > Novell: > - SLES10 > - SLES10 SP1 and up1 > Redhat: > - Redhat EL4 up4 and up5 > - Redhat EL5 and up1 > kernel.org: > - 2.6.23 and 2.6.24-rc2 > > Systems: > * x86_64 > * x86 > * ia64 > * ppc64* > > From vlad at dev.mellanox.co.il Sun Nov 25 00:40:27 2007 From: vlad at dev.mellanox.co.il (Vladimir Sokolovsky) Date: Sun, 25 Nov 2007 10:40:27 +0200 Subject: [ofa-general] OFED-1.2.5 backport patches for SLES9 SP4 In-Reply-To: <39C75744D164D948A170E9792AF8E7CA4D2C6D@exil.voltaire.com> References: <39C75744D164D948A170E9792AF8E7CA4D2C6D@exil.voltaire.com> Message-ID: <474934FB.8000808@dev.mellanox.co.il> Moshe Kazir wrote: > The attached files do the work. > > OFED_1.2.5_sles9_sp4_configure.diff include the changes in the > configure file. > OFED_1.2.5_sles9_sp4_backport.diff include the canges requiered in the > kernel_patche and kernel_addons directories. > > Moshe > ____________________________________________________________ > Moshe Katzir | +972-9971-8639 (o) | +972-52-860-6042 (m) > > Voltaire - The Grid Backbone > > www.voltaire.com > > Applied. Regards, Vladimir From a-abbas at aajeurope.com Sun Nov 25 02:53:22 2007 From: a-abbas at aajeurope.com (Roland Dick) Date: , 25 Nov 2007 11:53:22 +0100 Subject: [ofa-general] Where have you been? Message-ID: <01c82f59$c7501c10$deebc7d5@a-abbas> Hello! I am bored this evening. I am nice girl that would like to chat with you. Email me at eyt at TheGlowPuppy.info only, because I am writing not from my personal email. I will reply with my pics From vlad at lists.openfabrics.org Sun Nov 25 02:52:52 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Sun, 25 Nov 2007 02:52:52 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071125-0200 daily build status Message-ID: <20071125105252.4F2CAE60976@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.20 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on ia64 with linux-2.6.13 Passed on ia64 with linux-2.6.12 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.14 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.14 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Failed: From xhejtman at ics.muni.cz Sat Nov 24 14:31:17 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Sat, 24 Nov 2007 23:31:17 +0100 Subject: [ofa-general] ***SPAM*** Re: MTHCA driver from OFED 1.3a package In-Reply-To: References: <20071122140554.GB13609@ics.muni.cz> Message-ID: <20071124223117.GA4265@ics.muni.cz> Roland, On Sat, Nov 24, 2007 at 01:47:07PM -0800, Roland Dreier wrote: > Maybe the patch to try to enable MSI-X by default (which went in after > 2.6.23 I think) is causing problems, although it looks like you are > not on the MSI-X path. (Or were there other mthca messages too?) > > What kernel are you running in dom0? Is the change from OFED 1.2 to > 1.3a the only difference between the working and non-working cases, or > did you change anything else in your setup? I've already resolved this issue. For some reason, kernel drivers (2.6.18) works correctly under Xen Dom0. I was confused about OFED 1.2 which works but it contained patch to work under Xen. After applying similar patch to the OFED 1.3a, it works now as well. There is still outstanding problem discussed in July (if you remember) regarding wrong usage of DMA API and assumption of coherent mapping - pages returned by kernel allocator do not form contiguos area under Xen (with vanila kernel it is mostly contiguous but it is not guaranteed). I hoped that driver evolved in this case. You disagreed to use coherent allocator instead of standard. Did you do any progress? -- Lukáš Hejtmánek From xhejtman at ics.muni.cz Sat Nov 24 15:02:06 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Sun, 25 Nov 2007 00:02:06 +0100 Subject: [ofa-general] ***SPAM*** Re: MTHCA driver from OFED 1.3a package In-Reply-To: References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> Message-ID: <20071124230206.GB4265@ics.muni.cz> On Sat, Nov 24, 2007 at 02:56:22PM -0800, Roland Dreier wrote: > What is the patch that you need? force the memory allocation to be always coherent. > > I hoped that driver evolved in this case. You disagreed to use coherent > > allocator instead of standard. Did you do any progress? > > No, it is still on my list of things to address, but not a very high > priority. I thought this is only an issue in domU though, not dom0? not exactly. The non-contiguous problem happens in domU just more often than in dom0. -- Lukáš Hejtmánek From rpearson at systemfabricworks.com Sun Nov 25 08:48:33 2007 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Sun, 25 Nov 2007 10:48:33 -0600 Subject: [ofa-general] libipath crash In-Reply-To: Message-ID: <5p5klh$2g3bil@rrcs-agw-01.hrndva.rr.com> Roland, I may have found the wrong crash. I'll try to reproduce and send to qlogic. Thanks, Bob -----Original Message----- From: Roland Dreier [mailto:rdreier at cisco.com] Sent: Saturday, November 24, 2007 10:54 PM To: Robert Pearson Cc: openib-general at openib.org; 'Arthur Jones' Subject: Re: [ofa-general] libipath crash > [root at client1 src]# uname -a > Linux client1 2.6.18-8.1.15.el5 #1 SMP Mon Oct 22 08:32:28 EDT 2007 x86_64 x86_64 x86_64 GNU/Linux OK... your test case doesn't crash on my system and I don't have much interest in trying to figure out what pile of backport patches OFED has added on top of the already old kernel sources it started with, but maybe someone at Qlogic does. On the other hand, it's pretty odd that your stack trace seems to be coming from the create QP path: > Nov 24 12:56:35 client1 kernel: [] :ib_ipath:ipath_create_qp+0x171/0x5e2 > Nov 24 12:56:35 client1 kernel: [] :ib_uverbs:ib_uverbs_create_qp+0x251/0x467 but the test case you sent does only create CQ. Are you positive that the code you sent is sufficient to trigger the crash you see? - R. From cgdadejesus at gmail.com Sun Nov 25 09:53:36 2007 From: cgdadejesus at gmail.com (jose morales) Date: Sun, 25 Nov 2007 18:53:36 +0100 Subject: [ofa-general] LA VERDAD Message-ID: SOY YO. -------------- next part -------------- An HTML attachment was scrubbed... URL: From ggrundstrom at NetEffect.com Sun Nov 25 17:59:40 2007 From: ggrundstrom at NetEffect.com (Glenn Grundstrom) Date: Sun, 25 Nov 2007 19:59:40 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <4746F349.3040606@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> > > Kanevsky, Arkady wrote: > > Very good points. > > Thanks Steve. > > > > If we can do unsignalled 0-size RDMA Read with "bogus" > S-tag this may > > work better. > > Yes, it will require IRD not to be 0 set at Responder. > > Ditto ORD of at least 1 on Responder. > > There is no need to have extra CQ entry on either side for it. > > It is only needed for error path. > > So this will only be needed if Sender posted the full queue > of sends. > > But it can not post anything because CM will not let it know that > > connection is established. > > > > > Well, actually, I think the ULP _can_ post before establishing the > connection. But I guess we can define the semantics such that > applications using the rdma-cm interface must adhere to > whatever we need > to make this hack work. > > Q: are there apps using the rdma-cm out there today that > pre-post SQ WRs > before getting a ESTABLISHED event? > > Steve. ULPs are allowed to post prior to establishing the connection, but I can't name any that operate this way. Prohibiting applications that use the rdma_cm directly from pre-posting is okay, but what about ULP's over other ULP's (i.e. MPI over uDAPL). How can/will this be handled? Glenn. > > Happy Thanksgiving, > > > > Arkady Kanevsky email: arkady at netapp.com > > Network Appliance Inc. phone: 781-768-5395 > > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > > Waltham, MA 02451 central phone: 781-768-5300 > > > > > > > >> -----Original Message----- > >> From: Steve Wise [mailto:swise at opengridcomputing.com] > >> Sent: Wednesday, November 21, 2007 1:07 PM > >> To: Kanevsky, Arkady > >> Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org > >> Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal > >> > >> Comments in-line below... > >> > >> > >> Kanevsky, Arkady wrote: > >> > >>> Group, > >>> > >>> > >>> below is proposal on how to resolve peer-to-peer > iWARP CM issue > >>> discovered at interop event. > >>> > >>> > >>> The main issue is that MPA spec (relevant portion of > >>> > >> IETF RFC 5044 > >> > >>> is below) require that > >>> > >>> > >>> connection initiator send first message over the > >>> > >> established connection. > >> > >>> Multiple MPI implementations and several other apps use > >>> > >> peer-to-peer > >> > >>> model. > >>> > >>> > >>> So rather then forcing all of them to do it on their > >>> > >> own, which will > >> > >>> not help with > >>> > >>> > >>> interop between different implementations, the goal > is to extend > >>> lower layers to provide it. > >>> > >>> > >>> > >>> > >>> > >>> Our first idea was to leave MPA protocol untouched and > >>> > >> try to solve > >> > >>> this problem > >>> > >>> > >>> in iw_cm. But there are too many complications to it. > First, in > >>> order to adhere to RFC5044 > >>> > >>> > >>> initiator must send first FPDU and responder process > >>> > >> it. But since > >> > >>> the connection is already > >>> > >>> > >>> established processing FPDU involves ULP on whose behalf the > >>> connection is created. > >>> > >>> > >>> So either initiator sends a message which generates > >>> > >> completion on > >> > >>> responder CQ, thus visible > >>> > >>> > >>> to ULP, or not. > >>> > >> > >> > >>> In the later case, the only op which can do it is > >>> RDMA one, which means > >>> > >>> > >>> that responder somehow provided initiator S-tag which > >>> > >> it can use. > >> > >>> So, this is an extension > >>> > >>> > >>> to MPA, probably using private data. And that responder upon > >>> receiving it destroy this S-tag. > >>> > >>> > >>> In any case this is an extension of MPA. > >>> > >>> > >> This stag exchange isn't needed if this RDMA op is a 0B READ. > >> The responder waits for that 0B read and only indicates the > >> rdma connection is established to its ULP when it replies to > >> the 0B read. In this scenario, the responder/server side > >> doesn't consume any CQ resources. > >> But it would require an IRD of at least 1 to be configured > on the QP. > >> The initiator still requires an SQ entry, and possibly a CQ > >> entry, for initiating the 0B read and handling completion. > >> But its perhaps a little less painful than doing a SEND/RECV > >> exchange. The read wr could be unsignaled so that it won't > >> generate a CQE. But it still consumes an SQ WR slot so the > >> SQ would have to be sized to allow this extra WR. And I guess > >> the CQ would also need to be sized accordingly in case the > >> read failed. > >> > >> > >>> In the former, Send is used but this requires a buffer > >>> > >> to be posted > >> > >>> to CQ. But since > >>> > >>> > >>> the same CQ (or SharedCQ) can be used by other > >>> > >> connections at the > >> > >>> same time it can cause > >>> > >>> > >>> the responder CM posted buffer to be consumed by other > >>> > >> connection. > >> > >>> This is not acceptable. > >>> > >>> > >>> > >>> > >>> > >>> So new we consider extension to MPA protocol. > >>> > >>> > >>> The goal is to be completely backwards compatible to > >>> > >> existing version 1. > >> > >>> In a nutshell, use a "flag" in the MPA request message which > >>> indicates that > >>> > >>> > >>> "ready to receive" message will be send by requestor upon > >>> receiving > >>> > >>> > >>> MPA response message with connection acceptance. > >>> > >>> > >>> > >>> > >>> > >>> here are the changes to IETF RFC5044 > >>> > >>> > >>> > >>> > >>> > >>> 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > >>> > >> 2 3 4 5 6 7 8 > >> > >>> 9 0 1 > >>> > >>> > >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 > >> > >>> | | + Key (16 bytes containing "MPA ID Req Frame") + 4 > >>> > >> | (4D 50 41 > >> > >>> 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 > >>> > >> bytes containing > >> > >>> "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 > >>> > >> 70 20 46 72 61 > >> > >>> 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") > >>> > >> + 12 | (4D 50 > >> > >>> 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + > >>> > >>> > >> > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 > >> > >>> |M|C|R|S| Res | Rev | PD_Length | > >>> > >>> > >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | > >> > >>> | ~ ~ ~ Private Data ~ | | | > >>> > >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | > >> > >>> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > >>> > >>> > >>> > >>> > >>> > >>> 2. S: indicator in the Req frame whether or not > >>> > >> Requestor will send > >> > >>> Rtr frame. > >>> > >>> > >>> In Req frame, if set to 1 then Rtr frame will > be sent if > >>> responder > >>> > >>> > >>> sends Rep frame with accept bit set. 0 indicate > >>> > >> that Rtr frame > >> > >>> will not be sent. > >>> > >>> > >>> In Rep frame, 0 means that Responder cannot support > >>> > >> Rtr frame, > >> > >>> while 1 that it is and is waiting for it. > >>> > >>> > >>> (While my preference is to handle this as MPA > >>> > >> protocol version > >> > >>> matching rules, > >>> > >>> > >>> proposed method will provide complete backwards > >>> > >> compatibility) > >> > >>> Unused by Rtr frame. That is set to 0 in Rtr frame > >>> > >> and ignored > >> > >>> by responder. > >>> > >>> > >>> > >>> > >>> > >>> All other bits M,C,R and remainder of Res treated > >>> > >> as in MPA ver 1. > >> > >>> > >>> > >>> > >>> Rtr frame adhere to C bit as specified in Rep frame > >>> > >>> > >>> > >> First, the RTR frame _must_ be an FPDU for this to work. > >> Thus it violates the DDP/RDMAP specs because it is an known > >> DDP/RDMAP opcode. > >> > >> Second, assuming the RTR frame is sent as an FPDU, then this > >> won't work with existing RNIC HW. The HW will post an async > >> error because the incoming DDP/RDMAP opcode is unknown. > >> > >> The only way I see that we can fix this for the existing rnic > >> HW is to come up with some way to send a valid RDMAP message > >> from the initiator to the responder under the covers -and- > >> have the responder only indicate that the connection is > >> established when that FPDU is received. > >> > >> Chelsio cannot support this hack via a 0B write, but the > >> could support a 0B read or send/recv exchange. But as you > >> indicate, this is very painful and perhaps impossible to do > >> without impacting the ULP and breaking verbs semantics. > >> > >> (that's why we punted on this a year ago :) > >> > >> > >> Steve. > >> > >> _______________________________________________ > >> general mailing list > >> general at lists.openfabrics.org > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >> > >> To unsubscribe, please visit > >> http://openib.org/mailman/listinfo/openib-general > >> > >> > From kliteyn at mellanox.co.il Sun Nov 25 21:13:38 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 26 Nov 2007 07:13:38 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-26:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-25 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From He.Huang at Sun.COM Sun Nov 25 21:13:50 2007 From: He.Huang at Sun.COM (Isaac Huang) Date: Mon, 26 Nov 2007 13:13:50 +0800 Subject: [ofa-general] IB_WC_WR_FLUSH_ERR on receive requests Message-ID: <20071126051350.GB326@sun.com> Hello, Recently we observed a handful of IB_WC_WR_FLUSH_ERR errors on receive requests. I grepped through OFED 1.2.5.1 code and found that this error was mostly set in low level hw drivers. It also seemed that some ULPs didn't treat IB_WC_WR_FLUSH_ERR as a critical error, e.g. in ulp/ipoib/ipoib_cm.c:ipoib_ib_handle_rx_wc(): if (unlikely(wc->status != IB_WC_SUCCESS)) { if (wc->status != IB_WC_WR_FLUSH_ERR) ipoib_warn(priv, "failed recv event " "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); In what situations does this IB_WC_WR_FLUSH_ERR usually happen? Does it indicate any issue in the HCA hardware? Thanks, Isaac From keshetti85-student at yahoo.co.in Sun Nov 25 21:55:22 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Mon, 26 Nov 2007 11:25:22 +0530 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release Message-ID: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> Hi, I got lot of error messages like, ibsim.c:296: error: `IB_PORT_GID_PREFIX_F' undeclared (first use in this function) ibsim.c:296: error: (Each undeclared identifier is reported only once while installing ibsim-0.4 on a machine running OFED-1.2. With which release of OFED does ibsim-0.4 work properly? Also can anyone of you tell me where can I get ibsim working properly with OFED-1.2 release? -Mahesh From sweitzen at cisco.com Sun Nov 25 22:18:55 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Sun, 25 Nov 2007 22:18:55 -0800 Subject: [ofa-general] OFED 1.3 Beta release is available In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> Message-ID: I have added version 1.3beta2 to the OF Bugzilla. Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems ________________________________ From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Tziporet Koren Sent: Thursday, November 22, 2007 7:46 AM To: ewg at lists.openfabrics.org Cc: general at lists.openfabrics.org Subject: [ofa-general] OFED 1.3 Beta release is available Hi, OFED 1.3 Beta release is available on http://www.openfabrics.org/downloads/OFED/ofed-1.3/OFED-1.3-beta2.tgz To get BUILD_ID run ofed_info Please report any issues in bugzilla https://bugs.openfabrics.org/ The RC1 release is expected on December 5 Tziporet & Vlad ======================================================================== Release information: -------------------- OS support: Novell: - SLES10 - SLES10 SP1 and up1 Redhat: - Redhat EL4 up4 and up5 - Redhat EL5 and up1 kernel.org: - 2.6.23 and 2.6.24-rc2 Systems: * x86_64 * x86 * ia64 * ppc64* Main Changes from OFED 1.3-alpha ================================ * Kernel code based on 2.6.24-rc2 * New packages: * SRP target * qperf test from Qlogic * ibsim package * uDAPL 2.0 library (1.0 & 2.0 are coexist) * New OSes Support: * RHEL 5 up1 * SLES10 SP1 up1 * Compilation issues resolved: * Open MPI compilation on SLES10 SP1 * ibutils compiles on SLES10 PPC64 (64 bits) * Apply patches that fix warning of backport patches * Prefix is now supported properly * RDS implementation for API version 2 was updated form 1.2.5 branch * Fix binary compatibility of libibverbs caused by XRC implementation * Uninstall is now working properly * ib-bonding update to release 19 * MPI packages update: * mvapich-1.0.0-1625.src.rpm * mvapich2-1.0.1-1.src.rpm * openmpi-1.2.4-1.src.rpm Mlx4 driver specific changes: * Enable changing the default of HCA resource limits with module parameters * Default number of maximum QPs is now 128K (was 64K) * Fixing max_cqe's (not adding an extra cqe) * Fix state check in mlx4_qp_modify * Sanity check userspace send queue sizes * Several bug fixes in XRC Tasks that should be completed for the beta release: ==================================================== 1. 32-bit libraries to be supported on SLES10 SP1 Update1. 2. Fix SDP stability issues 3. IPoIB performance improvements for small messages 4. Fix bugs -------------- next part -------------- An HTML attachment was scrubbed... URL: From jackm at dev.mellanox.co.il Mon Nov 26 00:41:19 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Mon, 26 Nov 2007 10:41:19 +0200 Subject: [ofa-general] [PATCH] ipoib: fix kernel Oops resulting from xmit when priv->broadcast is NULL Message-ID: <200711261041.19402.jackm@dev.mellanox.co.il> IPoIB: Fix kernel Oops resulting from xmit following dev_down. If a port goes down, ipoib_ib_dev_down is invoked -- which flushed the mcasts (clearing priv->broadcast) and clearing the path record cache. If ipoib_start_xmit is then invoked (before the port is upped), a kernel Oops results from attempting to access priv->broadcast. Returning NULL if priv->broadcast is NULL is a harmless way of bypassing the problem -- the offending packet is simply discarded "without prejudice". Signed-off-by: Jack Morgenstein --- Roland, This fix needs to make it into 2.6.24. The Oops occurred during our testing. - Jack diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65e..c9f6077 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -460,6 +460,9 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + if (!priv->broadcast) + return NULL; + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; From eli at dev.mellanox.co.il Mon Nov 26 02:43:51 2007 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Mon, 26 Nov 2007 12:43:51 +0200 Subject: [ofa-general] IB_WC_WR_FLUSH_ERR on receive requests In-Reply-To: <20071126051350.GB326@sun.com> References: <20071126051350.GB326@sun.com> Message-ID: <1196073831.21753.86.camel@mtls03> On Mon, 2007-11-26 at 13:13 +0800, Isaac Huang wrote: > Hello, > > Recently we observed a handful of IB_WC_WR_FLUSH_ERR errors on receive > requests. I grepped through OFED 1.2.5.1 code and found that this > error was mostly set in low level hw drivers. It also seemed that some > ULPs didn't treat IB_WC_WR_FLUSH_ERR as a critical error, e.g. in > ulp/ipoib/ipoib_cm.c:ipoib_ib_handle_rx_wc(): > if (unlikely(wc->status != IB_WC_SUCCESS)) { > if (wc->status != IB_WC_WR_FLUSH_ERR) > ipoib_warn(priv, "failed recv event " > "(status=%d, wrid=%d vend_err %x)\n", > wc->status, wr_id, wc->vendor_err); > > In what situations does this IB_WC_WR_FLUSH_ERR usually happen? Does it > indicate any issue in the HCA hardware? This flow does not necessarily indicate a problem. When the a QP is closed, e.g. when the interface goes down, the QP is modified to error and any pending receive WRs are completed with this error. From vlad at lists.openfabrics.org Mon Nov 26 02:56:58 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Mon, 26 Nov 2007 02:56:58 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071126-0200 daily build status Message-ID: <20071126105659.06DDDE6080D@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.18 Passed on ppc64 with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.18 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.13 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.22 Passed on ppc64 with linux-2.6.14 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.12 Passed on x86_64 with linux-2.6.21.1 Passed on ppc64 with linux-2.6.17 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.15 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on ia64 with linux-2.6.23 Passed on ppc64 with linux-2.6.13 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Failed: From or.gerlitz at gmail.com Mon Nov 26 03:22:39 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 26 Nov 2007 13:22:39 +0200 Subject: [ofa-general] ***SPAM*** Re: MTHCA driver from OFED 1.3a package In-Reply-To: <20071124223117.GA4265@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> Message-ID: <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> On 11/25/07, Lukas Hejtmanek wrote: > I've already resolved this issue. For some reason, kernel drivers (2.6.18) > works correctly under Xen Dom0. I was confused about OFED 1.2 which works but > it contained patch to work under Xen. After applying similar patch to the OFED > 1.3a, it works now as well. Can you please clarify what is the patch in OFED 1.2 which once deployed to OFED 1.3 makes it work for you? Looking on the 1.2 sources under ofa_kernel-1.2/kernel_patches/fixes I see three patches that contain the word coherent, but neither of them seemed to me what you are referring to, am I wrong? The patches are: mthca_3_alloc_consistent.patch mthca_4_dma_align_reserved_mtts.patch mthca_5_roland_fix_icm_coherent.patch Or. change-log for mthca_3_alloc_consistent.patch Fix non-cache-coherent CPUs with memfree HCAs. We allocate the MTT table with alloc_pages() and then do pci_map_sg(), so we must call pci_dma_sync_sg after the CPU writes to the MTT table (this works since device never writes the MTTs on memfree). For MPTs, both the device and CPU might write there, so we must allocate dma coherent memory for these. Signed-off-by: Michael S. Tsirkin change-log for mthca_4_dma_align_reserved_mtts.patch [PATCHv3 5 of 5] IB/mthca: reserved MTTs and memory alignment issues This fixes several issues related to reserved MTTs and memory alignment. 1. MTTs are allocated in non-cache-coherent memory, so we must give reserved MTTs their own cache line, to prevent both device and CPU from writing into the same cache line at the same time. 2. reserved_mtts field has different meaning in Tavor and Arbel, so we are wasting mtt entries on memfree. fix the Arbel case to match Tavor semantics. Signed-off-by: Michael S. Tsirkin change-log for mthca_5_roland_fix_icm_coherent.patch commit 1c7c25e1a36ea2aa053373dfb363fd646677edac Author: Roland Dreier Date: Wed Feb 14 08:45:17 2007 -0800 IB/mthca: Fix allocation of ICM chunks in coherent memory The change to allow allocating ICM chunks from coherent memory did not increment the count of sg entries properly, so a chunk that required more than allocation would not be mapped properly by the HCA. Fix this by adding the missing increment of chunk->nsg. Signed-off-by: Roland Dreier From xhejtman at ics.muni.cz Mon Nov 26 05:16:37 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Mon, 26 Nov 2007 14:16:37 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> Message-ID: <20071126131637.GC4296@ics.muni.cz> On Mon, Nov 26, 2007 at 01:22:39PM +0200, Or Gerlitz wrote: > Can you please clarify what is the patch in OFED 1.2 which once > deployed to OFED 1.3 makes it work for you? Looking on the 1.2 sources > under > ofa_kernel-1.2/kernel_patches/fixes I see three patches that contain > the word coherent, but neither of them seemed to me what you are > referring to, am I wrong? The patches are: > mthca_3_alloc_consistent.patch > mthca_4_dma_align_reserved_mtts.patch > mthca_5_roland_fix_icm_coherent.patch none of them. I did my personal patch: struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, gfp_t gfp_mask, int coherent) { struct mthca_icm *icm; struct mthca_icm_chunk *chunk = NULL; int cur_order; int ret; + coherent = 1; /* We use sg_set_buf for coherent allocs, which assumes low memory */ + //BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); -- Lukáš Hejtmánek From or.gerlitz at gmail.com Mon Nov 26 05:37:33 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Mon, 26 Nov 2007 15:37:33 +0200 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071126131637.GC4296@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> Message-ID: <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> On 11/26/07, Lukas Hejtmanek wrote: > On Mon, Nov 26, 2007 at 01:22:39PM +0200, Or Gerlitz wrote: > > Can you please clarify what is the patch in OFED 1.2 which once > > deployed to OFED 1.3 makes it work for you? > none of them. I did my personal patch > struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, > gfp_t gfp_mask, int coherent) > { > struct mthca_icm *icm; > struct mthca_icm_chunk *chunk = NULL; > int cur_order; > int ret; > > + coherent = 1; > /* We use sg_set_buf for coherent allocs, which assumes low memory */ > + //BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM)); OK, I see. So what's needed in order to fix the problem for the upstream code, it seems the code is ready to take the path of using mthca_alloc_icm_coherent, and your patch just forces it to use this practice, correct? Why its correct to remove this assertion? Or. From xhejtman at ics.muni.cz Mon Nov 26 05:47:13 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Mon, 26 Nov 2007 14:47:13 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> Message-ID: <20071126134713.GD4296@ics.muni.cz> On Mon, Nov 26, 2007 at 03:37:33PM +0200, Or Gerlitz wrote: > OK, I see. So what's needed in order to fix the problem for the > upstream code, it seems the code is ready to take the path of using > mthca_alloc_icm_coherent, and your patch just forces it to use this > practice, correct? Why its correct to remove this assertion? Ronald believes that using coherent allocation is not the right way. It works on X86_64 though. But on some other architectures, you can run out of coherent memory shortly which is the main argument con. The right approach is to use correct DMA API (which means not to use dma_sync_single on memory not mapped with dma_map_single), Ronald told he has it in the long term TODO list. The assertion is applied as allocator is called with coherent=0 and gpf_mask set to __GPF_HIGHMEM. On X86_64 this is not an issue so the assertion is removed. The correct way should be with coherent=1 and gpf_mask &= ~__GPF_HIGHMEM. -- Lukáš Hejtmánek From tziporet at mellanox.co.il Mon Nov 26 05:54:56 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Mon, 26 Nov 2007 15:54:56 +0200 Subject: [ofa-general] OFED Nov 20 meeting summary on release schedule In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E31E@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90282E31E@mtlexch01.mtl.com> Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E389@mtlexch01.mtl.com> OFED Nov 20 meeting summary on release schedule =============================================== Was a short meeting * Reviewed open tasks and decided there are not blockers for the beta release. Release schedule: * Beta: Nov 21, 2007- kernel base: 2.6.24-rc2 * RC1: Dec 4, 2007 * RC2: Dec 18, 2007 * RC3: Jan 8, 2008 * RC4: Jan 22, 2008 * Release: Jan 29, 2008 Done tasks for the beta: * Rebase for kernel 2.6.24-rc1 - all * Management readiness and open a branch for 1.3 - Sasha * Added SRPT to the package - Vu & Vlad * Open MPI compilation on SLES10 SP1 - Vlad * Multiple uDAPL libs (1.0 & 2.0) - Vlad and Arlin (Intel) * ibutils on SLES10 PPC64 (64 bits) - Vlad * Add qperf test from Qlogic - Johann (Qlogic) * Support RHEL 5 up1 - Woody & Vlad * Apply patches that fix warning of backport patches - Vlad * New MVAPICH package - Pasha & DK (OSU) * Complete RDS work - Vlad (Mellanox) * Integrate all SDP features - Jim (Mellanox) * nes - updated backport patches - Glenn (NetEffect) From ianbrn at gmail.com Mon Nov 26 06:57:35 2007 From: ianbrn at gmail.com (Ian Brown) Date: Mon, 26 Nov 2007 16:57:35 +0200 Subject: [ofa-general] 1GB ethernet or 10 GB and non infiniband nics using RDMA - newbie Message-ID: Hello, I am a newbie on this. I am looking for the code for Linux Network drivers for 1Gb ethernet or 10 Gb and non infiniband nics which use RDMA . (I think they are caled iWARP) I can not find any under the drivers/bet tree (or maybe there are and I don't know where). Are this drivers in the infiniband tree? Regards, Ian From rdreier at cisco.com Mon Nov 26 07:03:58 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 26 Nov 2007 07:03:58 -0800 Subject: [ofa-general] 1GB ethernet or 10 GB and non infiniband nics using RDMA - newbie In-Reply-To: (Ian Brown's message of "Mon, 26 Nov 2007 16:57:35 +0200") References: Message-ID: > Are this drivers in the infiniband tree? Yes, drivers/infiniband/hw/amso1100 and drivers/infiniband/hw/cxgb3. I would have thought that text like This is an iWARP/RDMA driver for the Chelsio T3 1GbE and 10GbE adapters. in the Kconfig help for INFINIBAND_CXGB3 would have been a clue.... - R. From ianbrn at gmail.com Mon Nov 26 07:28:55 2007 From: ianbrn at gmail.com (Ian Brown) Date: Mon, 26 Nov 2007 17:28:55 +0200 Subject: [ofa-general] 1GB ethernet or 10 GB and non infiniband nics using RDMA - newbie In-Reply-To: References: Message-ID: Roland , Thanks. Last one: do I understand correctly that currently these two drivers are the **only** (non infiniband) NICs drivers in the kernel tree which support RDMA ? Rgs, Ian On Nov 26, 2007 5:03 PM, Roland Dreier wrote: > > Are this drivers in the infiniband tree? > > Yes, drivers/infiniband/hw/amso1100 and drivers/infiniband/hw/cxgb3. > I would have thought that text like > > This is an iWARP/RDMA driver for the Chelsio T3 1GbE and > 10GbE adapters. > > in the Kconfig help for INFINIBAND_CXGB3 would have been a clue.... > > - R. > From hrosenstock at xsigo.com Mon Nov 26 07:33:24 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 07:33:24 -0800 Subject: [ofa-general] [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported Message-ID: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported Based on email from Jason Gunthorpe Signed-off-by: Hal Rosenstock diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c index 9628eba..05efdca 100644 --- a/libibmad/src/dump.c +++ b/libibmad/src/dump.c @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) } } +static void +dump_linkwidth(char *buf, int bufsz, int width) +{ + char *s = buf, *e = s + bufsz; + + if (width & 0x1) + s += snprintf(s, e - s, "1X or "); + if (s < e && (width & 0x2)) + s += snprintf(s, e - s, "4X or "); + if (s < e && (width & 0x4)) + s += snprintf(s, e - s, "8X or "); + if (s < e && (width & 0x8)) + s += snprintf(s, e - s, "12X or "); + + if ((width >> 4) || s == buf) + s += snprintf(s, e - s, "?(%d)", width); + else + s[-3] = 0; +} + void mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) { int width = *(int *)val; - switch (width) { + dump_linkwidth(buf, bufsz, width); + + switch(width) { case 1: - snprintf(buf, bufsz, "1X"); - break; - case 2: - snprintf(buf, bufsz, "4X (IBA extension)"); - break; case 3: - snprintf(buf, bufsz, "1X or 4X"); - break; - case 4: - snprintf(buf, bufsz, "8X (IBA extension)"); - break; case 7: - snprintf(buf, bufsz, "1X or 4X or 8X"); - break; - case 8: - snprintf(buf, bufsz, "12X (IBA extension)"); - break; case 11: - snprintf(buf, bufsz, "1X or 4X or 12X"); - break; case 15: - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); break; + default: - IBWARN("bad width %d", width); - buf[0] = 0; + snprintf(buf + strlen(buf), bufsz - strlen(buf), + "(IBA extension)"); + break; } } @@ -267,21 +273,8 @@ void mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) { int width = *(int *)val; - char *s = buf, *e = s + bufsz; - if (width & 0x1) - s += snprintf(s, e - s, "1X or "); - if (s < e && (width & 0x2)) - s += snprintf(s, e - s, "4X or "); - if (s < e && (width & 0x4)) - s += snprintf(s, e - s, "8X or "); - if (s < e && (width & 0x8)) - s += snprintf(s, e - s, "12X or "); - - if ((width >> 4) || s == buf) - s += snprintf(s, e - s, "?(%d)", width); - else - s[-3] = 0; + dump_linkwidth(buf, bufsz, width); } void @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) } } -void -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) +static void +dump_linkspeed(char *buf, int bufsz, int speed) { - int speed = *(int *)val; + char *s = buf, *e = s + bufsz; + + if (speed & 0x1) + s += snprintf(s, e - s, "2.5 Gbps or "); + if (s < e && (speed & 0x2)) + s += snprintf(s, e - s, "5.0 Gbps or "); + if (s < e && (speed & 0x4)) + s += snprintf(s, e - s, "10.0 Gbps or "); + + if ((speed >> 4) || s == buf) + s += snprintf(s, e - s, "?(%d)", speed); + else + s[-3] = 0; switch (speed) { case 1: - snprintf(buf, bufsz, "2.5 Gbps"); - break; - case 2: - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); - break; case 3: - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); - break; - case 4: - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); - break; case 5: - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); - break; case 7: - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); break; default: - snprintf(buf, bufsz, "?(%d)", speed); + snprintf(s, e - s, "(IBA extension)"); break; } } void +mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) +{ + int speed = *(int *)val; + + dump_linkspeed(buf, bufsz, speed); +} + +void mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) { int speed = *(int *)val; - switch (speed) { - case 1: - snprintf(buf, bufsz, "2.5 Gbps"); - break; - case 2: - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); - break; - case 3: - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); - break; - case 4: - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); - break; - case 5: - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); - break; - case 6: - snprintf(buf, bufsz, "5.0 or 10.0 Gbps"); - break; - case 7: - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); - break; - case 15: - snprintf(buf, bufsz, "SpeedSupported"); - break; - default: - snprintf(buf, bufsz, "?(%d)", speed); - break; - } + dump_linkspeed(buf, bufsz, speed); } void From hrosenstock at xsigo.com Mon Nov 26 08:12:10 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 08:12:10 -0800 Subject: [ofa-general] [PATCH][TRIVIAL] OpenSM/libvendor/osm_vendor_ibumad.c: Make error code in osm_log message unique Message-ID: <1196093530.26651.192.camel@hrosenstock-ws.xsigo.com> OpenSM/libvendor/osm_vendor_ibumad.c: Make error code in osm_log message unique Signed-off-by: Hal Rosenstock diff --git a/opensm/libvendor/osm_vendor_ibumad.c b/opensm/libvendor/osm_vendor_ibumad.c index 462ad9c..571ec93 100644 --- a/opensm/libvendor/osm_vendor_ibumad.c +++ b/opensm/libvendor/osm_vendor_ibumad.c @@ -791,7 +791,7 @@ osm_vendor_bind(IN osm_vendor_t * const p_vend, p_vend->issm_path, sizeof(p_vend->issm_path)) < 0) { osm_log(p_vend->p_log, OSM_LOG_ERROR, - "osm_vendor_bind: ERR 5424: " + "osm_vendor_bind: ERR 542E: " "Cannot resolve issm path for port %s:%u\n", p_vend->umad_port.ca_name, p_vend->umad_port.portnum); goto Exit; From rdreier at cisco.com Mon Nov 26 08:29:29 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 26 Nov 2007 08:29:29 -0800 Subject: [ofa-general] 1GB ethernet or 10 GB and non infiniband nics using RDMA - newbie In-Reply-To: (Ian Brown's message of "Mon, 26 Nov 2007 17:28:55 +0200") References: Message-ID: > Last one: do I understand correctly that currently these two drivers > are the **only** (non infiniband) NICs drivers in the kernel tree > which support RDMA ? Yes, as far as I know. From swise at opengridcomputing.com Mon Nov 26 09:24:57 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 26 Nov 2007 11:24:57 -0600 Subject: [ofa-general] [PATCH 2.6.25 0/2] RDMA/cxgb3 patches for 2.6.25 Message-ID: <20071126172457.22792.62583.stgit@dell3.ogc.int> Hey roland, Please pull these two iw_cxgb3 patches for 2.6.25. The 5.0 firmware change must be committed along with the cxgb3 NIC changes submitted here: http://lkml.org/lkml/2007/11/16/224 and merged by Jeff here: http://lkml.org/lkml/2007/11/23/180 Shortlog: RDMA/cxgb3: Hold rtnl_lock() around ethtool get_drvinfo call. RDMA/cxgb3: Support 5.0 firmware. --- Steve. From swise at opengridcomputing.com Mon Nov 26 09:28:46 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 26 Nov 2007 11:28:46 -0600 Subject: [ofa-general] [PATCH 2.6.25 2/2] RDMA/cxgb3: Support 5.0 firmware. In-Reply-To: <20071126172457.22792.62583.stgit@dell3.ogc.int> References: <20071126172457.22792.62583.stgit@dell3.ogc.int> Message-ID: <20071126172846.22792.8854.stgit@dell3.ogc.int> RDMA/cxgb3: Support 5.0 firmware. The 5.0 firmware now supports translating sgls in recv wrs, so remove the host driver logic currently doing the translation. Note: this change requires 5.0 firmware. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_qp.c | 21 ++------------------- 1 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index dd89b6b..9bb8112 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -208,36 +208,19 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, struct ib_recv_wr *wr) { - int i, err = 0; - u32 pbl_addr[4]; - u8 page_size[4]; + int i; if (wr->num_sge > T3_MAX_SGE) return -EINVAL; - err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } for (; i < T3_MAX_SGE; i++) { wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; } return 0; } From swise at opengridcomputing.com Mon Nov 26 09:28:44 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Mon, 26 Nov 2007 11:28:44 -0600 Subject: [ofa-general] [PATCH 2.6.25 1/2] RDMA/cxgb3: Hold rtnl_lock() around ethtool get_drvinfo call. In-Reply-To: <20071126172457.22792.62583.stgit@dell3.ogc.int> References: <20071126172457.22792.62583.stgit@dell3.ogc.int> Message-ID: <20071126172844.22792.46023.stgit@dell3.ogc.int> RDMA/cxgb3: Hold rtnl_lock() around ethtool get_drvinfo call. Currently the call into cxgb3 to get the driver info is not serialized. The iw_cxgb3 module needs to hold the rtnl_lock around the ethtool ops call like dev_ioctl() does. Signed-off-by: Steve Wise --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b5436ca..69b1204 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1053,7 +1054,9 @@ static ssize_t show_fw_ver(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1065,7 +1068,9 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) struct net_device *lldev = dev->rdev.t3cdev_p->lldev; PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); + rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } From rpearson at systemfabricworks.com Mon Nov 26 09:37:46 2007 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Mon, 26 Nov 2007 11:37:46 -0600 Subject: [ofa-general] ipath crash Message-ID: <5p5klh$2gfnks@rrcs-agw-01.hrndva.rr.com> Here is the right crash ----------- [cut here ] --------- [please bite here ] --------- Kernel BUG at mm/slab.c:2649 invalid opcode: 0000 [1] SMP last sysfs file: /class/infiniband/ipath0/node_type CPU 7 Modules linked in: autofs4 hidp rfcomm l2cap bluetooth sunrpc rdma_ucm(U) ib_srp(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_add r(U) ib_uverbs(U) ib_umad(U) ib_mthca(U) ib_ipoib(U) ib_cm(U) ib_sa(U) ib_mad(U) ip_conntrack_netbios_ns ipt_REJECT xt_s tate ip_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 dm_m irror dm_mod video sbs i2c_ec i2c_core button battery asus_acpi acpi_memhotplug ac parport_pc lp parport sg ib_ipath(U) ide_cd ib_core(U) serio_raw cdrom bnx2 shpchp pcspkr mptsas mptscsih mptbase scsi_transport_sas sd_mod scsi_mod ext3 jbd ehci_hcd ohci_hcd uhci_hcd Pid: 8101, comm: fragment Not tainted 2.6.18-8.1.15.el5 #1 RIP: 0010:[] [] cache_grow+0x1e/0x395 RSP: 0018:ffff810010c3dcb8 EFLAGS: 00010006 RAX: 0000000000000000 RBX: 00000000000080d0 RCX: 00000000ffffffff RDX: 0000000000000000 RSI: 00000000000080d0 RDI: ffff810037ff43c0 RBP: ffff81003ffa06e0 R08: ffff8100020bc280 R09: ffff810037e64400 R10: ffff810010c3de68 R11: 000000000000555c R12: ffff810037ff43c0 R13: ffff81003ffa06c0 R14: 0000000000000000 R15: ffff810037ff43c0 FS: 00002aaaaaad7440(0000) GS:ffff8100020bf340(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00002aaaaaaac000 CR3: 0000000011a7f000 CR4: 00000000000006e0 Process fragment (pid: 8101, threadinfo ffff810010c3c000, task ffff81002cdd3820) Stack: 0000000000000000 0000000000000001 0000000000000296 0000000000000001 ffff810010c3dd18 00000000ffffffff ffff81003ffa06e0 ffff8100020bc280 ffff81003ffa06c0 000000000000000c ffff810037ff43c0 ffffffff8005a5ce Call Trace: [] cache_alloc_refill+0x136/0x186 [] kmem_cache_alloc_node+0x98/0xb2 [] __vmalloc_area_node+0x62/0x153 [] vmalloc_user+0x15/0x50 [] :ib_ipath:ipath_create_cq+0x67/0x1d6 [] __down_write_nested+0x12/0x92 [] :ib_uverbs:ib_uverbs_create_cq+0x143/0x259 [] :ib_uverbs:ib_uverbs_write+0x93/0xa9 [] selinux_file_permission+0x9f/0xb6 [] vfs_write+0xce/0x174 [] sys_write+0x45/0x6e [] tracesys+0xd1/0xdc The last one was from an older crash that I picked up by mistake. Bob -------------- next part -------------- An HTML attachment was scrubbed... URL: From sashak at voltaire.com Mon Nov 26 11:03:12 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 19:03:12 +0000 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> Message-ID: <20071126190312.GC14894@sashak.voltaire.com> Hi, On 11:25 Mon 26 Nov , Keshetti Mahesh wrote: > > I got lot of error messages like, > > ibsim.c:296: error: `IB_PORT_GID_PREFIX_F' undeclared (first use in > this function) > ibsim.c:296: error: (Each undeclared identifier is reported only once Are you sure that ibsim build finds libibmad header files? IB_PORT_GID_PREFIX_F is defined there for couple of years. > while installing ibsim-0.4 on a machine running OFED-1.2. With which > release of OFED > does ibsim-0.4 work properly? OFED-1.3 > Also can anyone of you tell me where can I get ibsim working properly with > OFED-1.2 release? Basically you can checkout 3-8 weeks old version of ibsim directly from the git repo. But as noted above I don't think it is the problem here. Sasha From mshefty at ichips.intel.com Mon Nov 26 11:11:12 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Mon, 26 Nov 2007 11:11:12 -0800 Subject: [ofa-general] [PATCH 0/6] nes: Cosmetic changes; support virtual WQs and PPC In-Reply-To: <20071114221453.3ADD5E609F0@openfabrics.org> References: <20071114221453.3ADD5E609F0@openfabrics.org> Message-ID: <474B1A50.9080205@ichips.intel.com> Glenn Grundstrom (NetEffect) wrote: > Updated code for the NetEffect NE020 adapter. > > Updates include: > - Support for userspace/virtual WQs. > - PowerPC > - Support for multiple debugging levels > - Many, many cosmetic changes inline with kernel.org standards It would be possible to review this if each change were self contained, rather than grouped together with patches based on which files were affected. - Sean From ralph.campbell at qlogic.com Mon Nov 26 11:34:53 2007 From: ralph.campbell at qlogic.com (Ralph Campbell) Date: Mon, 26 Nov 2007 11:34:53 -0800 Subject: [ofa-general] ipath crash In-Reply-To: <5p5klh$2gfnks@rrcs-agw-01.hrndva.rr.com> References: <5p5klh$2gfnks@rrcs-agw-01.hrndva.rr.com> Message-ID: <1196105693.30674.8.camel@brick.pathscale.com> 2.6.18 has a bug in the vmalloc_user() code which causes this. The thing to do is use a new version of the kernel (2.6.20+ I think). On Mon, 2007-11-26 at 11:37 -0600, Robert Pearson wrote: > Here is the right crash > > > > ----------- [cut here ] --------- [please bite here ] --------- > > Kernel BUG at mm/slab.c:2649 > > invalid opcode: 0000 [1] SMP > > last sysfs file: /class/infiniband/ipath0/node_type > > CPU 7 > > Modules linked in: autofs4 hidp rfcomm l2cap bluetooth sunrpc > rdma_ucm(U) ib_srp(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_add > > r(U) ib_uverbs(U) ib_umad(U) ib_mthca(U) ib_ipoib(U) ib_cm(U) ib_sa(U) > ib_mad(U) ip_conntrack_netbios_ns ipt_REJECT xt_s > > tate ip_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT > xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 dm_m > > irror dm_mod video sbs i2c_ec i2c_core button battery asus_acpi > acpi_memhotplug ac parport_pc lp parport sg ib_ipath(U) > > ide_cd ib_core(U) serio_raw cdrom bnx2 shpchp pcspkr mptsas mptscsih > mptbase scsi_transport_sas sd_mod scsi_mod ext3 jbd > > ehci_hcd ohci_hcd uhci_hcd > > Pid: 8101, comm: fragment Not tainted 2.6.18-8.1.15.el5 #1 > > RIP: 0010:[] [] cache_grow > +0x1e/0x395 > > RSP: 0018:ffff810010c3dcb8 EFLAGS: 00010006 > > RAX: 0000000000000000 RBX: 00000000000080d0 RCX: 00000000ffffffff > > RDX: 0000000000000000 RSI: 00000000000080d0 RDI: ffff810037ff43c0 > > RBP: ffff81003ffa06e0 R08: ffff8100020bc280 R09: ffff810037e64400 > > R10: ffff810010c3de68 R11: 000000000000555c R12: ffff810037ff43c0 > > R13: ffff81003ffa06c0 R14: 0000000000000000 R15: ffff810037ff43c0 > > FS: 00002aaaaaad7440(0000) GS:ffff8100020bf340(0000) > knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 00002aaaaaaac000 CR3: 0000000011a7f000 CR4: 00000000000006e0 > > Process fragment (pid: 8101, threadinfo ffff810010c3c000, task > ffff81002cdd3820) > > Stack: 0000000000000000 0000000000000001 0000000000000296 > 0000000000000001 > > ffff810010c3dd18 00000000ffffffff ffff81003ffa06e0 ffff8100020bc280 > > ffff81003ffa06c0 000000000000000c ffff810037ff43c0 ffffffff8005a5ce > > Call Trace: > > [] cache_alloc_refill+0x136/0x186 > > [] kmem_cache_alloc_node+0x98/0xb2 > > [] __vmalloc_area_node+0x62/0x153 > > [] vmalloc_user+0x15/0x50 > > [] :ib_ipath:ipath_create_cq+0x67/0x1d6 > > [] __down_write_nested+0x12/0x92 > > [] :ib_uverbs:ib_uverbs_create_cq+0x143/0x259 > > [] :ib_uverbs:ib_uverbs_write+0x93/0xa9 > > [] selinux_file_permission+0x9f/0xb6 > > [] vfs_write+0xce/0x174 > > [] sys_write+0x45/0x6e > > [] tracesys+0xd1/0xdc > > > > The last one was from an older crash that I picked up by mistake. > > > > Bob > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sashak at voltaire.com Mon Nov 26 11:54:35 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 19:54:35 +0000 Subject: [ofa-general] Re: [PATCH][TRIVIAL] OpenSM/libvendor/osm_vendor_ibumad.c: Make error code in osm_log message unique In-Reply-To: <1196093530.26651.192.camel@hrosenstock-ws.xsigo.com> References: <1196093530.26651.192.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071126195435.GE14894@sashak.voltaire.com> On 08:12 Mon 26 Nov , Hal Rosenstock wrote: > OpenSM/libvendor/osm_vendor_ibumad.c: Make error code in osm_log message > unique > > Signed-off-by: Hal Rosenstock Applied. Thanks. Sasha From jim at mellanox.com Mon Nov 26 11:49:53 2007 From: jim at mellanox.com (Jim Mott) Date: Mon, 26 Nov 2007 11:49:53 -0800 Subject: [ofa-general] [PATCH 1/1] SDP - bug793: skbuff changes in 2.6.22+ Message-ID: In recent 64 bit kernels the struct sk_buff fields 'tail' and 'end' have changed from pointers to offsets. This fix for SDP supports the change and fixes a "kernel BUG at net/core/skbuff.c:95!" problem. Signed-off-by: Jim Mott --- Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2007-11-26 11:49:30.000000000 -0600 +++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c 2007-11-26 11:55:48.000000000 -0600 @@ -633,7 +633,11 @@ static void sdp_handle_wc(struct sdp_soc else skb->data_len = 0; skb->data = skb->head; +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->tail = skb_headlen(skb); +#else skb->tail = skb->head + skb_headlen(skb); +#endif h = (struct sdp_bsdh *)skb->data; skb_reset_transport_header(skb); ssk->mseq_ack = ntohl(h->mseq); From sashak at voltaire.com Mon Nov 26 12:11:05 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 20:11:05 +0000 Subject: [ofa-general] Re: LASH routing algorithm in openSM In-Reply-To: <829ded920711230235r51dd1f6di5cf1004bdf3cb4f@mail.gmail.com> References: <829ded920711212055t772a605ev5307e2c15a7bda5d@mail.gmail.com> <47454B3F.7000300@simula.no> <829ded920711220259s5a2e74f8t5048c033dddd5a35@mail.gmail.com> <829ded920711230022he95da4fy946a5c35968f5f16@mail.gmail.com> <829ded920711230023m127bdc9ah2c78ebf8a06cd163@mail.gmail.com> <51504.85.164.64.55.1195807306.squirrel@webmail.uio.no> <829ded920711230235r51dd1f6di5cf1004bdf3cb4f@mail.gmail.com> Message-ID: <20071126201105.GG14894@sashak.voltaire.com> On 16:05 Fri 23 Nov , Keshetti Mahesh wrote: > > I believe that LMC>0 will be (is being implemented) but i am not sure of > > the situation. My colleague, might be able to give you a quick reply on > > this. > > > > We are not heavily involved in openSM. We were invited to implement the > > LASH algorithm in OFED 1.2 / 1.3 and have not had any interaction with > > OpenFabrics since then. > > > > I suggest that if Sven-Arne doesn't have any more information than I do > > then you should send a question to the openSM mailing list. I believe they > > are very active and answer quickly. > > > > - Tom > > > > Is there any work going on in OFED community to support LMC>0 with > LASH routing algorithm in openSM currently ? At least I don't know about this. Sasha From sashak at voltaire.com Mon Nov 26 12:46:13 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 20:46:13 +0000 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported In-Reply-To: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> References: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071126204613.GI14894@sashak.voltaire.com> On 07:33 Mon 26 Nov , Hal Rosenstock wrote: > libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed > Enabled/Supported > > Based on email from Jason Gunthorpe > > Signed-off-by: Hal Rosenstock > > diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c > index 9628eba..05efdca 100644 > --- a/libibmad/src/dump.c > +++ b/libibmad/src/dump.c > @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) > } > } > > +static void > +dump_linkwidth(char *buf, int bufsz, int width) > +{ > + char *s = buf, *e = s + bufsz; > + > + if (width & 0x1) > + s += snprintf(s, e - s, "1X or "); > + if (s < e && (width & 0x2)) > + s += snprintf(s, e - s, "4X or "); > + if (s < e && (width & 0x4)) > + s += snprintf(s, e - s, "8X or "); > + if (s < e && (width & 0x8)) > + s += snprintf(s, e - s, "12X or "); > + > + if ((width >> 4) || s == buf) > + s += snprintf(s, e - s, "?(%d)", width); > + else > + s[-3] = 0; > +} > + > void > mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) > { > int width = *(int *)val; > > - switch (width) { > + dump_linkwidth(buf, bufsz, width); > + > + switch(width) { > case 1: > - snprintf(buf, bufsz, "1X"); > - break; > - case 2: > - snprintf(buf, bufsz, "4X (IBA extension)"); > - break; > case 3: > - snprintf(buf, bufsz, "1X or 4X"); > - break; > - case 4: > - snprintf(buf, bufsz, "8X (IBA extension)"); > - break; > case 7: > - snprintf(buf, bufsz, "1X or 4X or 8X"); > - break; > - case 8: > - snprintf(buf, bufsz, "12X (IBA extension)"); > - break; > case 11: > - snprintf(buf, bufsz, "1X or 4X or 12X"); > - break; > case 15: > - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); > break; > + > default: > - IBWARN("bad width %d", width); > - buf[0] = 0; > + snprintf(buf + strlen(buf), bufsz - strlen(buf), > + "(IBA extension)"); > + break; > } > } > > @@ -267,21 +273,8 @@ void > mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) > { > int width = *(int *)val; > - char *s = buf, *e = s + bufsz; > > - if (width & 0x1) > - s += snprintf(s, e - s, "1X or "); > - if (s < e && (width & 0x2)) > - s += snprintf(s, e - s, "4X or "); > - if (s < e && (width & 0x4)) > - s += snprintf(s, e - s, "8X or "); > - if (s < e && (width & 0x8)) > - s += snprintf(s, e - s, "12X or "); > - > - if ((width >> 4) || s == buf) > - s += snprintf(s, e - s, "?(%d)", width); > - else > - s[-3] = 0; > + dump_linkwidth(buf, bufsz, width); > } > > void > @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) > } > } > > -void > -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > +static void > +dump_linkspeed(char *buf, int bufsz, int speed) > { > - int speed = *(int *)val; > + char *s = buf, *e = s + bufsz; > + > + if (speed & 0x1) > + s += snprintf(s, e - s, "2.5 Gbps or "); > + if (s < e && (speed & 0x2)) > + s += snprintf(s, e - s, "5.0 Gbps or "); > + if (s < e && (speed & 0x4)) > + s += snprintf(s, e - s, "10.0 Gbps or "); > + > + if ((speed >> 4) || s == buf) > + s += snprintf(s, e - s, "?(%d)", speed); > + else > + s[-3] = 0; What should be if something like speed = 9 is passed? Probably you meant (speed >> 3) here? Sasha > > switch (speed) { > case 1: > - snprintf(buf, bufsz, "2.5 Gbps"); > - break; > - case 2: > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > - break; > case 3: > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > - break; > - case 4: > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > - break; > case 5: > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > - break; > case 7: > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > break; > default: > - snprintf(buf, bufsz, "?(%d)", speed); > + snprintf(s, e - s, "(IBA extension)"); > break; > } > } > > void > +mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > +{ > + int speed = *(int *)val; > + > + dump_linkspeed(buf, bufsz, speed); > +} > + > +void > mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) > { > int speed = *(int *)val; > > - switch (speed) { > - case 1: > - snprintf(buf, bufsz, "2.5 Gbps"); > - break; > - case 2: > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > - break; > - case 3: > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > - break; > - case 4: > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > - break; > - case 5: > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > - break; > - case 6: > - snprintf(buf, bufsz, "5.0 or 10.0 Gbps"); > - break; > - case 7: > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > - break; > - case 15: > - snprintf(buf, bufsz, "SpeedSupported"); > - break; > - default: > - snprintf(buf, bufsz, "?(%d)", speed); > - break; > - } > + dump_linkspeed(buf, bufsz, speed); > } > > void > From sashak at voltaire.com Mon Nov 26 13:07:20 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 21:07:20 +0000 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071117204111.GC32058@obsidianresearch.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071117204111.GC32058@obsidianresearch.com> Message-ID: <20071126210720.GK14894@sashak.voltaire.com> On 13:41 Sat 17 Nov , Jason Gunthorpe wrote: > On Thu, Nov 15, 2007 at 06:30:17AM -0800, Hal Rosenstock wrote: > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > lid output format unification is needed. Print LIDs as decimal in > > > ibtracert. > > > > I'd prefer to see this done as some sort of option. Also, I think hex is > > better for MLIDs. > > FWIW, we consistently use hex notation in our switch products in the > format '0x10/16' which specifies both the LMC and the LID in a compact > manner. > > Since both GIDs, GUIDs and MACs are printed in hex, choosing decimal for lid > seems like an inconsistent choice to me. It also makes it harder to > read out the LMC bits. > > Also, this same kind of unification is needed for GID's. They should > always be printed and accepted in IPv6 format, not 128 bit > decimal. Not sure this was discussed explicitly (probably it should be). My feeling based on the feedback from people (emails, opened bugs, etc) was that desired *IDs formats are decimal for LIDs, LMC and hexadecimal for GUIDs, MLIDs. I don't remember that GID was discussed at all and I think that Jason's proposition about ipv6 format is first here. Any other opinions? Should *IDs formats be formalized at all? Thoughts? Sasha From weiny2 at llnl.gov Mon Nov 26 13:07:14 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Mon, 26 Nov 2007 13:07:14 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071126210720.GK14894@sashak.voltaire.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071117204111.GC32058@obsidianresearch.com> <20071126210720.GK14894@sashak.voltaire.com> Message-ID: <20071126130714.60993dd7.weiny2@llnl.gov> On Mon, 26 Nov 2007 21:07:20 +0000 Sasha Khapyorsky wrote: > On 13:41 Sat 17 Nov , Jason Gunthorpe wrote: > > > > Also, this same kind of unification is needed for GID's. They should > > always be printed and accepted in IPv6 format, not 128 bit > > decimal. > > Not sure this was discussed explicitly (probably it should be). My > feeling based on the feedback from people (emails, opened bugs, etc) > was that desired *IDs formats are decimal for LIDs, LMC and hexadecimal > for GUIDs, MLIDs. This sounds good. Add comma separated decimal for Directed paths. (for example from opensm.log output "Initial path: 0,1,2") > > I don't remember that GID was discussed at all and I think that Jason's > proposition about ipv6 format is first here. > > Any other opinions? Should *IDs formats be formalized at all? Thoughts? > Yes I believe ID's should be formalized. Ira From hrosenstock at xsigo.com Mon Nov 26 13:13:45 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 13:13:45 -0800 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported In-Reply-To: <20071126204613.GI14894@sashak.voltaire.com> References: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> <20071126204613.GI14894@sashak.voltaire.com> Message-ID: <1196111625.26651.224.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-26 at 20:46 +0000, Sasha Khapyorsky wrote: > On 07:33 Mon 26 Nov , Hal Rosenstock wrote: > > libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed > > Enabled/Supported > > > > Based on email from Jason Gunthorpe > > > > Signed-off-by: Hal Rosenstock > > > > diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c > > index 9628eba..05efdca 100644 > > --- a/libibmad/src/dump.c > > +++ b/libibmad/src/dump.c > > @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) > > } > > } > > > > +static void > > +dump_linkwidth(char *buf, int bufsz, int width) > > +{ > > + char *s = buf, *e = s + bufsz; > > + > > + if (width & 0x1) > > + s += snprintf(s, e - s, "1X or "); > > + if (s < e && (width & 0x2)) > > + s += snprintf(s, e - s, "4X or "); > > + if (s < e && (width & 0x4)) > > + s += snprintf(s, e - s, "8X or "); > > + if (s < e && (width & 0x8)) > > + s += snprintf(s, e - s, "12X or "); > > + > > + if ((width >> 4) || s == buf) > > + s += snprintf(s, e - s, "?(%d)", width); > > + else > > + s[-3] = 0; > > +} > > + > > void > > mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) > > { > > int width = *(int *)val; > > > > - switch (width) { > > + dump_linkwidth(buf, bufsz, width); > > + > > + switch(width) { > > case 1: > > - snprintf(buf, bufsz, "1X"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "4X (IBA extension)"); > > - break; > > case 3: > > - snprintf(buf, bufsz, "1X or 4X"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "8X (IBA extension)"); > > - break; > > case 7: > > - snprintf(buf, bufsz, "1X or 4X or 8X"); > > - break; > > - case 8: > > - snprintf(buf, bufsz, "12X (IBA extension)"); > > - break; > > case 11: > > - snprintf(buf, bufsz, "1X or 4X or 12X"); > > - break; > > case 15: > > - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); > > break; > > + > > default: > > - IBWARN("bad width %d", width); > > - buf[0] = 0; > > + snprintf(buf + strlen(buf), bufsz - strlen(buf), > > + "(IBA extension)"); > > + break; > > } > > } > > > > @@ -267,21 +273,8 @@ void > > mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) > > { > > int width = *(int *)val; > > - char *s = buf, *e = s + bufsz; > > > > - if (width & 0x1) > > - s += snprintf(s, e - s, "1X or "); > > - if (s < e && (width & 0x2)) > > - s += snprintf(s, e - s, "4X or "); > > - if (s < e && (width & 0x4)) > > - s += snprintf(s, e - s, "8X or "); > > - if (s < e && (width & 0x8)) > > - s += snprintf(s, e - s, "12X or "); > > - > > - if ((width >> 4) || s == buf) > > - s += snprintf(s, e - s, "?(%d)", width); > > - else > > - s[-3] = 0; > > + dump_linkwidth(buf, bufsz, width); > > } > > > > void > > @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) > > } > > } > > > > -void > > -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > > +static void > > +dump_linkspeed(char *buf, int bufsz, int speed) > > { > > - int speed = *(int *)val; > > + char *s = buf, *e = s + bufsz; > > + > > + if (speed & 0x1) > > + s += snprintf(s, e - s, "2.5 Gbps or "); > > + if (s < e && (speed & 0x2)) > > + s += snprintf(s, e - s, "5.0 Gbps or "); > > + if (s < e && (speed & 0x4)) > > + s += snprintf(s, e - s, "10.0 Gbps or "); > > + > > + if ((speed >> 4) || s == buf) > > + s += snprintf(s, e - s, "?(%d)", speed); > > + else > > + s[-3] = 0; > > What should be if something like speed = 9 is passed? Probably you meant > (speed >> 3) here? Yes, this was a cut and paste error. Do you want a new patch or will you just fix this up ? -- Hal > > Sasha > > > > > switch (speed) { > > case 1: > > - snprintf(buf, bufsz, "2.5 Gbps"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > > - break; > > case 3: > > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > > - break; > > case 5: > > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > > - break; > > case 7: > > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > > break; > > default: > > - snprintf(buf, bufsz, "?(%d)", speed); > > + snprintf(s, e - s, "(IBA extension)"); > > break; > > } > > } > > > > void > > +mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > > +{ > > + int speed = *(int *)val; > > + > > + dump_linkspeed(buf, bufsz, speed); > > +} > > + > > +void > > mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) > > { > > int speed = *(int *)val; > > > > - switch (speed) { > > - case 1: > > - snprintf(buf, bufsz, "2.5 Gbps"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > > - break; > > - case 3: > > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > > - break; > > - case 5: > > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > > - break; > > - case 6: > > - snprintf(buf, bufsz, "5.0 or 10.0 Gbps"); > > - break; > > - case 7: > > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > > - break; > > - case 15: > > - snprintf(buf, bufsz, "SpeedSupported"); > > - break; > > - default: > > - snprintf(buf, bufsz, "?(%d)", speed); > > - break; > > - } > > + dump_linkspeed(buf, bufsz, speed); > > } > > > > void > > From sashak at voltaire.com Mon Nov 26 13:28:48 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 21:28:48 +0000 Subject: [ofa-general] OpenSM error codes in a log file Message-ID: <20071126212848.GM14894@sashak.voltaire.com> Hi! OpenSM when logging an errors uses error code in a log message. Like this: osm_log(p_log, OSM_LOG_ERROR, "__updn_bfs_by_node (less) ERR AA01: " ... ^^^^ This is the old convention, first two digits (AA in this example) are unique per source file, the next two are sequential number of an error in this file. I wanted to know if anybody uses those error codes. If not, are there any plans for using this? IOW should we spend the time and maintain this? Sasha From hrosenstock at xsigo.com Mon Nov 26 13:20:51 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 13:20:51 -0800 Subject: [ofa-general] Re: [PATCH] infiniband-diags/ibtracert: print lids in decimal form In-Reply-To: <20071126210720.GK14894@sashak.voltaire.com> References: <20071115103754.GM17237@sashak.voltaire.com> <1195137017.24101.65.camel@hrosenstock-ws.xsigo.com> <20071117204111.GC32058@obsidianresearch.com> <20071126210720.GK14894@sashak.voltaire.com> Message-ID: <1196112052.26651.232.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-26 at 21:07 +0000, Sasha Khapyorsky wrote: > On 13:41 Sat 17 Nov , Jason Gunthorpe wrote: > > On Thu, Nov 15, 2007 at 06:30:17AM -0800, Hal Rosenstock wrote: > > > On Thu, 2007-11-15 at 12:37 +0200, Sasha Khapyorsky wrote: > > > > As stated in bug#504 (https://bugs.openfabrics.org/show_bug.cgi?id=504) > > > > lid output format unification is needed. Print LIDs as decimal in > > > > ibtracert. > > > > > > I'd prefer to see this done as some sort of option. Also, I think hex is > > > better for MLIDs. > > > > FWIW, we consistently use hex notation in our switch products in the > > format '0x10/16' which specifies both the LMC and the LID in a compact > > manner. > > > > Since both GIDs, GUIDs and MACs are printed in hex, choosing decimal for lid > > seems like an inconsistent choice to me. It also makes it harder to > > read out the LMC bits. > > > > Also, this same kind of unification is needed for GID's. They should > > always be printed and accepted in IPv6 format, not 128 bit > > decimal. > > Not sure this was discussed explicitly (probably it should be). My > feeling based on the feedback from people (emails, opened bugs, etc) > was that desired *IDs formats are decimal for LIDs, LMC and hexadecimal > for GUIDs, MLIDs. Not sure about decimal for LMC (as it is a mask to apply to LID). I also think that some prefer hex and would rather see some option for decimal/hex. Also, as I said before, I think that unicast LIDs need to be consistent with the SM. > I don't remember that GID was discussed at all and I think that Jason's > proposition about ipv6 format is first here. > > Any other opinions? Should *IDs formats be formalized at all? Thoughts? ipv6 format for GIDs makes sense to me (also affects SM too). -- Hal > Sasha From sashak at voltaire.com Mon Nov 26 13:33:47 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 21:33:47 +0000 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported In-Reply-To: <1196111625.26651.224.camel@hrosenstock-ws.xsigo.com> References: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> <20071126204613.GI14894@sashak.voltaire.com> <1196111625.26651.224.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071126213347.GN14894@sashak.voltaire.com> On 13:13 Mon 26 Nov , Hal Rosenstock wrote: > On Mon, 2007-11-26 at 20:46 +0000, Sasha Khapyorsky wrote: > > On 07:33 Mon 26 Nov , Hal Rosenstock wrote: > > > libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed > > > Enabled/Supported > > > > > > Based on email from Jason Gunthorpe > > > > > > Signed-off-by: Hal Rosenstock > > > > > > diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c > > > index 9628eba..05efdca 100644 > > > --- a/libibmad/src/dump.c > > > +++ b/libibmad/src/dump.c > > > @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) > > > } > > > } > > > > > > +static void > > > +dump_linkwidth(char *buf, int bufsz, int width) > > > +{ > > > + char *s = buf, *e = s + bufsz; > > > + > > > + if (width & 0x1) > > > + s += snprintf(s, e - s, "1X or "); > > > + if (s < e && (width & 0x2)) > > > + s += snprintf(s, e - s, "4X or "); > > > + if (s < e && (width & 0x4)) > > > + s += snprintf(s, e - s, "8X or "); > > > + if (s < e && (width & 0x8)) > > > + s += snprintf(s, e - s, "12X or "); > > > + > > > + if ((width >> 4) || s == buf) > > > + s += snprintf(s, e - s, "?(%d)", width); > > > + else > > > + s[-3] = 0; > > > +} > > > + > > > void > > > mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) > > > { > > > int width = *(int *)val; > > > > > > - switch (width) { > > > + dump_linkwidth(buf, bufsz, width); > > > + > > > + switch(width) { > > > case 1: > > > - snprintf(buf, bufsz, "1X"); > > > - break; > > > - case 2: > > > - snprintf(buf, bufsz, "4X (IBA extension)"); > > > - break; > > > case 3: > > > - snprintf(buf, bufsz, "1X or 4X"); > > > - break; > > > - case 4: > > > - snprintf(buf, bufsz, "8X (IBA extension)"); > > > - break; > > > case 7: > > > - snprintf(buf, bufsz, "1X or 4X or 8X"); > > > - break; > > > - case 8: > > > - snprintf(buf, bufsz, "12X (IBA extension)"); > > > - break; > > > case 11: > > > - snprintf(buf, bufsz, "1X or 4X or 12X"); > > > - break; > > > case 15: > > > - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); > > > break; > > > + > > > default: > > > - IBWARN("bad width %d", width); > > > - buf[0] = 0; > > > + snprintf(buf + strlen(buf), bufsz - strlen(buf), > > > + "(IBA extension)"); > > > + break; > > > } > > > } > > > > > > @@ -267,21 +273,8 @@ void > > > mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) > > > { > > > int width = *(int *)val; > > > - char *s = buf, *e = s + bufsz; > > > > > > - if (width & 0x1) > > > - s += snprintf(s, e - s, "1X or "); > > > - if (s < e && (width & 0x2)) > > > - s += snprintf(s, e - s, "4X or "); > > > - if (s < e && (width & 0x4)) > > > - s += snprintf(s, e - s, "8X or "); > > > - if (s < e && (width & 0x8)) > > > - s += snprintf(s, e - s, "12X or "); > > > - > > > - if ((width >> 4) || s == buf) > > > - s += snprintf(s, e - s, "?(%d)", width); > > > - else > > > - s[-3] = 0; > > > + dump_linkwidth(buf, bufsz, width); > > > } > > > > > > void > > > @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) > > > } > > > } > > > > > > -void > > > -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > > > +static void > > > +dump_linkspeed(char *buf, int bufsz, int speed) > > > { > > > - int speed = *(int *)val; > > > + char *s = buf, *e = s + bufsz; > > > + > > > + if (speed & 0x1) > > > + s += snprintf(s, e - s, "2.5 Gbps or "); > > > + if (s < e && (speed & 0x2)) > > > + s += snprintf(s, e - s, "5.0 Gbps or "); > > > + if (s < e && (speed & 0x4)) > > > + s += snprintf(s, e - s, "10.0 Gbps or "); > > > + > > > + if ((speed >> 4) || s == buf) > > > + s += snprintf(s, e - s, "?(%d)", speed); > > > + else > > > + s[-3] = 0; > > > > What should be if something like speed = 9 is passed? Probably you meant > > (speed >> 3) here? > > Yes, this was a cut and paste error. Do you want a new patch or will you > just fix this up ? Will fix. Sasha From hrosenstock at xsigo.com Mon Nov 26 13:23:09 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 13:23:09 -0800 Subject: [ofa-general] Re: OpenSM error codes in a log file In-Reply-To: <20071126212848.GM14894@sashak.voltaire.com> References: <20071126212848.GM14894@sashak.voltaire.com> Message-ID: <1196112189.26651.235.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-26 at 21:28 +0000, Sasha Khapyorsky wrote: > Hi! > > OpenSM when logging an errors uses error code in a log message. Like > this: > > osm_log(p_log, OSM_LOG_ERROR, > "__updn_bfs_by_node (less) ERR AA01: " ... > ^^^^ > This is the old convention, first two digits (AA in this example) are > unique per source file, the next two are sequential number of an error > in this file. > > I wanted to know if anybody uses those error codes. If not, are there > any plans for using this? > > IOW should we spend the time and maintain this? FWIW, even if no one comes forward with a log parser, I think they are useful and should continue to be maintained. -- Hal > > Sasha From sashak at voltaire.com Mon Nov 26 13:44:52 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Mon, 26 Nov 2007 21:44:52 +0000 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported In-Reply-To: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> References: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> Message-ID: <20071126214452.GO14894@sashak.voltaire.com> On 07:33 Mon 26 Nov , Hal Rosenstock wrote: > libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed > Enabled/Supported Another note... > > Based on email from Jason Gunthorpe > > Signed-off-by: Hal Rosenstock > > diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c > index 9628eba..05efdca 100644 > --- a/libibmad/src/dump.c > +++ b/libibmad/src/dump.c > @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) > } > } > > +static void > +dump_linkwidth(char *buf, int bufsz, int width) > +{ > + char *s = buf, *e = s + bufsz; > + > + if (width & 0x1) > + s += snprintf(s, e - s, "1X or "); > + if (s < e && (width & 0x2)) > + s += snprintf(s, e - s, "4X or "); > + if (s < e && (width & 0x4)) > + s += snprintf(s, e - s, "8X or "); > + if (s < e && (width & 0x8)) > + s += snprintf(s, e - s, "12X or "); > + > + if ((width >> 4) || s == buf) > + s += snprintf(s, e - s, "?(%d)", width); This prints "?(99)" > + else > + s[-3] = 0; and this leaves the blank at end - "SX " > +} > + > void > mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) > { > int width = *(int *)val; > > - switch (width) { > + dump_linkwidth(buf, bufsz, width); > + > + switch(width) { > case 1: > - snprintf(buf, bufsz, "1X"); > - break; > - case 2: > - snprintf(buf, bufsz, "4X (IBA extension)"); > - break; > case 3: > - snprintf(buf, bufsz, "1X or 4X"); > - break; > - case 4: > - snprintf(buf, bufsz, "8X (IBA extension)"); > - break; > case 7: > - snprintf(buf, bufsz, "1X or 4X or 8X"); > - break; > - case 8: > - snprintf(buf, bufsz, "12X (IBA extension)"); > - break; > case 11: > - snprintf(buf, bufsz, "1X or 4X or 12X"); > - break; > case 15: > - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); > break; > + > default: > - IBWARN("bad width %d", width); > - buf[0] = 0; > + snprintf(buf + strlen(buf), bufsz - strlen(buf), > + "(IBA extension)"); > + break; Here "(IBA extention)" is printed w/out leading blank. So in the case of unknown width code it will be "?(99)(IBA extension)". Right? If so isn't it better to drop the blank at end of dump_linkwidth() and add it here unconditionally? Also wouldn't it be better to print "(IBA extension)" only for known codes? Sasha > } > } > > @@ -267,21 +273,8 @@ void > mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) > { > int width = *(int *)val; > - char *s = buf, *e = s + bufsz; > > - if (width & 0x1) > - s += snprintf(s, e - s, "1X or "); > - if (s < e && (width & 0x2)) > - s += snprintf(s, e - s, "4X or "); > - if (s < e && (width & 0x4)) > - s += snprintf(s, e - s, "8X or "); > - if (s < e && (width & 0x8)) > - s += snprintf(s, e - s, "12X or "); > - > - if ((width >> 4) || s == buf) > - s += snprintf(s, e - s, "?(%d)", width); > - else > - s[-3] = 0; > + dump_linkwidth(buf, bufsz, width); > } > > void > @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) > } > } > > -void > -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > +static void > +dump_linkspeed(char *buf, int bufsz, int speed) > { > - int speed = *(int *)val; > + char *s = buf, *e = s + bufsz; > + > + if (speed & 0x1) > + s += snprintf(s, e - s, "2.5 Gbps or "); > + if (s < e && (speed & 0x2)) > + s += snprintf(s, e - s, "5.0 Gbps or "); > + if (s < e && (speed & 0x4)) > + s += snprintf(s, e - s, "10.0 Gbps or "); > + > + if ((speed >> 4) || s == buf) > + s += snprintf(s, e - s, "?(%d)", speed); > + else > + s[-3] = 0; > > switch (speed) { > case 1: > - snprintf(buf, bufsz, "2.5 Gbps"); > - break; > - case 2: > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > - break; > case 3: > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > - break; > - case 4: > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > - break; > case 5: > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > - break; > case 7: > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > break; > default: > - snprintf(buf, bufsz, "?(%d)", speed); > + snprintf(s, e - s, "(IBA extension)"); > break; > } > } > > void > +mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > +{ > + int speed = *(int *)val; > + > + dump_linkspeed(buf, bufsz, speed); > +} > + > +void > mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) > { > int speed = *(int *)val; > > - switch (speed) { > - case 1: > - snprintf(buf, bufsz, "2.5 Gbps"); > - break; > - case 2: > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > - break; > - case 3: > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > - break; > - case 4: > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > - break; > - case 5: > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > - break; > - case 6: > - snprintf(buf, bufsz, "5.0 or 10.0 Gbps"); > - break; > - case 7: > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > - break; > - case 15: > - snprintf(buf, bufsz, "SpeedSupported"); > - break; > - default: > - snprintf(buf, bufsz, "?(%d)", speed); > - break; > - } > + dump_linkspeed(buf, bufsz, speed); > } > > void > From kliteyn at mellanox.co.il Mon Nov 26 13:34:36 2007 From: kliteyn at mellanox.co.il (Yevgeny Kliteynik) Date: Mon, 26 Nov 2007 23:34:36 +0200 Subject: [ofa-general] Re: OpenSM error codes in a log file In-Reply-To: <1196112189.26651.235.camel@hrosenstock-ws.xsigo.com> References: <20071126212848.GM14894@sashak.voltaire.com> <1196112189.26651.235.camel@hrosenstock-ws.xsigo.com> Message-ID: <474B3BEC.7040509@mellanox.co.il> Hal Rosenstock wrote: > On Mon, 2007-11-26 at 21:28 +0000, Sasha Khapyorsky wrote: > >> Hi! >> >> OpenSM when logging an errors uses error code in a log message. Like >> this: >> >> osm_log(p_log, OSM_LOG_ERROR, >> "__updn_bfs_by_node (less) ERR AA01: " ... >> ^^^^ >> This is the old convention, first two digits (AA in this example) are >> unique per source file, the next two are sequential number of an error >> in this file. >> >> I wanted to know if anybody uses those error codes. If not, are there >> any plans for using this? >> >> IOW should we spend the time and maintain this? >> > > FWIW, even if no one comes forward with a log parser, I think they are > useful and should continue to be maintained. > Having a unique error code is a very useful thing - helps me find the problems reported by users. Besides, in the OpenSM simulation testing there is kind of log parser. It's not exactly parsing the log, but it's monitoring it for "ERR" messages and printing them to the test log. Again, unique error code is helpful here. -- Yevgeny > -- Hal > > >> Sasha >> > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > From hrosenstock at xsigo.com Mon Nov 26 13:42:46 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Mon, 26 Nov 2007 13:42:46 -0800 Subject: [ofa-general] Re: [PATCH] libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed Enabled/Supported In-Reply-To: <20071126214452.GO14894@sashak.voltaire.com> References: <1196091205.26651.180.camel@hrosenstock-ws.xsigo.com> <20071126214452.GO14894@sashak.voltaire.com> Message-ID: <1196113366.26651.239.camel@hrosenstock-ws.xsigo.com> On Mon, 2007-11-26 at 21:44 +0000, Sasha Khapyorsky wrote: > On 07:33 Mon 26 Nov , Hal Rosenstock wrote: > > libibmad/dump.c: Use bit mask approach to decoding LinkWidth/Speed > > Enabled/Supported > > Another note... > > > > > Based on email from Jason Gunthorpe > > > > Signed-off-by: Hal Rosenstock > > > > diff --git a/libibmad/src/dump.c b/libibmad/src/dump.c > > index 9628eba..05efdca 100644 > > --- a/libibmad/src/dump.c > > +++ b/libibmad/src/dump.c > > @@ -227,39 +227,45 @@ mad_dump_linkwidth(char *buf, int bufsz, void *val, int valsz) > > } > > } > > > > +static void > > +dump_linkwidth(char *buf, int bufsz, int width) > > +{ > > + char *s = buf, *e = s + bufsz; > > + > > + if (width & 0x1) > > + s += snprintf(s, e - s, "1X or "); > > + if (s < e && (width & 0x2)) > > + s += snprintf(s, e - s, "4X or "); > > + if (s < e && (width & 0x4)) > > + s += snprintf(s, e - s, "8X or "); > > + if (s < e && (width & 0x8)) > > + s += snprintf(s, e - s, "12X or "); > > + > > + if ((width >> 4) || s == buf) > > + s += snprintf(s, e - s, "?(%d)", width); > > This prints "?(99)" when width is 99. > > + else > > + s[-3] = 0; > > and this leaves the blank at end - "SX " Yes. > > +} > > + > > void > > mad_dump_linkwidthsup(char *buf, int bufsz, void *val, int valsz) > > { > > int width = *(int *)val; > > > > - switch (width) { > > + dump_linkwidth(buf, bufsz, width); > > + > > + switch(width) { > > case 1: > > - snprintf(buf, bufsz, "1X"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "4X (IBA extension)"); > > - break; > > case 3: > > - snprintf(buf, bufsz, "1X or 4X"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "8X (IBA extension)"); > > - break; > > case 7: > > - snprintf(buf, bufsz, "1X or 4X or 8X"); > > - break; > > - case 8: > > - snprintf(buf, bufsz, "12X (IBA extension)"); > > - break; > > case 11: > > - snprintf(buf, bufsz, "1X or 4X or 12X"); > > - break; > > case 15: > > - snprintf(buf, bufsz, "1X or 4X or 8X or 12X"); > > break; > > + > > default: > > - IBWARN("bad width %d", width); > > - buf[0] = 0; > > + snprintf(buf + strlen(buf), bufsz - strlen(buf), > > + "(IBA extension)"); > > + break; > > Here "(IBA extention)" is printed w/out leading blank. So in the case of > unknown width code it will be "?(99)(IBA extension)". Right? Right. > If so isn't it better to drop the blank at end of dump_linkwidth() and > add it here unconditionally? Sure. > Also wouldn't it be better to print "(IBA extension)" only for known > codes? Sure. Similarly for link speed. -- Hal > Sasha > > > } > > } > > > > @@ -267,21 +273,8 @@ void > > mad_dump_linkwidthen(char *buf, int bufsz, void *val, int valsz) > > { > > int width = *(int *)val; > > - char *s = buf, *e = s + bufsz; > > > > - if (width & 0x1) > > - s += snprintf(s, e - s, "1X or "); > > - if (s < e && (width & 0x2)) > > - s += snprintf(s, e - s, "4X or "); > > - if (s < e && (width & 0x4)) > > - s += snprintf(s, e - s, "8X or "); > > - if (s < e && (width & 0x8)) > > - s += snprintf(s, e - s, "12X or "); > > - > > - if ((width >> 4) || s == buf) > > - s += snprintf(s, e - s, "?(%d)", width); > > - else > > - s[-3] = 0; > > + dump_linkwidth(buf, bufsz, width); > > } > > > > void > > @@ -305,70 +298,49 @@ mad_dump_linkspeed(char *buf, int bufsz, void *val, int valsz) > > } > > } > > > > -void > > -mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > > +static void > > +dump_linkspeed(char *buf, int bufsz, int speed) > > { > > - int speed = *(int *)val; > > + char *s = buf, *e = s + bufsz; > > + > > + if (speed & 0x1) > > + s += snprintf(s, e - s, "2.5 Gbps or "); > > + if (s < e && (speed & 0x2)) > > + s += snprintf(s, e - s, "5.0 Gbps or "); > > + if (s < e && (speed & 0x4)) > > + s += snprintf(s, e - s, "10.0 Gbps or "); > > + > > + if ((speed >> 4) || s == buf) > > + s += snprintf(s, e - s, "?(%d)", speed); > > + else > > + s[-3] = 0; > > > > switch (speed) { > > case 1: > > - snprintf(buf, bufsz, "2.5 Gbps"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > > - break; > > case 3: > > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > > - break; > > case 5: > > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > > - break; > > case 7: > > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > > break; > > default: > > - snprintf(buf, bufsz, "?(%d)", speed); > > + snprintf(s, e - s, "(IBA extension)"); > > break; > > } > > } > > > > void > > +mad_dump_linkspeedsup(char *buf, int bufsz, void *val, int valsz) > > +{ > > + int speed = *(int *)val; > > + > > + dump_linkspeed(buf, bufsz, speed); > > +} > > + > > +void > > mad_dump_linkspeeden(char *buf, int bufsz, void *val, int valsz) > > { > > int speed = *(int *)val; > > > > - switch (speed) { > > - case 1: > > - snprintf(buf, bufsz, "2.5 Gbps"); > > - break; > > - case 2: > > - snprintf(buf, bufsz, "5.0 Gbps (IBA extension)"); > > - break; > > - case 3: > > - snprintf(buf, bufsz, "2.5 or 5.0 Gbps"); > > - break; > > - case 4: > > - snprintf(buf, bufsz, "10.0 Gbps (IBA extension)"); > > - break; > > - case 5: > > - snprintf(buf, bufsz, "2.5 or 10.0 Gbps"); > > - break; > > - case 6: > > - snprintf(buf, bufsz, "5.0 or 10.0 Gbps"); > > - break; > > - case 7: > > - snprintf(buf, bufsz, "2.5 or 5.0 or 10.0 Gbps"); > > - break; > > - case 15: > > - snprintf(buf, bufsz, "SpeedSupported"); > > - break; > > - default: > > - snprintf(buf, bufsz, "?(%d)", speed); > > - break; > > - } > > + dump_linkspeed(buf, bufsz, speed); > > } > > > > void > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From sean.hefty at intel.com Mon Nov 26 17:02:18 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 26 Nov 2007 17:02:18 -0800 Subject: [ofa-general] [PATCH 0/3] patch series for 2.6.25 Message-ID: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> Roland, I'd like to include the following patches in 2.6.25: ib/multicast: report errors on multicast groups if pkeys change ib/mad: report number of times a mad was retried ib/cm: add basic performance counters Patches follow for easier review, but are also available at: git://git.openfabrics.org/~shefty/rdma-dev.git for-roland All patches have previously been posted to the list. - Sean From sean.hefty at intel.com Mon Nov 26 17:05:33 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 26 Nov 2007 17:05:33 -0800 Subject: [ofa-general] [PATCH 1/3] ib/multicast: report errors on multicast groups if pkeys change In-Reply-To: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> References: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> Message-ID: <000101c83091$9c9c0180$9c98070a@amr.corp.intel.com> Pkey changes can invalidate multicast groups. Report errors on any multicast group affected by a pkey change. Signed-off-by: Sean Hefty --- A brief discussion of this patch is here: http://lists.openfabrics.org/pipermail/general/2007-October/042271.html drivers/infiniband/core/multicast.c | 55 +++++++++++++++++++++++++++++------ 1 files changed, 45 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 1bc1fe6..107f170 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -73,11 +73,20 @@ struct mcast_device { }; enum mcast_state { - MCAST_IDLE, MCAST_JOINING, MCAST_MEMBER, + MCAST_ERROR, +}; + +enum mcast_group_state { + MCAST_IDLE, MCAST_BUSY, - MCAST_ERROR + MCAST_GROUP_ERROR, + MCAST_PKEY_EVENT +}; + +enum { + MCAST_INVALID_PKEY_INDEX = 0xFFFF }; struct mcast_member; @@ -93,9 +102,10 @@ struct mcast_group { struct mcast_member *last_join; int members[3]; atomic_t refcount; - enum mcast_state state; + enum mcast_group_state state; struct ib_sa_query *query; int query_id; + u16 pkey_index; }; struct mcast_member { @@ -378,9 +388,19 @@ static int fail_join(struct mcast_group *group, struct mcast_member *member, static void process_group_error(struct mcast_group *group) { struct mcast_member *member; - int ret; + int ret = 0; + u16 pkey_index; + + if (group->state == MCAST_PKEY_EVENT) + ret = ib_find_pkey(group->port->dev->device, + group->port->port_num, + be16_to_cpu(group->rec.pkey), &pkey_index); spin_lock_irq(&group->lock); + if (group->state == MCAST_PKEY_EVENT && !ret && + group->pkey_index == pkey_index) + goto out; + while (!list_empty(&group->active_list)) { member = list_entry(group->active_list.next, struct mcast_member, list); @@ -399,6 +419,7 @@ static void process_group_error(struct mcast_group *group) } group->rec.join_state = 0; +out: group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); } @@ -415,9 +436,9 @@ static void mcast_work_handler(struct work_struct *work) retest: spin_lock_irq(&group->lock); while (!list_empty(&group->pending_list) || - (group->state == MCAST_ERROR)) { + (group->state != MCAST_BUSY)) { - if (group->state == MCAST_ERROR) { + if (group->state != MCAST_BUSY) { spin_unlock_irq(&group->lock); process_group_error(group); goto retest; @@ -494,12 +515,19 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; + u16 pkey_index = MCAST_INVALID_PKEY_INDEX; if (status) process_join_error(group, status); else { + ib_find_pkey(group->port->dev->device, group->port->port_num, + be16_to_cpu(rec->pkey), &pkey_index); + spin_lock_irq(&group->port->lock); group->rec = *rec; + if (group->state == MCAST_BUSY && + group->pkey_index == MCAST_INVALID_PKEY_INDEX) + group->pkey_index = pkey_index; if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) { rb_erase(&group->node, &group->port->table); mcast_insert(group->port, group, 1); @@ -539,6 +567,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port, group->port = port; group->rec.mgid = *mgid; + group->pkey_index = MCAST_INVALID_PKEY_INDEX; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->active_list); INIT_WORK(&group->work, mcast_work_handler); @@ -707,7 +736,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, } EXPORT_SYMBOL(ib_init_ah_from_mcmember); -static void mcast_groups_lost(struct mcast_port *port) +static void mcast_groups_event(struct mcast_port *port, + enum mcast_group_state state) { struct mcast_group *group; struct rb_node *node; @@ -721,7 +751,8 @@ static void mcast_groups_lost(struct mcast_port *port) atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } - group->state = MCAST_ERROR; + if (group->state != MCAST_GROUP_ERROR) + group->state = state; spin_unlock(&group->lock); } spin_unlock_irqrestore(&port->lock, flags); @@ -731,16 +762,20 @@ static void mcast_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct mcast_device *dev; + int index; dev = container_of(handler, struct mcast_device, event_handler); + index = event->element.port_num - dev->start_port; switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: - mcast_groups_lost(&dev->port[event->element.port_num - - dev->start_port]); + mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); + break; + case IB_EVENT_PKEY_CHANGE: + mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); break; default: break; From sean.hefty at intel.com Mon Nov 26 17:08:59 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 26 Nov 2007 17:08:59 -0800 Subject: [ofa-general] [PATCH 2/3] ib/mad: report number of times a mad was retried In-Reply-To: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> References: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> Message-ID: <000201c83092$173fe140$9c98070a@amr.corp.intel.com> To allow ULPs to tun timeout values and capture retry statistics, report the number of times that a mad send operation was retried. For RMPP mads, report the total number of times that the any portion (send window) of the send operation was retried. Signed-off-by: Sean Hefty --- This patch will be used to export IB CM performance counters. For more details, see threads starting at: http://lists.openfabrics.org/pipermail/general/2007-September/040659.html http://lists.openfabrics.org/pipermail/general/2007-September/041230.html drivers/infiniband/core/mad.c | 9 +++++++-- drivers/infiniband/core/mad_priv.h | 3 ++- drivers/infiniband/core/mad_rmpp.c | 2 +- include/rdma/ib_mad.h | 4 +++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 6f42877..91e62c3 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1100,7 +1100,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); - mad_send_wr->retries = send_buf->retries; + mad_send_wr->max_retries = send_buf->retries; + mad_send_wr->retries_left = send_buf->retries; + send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; @@ -2445,9 +2447,12 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; - if (!mad_send_wr->retries--) + if (!mad_send_wr->retries_left) return -ETIMEDOUT; + mad_send_wr->retries_left--; + mad_send_wr->send_buf.retries++; + mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9be5cc0..8b75010 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -131,7 +131,8 @@ struct ib_mad_send_wr_private { struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; - int retries; + int max_retries; + int retries_left; int retry; int refcount; enum ib_wc_status status; diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index d43bc62..a5e2a31 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -684,7 +684,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent, if (seg_num > mad_send_wr->last_ack) { adjust_last_ack(mad_send_wr, seg_num); - mad_send_wr->retries = mad_send_wr->send_buf.retries; + mad_send_wr->retries_left = mad_send_wr->max_retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 8ec3799..7228c05 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -230,7 +230,9 @@ struct ib_class_port_info * @seg_count: The number of RMPP segments allocated for this send. * @seg_size: Size of each RMPP segment. * @timeout_ms: Time to wait for a response. - * @retries: Number of times to retry a request for a response. + * @retries: Number of times to retry a request for a response. For MADs + * using RMPP, this applies per window. On completion, returns the number + * of retries needed to complete the transfer. * * Users are responsible for initializing the MAD buffer itself, with the * exception of any RMPP header. Additional segment buffer space allocated From sean.hefty at intel.com Mon Nov 26 17:15:26 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Mon, 26 Nov 2007 17:15:26 -0800 Subject: [ofa-general] [PATCH 3/3] ib/cm: add basic performance counters In-Reply-To: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> References: <000001c83091$2859bce0$9c98070a@amr.corp.intel.com> Message-ID: <000301c83092$fe2f99b0$9c98070a@amr.corp.intel.com> Add performance/debug counters to track sent/received messages, retries, and duplicates. Counters are tracked per CM message type, per port. The counters are always enabled, so intrusive state tracking is not done. Signed-off-by: Sean Hefty --- drivers/infiniband/core/cm.c | 294 +++++++++++++++++++++++++++++++++++++++-- drivers/infiniband/core/ucm.c | 37 ++--- 2 files changed, 296 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2e39236..790149e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. @@ -37,12 +37,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -78,15 +80,92 @@ static struct ib_cm { struct workqueue_struct *wq; } cm; +/* Counter indexes ordered by attribute ID */ +enum { + CM_REQ_COUNTER, + CM_MRA_COUNTER, + CM_REJ_COUNTER, + CM_REP_COUNTER, + CM_RTU_COUNTER, + CM_DREQ_COUNTER, + CM_DREP_COUNTER, + CM_SIDR_REQ_COUNTER, + CM_SIDR_REP_COUNTER, + CM_LAP_COUNTER, + CM_APR_COUNTER, + CM_ATTR_COUNT, + CM_ATTR_ID_OFFSET = 0x0010, +}; + +enum { + CM_XMIT, + CM_XMIT_RETRIES, + CM_RECV, + CM_RECV_DUPLICATES, + CM_COUNTER_GROUPS +}; + +static char const counter_group_names[CM_COUNTER_GROUPS] + [sizeof("cm_rx_duplicates")] = { + "cm_tx_msgs", "cm_tx_retries", + "cm_rx_msgs", "cm_rx_duplicates" +}; + +struct cm_counter_group { + struct kobject obj; + atomic_long_t counter[CM_ATTR_COUNT]; +}; + +struct cm_counter_attribute { + struct attribute attr; + int index; +}; + +#define CM_COUNTER_ATTR(_name, _index) \ +struct cm_counter_attribute cm_##_name##_counter_attr = { \ + .attr = {.name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE}, \ + .index = _index \ +} + +static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); +static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); +static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); +static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); +static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); +static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); +static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); +static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); +static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); +static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); +static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); + +static struct attribute *cm_counter_default_attrs[] = { + &cm_req_counter_attr.attr, + &cm_mra_counter_attr.attr, + &cm_rej_counter_attr.attr, + &cm_rep_counter_attr.attr, + &cm_rtu_counter_attr.attr, + &cm_dreq_counter_attr.attr, + &cm_drep_counter_attr.attr, + &cm_sidr_req_counter_attr.attr, + &cm_sidr_rep_counter_attr.attr, + &cm_lap_counter_attr.attr, + &cm_apr_counter_attr.attr, + NULL +}; + struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; + struct kobject port_obj; u8 port_num; + struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *device; + struct kobject dev_obj; u8 ack_delay; struct cm_port port[0]; }; @@ -1270,6 +1349,9 @@ static void cm_dup_req_handler(struct cm_work *work, struct ib_mad_send_buf *msg = NULL; int ret; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REQ_COUNTER]); + /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; @@ -1616,6 +1698,8 @@ static void cm_dup_rep_handler(struct cm_work *work) if (!cm_id_priv) return; + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; @@ -1781,6 +1865,8 @@ static int cm_rtu_handler(struct cm_work *work) if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; @@ -1958,6 +2044,8 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } @@ -1977,6 +2065,8 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -1988,6 +2078,10 @@ static int cm_dreq_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_DREQ_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_DREQ_COUNTER]); + goto unlock; default: goto unlock; } @@ -2339,10 +2433,20 @@ static int cm_mra_handler(struct cm_work *work) if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + cm_id_priv->msg, timeout)) { + if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) + atomic_long_inc(&work->port-> + counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); goto out; + } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; + case IB_CM_MRA_REQ_RCVD: + case IB_CM_MRA_REP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_MRA_COUNTER]); + /* fall through */ default: goto out; } @@ -2502,6 +2606,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; @@ -2515,6 +2621,10 @@ static int cm_lap_handler(struct cm_work *work) if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; + case IB_CM_LAP_RCVD: + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_LAP_COUNTER]); + goto unlock; default: goto unlock; } @@ -2796,6 +2906,8 @@ static int cm_sidr_req_handler(struct cm_work *work) cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); + atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. + counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; @@ -2990,6 +3102,27 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_port *port; + u16 attr_index; + + port = mad_agent->context; + attr_index = be16_to_cpu(((struct ib_mad_hdr *) + msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; + + /* + * If the send was in response to a received message (context[0] is not + * set to a cm_id), and is not a REJ, then it is a send that was + * manually retried. + */ + if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + msg->retries = 1; + + atomic_long_add(1 + msg->retries, + &port->counter_group[CM_XMIT].counter[attr_index]); + if (msg->retries) + atomic_long_add(msg->retries, + &port->counter_group[CM_XMIT_RETRIES]. + counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: @@ -3148,8 +3281,10 @@ EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_recv_wc *mad_recv_wc) { + struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; + u16 attr_id; int paths = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { @@ -3194,6 +3329,10 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, return; } + attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); + atomic_long_inc(&port->counter_group[CM_RECV]. + counter[attr_id - CM_ATTR_ID_OFFSET]); + work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { @@ -3204,7 +3343,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent, INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; - work->port = (struct cm_port *)mad_agent->context; + work->port = port; queue_delayed_work(cm.wq, &work->work, 0); } @@ -3379,6 +3518,110 @@ static void cm_get_ack_delay(struct cm_device *cm_dev) cm_dev->ack_delay = attr.local_ca_ack_delay; } +static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, + char *buf) +{ + struct cm_counter_group *group; + struct cm_counter_attribute *cm_attr; + + group = container_of(obj, struct cm_counter_group, obj); + cm_attr = container_of(attr, struct cm_counter_attribute, attr); + + return sprintf(buf, "%ld\n", + atomic_long_read(&group->counter[cm_attr->index])); +} + +static struct sysfs_ops cm_counter_ops = { + .show = cm_show_counter +}; + +static struct kobj_type cm_counter_obj_type = { + .sysfs_ops = &cm_counter_ops, + .default_attrs = cm_counter_default_attrs +}; + +static void cm_release_dev_obj(struct kobject *obj) +{ + struct cm_device *cm_dev; + + cm_dev = container_of(obj, struct cm_device, dev_obj); + kfree(cm_dev); +} + +static struct kobj_type cm_dev_obj_type = { + .release = cm_release_dev_obj +}; + +static struct class cm_class = { + .name = "infiniband_cm", +}; +EXPORT_SYMBOL(cm_class); + +static int cm_add_fs_obj(struct kobject *obj, struct kobject *parent, + struct kobj_type *type, const char *name) +{ + int ret; + + ret = kobject_set_name(obj, "%s", name); + if (ret) + return ret; + + obj->ktype = type; + obj->parent = kobject_get(parent); + if (!obj->parent) + return -EBUSY; + + ret = kobject_register(obj); + if (ret) + kobject_put(parent); + + return ret; +} + +static void cm_remove_fs_obj(struct kobject *obj) +{ + kobject_put(obj->parent); + kobject_unregister(obj); +} + +static int cm_create_port_fs(struct cm_port *port) +{ + char port_name[8]; + int i, ret; + + snprintf(port_name, sizeof port_name, "%d", port->port_num); + ret = cm_add_fs_obj(&port->port_obj, &port->cm_dev->dev_obj, + NULL, port_name); + if (ret) + return ret; + + for (i = 0; i < CM_COUNTER_GROUPS; i++) { + ret = cm_add_fs_obj(&port->counter_group[i].obj, &port->port_obj, + &cm_counter_obj_type, counter_group_names[i]); + if (ret) + goto error; + } + + return 0; + +error: + while (i--) + cm_remove_fs_obj(&port->counter_group[i].obj); + cm_remove_fs_obj(&port->port_obj); + return ret; + +} + +static void cm_remove_port_fs(struct cm_port *port) +{ + int i; + + for (i = 0; i < CM_COUNTER_GROUPS; i++) + cm_remove_fs_obj(&port->counter_group[i].obj); + + cm_remove_fs_obj(&port->port_obj); +} + static void cm_add_one(struct ib_device *device) { struct cm_device *cm_dev; @@ -3397,7 +3640,7 @@ static void cm_add_one(struct ib_device *device) if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) return; - cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * + cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; @@ -3405,11 +3648,23 @@ static void cm_add_one(struct ib_device *device) cm_dev->device = device; cm_get_ack_delay(cm_dev); + ret = cm_add_fs_obj(&cm_dev->dev_obj, &cm_class.subsys.kobj, + &cm_dev_obj_type, device->name); + if (ret) { + kfree(cm_dev); + return; + } + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= device->phys_port_cnt; i++) { port = &cm_dev->port[i-1]; port->cm_dev = cm_dev; port->port_num = i; + + ret = cm_create_port_fs(port); + if (ret) + goto error1; + port->mad_agent = ib_register_mad_agent(device, i, IB_QPT_GSI, ®_req, @@ -3418,11 +3673,11 @@ static void cm_add_one(struct ib_device *device) cm_recv_handler, port); if (IS_ERR(port->mad_agent)) - goto error1; + goto error2; ret = ib_modify_port(device, i, 0, &port_modify); if (ret) - goto error2; + goto error3; } ib_set_client_data(device, &cm_client, cm_dev); @@ -3431,8 +3686,10 @@ static void cm_add_one(struct ib_device *device) write_unlock_irqrestore(&cm.device_lock, flags); return; -error2: +error3: ib_unregister_mad_agent(port->mad_agent); +error2: + cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; @@ -3440,8 +3697,9 @@ error1: port = &cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static void cm_remove_one(struct ib_device *device) @@ -3466,8 +3724,9 @@ static void cm_remove_one(struct ib_device *device) port = &cm_dev->port[i-1]; ib_modify_port(device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); + cm_remove_port_fs(port); } - kfree(cm_dev); + cm_remove_fs_obj(&cm_dev->dev_obj); } static int __init ib_cm_init(void) @@ -3488,17 +3747,25 @@ static int __init ib_cm_init(void) idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); - cm.wq = create_workqueue("ib_cm"); - if (!cm.wq) + ret = class_register(&cm_class); + if (ret) return -ENOMEM; + cm.wq = create_workqueue("ib_cm"); + if (!cm.wq) { + ret = -ENOMEM; + goto error1; + } + ret = ib_register_client(&cm_client); if (ret) - goto error; + goto error2; return 0; -error: +error2: destroy_workqueue(cm.wq); +error1: + class_unregister(&cm_class); return ret; } @@ -3519,6 +3786,7 @@ static void __exit ib_cm_cleanup(void) } ib_unregister_client(&cm_client); + class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 424983f..4291ab4 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -106,6 +106,9 @@ enum { IB_UCM_MAX_DEVICES = 32 }; +/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ +extern struct class cm_class; + #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); @@ -1199,7 +1202,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp) return 0; } -static void ib_ucm_release_class_dev(struct class_device *class_dev) +static void ucm_release_class_dev(struct class_device *class_dev) { struct ib_ucm_device *dev; @@ -1217,11 +1220,6 @@ static const struct file_operations ucm_fops = { .poll = ib_ucm_poll, }; -static struct class ucm_class = { - .name = "infiniband_cm", - .release = ib_ucm_release_class_dev -}; - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_ucm_device *dev; @@ -1257,9 +1255,10 @@ static void ib_ucm_add_one(struct ib_device *device) if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1)) goto err; - ucm_dev->class_dev.class = &ucm_class; + ucm_dev->class_dev.class = &cm_class; ucm_dev->class_dev.dev = device->dma_device; ucm_dev->class_dev.devt = ucm_dev->dev.dev; + ucm_dev->class_dev.release = ucm_release_class_dev; snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d", ucm_dev->devnum); if (class_device_register(&ucm_dev->class_dev)) @@ -1306,40 +1305,34 @@ static int __init ib_ucm_init(void) "infiniband_cm"); if (ret) { printk(KERN_ERR "ucm: couldn't register device number\n"); - goto err; + goto error1; } - ret = class_register(&ucm_class); - if (ret) { - printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n"); - goto err_chrdev; - } - - ret = class_create_file(&ucm_class, &class_attr_abi_version); + ret = class_create_file(&cm_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "ucm: couldn't create abi_version attribute\n"); - goto err_class; + goto error2; } ret = ib_register_client(&ucm_client); if (ret) { printk(KERN_ERR "ucm: couldn't register client\n"); - goto err_class; + goto error3; } return 0; -err_class: - class_unregister(&ucm_class); -err_chrdev: +error3: + class_remove_file(&cm_class, &class_attr_abi_version); +error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); -err: +error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); - class_unregister(&ucm_class); + class_remove_file(&cm_class, &class_attr_abi_version); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } From keshetti85-student at yahoo.co.in Mon Nov 26 20:18:53 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Tue, 27 Nov 2007 09:48:53 +0530 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <20071126190312.GC14894@sashak.voltaire.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> <20071126190312.GC14894@sashak.voltaire.com> Message-ID: <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> > Basically you can checkout 3-8 weeks old version of ibsim directly from > the git repo. But as noted above I don't think it is the problem here. > Thanks, now I am able to install and run ibsim properly with OFED-1.2. Can anyone tell me who is the maintainer of ibdmchk etc. utilities ? I have some queries related to ibdmchk. Recently I have simulated an irregular network topology with the ibsim and ran opensm with LASH routing algorithm enabled. Later when I run ibdmchk on the simulator it found credit loops. Is there any problem in LASH implementation or in the ibdmchk implementation? -Mahesh From kliteyn at mellanox.co.il Mon Nov 26 21:52:17 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 27 Nov 2007 07:52:17 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-27:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-26 OpenSM git rev = Tue_Nov_20_00:04:13_2007 [6b8a7c5ebb648a6aa054c7ec69a9e804e772f416] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From rdreier at cisco.com Mon Nov 26 22:21:26 2007 From: rdreier at cisco.com (Roland Dreier) Date: Mon, 26 Nov 2007 22:21:26 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will pull some small fixes for 2.6.24: Erez Zilber (1): IB/iser: Add missing counter increment in iser_data_buf_aligned_len() Jack Morgenstein (1): mlx4_core: Fix state check in mlx4_qp_modify() Joachim Fenkes (1): IB/ehca: Fix static rate regression Ralph Campbell (4): IB/ipath: Fix offset returned to ibv_resize_cq() IB/ipath: Fix error path in QP creation IB/ipath: Fix offset returned to ibv_modify_srq() IB/ipath: Normalize error return codes for posting work requests drivers/infiniband/hw/ehca/ehca_qp.c | 4 +- drivers/infiniband/hw/ipath/ipath_cq.c | 19 +++++++++--- drivers/infiniband/hw/ipath/ipath_qp.c | 15 ++++++---- drivers/infiniband/hw/ipath/ipath_srq.c | 44 +++++++++++++++++------------ drivers/infiniband/hw/ipath/ipath_verbs.c | 8 +++-- drivers/infiniband/ulp/iser/iser_memory.c | 6 ++- drivers/net/mlx4/qp.c | 2 +- 7 files changed, 61 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 2e3e654..dd12668 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1203,7 +1203,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->service_level = attr->ah_attr.sl; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, attr->ah_attr.static_rate, &mqpcb->max_static_rate)) { ret = -EINVAL; @@ -1302,7 +1302,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; mqpcb->service_level_al = attr->alt_ah_attr.sl; - if (ehca_calc_ipd(shca, my_qp->init_attr.port_num, + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, attr->alt_ah_attr.static_rate, &mqpcb->max_static_rate_al)) { ret = -EINVAL; diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index 08d8ae1..d1380c7 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -395,12 +395,9 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail; } - /* - * Return the address of the WC as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { - __u64 offset = (__u64) wc; + __u64 offset = 0; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); if (ret) @@ -450,6 +447,18 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) struct ipath_mmap_info *ip = cq->ip; ipath_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, &dev->pending_mmaps); diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 6a41fdb..b997ff8 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -835,7 +835,8 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, init_attr->qp_type); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + vfree(qp->r_rq.wq); + goto bail_qp; } qp->ip = NULL; ipath_reset_qp(qp); @@ -863,7 +864,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, sizeof(offset)); if (err) { ret = ERR_PTR(err); - goto bail_rwq; + goto bail_ip; } } else { u32 s = sizeof(struct ipath_rwq) + @@ -875,7 +876,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.wq); if (!qp->ip) { ret = ERR_PTR(-ENOMEM); - goto bail_rwq; + goto bail_ip; } err = ib_copy_to_udata(udata, &(qp->ip->offset), @@ -907,9 +908,11 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; bail_ip: - kfree(qp->ip); -bail_rwq: - vfree(qp->r_rq.wq); + if (qp->ip) + kref_put(&qp->ip->ref, ipath_release_mmap_info); + else + vfree(qp->r_rq.wq); + ipath_free_qp(&dev->qp_table, qp); bail_qp: kfree(qp); bail_swq: diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index 40c36ec..2fef36f 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -59,7 +59,7 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > srq->rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } @@ -211,11 +211,11 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct ib_udata *udata) { struct ipath_srq *srq = to_isrq(ibsrq); + struct ipath_rwq *wq; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { struct ipath_rwq *owq; - struct ipath_rwq *wq; struct ipath_rwqe *p; u32 sz, size, n, head, tail; @@ -236,27 +236,20 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail; } - /* - * Return the address of the RWQ as the offset to mmap. - * See ipath_mmap() for details. - */ + /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; - __u64 offset = (__u64) wq; + __u64 offset = 0; ret = ib_copy_from_udata(&offset_addr, udata, sizeof(offset_addr)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; udata->outbuf = (void __user *) offset_addr; ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); - if (ret) { - vfree(wq); - goto bail; - } + if (ret) + goto bail_free; } spin_lock_irq(&srq->rq.lock); @@ -277,10 +270,8 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, else n -= tail; if (size <= n) { - spin_unlock_irq(&srq->rq.lock); - vfree(wq); ret = -EINVAL; - goto bail; + goto bail_unlock; } n = 0; p = wq->wq; @@ -314,6 +305,18 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, u32 s = sizeof(struct ipath_rwq) + size * sz; ipath_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See ipath_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + spin_lock_irq(&dev->pending_lock); if (list_empty(&ip->pending_mmaps)) list_add(&ip->pending_mmaps, @@ -328,7 +331,12 @@ int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, srq->limit = attr->srq_limit; spin_unlock_irq(&srq->rq.lock); } + goto bail; +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); bail: return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 74f77e7..c4c9984 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -302,8 +302,10 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - goto bail_inval; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } wqe = get_swqe_ptr(qp, qp->s_head); wqe->wr = *wr; @@ -404,7 +406,7 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { *bad_wr = wr; - ret = -ENOMEM; + ret = -EINVAL; goto bail; } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d687980..4a17743 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -310,13 +310,15 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data, if (i + 1 < data->dma_nents) { next_addr = ib_sg_dma_address(ibdev, sg_next(sg)); /* are i, i+1 fragments of the same page? */ - if (end_addr == next_addr) + if (end_addr == next_addr) { + cnt++; continue; - else if (!IS_4K_ALIGNED(end_addr)) { + } else if (!IS_4K_ALIGNED(end_addr)) { ret_len = cnt + 1; break; } } + cnt++; } if (i == data->dma_nents) ret_len = cnt; /* loop ended */ diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index 42b4763..fa24e65 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -113,7 +113,7 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_cmd_mailbox *mailbox; int ret = 0; - if (cur_state >= MLX4_QP_NUM_STATE || cur_state >= MLX4_QP_NUM_STATE || + if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || !op[cur_state][new_state]) return -EINVAL; From rpearson at systemfabricworks.com Mon Nov 26 23:17:26 2007 From: rpearson at systemfabricworks.com (Robert Pearson) Date: Tue, 27 Nov 2007 01:17:26 -0600 Subject: [ofa-general] ipath crash In-Reply-To: <1196105693.30674.8.camel@brick.pathscale.com> Message-ID: <5p5klh$2gorsm@rrcs-agw-01.hrndva.rr.com> Good to know. The system was a RHEL 5 based system. -----Original Message----- From: Ralph Campbell [mailto:ralph.campbell at qlogic.com] Sent: Monday, November 26, 2007 1:35 PM To: Robert Pearson Cc: openib-general at openib.org; 'Arthur Jones' Subject: Re: [ofa-general] ipath crash 2.6.18 has a bug in the vmalloc_user() code which causes this. The thing to do is use a new version of the kernel (2.6.20+ I think). On Mon, 2007-11-26 at 11:37 -0600, Robert Pearson wrote: > Here is the right crash > > > > ----------- [cut here ] --------- [please bite here ] --------- > > Kernel BUG at mm/slab.c:2649 > > invalid opcode: 0000 [1] SMP > > last sysfs file: /class/infiniband/ipath0/node_type > > CPU 7 > > Modules linked in: autofs4 hidp rfcomm l2cap bluetooth sunrpc > rdma_ucm(U) ib_srp(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_add > > r(U) ib_uverbs(U) ib_umad(U) ib_mthca(U) ib_ipoib(U) ib_cm(U) ib_sa(U) > ib_mad(U) ip_conntrack_netbios_ns ipt_REJECT xt_s > > tate ip_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT > xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 dm_m > > irror dm_mod video sbs i2c_ec i2c_core button battery asus_acpi > acpi_memhotplug ac parport_pc lp parport sg ib_ipath(U) > > ide_cd ib_core(U) serio_raw cdrom bnx2 shpchp pcspkr mptsas mptscsih > mptbase scsi_transport_sas sd_mod scsi_mod ext3 jbd > > ehci_hcd ohci_hcd uhci_hcd > > Pid: 8101, comm: fragment Not tainted 2.6.18-8.1.15.el5 #1 > > RIP: 0010:[] [] cache_grow > +0x1e/0x395 > > RSP: 0018:ffff810010c3dcb8 EFLAGS: 00010006 > > RAX: 0000000000000000 RBX: 00000000000080d0 RCX: 00000000ffffffff > > RDX: 0000000000000000 RSI: 00000000000080d0 RDI: ffff810037ff43c0 > > RBP: ffff81003ffa06e0 R08: ffff8100020bc280 R09: ffff810037e64400 > > R10: ffff810010c3de68 R11: 000000000000555c R12: ffff810037ff43c0 > > R13: ffff81003ffa06c0 R14: 0000000000000000 R15: ffff810037ff43c0 > > FS: 00002aaaaaad7440(0000) GS:ffff8100020bf340(0000) > knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 00002aaaaaaac000 CR3: 0000000011a7f000 CR4: 00000000000006e0 > > Process fragment (pid: 8101, threadinfo ffff810010c3c000, task > ffff81002cdd3820) > > Stack: 0000000000000000 0000000000000001 0000000000000296 > 0000000000000001 > > ffff810010c3dd18 00000000ffffffff ffff81003ffa06e0 ffff8100020bc280 > > ffff81003ffa06c0 000000000000000c ffff810037ff43c0 ffffffff8005a5ce > > Call Trace: > > [] cache_alloc_refill+0x136/0x186 > > [] kmem_cache_alloc_node+0x98/0xb2 > > [] __vmalloc_area_node+0x62/0x153 > > [] vmalloc_user+0x15/0x50 > > [] :ib_ipath:ipath_create_cq+0x67/0x1d6 > > [] __down_write_nested+0x12/0x92 > > [] :ib_uverbs:ib_uverbs_create_cq+0x143/0x259 > > [] :ib_uverbs:ib_uverbs_write+0x93/0xa9 > > [] selinux_file_permission+0x9f/0xb6 > > [] vfs_write+0xce/0x174 > > [] sys_write+0x45/0x6e > > [] tracesys+0xd1/0xdc > > > > The last one was from an older crash that I picked up by mistake. > > > > Bob > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From eli at mellanox.co.il Mon Nov 26 23:19:56 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 27 Nov 2007 09:19:56 +0200 Subject: [ofa-general] [PATCH] ipoib: Bug fix in ipoib_poll - resend Message-ID: <1196147996.21753.127.camel@mtls03> Bug fix in ipoib_poll ipoib_poll may return negative values in case of errors and cause data corruption. In addition if it returns 0 we can break the loop. Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 5063dd5..d98980c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -296,6 +296,8 @@ poll_more: t = min(IPOIB_NUM_WC, max); n = ib_poll_cq(priv->cq, t, priv->ibwc); + if (unlikely(n < 1)) + break; for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; -- 1.5.3.6 From jackm at dev.mellanox.co.il Tue Nov 27 00:59:48 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Tue, 27 Nov 2007 10:59:48 +0200 Subject: [ofa-general] [GIT PULL] please pull infiniband.git In-Reply-To: References: Message-ID: <200711271059.48891.jackm@dev.mellanox.co.il> On Tuesday 27 November 2007 08:21, Roland Dreier wrote: > Linus, please pull from > > master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus > > > Jack Morgenstein (1): > mlx4_core: Fix state check in mlx4_qp_modify() > MUST also enter the patch I send yesterday: [PATCH] ipoib: fix kernel Oops resulting from xmit when priv->broadcast is NULL. (critical bug fix -- will get kernel Oopses whenever ports on the network go down!). (patch given again below) - Jack ====================================================================================================================== IPoIB: Fix kernel Oops resulting from xmit following dev_down. If a port goes down, ipoib_ib_dev_down is invoked -- which flushed the mcasts (clearing priv->broadcast) and clearing the path record cache. If ipoib_start_xmit is then invoked (before the port is upped), a kernel Oops results from attempting to access priv->broadcast. Returning NULL if priv->broadcast is NULL is a harmless way of bypassing the problem -- the offending packet is simply discarded "without prejudice". Signed-off-by: Jack Morgenstein diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65e..c9f6077 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -460,6 +460,9 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + if (!priv->broadcast) + return NULL; + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; From dotanb at dev.mellanox.co.il Tue Nov 27 01:24:07 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Tue, 27 Nov 2007 11:24:07 +0200 Subject: [ofa-general] i got kernel oops in ib_umad when executing ULPs tests Message-ID: <474BE237.8050602@dev.mellanox.co.il> Hi. When executing SDP tests (stress_connect) i got a kernel oops in my machine in ib_umad: Here are the machine props: ************************************************************* Host Name : sw112/3 Host Architecture : x86_64 Linux Distribution: SUSE Linux Enterprise Server 10 (x86_64) VERSION = 10 Kernel Version : 2.6.16.21-0.8-smp GCC Version : gcc (GCC) 4.1.0 (SUSE Linux) Memory size : 4049452 kB Number of CPUs : 4 cpu MHz : 3192.308 MST Version : 4.4.3 Driver Version : ofa_1_3_dev-20071126-0855 HCA ID(s) : mlx4_0 HCA model(s) : 25418 Board(s) : MT_04A0110002 ************************************************************* Here is the dump of the /var/log/messages: Nov 27 09:26:32 sw112 OpenSM[24713]: Exiting SM Nov 27 09:26:32 sw112 kernel: general protection fault: 0000 [1] SMP Nov 27 09:26:32 sw112 kernel: last sysfs file: /class/net/ib0/address Nov 27 09:26:32 sw112 kernel: CPU 2 Nov 27 09:26:32 sw112 kernel: Modules linked in: mst_pciconf mst_pci rdma_ucm rds ib_sdp rdma_cm iw_cm ib_addr ib_ipoib ib_c m ib_sa ib_uverbs ib_umad mlx4_ib mlx4_core ib_mthca ib_mad ib_core memtrack autofs4 ipv6 nfs lockd nfs_acl sunrpc af_packet button battery ac apparmor aamatch_pcre loop dm_mod ide_cd uhci_hcd ehci_hcd cdrom shpchp pci_hotplug hw_random i8xx_tco us bcore e1000 ext3 jbd edd fan thermal processor sg mptspi mptscsih mptbase scsi_transport_spi piix sd_mod scsi_mod ide_disk i de_core Nov 27 09:26:32 sw112 kernel: Pid: 24713, comm: opensm Tainted: PF U 2.6.16.21-0.8-smp #1 Nov 27 09:26:32 sw112 kernel: RIP: 0010:[] {:ib_umad:dequeue_send+26} Nov 27 09:26:32 sw112 kernel: RSP: 0018:ffff8100c0d9fde8 EFLAGS: 00010046 Nov 27 09:26:32 sw112 kernel: RAX: ffff8100c1a95658 RBX: 3f40a6f32b5a2004 RCX: 3f40a6f32b5a2014 Nov 27 09:26:32 sw112 kernel: RDX: ffff8100c0d9fe58 RSI: 3f40a6f32b5a2004 RDI: ffff81007401ac3c Nov 27 09:26:32 sw112 kernel: RBP: 3f40a6f32b5a2004 R08: 0000000000000206 R09: 00000000000007d7 Nov 27 09:26:32 sw112 kernel: R10: 0000000000000000 R11: 0000000000000246 R12: ffff81007401ac00 Nov 27 09:26:32 sw112 kernel: R13: ffff81007401a210 R14: 0000000000000005 R15: 0000000000000000 Nov 27 09:26:32 sw112 kernel: FS: 00002b13822edef0(0000) GS:ffff81012bd6b340(0000) knlGS:0000000000000000 Nov 27 09:26:32 sw112 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b Nov 27 09:26:32 sw112 kernel: CR2: 00000000005d99c0 CR3: 0000000037079000 CR4: 00000000000006e0 Nov 27 09:26:32 sw112 kernel: Process opensm (pid: 24713, threadinfo ffff8100c0d9e000, task ffff8100cd8047d0) Nov 27 09:26:32 sw112 kernel: Stack: ffff81012d706b10 ffff8100c0d9fe68 ffff81007401ac00 ffffffff8837d4b1 Nov 27 09:26:32 sw112 kernel: 0000000000000296 ffff8100c0d9fe40 ffff81007401a210 ffff81007401a200 Nov 27 09:26:32 sw112 kernel: 0000000000000005 ffffffff8827261e Nov 27 09:26:32 sw112 kernel: Call Trace: {:ib_umad:send_handler+38} Nov 27 09:26:32 sw112 kernel: {:ib_mad:ib_unregister_mad_agent+359} Nov 27 09:26:32 sw112 kernel: {:ib_umad:ib_umad_unreg_agent+121} Nov 27 09:26:32 sw112 kernel: {:ib_umad:ib_umad_ioctl+74} {do_ioctl+33} Nov 27 09:26:32 sw112 kernel: {vfs_ioctl+584} {__up_write+33} Nov 27 09:26:32 sw112 kernel: {sys_ioctl+98} {system_call+126} Nov 27 09:26:32 sw112 kernel: Nov 27 09:26:32 sw112 kernel: Code: 48 8b 53 10 48 8b 41 08 48 89 42 08 48 89 10 48 c7 41 08 00 Nov 27 09:26:32 sw112 kernel: RIP {:ib_umad:dequeue_send+26} RSP Here is the dump of /var/log/opensm.log: Nov 27 09:26:44 546327 [D6AC7EF0] 0x03 -> OpenSM 3.1.7 Nov 27 09:26:44 546407 [D6AC7EF0] 0x80 -> OpenSM 3.1.7 Nov 27 09:26:44 547422 [D6AC7EF0] 0x02 -> osm_vendor_bind: Binding to port 0x4025 Nov 27 09:26:44 673957 [D6AC7EF0] 0x01 -> osm_vendor_bind: ERR 5426: Unable to register class 129 version 1 Nov 27 09:26:44 674032 [D6AC7EF0] 0x01 -> osm_sm_mad_ctrl_bind: ERR 3118: Vendor specific bind failed Nov 27 09:26:44 674057 [D6AC7EF0] 0x01 -> osm_sm_bind: ERR 2E10: SM MAD Controller bind failed (IB_ERROR) Nov 27 09:26:44 674089 [D6AC7EF0] 0x01 -> osm_sa_mad_ctrl_unbind: ERR 1A11: No previous bind Nov 27 09:26:44 675165 [D6AC7EF0] 0x80 -> Exiting SM can you check this issue? thanks Dotan From josephoska04 at o2.pl Tue Nov 27 01:48:11 2007 From: josephoska04 at o2.pl (=?UTF-8?Q?josephoska04?=) Date: Tue, 27 Nov 2007 10:48:11 +0100 Subject: [ofa-general] THANKS BE TO GOD Message-ID: <4823d0b0.18f5aa29.474be7db.f989@o2.pl> Dear Friend, I did not forgot your past effort and attemps to assist me, now I'm happy to inform you that i have suceeded in getting those funds transferred under the cooperation of a new partner from Japan. Now Contact my secretary ask him for($1.200.000.00)for your compensation his,name is NAME :MR. Favour Chinedu and his E-mail: (fv_chinedu01 at yahoo.dk) 1,Your Full Name.......... 2,Delivery address........ 3,phone number............ 4,email address........... REGARDS MR. joseph oska From krkumar2 at in.ibm.com Tue Nov 27 01:53:01 2007 From: krkumar2 at in.ibm.com (Krishna Kumar2) Date: Tue, 27 Nov 2007 15:23:01 +0530 Subject: [ofa-general] [PATCH] ipoib: Bug fix in ipoib_poll - resend In-Reply-To: <1196147996.21753.127.camel@mtls03> Message-ID: Hi Eli, > > t = min(IPOIB_NUM_WC, max); > n = ib_poll_cq(priv->cq, t, priv->ibwc); > + if (unlikely(n < 1)) > + break; > > for (i = 0; i < n; i++) { > struct ib_wc *wc = priv->ibwc + i; The 'for' loop (followed by the "if (n != t) break" check) should take care of this, isn't it? Thanks, - KK From eli at mellanox.co.il Tue Nov 27 02:17:26 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 27 Nov 2007 12:17:26 +0200 Subject: [ofa-general] [PATCH] ipoib: Bug fix in ipoib_poll - resend In-Reply-To: References: Message-ID: <1196158646.21753.152.camel@mtls03> On Tue, 2007-11-27 at 15:23 +0530, Krishna Kumar2 wrote: > Hi Eli, > > > > > t = min(IPOIB_NUM_WC, max); > > n = ib_poll_cq(priv->cq, t, priv->ibwc); > > + if (unlikely(n < 1)) > > + break; > > > > for (i = 0; i < n; i++) { > > struct ib_wc *wc = priv->ibwc + i; > > The 'for' loop (followed by the "if (n != t) break" check) > should take care of this, isn't it? > Oh you're saying that since this is signed arithmetic we should never enter the for loop. I saw a problem when I worked on some patch and that seemed to solve the problem... From vlad at lists.openfabrics.org Tue Nov 27 02:58:05 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Tue, 27 Nov 2007 02:58:05 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071127-0200 daily build status Message-ID: <20071127105805.83AF8E60849@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on x86_64 with linux-2.6.19 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.16 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.18 Passed on ppc64 with linux-2.6.16 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on ia64 with linux-2.6.19 Passed on ppc64 with linux-2.6.15 Passed on ia64 with linux-2.6.13 Passed on ppc64 with linux-2.6.17 Passed on powerpc with linux-2.6.15 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.12 Passed on ia64 with linux-2.6.14 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.15 Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.22 Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.14 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ppc64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Failed: From or.gerlitz at gmail.com Tue Nov 27 04:35:39 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Tue, 27 Nov 2007 14:35:39 +0200 Subject: [ofa-general] Re: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> Message-ID: <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> On 11/1/07, Sean Hefty wrote: > Fix a couple of errors in the man page documentation and add > infiniband specific text about QP configuration settings. This > is in response to user questions about various settings based > on feedback from Or. > Signed-off-by: Sean Hefty > man/rdma_accept.3 | 25 +++++++++++++++---------- > man/rdma_connect.3 | 23 +++++++++++++++++------ > man/rdma_get_cm_event.3 | 4 ++-- > man/rdma_resolve_addr.3 | 6 +++++- > man/rdma_resolve_route.3 | 4 +++- > 5 files changed, 42 insertions(+), 20 deletions(-) Hi Sean, Following a question from a librdmacm user, I relalized this is not commited yet... please same me from having to explain all of it again... can you commit? Or. From Arkady.Kanevsky at netapp.com Tue Nov 27 06:54:05 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Tue, 27 Nov 2007 09:54:05 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> References: <474473DD.3050507@opengridcomputing.com><4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> Message-ID: ULP can post recvs before connection is established but not to send queue prior to connection establishment. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Glenn Grundstrom [mailto:ggrundstrom at NetEffect.com] > Sent: Sunday, November 25, 2007 9:00 PM > To: Steve Wise; Kanevsky, Arkady > Cc: Leonid Grossman; openib-general at openib.org > Subject: RE: [ofa-general] Re: iWARP peer-to-peer CM proposal > > > > > Kanevsky, Arkady wrote: > > > Very good points. > > > Thanks Steve. > > > > > > If we can do unsignalled 0-size RDMA Read with "bogus" > > S-tag this may > > > work better. > > > Yes, it will require IRD not to be 0 set at Responder. > > > Ditto ORD of at least 1 on Responder. > > > There is no need to have extra CQ entry on either side for it. > > > It is only needed for error path. > > > So this will only be needed if Sender posted the full queue > > of sends. > > > But it can not post anything because CM will not let it know that > > > connection is established. > > > > > > > > Well, actually, I think the ULP _can_ post before establishing the > > connection. But I guess we can define the semantics such that > > applications using the rdma-cm interface must adhere to whatever we > > need to make this hack work. > > > > Q: are there apps using the rdma-cm out there today that > pre-post SQ > > WRs before getting a ESTABLISHED event? > > > > Steve. > > ULPs are allowed to post prior to establishing the > connection, but I can't name any that operate this way. > Prohibiting applications that use the rdma_cm directly from > pre-posting is okay, but what about ULP's over other ULP's > (i.e. MPI over uDAPL). How can/will this be handled? > > Glenn. > > > > > Happy Thanksgiving, > > > > > > Arkady Kanevsky email: arkady at netapp.com > > > Network Appliance Inc. phone: 781-768-5395 > > > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > > > Waltham, MA 02451 central phone: 781-768-5300 > > > > > > > > > > > >> -----Original Message----- > > >> From: Steve Wise [mailto:swise at opengridcomputing.com] > > >> Sent: Wednesday, November 21, 2007 1:07 PM > > >> To: Kanevsky, Arkady > > >> Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org > > >> Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal > > >> > > >> Comments in-line below... > > >> > > >> > > >> Kanevsky, Arkady wrote: > > >> > > >>> Group, > > >>> > > >>> > > >>> below is proposal on how to resolve peer-to-peer > > iWARP CM issue > > >>> discovered at interop event. > > >>> > > >>> > > >>> The main issue is that MPA spec (relevant portion of > > >>> > > >> IETF RFC 5044 > > >> > > >>> is below) require that > > >>> > > >>> > > >>> connection initiator send first message over the > > >>> > > >> established connection. > > >> > > >>> Multiple MPI implementations and several other apps use > > >>> > > >> peer-to-peer > > >> > > >>> model. > > >>> > > >>> > > >>> So rather then forcing all of them to do it on their > > >>> > > >> own, which will > > >> > > >>> not help with > > >>> > > >>> > > >>> interop between different implementations, the goal > > is to extend > > >>> lower layers to provide it. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> Our first idea was to leave MPA protocol untouched and > > >>> > > >> try to solve > > >> > > >>> this problem > > >>> > > >>> > > >>> in iw_cm. But there are too many complications to it. > > First, in > > >>> order to adhere to RFC5044 > > >>> > > >>> > > >>> initiator must send first FPDU and responder process > > >>> > > >> it. But since > > >> > > >>> the connection is already > > >>> > > >>> > > >>> established processing FPDU involves ULP on whose behalf the > > >>> connection is created. > > >>> > > >>> > > >>> So either initiator sends a message which generates > > >>> > > >> completion on > > >> > > >>> responder CQ, thus visible > > >>> > > >>> > > >>> to ULP, or not. > > >>> > > >> > > >> > > >>> In the later case, the only op which can do it is > > >>> RDMA one, which means > > >>> > > >>> > > >>> that responder somehow provided initiator S-tag which > > >>> > > >> it can use. > > >> > > >>> So, this is an extension > > >>> > > >>> > > >>> to MPA, probably using private data. And that responder upon > > >>> receiving it destroy this S-tag. > > >>> > > >>> > > >>> In any case this is an extension of MPA. > > >>> > > >>> > > >> This stag exchange isn't needed if this RDMA op is a 0B READ. > > >> The responder waits for that 0B read and only indicates > the rdma > > >> connection is established to its ULP when it replies to the 0B > > >> read. In this scenario, the responder/server side > doesn't consume > > >> any CQ resources. > > >> But it would require an IRD of at least 1 to be configured > > on the QP. > > >> The initiator still requires an SQ entry, and possibly a > CQ entry, > > >> for initiating the 0B read and handling completion. > > >> But its perhaps a little less painful than doing a SEND/RECV > > >> exchange. The read wr could be unsignaled so that it won't > > >> generate a CQE. But it still consumes an SQ WR slot so the SQ > > >> would have to be sized to allow this extra WR. And I > guess the CQ > > >> would also need to be sized accordingly in case the read failed. > > >> > > >> > > >>> In the former, Send is used but this requires a buffer > > >>> > > >> to be posted > > >> > > >>> to CQ. But since > > >>> > > >>> > > >>> the same CQ (or SharedCQ) can be used by other > > >>> > > >> connections at the > > >> > > >>> same time it can cause > > >>> > > >>> > > >>> the responder CM posted buffer to be consumed by other > > >>> > > >> connection. > > >> > > >>> This is not acceptable. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> So new we consider extension to MPA protocol. > > >>> > > >>> > > >>> The goal is to be completely backwards compatible to > > >>> > > >> existing version 1. > > >> > > >>> In a nutshell, use a "flag" in the MPA request message which > > >>> indicates that > > >>> > > >>> > > >>> "ready to receive" message will be send by requestor upon > > >>> receiving > > >>> > > >>> > > >>> MPA response message with connection acceptance. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> here are the changes to IETF RFC5044 > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > > >>> > > >> 2 3 4 5 6 7 8 > > >> > > >>> 9 0 1 > > >>> > > >>> > > >> > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 > > >> > > >>> | | + Key (16 bytes containing "MPA ID Req Frame") + 4 > > >>> > > >> | (4D 50 41 > > >> > > >>> 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 > > >>> > > >> bytes containing > > >> > > >>> "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 > > >>> > > >> 70 20 46 72 61 > > >> > > >>> 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") > > >>> > > >> + 12 | (4D 50 > > >> > > >>> 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + > > >>> > > >>> > > >> > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 > > >> > > >>> |M|C|R|S| Res | Rev | PD_Length | > > >>> > > >>> > > >> > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | > > >> > > >>> | ~ ~ ~ Private Data ~ | | | > > >>> > > >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | > > >> > > >>> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> 2. S: indicator in the Req frame whether or not > > >>> > > >> Requestor will send > > >> > > >>> Rtr frame. > > >>> > > >>> > > >>> In Req frame, if set to 1 then Rtr frame will > > be sent if > > >>> responder > > >>> > > >>> > > >>> sends Rep frame with accept bit set. 0 indicate > > >>> > > >> that Rtr frame > > >> > > >>> will not be sent. > > >>> > > >>> > > >>> In Rep frame, 0 means that Responder cannot support > > >>> > > >> Rtr frame, > > >> > > >>> while 1 that it is and is waiting for it. > > >>> > > >>> > > >>> (While my preference is to handle this as MPA > > >>> > > >> protocol version > > >> > > >>> matching rules, > > >>> > > >>> > > >>> proposed method will provide complete backwards > > >>> > > >> compatibility) > > >> > > >>> Unused by Rtr frame. That is set to 0 in Rtr frame > > >>> > > >> and ignored > > >> > > >>> by responder. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> All other bits M,C,R and remainder of Res treated > > >>> > > >> as in MPA ver 1. > > >> > > >>> > > >>> > > >>> > > >>> Rtr frame adhere to C bit as specified in Rep frame > > >>> > > >>> > > >>> > > >> First, the RTR frame _must_ be an FPDU for this to work. > > >> Thus it violates the DDP/RDMAP specs because it is an known > > >> DDP/RDMAP opcode. > > >> > > >> Second, assuming the RTR frame is sent as an FPDU, then > this won't > > >> work with existing RNIC HW. The HW will post an async error > > >> because the incoming DDP/RDMAP opcode is unknown. > > >> > > >> The only way I see that we can fix this for the existing > rnic HW is > > >> to come up with some way to send a valid RDMAP message from the > > >> initiator to the responder under the covers -and- have the > > >> responder only indicate that the connection is established when > > >> that FPDU is received. > > >> > > >> Chelsio cannot support this hack via a 0B write, but the could > > >> support a 0B read or send/recv exchange. But as you > indicate, this > > >> is very painful and perhaps impossible to do without > impacting the > > >> ULP and breaking verbs semantics. > > >> > > >> (that's why we punted on this a year ago :) > > >> > > >> > > >> Steve. > > >> > > >> _______________________________________________ > > >> general mailing list > > >> general at lists.openfabrics.org > > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > >> > > >> To unsubscribe, please visit > > >> http://openib.org/mailman/listinfo/openib-general > > >> > > >> > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From rdreier at cisco.com Tue Nov 27 07:32:06 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 27 Nov 2007 07:32:06 -0800 Subject: [ofa-general] [PATCH] ipoib: Bug fix in ipoib_poll - resend In-Reply-To: <1196158646.21753.152.camel@mtls03> (Eli Cohen's message of "Tue, 27 Nov 2007 12:17:26 +0200") References: <1196158646.21753.152.camel@mtls03> Message-ID: > > > + if (unlikely(n < 1)) > > > + break; > > > > > > for (i = 0; i < n; i++) { > > > struct ib_wc *wc = priv->ibwc + i; > > > The 'for' loop (followed by the "if (n != t) break" check) > > should take care of this, isn't it? > > Oh you're saying that since this is signed arithmetic we should never > enter the for loop. I saw a problem when I worked on some patch and that > seemed to solve the problem... What does signed arithmetic have to do with anything? I don't see how your patch changes the behavior at all: if the condition (n < 1) ever tests as true then the for loop cannot be entered, and since t is always at least 1, the condition (n != t) will be true and we'll break out of the while loop anyway. - R. From tziporet at mellanox.co.il Tue Nov 27 07:35:50 2007 From: tziporet at mellanox.co.il (Tziporet Koren) Date: Tue, 27 Nov 2007 17:35:50 +0200 Subject: [ofa-general] Agenda for the OFED meeting today Message-ID: <6C2C79E72C305246B504CBA17B5500C90282E3BB@mtlexch01.mtl.com> Agenda for OFED meeting today: 1. Beta testing status 2. Modules progress toward RC1 next week 3. Open issues Tziporet From sashak at voltaire.com Tue Nov 27 07:51:30 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Tue, 27 Nov 2007 15:51:30 +0000 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> <20071126190312.GC14894@sashak.voltaire.com> <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> Message-ID: <20071127155130.GC26160@sashak.voltaire.com> On 09:48 Tue 27 Nov , Keshetti Mahesh wrote: > > Can anyone tell me who is the maintainer of ibdmchk etc. utilities ? It is part of ibutils. > I have some queries related to ibdmchk. > Recently I have simulated an irregular network topology with the ibsim > and ran opensm with LASH routing algorithm enabled. Later when I run ibdmchk > on the simulator it found credit loops. Is there any problem in LASH > implementation > or in the ibdmchk implementation? LASH resolves credit loops by using different VLs, I don't think ibdmchk takes this into account, but don't know for sure. Sasha From swise at opengridcomputing.com Tue Nov 27 07:45:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 27 Nov 2007 09:45:53 -0600 Subject: [ofa-general] Re: [ewg] Agenda for the OFED meeting today In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E3BB@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90282E3BB@mtlexch01.mtl.com> Message-ID: <474C3BB1.1060004@opengridcomputing.com> I have a conflicting meeting and cannot attend today's call. cxgb3 status: There is a series of patches merged upstream that we need to pull into ofed-1.2.5.3 and ofed-1.3. I'm working on these still. Should have them by the end of the week. These are key chelsio. I'll regression test beta this week also. Thanks, Steve. Tziporet Koren wrote: > Agenda for OFED meeting today: > > 1. Beta testing status > 2. Modules progress toward RC1 next week > 3. Open issues > > Tziporet > > > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From hrosenstock at xsigo.com Tue Nov 27 07:52:06 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Tue, 27 Nov 2007 07:52:06 -0800 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <20071127155130.GC26160@sashak.voltaire.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> <20071126190312.GC14894@sashak.voltaire.com> <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> <20071127155130.GC26160@sashak.voltaire.com> Message-ID: <1196178726.26651.299.camel@hrosenstock-ws.xsigo.com> On Tue, 2007-11-27 at 15:51 +0000, Sasha Khapyorsky wrote: > On 09:48 Tue 27 Nov , Keshetti Mahesh wrote: > > > > Can anyone tell me who is the maintainer of ibdmchk etc. utilities ? > > It is part of ibutils. ibutils maintainer is Oren Kladnitsky orenk at dev.mellanox.co.il Not sure if he monitors this list. > > I have some queries related to ibdmchk. > > Recently I have simulated an irregular network topology with the ibsim > > and ran opensm with LASH routing algorithm enabled. Later when I run ibdmchk > > on the simulator it found credit loops. Is there any problem in LASH > > implementation > > or in the ibdmchk implementation? > > LASH resolves credit loops by using different VLs, I don't think ibdmchk > takes this into account, but don't know for sure. I also think ibdmchk needs some support to handle LASH. I don't think it is currently supported by it (although that is not documented AFAIK). -- Hal > Sasha > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From xhejtman at ics.muni.cz Tue Nov 27 08:08:03 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Tue, 27 Nov 2007 17:08:03 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> Message-ID: <20071127160803.GD4365@ics.muni.cz> Hello, just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to 2.6.23 kernel with built in driver. Any reason for this? -- Lukáš Hejtmánek From eli at mellanox.co.il Tue Nov 27 08:17:32 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Tue, 27 Nov 2007 18:17:32 +0200 Subject: [ofa-general] [PATCH] ipoib: Bug fix in ipoib_poll - resend In-Reply-To: References: <1196158646.21753.152.camel@mtls03> Message-ID: <1196180252.21753.181.camel@mtls03> On Tue, 2007-11-27 at 07:32 -0800, Roland Dreier wrote: > > > > + if (unlikely(n < 1)) > > > > + break; > > > > > > > > for (i = 0; i < n; i++) { > > > > struct ib_wc *wc = priv->ibwc + i; > > > > > The 'for' loop (followed by the "if (n != t) break" check) > > > should take care of this, isn't it? > > > > Oh you're saying that since this is signed arithmetic we should never > > enter the for loop. I saw a problem when I worked on some patch and that > > seemed to solve the problem... > > What does signed arithmetic have to do with anything? I don't see how > your patch changes the behavior at all: if the condition (n < 1) ever > tests as true then the for loop cannot be entered, and since t is > always at least 1, the condition (n != t) will be true and we'll break > out of the while loop anyway. > I mean that I agree that the patch does not fix anything since even if poll_cq returns a negative number, "i < n" in the for loop will cause the block to not be executed (since "0 < negative number" is false). From fzago at systemfabricworks.com Tue Nov 27 08:34:00 2007 From: fzago at systemfabricworks.com (frank zago) Date: Tue, 27 Nov 2007 10:34:00 -0600 Subject: [ofa-general] [PATCH] manpage for ibv_get_cq_event Message-ID: <474C46F8.4090303@systemfabricworks.com> Hello, The code sample in ibv_get_cq_event is missing the case where no wc is available. Patch attached. Regards, Frank. -------------- next part -------------- A non-text attachment was scrubbed... Name: ibv_get_cq_event.diff Type: text/x-patch Size: 399 bytes Desc: not available URL: From tziporet at dev.mellanox.co.il Tue Nov 27 08:51:48 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 27 Nov 2007 18:51:48 +0200 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071127160803.GD4365@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071127160803.GD4365@ics.muni.cz> Message-ID: <474C4B24.4080809@mellanox.co.il> Lukas Hejtmanek wrote: > Hello, > > just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to > 2.6.23 kernel with built in driver. Any reason for this? > Which benchmark? Which HCA? Is it the same with ofed beta release? Thanks, Tziporet From xhejtman at ics.muni.cz Tue Nov 27 09:17:25 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Tue, 27 Nov 2007 18:17:25 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <474C4B24.4080809@mellanox.co.il> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071127160803.GD4365@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> Message-ID: <20071127171725.GE4365@ics.muni.cz> On Tue, Nov 27, 2007 at 06:51:48PM +0200, Tziporet Koren wrote: >> just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to >> 2.6.23 kernel with built in driver. Any reason for this? >> > Which benchmark? ib_rdma_bw ib_send_bw ibv_uc_pingpong > Which HCA? Mellanox InfiniBand HCA, HCA.Cheetah-DDR.20. > Is it the same with ofed beta release? Did you mean 1.3b? I have not tried it. -- Lukáš Hejtmánek From sean.hefty at intel.com Tue Nov 27 09:22:30 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Tue, 27 Nov 2007 09:22:30 -0800 Subject: [ofa-general] RE: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> Message-ID: <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> >Following a question from a librdmacm user, I relalized this is not >commited yet... please same me from having to explain all of it >again... can you commit? These have been committed to master branch. - Sean From tziporet at dev.mellanox.co.il Tue Nov 27 09:24:55 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 27 Nov 2007 19:24:55 +0200 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071127171725.GE4365@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071127160803.GD4365@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> <20071127171725.GE4365@ics.muni.cz> Message-ID: <474C52E7.5040703@mellanox.co.il> Lukas Hejtmanek wrote: > On Tue, Nov 27, 2007 at 06:51:48PM +0200, Tziporet Koren wrote: > >>> just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to >>> 2.6.23 kernel with built in driver. Any reason for this? >>> >>> >> Which benchmark? >> > > ib_rdma_bw > ib_send_bw > ibv_uc_pingpong > > >> Which HCA? >> > > Mellanox InfiniBand HCA, HCA.Cheetah-DDR.20. > > >> Is it the same with ofed beta release? >> > > Did you mean 1.3b? I have not tried it. > Is this issue related to regular Linux? (I ask since the first mail thread started with Xen issues) Tziporet From xhejtman at ics.muni.cz Tue Nov 27 09:30:24 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Tue, 27 Nov 2007 18:30:24 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <474C52E7.5040703@mellanox.co.il> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071127160803.GD4365@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> <20071127171725.GE4365@ics.muni.cz> <474C52E7.5040703@mellanox.co.il> Message-ID: <20071127173024.GF4365@ics.muni.cz> On Tue, Nov 27, 2007 at 07:24:55PM +0200, Tziporet Koren wrote: > Is this issue related to regular Linux? (I ask since the first mail thread > started with Xen issues) Yes, first, we suspected 2.6.18 Xen kernel, then we found that 2.6.18 non-Xen kernel is the same and finally, we found out that 2.6.23 kernel with ofed drivers is slow compared to the same kernel with built in drivers (i.e., the drivers shipped with vanila kernel). -- Lukáš Hejtmánek From tziporet at dev.mellanox.co.il Tue Nov 27 12:12:33 2007 From: tziporet at dev.mellanox.co.il (Tziporet Koren) Date: Tue, 27 Nov 2007 22:12:33 +0200 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071127171725.GE4365@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071127160803.GD4365@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> <20071127171725.GE4365@ics.muni.cz> Message-ID: <474C7A31.8050903@mellanox.co.il> Hi, Need to know why the performance on OFED is worst then the vanilla kernel Jack - please check this Sagi - do you have performance results of OFED 1.3 with Sinai DDR? Can we compare them to OFED 1.2.5? thanks, Tziporet Lukas Hejtmanek wrote: > On Tue, Nov 27, 2007 at 06:51:48PM +0200, Tziporet Koren wrote: > >>> just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to >>> 2.6.23 kernel with built in driver. Any reason for this? >>> >>> >> Which benchmark? >> > > ib_rdma_bw > ib_send_bw > ibv_uc_pingpong > > >> Which HCA? >> > > Mellanox InfiniBand HCA, HCA.Cheetah-DDR.20. > > >> Is it the same with ofed beta release? >> > > Did you mean 1.3b? I have not tried it. > > From pradeeps at linux.vnet.ibm.com Tue Nov 27 14:45:18 2007 From: pradeeps at linux.vnet.ibm.com (Pradeep Satyanarayana) Date: Tue, 27 Nov 2007 14:45:18 -0800 Subject: [ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs In-Reply-To: <4744841A.2090801@linux.vnet.ibm.com> References: <200710261533.UlO70kYhcNvuPmut@cisco.com> <4727B27D.2070207@linux.vnet.ibm.com> <4727C18F.3010509@linux.vnet.ibm.com> <472A5351.5020106@linux.vnet.ibm.com> <472B5567.1020301@linux.vnet.ibm.com> <472B61F1.5060305@linux.vnet.ibm.com> <472BD7F2.70200@linux.vnet.ibm.com> <47311F34.6030004@linux.vnet.ibm.com> <47322E87.60409@linux.vnet.ibm.com> <4744841A.2090801@linux.vnet.ibm.com> Message-ID: <474C9DFE.6020606@linux.vnet.ibm.com> Pradeep Satyanarayana wrote: > I downloaded the latest tree and reviewed the code and it looks good. For > some reason I have not been able to actually run the tests. I will do that > next week and confirm that all is OK. I ran several iterations of netperf and it looks good. > > On another note, along with the main non srq patch I had submitted a > secondary patch that would switch to UD mode if no connected mode QPs > are available. I have not updated that patch in a while now. If there is > any interest in that please let me know and I can submit an updated patch. Please let me know if there is any interest in this patch. Pradeep From caitlin.bestler at gmail.com Tue Nov 27 15:11:00 2007 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Tue, 27 Nov 2007 15:11:00 -0800 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> Message-ID: <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> On Nov 27, 2007 6:54 AM, Kanevsky, Arkady wrote: > ULP can post recvs before connection is established but not to send > queue > prior to connection establishment. > ULP can post sends only after it is notified that the connection is established. The issue is when the iWARP layer can issue this notification. If the MPA layer implements fencing on its own, then the notification can be provided immediately after the MPA Request/Response exchange. If not, it must wait for the first MPA frame. The problem is that implementations that adhere to closely to the RDMAC verbs can obtain no information about the connection unless there is a CQE producing event. From swise at opengridcomputing.com Tue Nov 27 15:13:31 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 27 Nov 2007 17:13:31 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> Message-ID: <474CA49B.3080806@opengridcomputing.com> Caitlin Bestler wrote: > On Nov 27, 2007 6:54 AM, Kanevsky, Arkady wrote: >> ULP can post recvs before connection is established but not to send >> queue >> prior to connection establishment. >> > > > ULP can post sends only after it is notified that the connection is established. > > The issue is when the iWARP layer can issue this notification. > > If the MPA layer implements fencing on its own, then the notification can > be provided immediately after the MPA Request/Response exchange. > > If not, it must wait for the first MPA frame. The problem is that > implementations > that adhere to closely to the RDMAC verbs can obtain no information about > the connection unless there is a CQE producing event. The idea for this "hack" is that the passive side (the side that sends the MPA response) will hold off posting the ESTABLISHED event to the rdma-cm ULP until after it receives this 0B Read Request from the client... From rdreier at cisco.com Tue Nov 27 15:28:08 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 27 Nov 2007 15:28:08 -0800 Subject: [ofa-general] MT25418 In-Reply-To: <200711092141.51243.bs@q-leap.de> (Bernd Schubert's message of "Fri, 9 Nov 2007 21:41:50 +0100") References: <200711082141.53113.bs@q-leap.de> <200711092115.51939.bs@q-leap.de> <200711092141.51243.bs@q-leap.de> Message-ID: > Yes exactly and reproducable on all 6 nodes with connectX presently here in > our test lab. > Just by accident I first always had connected port 2. Shortly before I already > thought it doesn't work at all, I tried the other port... Sorry for the slow reply. I just swapped cables on one of my test systems, and IPoIB works fine for me on port 2 of my ConnectX HCA. This is with the kernel around 2.6.24-rc1 that happens to be running there. So I'm not sure what's different about your system. I would suggest raising this with your HCA vendor since perhaps you have a bad batch of HCAs or old firmware or something. - R. From rdreier at cisco.com Tue Nov 27 15:40:28 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 27 Nov 2007 15:40:28 -0800 Subject: [ofa-general] Re: [PATCH] ipoib: fix kernel Oops resulting from xmit when priv->broadcast is NULL In-Reply-To: <200711261041.19402.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Mon, 26 Nov 2007 10:41:19 +0200") References: <200711261041.19402.jackm@dev.mellanox.co.il> Message-ID: thanks, applied From mshefty at ichips.intel.com Tue Nov 27 15:41:19 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Tue, 27 Nov 2007 15:41:19 -0800 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CA49B.3080806@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> Message-ID: <474CAB1F.20506@ichips.intel.com> > The idea for this "hack" is that the passive side (the side that sends > the MPA response) will hold off posting the ESTABLISHED event to the > rdma-cm ULP until after it receives this 0B Read Request from the client... What is notifying the passive side that the active side has completed a read request, and that it's okay to start sending? Also, at least with IB, a QP be configured on creation to always generate a CQ entry for all WRs posted to the send queue. I don't know if iWarp follows this same model. - Sean From caitlin.bestler at gmail.com Tue Nov 27 15:41:33 2007 From: caitlin.bestler at gmail.com (Caitlin Bestler) Date: Tue, 27 Nov 2007 15:41:33 -0800 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CA49B.3080806@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> Message-ID: <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> On Nov 27, 2007 3:13 PM, Steve Wise wrote: > > Caitlin Bestler wrote: > > On Nov 27, 2007 6:54 AM, Kanevsky, Arkady wrote: > >> ULP can post recvs before connection is established but not to send > >> queue prior to connection establishment. > >> > > > > > > ULP can post sends only after it is notified that the connection is established. > > > > The issue is when the iWARP layer can issue this notification. > > > > If the MPA layer implements fencing on its own, then the notification can > > be provided immediately after the MPA Request/Response exchange. > > > > If not, it must wait for the first MPA frame. The problem is that > > implementations that adhere to closely to the RDMAC verbs can obtain > > no information about the connection unless there is a CQE producing event. > > The idea for this "hack" is that the passive side (the side that sends > the MPA response) will hold off posting the ESTABLISHED event to the > rdma-cm ULP until after it receives this 0B Read Request from the client... > The problem is that this solution is being applied at the wrong layer. MPA is not the source of the problem, but rather the RDMAC layer verbs. The solution needs to be a verb-layer solution, not an MPA layer solution. Steve's last comment states the problem well: we are trying to enable the Verbs layer on the Passive side to generate the Established event, and if at all possible to do so in a way that places no requirements on the application layer. I believe it is possible to do so without making any modifications to MPA. The MPA protocol requirement is a safeguard against receiving an MPA Frame before the MPA Response frame. MPA does not have or need an RTR message, because the MPA RFC allows *any* MPA frame from the active side to effectively acknowledge receipt of the MPA Response. That includes a zero-length RDMA Write. An iWARP implementation can (perhaps SHOULD) implement an "MPA Fenced" state on the passive side that is cleared on receipt of any MPA frame. With such a "MPA Fence" feature, the CM layer can generate the "Connection Established" event as soon as it sends the MPA Response and the Passive-side ULP will be able to post to the SQ, the messages just won't go the wire until something is received. Meanwhile the Active Side must ensure that *some* MPA frame is sent immediately after the MPA Response is received. If it has traffic ready to go it can simply send that. If it does not, it can use a zero-length write. A zero-length write is totally transparent to the ULP at both ends. But that will only work for *some* implementations. On others a zero length RDMA Read is needed to unjam things. That's almost transparent, but not totally so since it temporarily uses an RDMA Read credit. And while nobody has spoken up to say *they* have that problem, I would not be surprised if there are implementations where nothing less than a full ULP "nop" message will suffice. So keeping the fix at the verbs layer, and allowing the minimal extra effort to be controlled by the Passive layer itself, suggests that the Passive side simply encode its MPA-unjam-action-required in the OFA standardized portion of the Private Data. Encodings would include: - Any MPA Frame, including a zero-length RDMA Write will unjam the passive side SendQ. - An untagged message or a zero-length RDMA Read will work. - Only an untagged message will work. In the latter cases the middleware will have to play games with standin receive WQEs and only posting the actual receive WQEs to the QP after the MPA fence has been unjammed. That isn't pretty, but if your hardware is fixed then it's either that or make the application deal with the problem. I have a hunch that the MPI developers would not like that option at all. How this differs from what Arkady proposed is that it avoids making any changes to MPA, but instead only makes use of the OFA defined portion of the Private Data. Further it allows use of a zero-length RDMA Write when that is sufficient to break the MPA logjam. A zero-length RDMA Write, unlike a zero-length RDMA Read, is *totally* transparent to the ULP. From swise at opengridcomputing.com Tue Nov 27 15:47:56 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 27 Nov 2007 17:47:56 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CAB1F.20506@ichips.intel.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <474CAB1F.20506@ichips.intel.com> Message-ID: <474CACAC.80104@opengridcomputing.com> Sean Hefty wrote: >> The idea for this "hack" is that the passive side (the side that sends >> the MPA response) will hold off posting the ESTABLISHED event to the >> rdma-cm ULP until after it receives this 0B Read Request from the >> client... > > What is notifying the passive side that the active side has completed a > read request, and that it's okay to start sending? > The iwarp provider driver will only post the IW_CM_ESTABLISHED event after receiving the read request. For the Chelsio provider, this will require changes to the rnic firmware and the driver/library to support all this. I haven't thought through exactly how this should be implemented. For instance, the provider library poll function needs to deal with this 0B read completion and note that it is this special connection setup 0B read and thus hide the completion from the user call poll()... > Also, at least with IB, a QP be configured on creation to always > generate a CQ entry for all WRs posted to the send queue. I don't know > if iWarp follows this same model. After thinking about this more, I think we do want to make this 0B read signaled. Then we can post the IW_CM_ESTABLISHED event on the client side when the read request completes. So from the RDMA application's perspective, the connection never gets setup until this 0B read is completed, and that's really what we want... Steve. From rdreier at cisco.com Tue Nov 27 15:48:06 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 27 Nov 2007 15:48:06 -0800 Subject: [ofa-general] i got kernel oops in ib_umad when executing ULPs tests In-Reply-To: <474BE237.8050602@dev.mellanox.co.il> (Dotan Barak's message of "Tue, 27 Nov 2007 11:24:07 +0200") References: <474BE237.8050602@dev.mellanox.co.il> Message-ID: So the oops is in RIP: 0010:[] {:ib_umad:dequeue_send+26} ie removing a send from the list of pending userspace sends in list_del(). The address in RBX, 3f40a6f32b5a2004, looks totally corrupt. This is happening from the send_handler() called back from ib_unregister_mad_agent()... is it possible that there's something bogus leading to a double callback or something? I recall we already saw some bugs in this area before. - R. From swise at opengridcomputing.com Tue Nov 27 15:58:33 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 27 Nov 2007 17:58:33 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> Message-ID: <474CAF29.1020106@opengridcomputing.com> Caitlin Bestler wrote: > On Nov 27, 2007 3:13 PM, Steve Wise wrote: >> Caitlin Bestler wrote: >>> On Nov 27, 2007 6:54 AM, Kanevsky, Arkady wrote: >>>> ULP can post recvs before connection is established but not to send >>>> queue prior to connection establishment. >>>> >>> >>> ULP can post sends only after it is notified that the connection is established. >>> >>> The issue is when the iWARP layer can issue this notification. >>> >>> If the MPA layer implements fencing on its own, then the notification can >>> be provided immediately after the MPA Request/Response exchange. >>> >>> If not, it must wait for the first MPA frame. The problem is that >>> implementations that adhere to closely to the RDMAC verbs can obtain >>> no information about the connection unless there is a CQE producing event. >> The idea for this "hack" is that the passive side (the side that sends >> the MPA response) will hold off posting the ESTABLISHED event to the >> rdma-cm ULP until after it receives this 0B Read Request from the client... >> > > The problem is that this solution is being applied at the wrong layer. > > MPA is not the source of the problem, but rather the RDMAC layer verbs. > The solution needs to be a verb-layer solution, not an MPA layer solution. > This isn't being solved at the MPA layer. It being solved as a protocol exchange done after the MPA exchanges (and after the connections are transitioned into FPDU mode. Remeber: This is a _hack_ to get our current generation of rnics to support peer-to-peer _without_ impacting the rdma applications (like IMPI and OMPI). > Steve's last comment states the problem well: we are trying to enable the > Verbs layer on the Passive side to generate the Established event, and > if at all possible to do so in a way that places no requirements on the > application layer. > > I believe it is possible to do so without making any modifications to MPA. > Yes. > The MPA protocol requirement is a safeguard against receiving an MPA > Frame before the MPA Response frame. MPA does not have or need an > RTR message, because the MPA RFC allows *any* MPA frame from the > active side to effectively acknowledge receipt of the MPA Response. > Yes, but it puts the onus on the ULP to deal with this. In our current implementation model, that ULP is the top end application. > That includes a zero-length RDMA Write. > > An iWARP implementation can (perhaps SHOULD) implement an "MPA > Fenced" state on the passive side that is cleared on receipt of any MPA > frame. With such a "MPA Fence" feature, the CM layer can generate the > "Connection Established" event as soon as it sends the MPA Response > and the Passive-side ULP will be able to post to the SQ, the messages > just won't go the wire until something is received. > > Meanwhile the Active Side must ensure that *some* MPA frame is sent > immediately after the MPA Response is received. If it has traffic ready to > go it can simply send that. If it does not, it can use a zero-length write. > A zero-length write is totally transparent to the ULP at both ends. > > But that will only work for *some* implementations. On others a zero > length RDMA Read is needed to unjam things. That's almost transparent, > but not totally so since it temporarily uses an RDMA Read credit. > Right. Chelsio needs a Read vs a Write because the FW and driver don't detect the incoming 0B write so they cannot drive the ESTABLISHED event on that. > And while nobody has spoken up to say *they* have that problem, I would > not be surprised if there are implementations where nothing less than a full > ULP "nop" message will suffice. > > So keeping the fix at the verbs layer, and allowing the minimal extra > effort to be controlled by the Passive layer itself, suggests that the > Passive side simply encode its MPA-unjam-action-required in the > OFA standardized portion of the Private Data. Encodings would > include: > > - Any MPA Frame, including a zero-length RDMA Write will unjam > the passive side SendQ. > - An untagged message or a zero-length RDMA Read will work. > - Only an untagged message will work. > So you're advocating adding a standardized header to the private data to indicate what the passive side needs. While we're at it, lets add in ORD/IRD ;-) > In the latter cases the middleware will have to play games with standin > receive WQEs and only posting the actual receive WQEs to the QP > after the MPA fence has been unjammed. That isn't pretty, but if your > hardware is fixed then it's either that or make the application deal with > the problem. I have a hunch that the MPI developers would not like that > option at all. > > How this differs from what Arkady proposed is that it avoids making any > changes to MPA, but instead only makes use of the OFA defined portion > of the Private Data. Further it allows use of a zero-length RDMA Write > when that is sufficient to break the MPA logjam. A zero-length RDMA > Write, unlike a zero-length RDMA Read, is *totally* transparent to the ULP. For the short term, I claim we just implement this as part of linux iwarp connection setup (mandating a 0B read be sent from the active side). Your proposal to add meta-data to the private data requires a standards change anyway and is, IMO, the 2nd phase of this whole enchilada... Steve. From rdreier at cisco.com Tue Nov 27 15:59:05 2007 From: rdreier at cisco.com (Roland Dreier) Date: Tue, 27 Nov 2007 15:59:05 -0800 Subject: [ofa-general] [PATCH 0/6] nes: Cosmetic changes; support virtual WQs and PPC In-Reply-To: <20071114221453.3ADD5E609F0@openfabrics.org> (Glenn Grundstrom's message of "Wed, 14 Nov 2007 14:14:53 -0800 (PST)") References: <20071114221453.3ADD5E609F0@openfabrics.org> Message-ID: Arghh... these don't apply to my "neteffect" branch, so you've lost the cleanup work that I did (eg trailing whitespace removal, formatting fixes, etc). I thought we agreed that I would pull the driver into my tree for merging into 2.6.25 and we would work on it there. Anwyay. I'll reimport the nes driver into a new "neteffect" branch in my tree (and maybe recreate the work I did). In the future please post updates as incremental patches to that tree. And when I say incremental patches I mean reviewable things that are split up conceptually and not by which files they touch. (That is, patches like "Fix types of workqueue work functions" rather than "Fix nes_cm.c") Thanks... From caitlin.bestler at neterion.com Tue Nov 27 16:20:52 2007 From: caitlin.bestler at neterion.com (Caitlin Bestler) Date: Tue, 27 Nov 2007 16:20:52 -0800 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CAF29.1020106@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> Message-ID: <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> On Nov 27, 2007 3:58 PM, Steve Wise wrote: > > For the short term, I claim we just implement this as part of linux > iwarp connection setup (mandating a 0B read be sent from the active > side). Your proposal to add meta-data to the private data requires a > standards change anyway and is, IMO, the 2nd phase of this whole > enchilada... > > Steve. > I don't see how you can have any solution here that does not require meta-data. For non-peer-to-peer connections neither a zero length RDMA Read or Write should be sent. An extraneous RDMA Read is particularly onerous for a short lived connection that fits the classic active/passive model. So *something* is telling the CMA layer that this connection may need an MPA unjam action. If that isn't meta-data, what is it? Further, the RDMA Read solution is adequate whenever the RDMA Write solution would have been (although at an unnecessary extra cost), but as near as I can determine it is not a complete solution. If the passive side needs an untagged message completion then *something* needs to send it. How can the CM layer (or, I suppose, the ULP itself) know that this untagged NOP message must be sent without meta-data? As I see it, if we want to do the minimum that is required, but be certain that it is adequate, we need a per-connection setup meta-data exchange. From swise at opengridcomputing.com Tue Nov 27 16:47:44 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Tue, 27 Nov 2007 18:47:44 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> Message-ID: <474CBAB0.8020208@opengridcomputing.com> Caitlin Bestler wrote: > On Nov 27, 2007 3:58 PM, Steve Wise wrote: > >> For the short term, I claim we just implement this as part of linux >> iwarp connection setup (mandating a 0B read be sent from the active >> side). Your proposal to add meta-data to the private data requires a >> standards change anyway and is, IMO, the 2nd phase of this whole >> enchilada... >> >> Steve. >> > > I don't see how you can have any solution here that does not require meta-data. > For non-peer-to-peer connections neither a zero length RDMA Read or Write > should be sent. An extraneous RDMA Read is particularly onerous for a short > lived connection that fits the classic active/passive model. So *something* > is telling the CMA layer that this connection may need an MPA unjam action. > If that isn't meta-data, what is it? I assumed the 0B read would _always_ be sent as part of establishing an iWARP connection using linux and the rdma-cm. > > Further, the RDMA Read solution is adequate whenever the RDMA Write > solution would have been (although at an unnecessary extra cost), but > as near as I can determine it is not a complete solution. If the passive > side needs an untagged message completion then *something* needs > to send it. How can the CM layer (or, I suppose, the ULP itself) know > that this untagged NOP message must be sent without meta-data? I believe at Reno we had the current rnic vendors all saying a SEND or 0B read will work. So: If someone has current iwarp HW that will _not_ handle this problem by doing the 0B read hack, please speak up now. > > As I see it, if we want to do the minimum that is required, but be certain > that it is adequate, we need a per-connection setup meta-data exchange. Are you going to prototype this? Steve. From ggrundstrom at NetEffect.com Tue Nov 27 17:39:59 2007 From: ggrundstrom at NetEffect.com (Glenn Grundstrom) Date: Tue, 27 Nov 2007 19:39:59 -0600 Subject: [ofa-general] [PATCH 0/6] nes: Cosmetic changes; support virtual WQs and PPC In-Reply-To: References: <20071114221453.3ADD5E609F0@openfabrics.org> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57572@venom2> Or you said I could submit patches to you. One problem with only using your tree is that I need to supply updates and backports for OFED builds. I've cloned Vlad's tree for that. How about this for the future: Let me know when you've made changes and I'll pull and merge your changes into my code. That way I can still provide everything for OFED and patches I create should apply cleanly in your branch. I assume you could also pull from my tree as well, right? If you have a better way to satisfy all we can discuss it. The last set of patches were 99% cosmetic changes and I shouldn't need to submit something that unwieldy again. On that note, I've got another set of patches almost ready to go and I've split them by issue/fix. Maybe you should wait to modify anything in the nes directory until I've pushed my changes to the OFA servers. Sorry for the mixup. Thanks, Glenn. > > Arghh... these don't apply to my "neteffect" branch, so you've lost > the cleanup work that I did (eg trailing whitespace removal, > formatting fixes, etc). I thought we agreed that I would pull the > driver into my tree for merging into 2.6.25 and we would work on it > there. > > Anwyay. I'll reimport the nes driver into a new "neteffect" branch in > my tree (and maybe recreate the work I did). In the future please > post updates as incremental patches to that tree. And when I say > incremental patches I mean reviewable things that are split up > conceptually and not by which files they touch. (That is, patches > like "Fix types of workqueue work functions" rather than "Fix > nes_cm.c") > > Thanks... > From kliteyn at mellanox.co.il Tue Nov 27 21:51:41 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 28 Nov 2007 07:51:41 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-28:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-27 OpenSM git rev = Mon_Nov_26_08:12:10_2007 [b989216e1ae91e0049ec3d4980cb8e2bdad8ed49] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=520 Pass=520 Fail=0 Pass: 39 Stability IS1-16.topo 39 Pkey IS1-16.topo 39 OsmTest IS1-16.topo 39 OsmStress IS1-16.topo 39 Multicast IS1-16.topo 39 LidMgr IS1-16.topo 13 Stability IS3-loop.topo 13 Stability IS3-128.topo 13 Pkey IS3-128.topo 13 OsmTest IS3-loop.topo 13 OsmTest IS3-128.topo 13 OsmStress IS3-128.topo 13 Multicast IS3-loop.topo 13 Multicast IS3-128.topo 13 LidMgr IS3-128.topo 13 FatTree merge-roots-4-ary-2-tree.topo 13 FatTree merge-root-4-ary-3-tree.topo 13 FatTree gnu-stallion-64.topo 13 FatTree blend-4-ary-2-tree.topo 13 FatTree RhinoDDR.topo 13 FatTree FullGnu.topo 13 FatTree 4-ary-2-tree.topo 13 FatTree 2-ary-4-tree.topo 13 FatTree 12-node-spaced.topo 13 FTreeFail 4-ary-2-tree-missing-sw-link.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 13 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 13 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From keshetti85-student at yahoo.co.in Tue Nov 27 23:20:13 2007 From: keshetti85-student at yahoo.co.in (Keshetti Mahesh) Date: Wed, 28 Nov 2007 12:50:13 +0530 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <1196178726.26651.299.camel@hrosenstock-ws.xsigo.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> <20071126190312.GC14894@sashak.voltaire.com> <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> <20071127155130.GC26160@sashak.voltaire.com> <1196178726.26651.299.camel@hrosenstock-ws.xsigo.com> Message-ID: <829ded920711272320g64f5992emf2c1e9d16283d22e@mail.gmail.com> > ibutils maintainer is Oren Kladnitsky orenk at dev.mellanox.co.il > Not sure if he monitors this list. Sorry, I actual wanted to know who are the developers of ibadm group of utilities. > > LASH resolves credit loops by using different VLs, I don't think ibdmchk > > takes this into account, but don't know for sure. Yes, I have verified in ibdmchk that it considers only one VL while checking for credit loops. > I also think ibdmchk needs some support to handle LASH. I don't think it > is currently supported by it (although that is not documented AFAIK). > Is anyone currently working on this part (adding support to ibdmchk to handle LASH) in OFED community. -Mahesh From monis at voltaire.com Tue Nov 27 23:20:44 2007 From: monis at voltaire.com (Moni Shoua) Date: Wed, 28 Nov 2007 09:20:44 +0200 Subject: [ofa-general] [PATCH ofed-1.3] IB/IPoIB: Restore support for interface statistics Message-ID: <474D16CC.9010807@voltaire.com> While moving to kernel 2.6.24 in OFED, the function for getting interface statistics got lost. This is a backport patch to re-enable net device statistics for kernels that do not have the struct net_device_stats in struct netdevice. This patch fixes bug 790. Signed-off-by: Moni Shoua --- diff --git a/kernel_patches/backport/2.6.11/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.11/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.11/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.11_FC4/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.11_FC4/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.11_FC4/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.12/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.12/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.12/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.13/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.13/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.13/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.13_suse10_0_u/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.13_suse10_0_u/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.13_suse10_0_u/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.14/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.14/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.14/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.15/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.15/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.15/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.15_ubuntu606/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.15_ubuntu606/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.16/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.16/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.16/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.16_sles10/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.16_sles10/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.16_sles10/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.16_sles10_sp1/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.16_sles10_sp1/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.16_sles10_sp1/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.17/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.17/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.17/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.18-EL5.1/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.18-EL5.1/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.18-EL5.1/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.18/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.18/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.18/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.18_FC6/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.18_FC6/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.18_FC6/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.18_suse10_2/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.18_suse10_2/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.18_suse10_2/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.19/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.19/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.19/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.20/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.20/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.20/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.9_U4/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.9_U4/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U4/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; diff --git a/kernel_patches/backport/2.6.9_U5/ipoib_0110_restore_get_stats.patch b/kernel_patches/backport/2.6.9_U5/ipoib_0110_restore_get_stats.patch new file mode 100644 index 0000000..62359a8 --- /dev/null +++ b/kernel_patches/backport/2.6.9_U5/ipoib_0110_restore_get_stats.patch @@ -0,0 +1,26 @@ +Index: ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c +=================================================================== +--- ofa_kernel-1.3.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:16:06.000000000 +0200 ++++ ofa_kernel-1.3/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-11-27 15:27:59.401366315 +0200 +@@ -790,6 +790,13 @@ out: + return NETDEV_TX_OK; + } + ++static struct net_device_stats *ipoib_get_stats(struct net_device *dev) ++{ ++ struct ipoib_dev_priv *priv = netdev_priv(dev); ++ ++ return &priv->stats; ++} ++ + static void ipoib_timeout(struct net_device *dev) + { + struct ipoib_dev_priv *priv = netdev_priv(dev); +@@ -1015,6 +1022,7 @@ static void ipoib_setup(struct net_devic + dev->stop = ipoib_stop; + dev->change_mtu = ipoib_change_mtu; + dev->hard_start_xmit = ipoib_start_xmit; ++ dev->get_stats = ipoib_get_stats; + dev->tx_timeout = ipoib_timeout; + dev->hard_header = ipoib_hard_header; + dev->set_multicast_list = ipoib_set_mcast_list; From eli at mellanox.co.il Tue Nov 27 23:43:07 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Wed, 28 Nov 2007 09:43:07 +0200 Subject: [ofa-general] Re: [PATCH ofed-1.3] IB/IPoIB: Restore support for interface statistics In-Reply-To: <474D16CC.9010807@voltaire.com> References: <474D16CC.9010807@voltaire.com> Message-ID: <1196235787.21753.184.camel@mtls03> On Wed, 2007-11-28 at 09:20 +0200, Moni Shoua wrote: > While moving to kernel 2.6.24 in OFED, the function for getting interface > statistics got lost. This is a backport patch to re-enable net device > statistics for kernels that do not have the struct net_device_stats > in struct netdevice. > > This patch fixes bug 790. > > Signed-off-by: Moni Shoua > --- Thanks Moni, we'll push this to ofed. From jackm at dev.mellanox.co.il Wed Nov 28 00:08:03 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 28 Nov 2007 10:08:03 +0200 Subject: [ofa-general] [PATCH] mlx4_core: increase max number of qp's and =?iso-8859-1?q?of=09srq=27s_to?= 128K In-Reply-To: <4743E7D3.8090201@voltaire.com> References: <200711201744.15766.jackm@dev.mellanox.co.il> <4743E7D3.8090201@voltaire.com> Message-ID: <200711281008.03739.jackm@dev.mellanox.co.il> On Wednesday 21 November 2007 10:09, Or Gerlitz wrote: > Why you want to increase the maxima for SRQs as well? a 1:1 ratio > between QPs to SRQs means a broken application design, isn't it? > Not really, for the new XRC qp type. In this case, we will have one XRC connection per multi-process application per host, with a larger number of XRC_SRQs (one per process per host). However, the XRC SRQs act more like RD qps, so we really don't need to increase the default max SRQs. I'll post V2 of the patch now. - Jack From jackm at dev.mellanox.co.il Wed Nov 28 00:08:10 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 28 Nov 2007 10:08:10 +0200 Subject: [ofa-general] [PATCH V2] mlx4_core: increase max number of qp's to 128K Message-ID: <200711281008.10521.jackm@dev.mellanox.co.il> mlx4_core: increase max QPs to 128K. With the advent large clusters which utilize multicore hosts, 64K qp's is not enough. We want to increase the default maxima for QPs to 128K. Signed-off-by: Jack Morgenstein Index: ofa_1_3_dev_kernel/drivers/net/mlx4/main.c =================================================================== --- ofa_1_3_dev_kernel.orig/drivers/net/mlx4/main.c 2007-11-21 17:51:56.000000000 +0200 +++ ofa_1_3_dev_kernel/drivers/net/mlx4/main.c 2007-11-22 10:26:04.000000000 +0200 @@ -76,7 +76,7 @@ static const char mlx4_version[] __devin DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mlx4_profile default_profile = { - .num_qp = 1 << 16, + .num_qp = 1 << 17, .num_srq = 1 << 16, .rdmarc_per_qp = 1 << 4, .num_cq = 1 << 16, From a-alisoj at academyofhealth.com Wed Nov 28 02:33:41 2007 From: a-alisoj at academyofhealth.com (Pamela Kilgore) Date: Wed, 28 Nov 2007 04:33:41 -0600 Subject: [ofa-general] Chatting online Message-ID: <01c83177$da403790$8ab2b05a@a-alisoj> Hello! I am bored this afternoon. I am nice girl that would like to chat with you. Email me at v at TheGlowPuppy.info only, because I am writing not from my personal email. To see some pictures of me. From jackm at dev.mellanox.co.il Wed Nov 28 02:44:20 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 28 Nov 2007 12:44:20 +0200 Subject: [ofa-general] [PATCH] libmlx4: max_recv_wr must be non-zero for non-SRQ QPs Message-ID: <200711281244.20552.jackm@dev.mellanox.co.il> max_recv_wr must also be non-zero for QPs which are not associated with an SRQ. Signed-off-by: Jack Morgenstein --- Roland, Without this patch, if the userspace caller specifies max_recv_wr = 0 for a non-srq QP, the creation will be rejected in kernel space in file infiniband/hw/mlx4/qp.c, procedure set_rq_size: } else { /* HW requires >= 1 RQ entry with >= 1 gather entry */ ==> NOTE: if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) return -EINVAL; You've set max_recv_sge size to 1, but not max_recv_wr. Jack diff --git a/src/verbs.c b/src/verbs.c index 4e7beff..ec4c6a5 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -367,8 +367,12 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) if (attr->srq) attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0; - else if (attr->cap.max_recv_sge < 1) - attr->cap.max_recv_sge = 1; + else { + if (attr->cap.max_recv_sge < 1) + attr->cap.max_recv_sge = 1; + if (attr->cap.max_recv_wr < 1) + attr->cap.max_recv_wr = 1; + } if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp)) goto err; From vlad at lists.openfabrics.org Wed Nov 28 02:55:39 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Wed, 28 Nov 2007 02:55:39 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071128-0200 daily build status Message-ID: <20071128105540.1660BE60895@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with 2.6.15-23-server Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.13 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16 Passed on x86_64 with linux-2.6.17 Passed on ia64 with linux-2.6.18 Passed on x86_64 with linux-2.6.19 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.13 Passed on ia64 with linux-2.6.17 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.14 Passed on powerpc with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.19 Passed on ia64 with linux-2.6.19 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on ppc64 with linux-2.6.18 Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on x86_64 with linux-2.6.14 Passed on x86_64 with linux-2.6.13 Passed on x86_64 with linux-2.6.22 Passed on ia64 with linux-2.6.13 Passed on powerpc with linux-2.6.15 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.18-53.el5 Passed on ia64 with linux-2.6.16 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.23 Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.15 Passed on ia64 with linux-2.6.21.1 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on x86_64 with linux-2.6.18-8.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on ppc64 with linux-2.6.18-8.el5 Failed: From or.gerlitz at gmail.com Wed Nov 28 03:07:25 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Wed, 28 Nov 2007 13:07:25 +0200 Subject: [ofa-general] Re: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> Message-ID: <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> On 11/27/07, Sean Hefty wrote: > These have been committed to master branch. OK, got it. Some users have approached me and said that its unclear from the man pages for some values of the connection param structure what are their legal values. Reviewing this a little, I think we should add the maximum values for the retry_count and rnr_retry_count under the infiniband specific section of the rdma_connect and rdma_accept pages. Also, what about pushing all these documentation changes as a release to OFED 1.3? Or. From Arkady.Kanevsky at netapp.com Wed Nov 28 05:13:04 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 28 Nov 2007 08:13:04 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> References: <474473DD.3050507@opengridcomputing.com><4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> Message-ID: Any posting to SQ prior to connection establishment will complete "immideately" with the "flashed" status. Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Glenn Grundstrom [mailto:ggrundstrom at NetEffect.com] > Sent: Sunday, November 25, 2007 9:00 PM > To: Steve Wise; Kanevsky, Arkady > Cc: Leonid Grossman; openib-general at openib.org > Subject: RE: [ofa-general] Re: iWARP peer-to-peer CM proposal > > > > > Kanevsky, Arkady wrote: > > > Very good points. > > > Thanks Steve. > > > > > > If we can do unsignalled 0-size RDMA Read with "bogus" > > S-tag this may > > > work better. > > > Yes, it will require IRD not to be 0 set at Responder. > > > Ditto ORD of at least 1 on Responder. > > > There is no need to have extra CQ entry on either side for it. > > > It is only needed for error path. > > > So this will only be needed if Sender posted the full queue > > of sends. > > > But it can not post anything because CM will not let it know that > > > connection is established. > > > > > > > > Well, actually, I think the ULP _can_ post before establishing the > > connection. But I guess we can define the semantics such that > > applications using the rdma-cm interface must adhere to whatever we > > need to make this hack work. > > > > Q: are there apps using the rdma-cm out there today that > pre-post SQ > > WRs before getting a ESTABLISHED event? > > > > Steve. > > ULPs are allowed to post prior to establishing the > connection, but I can't name any that operate this way. > Prohibiting applications that use the rdma_cm directly from > pre-posting is okay, but what about ULP's over other ULP's > (i.e. MPI over uDAPL). How can/will this be handled? > > Glenn. > > > > > Happy Thanksgiving, > > > > > > Arkady Kanevsky email: arkady at netapp.com > > > Network Appliance Inc. phone: 781-768-5395 > > > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > > > Waltham, MA 02451 central phone: 781-768-5300 > > > > > > > > > > > >> -----Original Message----- > > >> From: Steve Wise [mailto:swise at opengridcomputing.com] > > >> Sent: Wednesday, November 21, 2007 1:07 PM > > >> To: Kanevsky, Arkady > > >> Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org > > >> Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal > > >> > > >> Comments in-line below... > > >> > > >> > > >> Kanevsky, Arkady wrote: > > >> > > >>> Group, > > >>> > > >>> > > >>> below is proposal on how to resolve peer-to-peer > > iWARP CM issue > > >>> discovered at interop event. > > >>> > > >>> > > >>> The main issue is that MPA spec (relevant portion of > > >>> > > >> IETF RFC 5044 > > >> > > >>> is below) require that > > >>> > > >>> > > >>> connection initiator send first message over the > > >>> > > >> established connection. > > >> > > >>> Multiple MPI implementations and several other apps use > > >>> > > >> peer-to-peer > > >> > > >>> model. > > >>> > > >>> > > >>> So rather then forcing all of them to do it on their > > >>> > > >> own, which will > > >> > > >>> not help with > > >>> > > >>> > > >>> interop between different implementations, the goal > > is to extend > > >>> lower layers to provide it. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> Our first idea was to leave MPA protocol untouched and > > >>> > > >> try to solve > > >> > > >>> this problem > > >>> > > >>> > > >>> in iw_cm. But there are too many complications to it. > > First, in > > >>> order to adhere to RFC5044 > > >>> > > >>> > > >>> initiator must send first FPDU and responder process > > >>> > > >> it. But since > > >> > > >>> the connection is already > > >>> > > >>> > > >>> established processing FPDU involves ULP on whose behalf the > > >>> connection is created. > > >>> > > >>> > > >>> So either initiator sends a message which generates > > >>> > > >> completion on > > >> > > >>> responder CQ, thus visible > > >>> > > >>> > > >>> to ULP, or not. > > >>> > > >> > > >> > > >>> In the later case, the only op which can do it is > > >>> RDMA one, which means > > >>> > > >>> > > >>> that responder somehow provided initiator S-tag which > > >>> > > >> it can use. > > >> > > >>> So, this is an extension > > >>> > > >>> > > >>> to MPA, probably using private data. And that responder upon > > >>> receiving it destroy this S-tag. > > >>> > > >>> > > >>> In any case this is an extension of MPA. > > >>> > > >>> > > >> This stag exchange isn't needed if this RDMA op is a 0B READ. > > >> The responder waits for that 0B read and only indicates > the rdma > > >> connection is established to its ULP when it replies to the 0B > > >> read. In this scenario, the responder/server side > doesn't consume > > >> any CQ resources. > > >> But it would require an IRD of at least 1 to be configured > > on the QP. > > >> The initiator still requires an SQ entry, and possibly a > CQ entry, > > >> for initiating the 0B read and handling completion. > > >> But its perhaps a little less painful than doing a SEND/RECV > > >> exchange. The read wr could be unsignaled so that it won't > > >> generate a CQE. But it still consumes an SQ WR slot so the SQ > > >> would have to be sized to allow this extra WR. And I > guess the CQ > > >> would also need to be sized accordingly in case the read failed. > > >> > > >> > > >>> In the former, Send is used but this requires a buffer > > >>> > > >> to be posted > > >> > > >>> to CQ. But since > > >>> > > >>> > > >>> the same CQ (or SharedCQ) can be used by other > > >>> > > >> connections at the > > >> > > >>> same time it can cause > > >>> > > >>> > > >>> the responder CM posted buffer to be consumed by other > > >>> > > >> connection. > > >> > > >>> This is not acceptable. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> So new we consider extension to MPA protocol. > > >>> > > >>> > > >>> The goal is to be completely backwards compatible to > > >>> > > >> existing version 1. > > >> > > >>> In a nutshell, use a "flag" in the MPA request message which > > >>> indicates that > > >>> > > >>> > > >>> "ready to receive" message will be send by requestor upon > > >>> receiving > > >>> > > >>> > > >>> MPA response message with connection acceptance. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> here are the changes to IETF RFC5044 > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> 1. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 > > >>> > > >> 2 3 4 5 6 7 8 > > >> > > >>> 9 0 1 > > >>> > > >>> > > >> > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 0 > > >> > > >>> | | + Key (16 bytes containing "MPA ID Req Frame") + 4 > > >>> > > >> | (4D 50 41 > > >> > > >>> 20 49 44 20 52 65 71 20 46 72 61 6D 65) | + Or (16 > > >>> > > >> bytes containing > > >> > > >>> "MPA ID Rep Frame") + 8 | (4D 50 41 20 49 44 20 52 65 > > >>> > > >> 70 20 46 72 61 > > >> > > >>> 6D 65) | + Or (16 bytes containing "MPA ID Rtr Frame") > > >>> > > >> + 12 | (4D 50 > > >> > > >>> 41 20 49 44 20 52 74 52 20 46 72 61 6D 65) | + > > >>> > > >>> > > >> > > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 16 > > >> > > >>> |M|C|R|S| Res | Rev | PD_Length | > > >>> > > >>> > > >> > +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | > > >> > > >>> | ~ ~ ~ Private Data ~ | | | > > >>> > > >> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | > > >> > > >>> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> 2. S: indicator in the Req frame whether or not > > >>> > > >> Requestor will send > > >> > > >>> Rtr frame. > > >>> > > >>> > > >>> In Req frame, if set to 1 then Rtr frame will > > be sent if > > >>> responder > > >>> > > >>> > > >>> sends Rep frame with accept bit set. 0 indicate > > >>> > > >> that Rtr frame > > >> > > >>> will not be sent. > > >>> > > >>> > > >>> In Rep frame, 0 means that Responder cannot support > > >>> > > >> Rtr frame, > > >> > > >>> while 1 that it is and is waiting for it. > > >>> > > >>> > > >>> (While my preference is to handle this as MPA > > >>> > > >> protocol version > > >> > > >>> matching rules, > > >>> > > >>> > > >>> proposed method will provide complete backwards > > >>> > > >> compatibility) > > >> > > >>> Unused by Rtr frame. That is set to 0 in Rtr frame > > >>> > > >> and ignored > > >> > > >>> by responder. > > >>> > > >>> > > >>> > > >>> > > >>> > > >>> All other bits M,C,R and remainder of Res treated > > >>> > > >> as in MPA ver 1. > > >> > > >>> > > >>> > > >>> > > >>> Rtr frame adhere to C bit as specified in Rep frame > > >>> > > >>> > > >>> > > >> First, the RTR frame _must_ be an FPDU for this to work. > > >> Thus it violates the DDP/RDMAP specs because it is an known > > >> DDP/RDMAP opcode. > > >> > > >> Second, assuming the RTR frame is sent as an FPDU, then > this won't > > >> work with existing RNIC HW. The HW will post an async error > > >> because the incoming DDP/RDMAP opcode is unknown. > > >> > > >> The only way I see that we can fix this for the existing > rnic HW is > > >> to come up with some way to send a valid RDMAP message from the > > >> initiator to the responder under the covers -and- have the > > >> responder only indicate that the connection is established when > > >> that FPDU is received. > > >> > > >> Chelsio cannot support this hack via a 0B write, but the could > > >> support a 0B read or send/recv exchange. But as you > indicate, this > > >> is very painful and perhaps impossible to do without > impacting the > > >> ULP and breaking verbs semantics. > > >> > > >> (that's why we punted on this a year ago :) > > >> > > >> > > >> Steve. > > >> > > >> _______________________________________________ > > >> general mailing list > > >> general at lists.openfabrics.org > > >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > >> > > >> To unsubscribe, please visit > > >> http://openib.org/mailman/listinfo/openib-general > > >> > > >> > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From Arkady.Kanevsky at netapp.com Wed Nov 28 05:29:30 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 28 Nov 2007 08:29:30 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CBAB0.8020208@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> <474CBAB0.8020208@opengridcomputing.com> Message-ID: Agree with initiator/client sending signalled 0B RDMA Read. This will handle client side. Still not 100% clear on passive/server side. Two issues which bothers me. 1. Is "bogus" S-tag allowed for incomming RDMA ops? I do not recall that RDDP requires that length is checked before S-tag. 2. How is "verb" layer on server side knows that RDMA Read op came and was done? Is it some back door to vendor FW? Will this be kicked for all incoming RDMA Read ops? Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Steve Wise [mailto:swise at opengridcomputing.com] > Sent: Tuesday, November 27, 2007 7:48 PM > To: Caitlin Bestler > Cc: Kanevsky, Arkady; Glenn Grundstrom; Leonid Grossman; > openib-general at openib.org > Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal > > Caitlin Bestler wrote: > > On Nov 27, 2007 3:58 PM, Steve Wise > wrote: > > > >> For the short term, I claim we just implement this as part > of linux > >> iwarp connection setup (mandating a 0B read be sent from > the active > >> side). Your proposal to add meta-data to the private data > requires a > >> standards change anyway and is, IMO, the 2nd phase of this whole > >> enchilada... > >> > >> Steve. > >> > > > > I don't see how you can have any solution here that does > not require meta-data. > > For non-peer-to-peer connections neither a zero length RDMA Read or > > Write should be sent. An extraneous RDMA Read is > particularly onerous > > for a short lived connection that fits the classic active/passive > > model. So *something* is telling the CMA layer that this > connection may need an MPA unjam action. > > If that isn't meta-data, what is it? > > I assumed the 0B read would _always_ be sent as part of > establishing an iWARP connection using linux and the rdma-cm. > > > > > Further, the RDMA Read solution is adequate whenever the RDMA Write > > solution would have been (although at an unnecessary extra > cost), but > > as near as I can determine it is not a complete solution. If the > > passive side needs an untagged message completion then *something* > > needs to send it. How can the CM layer (or, I suppose, the > ULP itself) > > know that this untagged NOP message must be sent without meta-data? > > I believe at Reno we had the current rnic vendors all saying > a SEND or 0B read will work. So: If someone has current > iwarp HW that will _not_ > handle this problem by doing the 0B read hack, please speak up now. > > > > > As I see it, if we want to do the minimum that is required, but be > > certain that it is adequate, we need a per-connection setup > meta-data exchange. > > Are you going to prototype this? > > > Steve. > > > From fenkes at de.ibm.com Wed Nov 28 05:46:28 2007 From: fenkes at de.ibm.com (Joachim Fenkes) Date: Wed, 28 Nov 2007 15:46:28 +0200 Subject: [ofa-general] [PATCH] IB/ehca: Fix static rate if path faster than link Message-ID: <200711281446.29085.fenkes@de.ibm.com> The formula would yield -1 for this, which is wrong in a bad way (max throttling). Clamp to 0, which is the correct value. Signed-off-by: Joachim Fenkes --- This fixes another regression introduced in rc3. Please review and apply for 2.6.24-rc4. Thanks! drivers/infiniband/hw/ehca/ehca_av.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 453eb99..f7782c8 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -76,8 +76,12 @@ int ehca_calc_ipd(struct ehca_shca *shca, int port, link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; return 0; } -- 1.5.2 From felix at chelsio.com Wed Nov 28 06:13:07 2007 From: felix at chelsio.com (Felix Marti) Date: Wed, 28 Nov 2007 06:13:07 -0800 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com><469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com><474CBAB0.8020208@opengridcomputing.com> Message-ID: <8A71B368A89016469F72CD08050AD33401D14785@maui.asicdesigners.com> > -----Original Message----- > From: general-bounces at lists.openfabrics.org [mailto:general- > bounces at lists.openfabrics.org] On Behalf Of Kanevsky, Arkady > Sent: Wednesday, November 28, 2007 5:30 AM > To: Steve Wise; Caitlin Bestler > Cc: Leonid Grossman; openib-general at openib.org > Subject: RE: [ofa-general] Re: iWARP peer-to-peer CM proposal > > Agree with initiator/client sending signalled 0B RDMA Read. > This will handle client side. > > Still not 100% clear on passive/server side. > Two issues which bothers me. > 1. Is "bogus" S-tag allowed for incomming RDMA ops? > I do not recall that RDDP requires that length is checked before > S-tag. > > 2. How is "verb" layer on server side knows that RDMA Read op > came and was done? Is it some back door to vendor FW? > Will this be kicked for all incoming RDMA Read ops? As you point out, the server Verbs layer is not aware of an incoming 0B RDMA Read (or Write for that matter). Hence some kind of magic must happen in the adapter where we vendors will have a choice: a) just 'unjam' the SQ in the adapter (which means that the CM layer works as today and the server can post SQ ops before the 'unjam' is received but they won't make it to the wire) or b) send a back-door command to the CM which can then move the state machine to established only after the 'unjam' is received. Whatever is done, it cannot happen for all zero-length RDMA Read (or Write for that matter). Hence the adapter must be informed that that the next zero-length is the 'unjam' message (which also means that the server side could, in theory, omit sending the RDMA Read Response, because the RDMA Read Request was really a 'unjam'... not that I would be pushing for such an 'optimization' to avoid an extra wire message). > > Arkady Kanevsky email: arkady at netapp.com > Network Appliance Inc. phone: 781-768-5395 > 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 > Waltham, MA 02451 central phone: 781-768-5300 > > > > -----Original Message----- > > From: Steve Wise [mailto:swise at opengridcomputing.com] > > Sent: Tuesday, November 27, 2007 7:48 PM > > To: Caitlin Bestler > > Cc: Kanevsky, Arkady; Glenn Grundstrom; Leonid Grossman; > > openib-general at openib.org > > Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal > > > > Caitlin Bestler wrote: > > > On Nov 27, 2007 3:58 PM, Steve Wise > > wrote: > > > > > >> For the short term, I claim we just implement this as part > > of linux > > >> iwarp connection setup (mandating a 0B read be sent from > > the active > > >> side). Your proposal to add meta-data to the private data > > requires a > > >> standards change anyway and is, IMO, the 2nd phase of this whole > > >> enchilada... > > >> > > >> Steve. > > >> > > > > > > I don't see how you can have any solution here that does > > not require meta-data. > > > For non-peer-to-peer connections neither a zero length RDMA Read or > > > Write should be sent. An extraneous RDMA Read is > > particularly onerous > > > for a short lived connection that fits the classic active/passive > > > model. So *something* is telling the CMA layer that this > > connection may need an MPA unjam action. > > > If that isn't meta-data, what is it? > > > > I assumed the 0B read would _always_ be sent as part of > > establishing an iWARP connection using linux and the rdma-cm. > > > > > > > > Further, the RDMA Read solution is adequate whenever the RDMA Write > > > solution would have been (although at an unnecessary extra > > cost), but > > > as near as I can determine it is not a complete solution. If the > > > passive side needs an untagged message completion then *something* > > > needs to send it. How can the CM layer (or, I suppose, the > > ULP itself) > > > know that this untagged NOP message must be sent without meta-data? > > > > I believe at Reno we had the current rnic vendors all saying > > a SEND or 0B read will work. So: If someone has current > > iwarp HW that will _not_ > > handle this problem by doing the 0B read hack, please speak up now. > > > > > > > > As I see it, if we want to do the minimum that is required, but be > > > certain that it is adequate, we need a per-connection setup > > meta-data exchange. > > > > Are you going to prototype this? > > > > > > Steve. > > > > > > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib- > general From stijn.desmet at intec.ugent.be Wed Nov 28 06:43:18 2007 From: stijn.desmet at intec.ugent.be (Stijn De Smet) Date: Wed, 28 Nov 2007 15:43:18 +0100 Subject: [ofa-general] DDR vs SDR performance Message-ID: <474D7E86.9050702@intec.ugent.be> Hello, I have a problem with the DDR performance: Configuration: 2 servers (IBM x3755, equiped with 4 dualcore opteron and 16GB RAM) 3 HCA's installed (2 Cisco DDR(Cheetah) and 1 Cisco dual SDR(LionMini), all PCI-e x8), all DDR HCA's at newest Cisco Firmware v1.2.917 build 3.2.0.149, with label 'HCA.Cheetah-DDR.20' The DDR's are connected with a cable, and s3n1 is running a SM. The SDR boards are connected over a Cisco SFS-7000D, but the DDR performance is +- the same over this SFS-7000D Both servers are running SLES10-SP1 with Ofed 1.2.5. s3n1:~ # ibstatus Infiniband device 'mthca0' port 1 status: < -- DDR board #1, not connected default gid: fe80:0000:0000:0000:0005:ad00:000b:cb39 base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) Infiniband device 'mthca1' port 1 status: <--- DDR board #2, connected with cable default gid: fe80:0000:0000:0000:0005:ad00:000b:cb31 base lid: 0x16 sm lid: 0x16 state: 4: ACTIVE phys state: 5: LinkUp rate: 20 Gb/sec (4X DDR) Infiniband device 'mthca2' port 1 status: <--- SDR board, only port 1 connected to the SFS-7000D default gid: fe80:0000:0000:0000:0005:ad00:0008:a8d9 base lid: 0x3 sm lid: 0x2 state: 4: ACTIVE phys state: 5: LinkUp rate: 10 Gb/sec (4X) Infiniband device 'mthca2' port 2 status: default gid: fe80:0000:0000:0000:0005:ad00:0008:a8da base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) RDMA test of : -- SDR: s3n2:~ # ib_rdma_bw -d mthca2 gpfs3n1 7190: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 | duplex=0 | cma=0 | 7190: Local address: LID 0x05, QPN 0x0408, PSN 0xf10f03 RKey 0x003b00 VAddr 0x002ba7b9943000 7190: Remote address: LID 0x03, QPN 0x040a, PSN 0xa9cf5c, RKey 0x003e00 VAddr 0x002adb2f3bb000 7190: Bandwidth peak (#0 to #989): 937.129 MB/sec 7190: Bandwidth average: 937.095 MB/sec 7190: Service Demand peak (#0 to #989): 2709 cycles/KB 7190: Service Demand Avg : 2709 cycles/KB -- DDR s3n2:~ # ib_rdma_bw -d mthca1 gpfs3n1 7191: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 | duplex=0 | cma=0 | 7191: Local address: LID 0x10, QPN 0x0405, PSN 0x5e19e RKey 0x002600 VAddr 0x002b76eab20000 7191: Remote address: LID 0x16, QPN 0x0405, PSN 0xdd976e, RKey 0x80002900 VAddr 0x002ba8ed10e000 7191: Bandwidth peak (#0 to #990): 1139.32 MB/sec 7191: Bandwidth average: 1139.31 MB/sec 7191: Service Demand peak (#0 to #990): 2228 cycles/KB 7191: Service Demand Avg : 2228 cycles/KB So only 200MB/s increase between SDR and DDR With comparable hardware(x3655, dual dualcore opteron, 8GB RAM), I get a little bit better RDMA performance(1395MB/s so close to the PCI-e x8 limit), but even worse IPoIB and SDP performance with kernels 2.6.22 and 2.6.23.9 and Ofed 1.3b IPoIB test(iperf), IPoIB in connected mode, MTU 65520: #ib2 is SDR, ib1 is DDR #SDR: s3n2:~ # iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.2 port 50598 connected with 192.168.1.1 port 5001 [ 3] 0.0-10.0 sec 6.28 GBytes 5.40 Gbits/sec #DDR: s3n2:~ # iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.2 port 32935 connected with 192.168.1.1 port 5001 [ 3] 0.0-10.0 sec 6.91 GBytes 5.93 Gbits/sec Now the increase is only 0.5Gbit And finally a test with SDP: DDR: s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte (default) ------------------------------------------------------------ [ 4] local 192.168.1.2 port 58186 connected with 192.168.1.1 port 5001 [ 4] 0.0-10.0 sec 7.72 GBytes 6.63 Gbits/sec #SDR: s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte (default) ------------------------------------------------------------ [ 4] local 192.168.1.2 port 58187 connected with 192.168.1.1 port 5001 [ 4] 0.0-10.0 sec 7.70 GBytes 6.61 Gbits/sec With SDP there is even no difference anymore between the 2 boards. Even when using multiple connections(using 3 servers(s3s2,s3s3,s3s4), x3655, 2.6.22, connecting all to one(s3s1) over DDR): s3s2:~ # iperf -c cic-s3s1 -p 5001 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.15 port 33576 connected with 192.168.1.14 port 5001 [ 3] 0.0-30.0 sec 5.94 GBytes 1.70 Gbits/sec s3s3:~ # iperf -c cic-s3s1 -p 5002 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5002 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.16 port 53558 connected with 192.168.1.14 port 5002 [ 3] 0.0-30.0 sec 5.74 GBytes 1.64 Gbits/sec s3s4:~ # iperf -c cic-s3s1 -p 5003 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5003 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.17 port 37169 connected with 192.168.1.14 port 5003 [ 3] 0.0-30.0 sec 5.79 GBytes 1.66 Gbits/sec This gives a total of 1.7+1.64+1.66Gbits/sec=5Gbits/sec Is this normal behavior(SDP and IPoIB not benefiting from DDR)? Regards, Stijn From jackm at dev.mellanox.co.il Wed Nov 28 06:42:00 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 28 Nov 2007 16:42:00 +0200 Subject: [ofa-general] [PATCH ofed-1.3] IB/IPoIB: Restore support for interface statistics In-Reply-To: <474D16CC.9010807@voltaire.com> References: <474D16CC.9010807@voltaire.com> Message-ID: <200711281642.00863.jackm@dev.mellanox.co.il> On Wednesday 28 November 2007 09:20, Moni Shoua wrote: > While moving to kernel 2.6.24 in OFED, the function for getting interface > statistics got lost. This is a backport patch to re-enable net device > statistics for kernels that do not have the struct net_device_stats > in struct netdevice. > > This patch fixes bug 790. > Thanks Moni, applied. I actually applied the patch so that it created the various backport files, then committed all the backport files together in a single commit, with your authorship and signed-off-by. (I probably should have added myself as well, below your signed-off -- since I changed the commit format -- but I forgot to do this; sorry about that). - Jack From Shainer at Mellanox.com Wed Nov 28 06:45:10 2007 From: Shainer at Mellanox.com (Gilad Shainer) Date: Wed, 28 Nov 2007 06:45:10 -0800 Subject: [ofa-general] DDR vs SDR performance In-Reply-To: <474D7E86.9050702@intec.ugent.be> Message-ID: <9FA59C95FFCBB34EA5E42C1A8573784FD47FF2@mtiexch01.mti.com> Is the chipset in your servers HT2000? Gilad. -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Stijn De Smet Sent: Wednesday, November 28, 2007 6:43 AM To: general at lists.openfabrics.org Subject: [ofa-general] DDR vs SDR performance Hello, I have a problem with the DDR performance: Configuration: 2 servers (IBM x3755, equiped with 4 dualcore opteron and 16GB RAM) 3 HCA's installed (2 Cisco DDR(Cheetah) and 1 Cisco dual SDR(LionMini), all PCI-e x8), all DDR HCA's at newest Cisco Firmware v1.2.917 build 3.2.0.149, with label 'HCA.Cheetah-DDR.20' The DDR's are connected with a cable, and s3n1 is running a SM. The SDR boards are connected over a Cisco SFS-7000D, but the DDR performance is +- the same over this SFS-7000D Both servers are running SLES10-SP1 with Ofed 1.2.5. s3n1:~ # ibstatus Infiniband device 'mthca0' port 1 status: < -- DDR board #1, not connected default gid: fe80:0000:0000:0000:0005:ad00:000b:cb39 base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) Infiniband device 'mthca1' port 1 status: <--- DDR board #2, connected with cable default gid: fe80:0000:0000:0000:0005:ad00:000b:cb31 base lid: 0x16 sm lid: 0x16 state: 4: ACTIVE phys state: 5: LinkUp rate: 20 Gb/sec (4X DDR) Infiniband device 'mthca2' port 1 status: <--- SDR board, only port 1 connected to the SFS-7000D default gid: fe80:0000:0000:0000:0005:ad00:0008:a8d9 base lid: 0x3 sm lid: 0x2 state: 4: ACTIVE phys state: 5: LinkUp rate: 10 Gb/sec (4X) Infiniband device 'mthca2' port 2 status: default gid: fe80:0000:0000:0000:0005:ad00:0008:a8da base lid: 0x0 sm lid: 0x0 state: 1: DOWN phys state: 2: Polling rate: 10 Gb/sec (4X) RDMA test of : -- SDR: s3n2:~ # ib_rdma_bw -d mthca2 gpfs3n1 7190: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 | duplex=0 | cma=0 | 7190: Local address: LID 0x05, QPN 0x0408, PSN 0xf10f03 RKey 0x003b00 VAddr 0x002ba7b9943000 7190: Remote address: LID 0x03, QPN 0x040a, PSN 0xa9cf5c, RKey 0x003e00 VAddr 0x002adb2f3bb000 7190: Bandwidth peak (#0 to #989): 937.129 MB/sec 7190: Bandwidth average: 937.095 MB/sec 7190: Service Demand peak (#0 to #989): 2709 cycles/KB 7190: Service Demand Avg : 2709 cycles/KB -- DDR s3n2:~ # ib_rdma_bw -d mthca1 gpfs3n1 7191: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 | duplex=0 | cma=0 | 7191: Local address: LID 0x10, QPN 0x0405, PSN 0x5e19e RKey 0x002600 VAddr 0x002b76eab20000 7191: Remote address: LID 0x16, QPN 0x0405, PSN 0xdd976e, RKey 0x80002900 VAddr 0x002ba8ed10e000 7191: Bandwidth peak (#0 to #990): 1139.32 MB/sec 7191: Bandwidth average: 1139.31 MB/sec 7191: Service Demand peak (#0 to #990): 2228 cycles/KB 7191: Service Demand Avg : 2228 cycles/KB So only 200MB/s increase between SDR and DDR With comparable hardware(x3655, dual dualcore opteron, 8GB RAM), I get a little bit better RDMA performance(1395MB/s so close to the PCI-e x8 limit), but even worse IPoIB and SDP performance with kernels 2.6.22 and 2.6.23.9 and Ofed 1.3b IPoIB test(iperf), IPoIB in connected mode, MTU 65520: #ib2 is SDR, ib1 is DDR #SDR: s3n2:~ # iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.2 port 50598 connected with 192.168.1.1 port 5001 [ 3] 0.0-10.0 sec 6.28 GBytes 5.40 Gbits/sec #DDR: s3n2:~ # iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.2 port 32935 connected with 192.168.1.1 port 5001 [ 3] 0.0-10.0 sec 6.91 GBytes 5.93 Gbits/sec Now the increase is only 0.5Gbit And finally a test with SDP: DDR: s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte (default) ------------------------------------------------------------ [ 4] local 192.168.1.2 port 58186 connected with 192.168.1.1 port 5001 [ 4] 0.0-10.0 sec 7.72 GBytes 6.63 Gbits/sec #SDR: s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 ------------------------------------------------------------ Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte (default) ------------------------------------------------------------ [ 4] local 192.168.1.2 port 58187 connected with 192.168.1.1 port 5001 [ 4] 0.0-10.0 sec 7.70 GBytes 6.61 Gbits/sec With SDP there is even no difference anymore between the 2 boards. Even when using multiple connections(using 3 servers(s3s2,s3s3,s3s4), x3655, 2.6.22, connecting all to one(s3s1) over DDR): s3s2:~ # iperf -c cic-s3s1 -p 5001 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5001 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.15 port 33576 connected with 192.168.1.14 port 5001 [ 3] 0.0-30.0 sec 5.94 GBytes 1.70 Gbits/sec s3s3:~ # iperf -c cic-s3s1 -p 5002 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5002 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.16 port 53558 connected with 192.168.1.14 port 5002 [ 3] 0.0-30.0 sec 5.74 GBytes 1.64 Gbits/sec s3s4:~ # iperf -c cic-s3s1 -p 5003 -t 30 ------------------------------------------------------------ Client connecting to cic-s3s1, TCP port 5003 TCP window size: 1.00 MByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.17 port 37169 connected with 192.168.1.14 port 5003 [ 3] 0.0-30.0 sec 5.79 GBytes 1.66 Gbits/sec This gives a total of 1.7+1.64+1.66Gbits/sec=5Gbits/sec Is this normal behavior(SDP and IPoIB not benefiting from DDR)? Regards, Stijn _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From stijn.desmet at intec.ugent.be Wed Nov 28 07:01:37 2007 From: stijn.desmet at intec.ugent.be (Stijn De Smet) Date: Wed, 28 Nov 2007 16:01:37 +0100 Subject: [ofa-general] DDR vs SDR performance In-Reply-To: <9FA59C95FFCBB34EA5E42C1A8573784FD47FF2@mtiexch01.mti.com> References: <9FA59C95FFCBB34EA5E42C1A8573784FD47FF2@mtiexch01.mti.com> Message-ID: <474D82D1.5080504@intec.ugent.be> One ServerWorks HT2100 A PCI Express Bridge, one HT2100 B PCI Express Bridge, and one ServerWorks HT1000 South Bridge Regards, Stijn Gilad Shainer wrote: > Is the chipset in your servers HT2000? > > Gilad. > > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Stijn De > Smet > Sent: Wednesday, November 28, 2007 6:43 AM > To: general at lists.openfabrics.org > Subject: [ofa-general] DDR vs SDR performance > > Hello, > > I have a problem with the DDR performance: > > Configuration: > 2 servers (IBM x3755, equiped with 4 dualcore opteron and 16GB RAM) > 3 HCA's installed (2 Cisco DDR(Cheetah) and 1 Cisco dual SDR(LionMini), > all PCI-e x8), all DDR HCA's at newest Cisco Firmware v1.2.917 build > 3.2.0.149, with label 'HCA.Cheetah-DDR.20' > > The DDR's are connected with a cable, and s3n1 is running a SM. The SDR > boards are connected over a Cisco SFS-7000D, but the DDR performance is > +- the same over this SFS-7000D > > Both servers are running SLES10-SP1 with Ofed 1.2.5. > > > s3n1:~ # ibstatus > Infiniband device 'mthca0' port 1 status: < -- DDR board #1, not > connected > default gid: fe80:0000:0000:0000:0005:ad00:000b:cb39 > base lid: 0x0 > sm lid: 0x0 > state: 1: DOWN > phys state: 2: Polling > rate: 10 Gb/sec (4X) > > Infiniband device 'mthca1' port 1 status: <--- DDR board #2, connected > with cable > default gid: fe80:0000:0000:0000:0005:ad00:000b:cb31 > base lid: 0x16 > sm lid: 0x16 > state: 4: ACTIVE > phys state: 5: LinkUp > rate: 20 Gb/sec (4X DDR) > > Infiniband device 'mthca2' port 1 status: <--- SDR board, only port 1 > connected to the SFS-7000D > default gid: fe80:0000:0000:0000:0005:ad00:0008:a8d9 > base lid: 0x3 > sm lid: 0x2 > state: 4: ACTIVE > phys state: 5: LinkUp > rate: 10 Gb/sec (4X) > > Infiniband device 'mthca2' port 2 status: > default gid: fe80:0000:0000:0000:0005:ad00:0008:a8da > base lid: 0x0 > sm lid: 0x0 > state: 1: DOWN > phys state: 2: Polling > rate: 10 Gb/sec (4X) > > > RDMA test of : > -- SDR: > s3n2:~ # ib_rdma_bw -d mthca2 gpfs3n1 > 7190: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 > | duplex=0 | cma=0 | > 7190: Local address: LID 0x05, QPN 0x0408, PSN 0xf10f03 RKey 0x003b00 > VAddr 0x002ba7b9943000 > 7190: Remote address: LID 0x03, QPN 0x040a, PSN 0xa9cf5c, RKey 0x003e00 > VAddr 0x002adb2f3bb000 > > > 7190: Bandwidth peak (#0 to #989): 937.129 MB/sec > 7190: Bandwidth average: 937.095 MB/sec > 7190: Service Demand peak (#0 to #989): 2709 cycles/KB > 7190: Service Demand Avg : 2709 cycles/KB > > -- DDR > s3n2:~ # ib_rdma_bw -d mthca1 gpfs3n1 > 7191: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | iters=1000 > | duplex=0 | cma=0 | > 7191: Local address: LID 0x10, QPN 0x0405, PSN 0x5e19e RKey 0x002600 > VAddr 0x002b76eab20000 > 7191: Remote address: LID 0x16, QPN 0x0405, PSN 0xdd976e, RKey > 0x80002900 VAddr 0x002ba8ed10e000 > > > 7191: Bandwidth peak (#0 to #990): 1139.32 MB/sec > 7191: Bandwidth average: 1139.31 MB/sec > 7191: Service Demand peak (#0 to #990): 2228 cycles/KB > 7191: Service Demand Avg : 2228 cycles/KB > > So only 200MB/s increase between SDR and DDR With comparable > hardware(x3655, dual dualcore opteron, 8GB RAM), I get a little bit > better RDMA performance(1395MB/s so close to the PCI-e x8 limit), but > even worse IPoIB and SDP performance with kernels 2.6.22 and > 2.6.23.9 and Ofed 1.3b > > > > IPoIB test(iperf), IPoIB in connected mode, MTU 65520: > #ib2 is SDR, ib1 is DDR > #SDR: > s3n2:~ # iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.2 port 50598 connected with 192.168.1.1 port 5001 > [ 3] 0.0-10.0 sec 6.28 GBytes 5.40 Gbits/sec > > #DDR: > s3n2:~ # iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.2 port 32935 connected with 192.168.1.1 port 5001 > [ 3] 0.0-10.0 sec 6.91 GBytes 5.93 Gbits/sec > > > Now the increase is only 0.5Gbit > > And finally a test with SDP: > > DDR: > s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte > (default) > ------------------------------------------------------------ > [ 4] local 192.168.1.2 port 58186 connected with 192.168.1.1 port 5001 > [ 4] 0.0-10.0 sec 7.72 GBytes 6.63 Gbits/sec > > #SDR: > s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 MByte > (default) > ------------------------------------------------------------ > [ 4] local 192.168.1.2 port 58187 connected with 192.168.1.1 port 5001 > [ 4] 0.0-10.0 sec 7.70 GBytes 6.61 Gbits/sec > > With SDP there is even no difference anymore between the 2 boards. > > > Even when using multiple connections(using 3 servers(s3s2,s3s3,s3s4), > x3655, 2.6.22, connecting all to one(s3s1) over DDR): > s3s2:~ # iperf -c cic-s3s1 -p 5001 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5001 TCP window size: 1.00 MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.15 port 33576 connected with 192.168.1.14 port > 5001 [ 3] 0.0-30.0 sec 5.94 GBytes 1.70 Gbits/sec s3s3:~ # iperf -c > cic-s3s1 -p 5002 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5002 TCP window size: 1.00 MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.16 port 53558 connected with 192.168.1.14 port > 5002 [ 3] 0.0-30.0 sec 5.74 GBytes 1.64 Gbits/sec s3s4:~ # iperf -c > cic-s3s1 -p 5003 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5003 TCP window size: 1.00 MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.17 port 37169 connected with 192.168.1.14 port > 5003 [ 3] 0.0-30.0 sec 5.79 GBytes 1.66 Gbits/sec > > > This gives a total of 1.7+1.64+1.66Gbits/sec=5Gbits/sec > > Is this normal behavior(SDP and IPoIB not benefiting from DDR)? > > > Regards, > Stijn > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Wed Nov 28 07:17:45 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Wed, 28 Nov 2007 15:17:45 +0000 Subject: [ofa-general] Re: i got kernel oops in ib_umad when executing ULPs tests In-Reply-To: <474BE237.8050602@dev.mellanox.co.il> References: <474BE237.8050602@dev.mellanox.co.il> Message-ID: <20071128151745.GB708@sashak.voltaire.com> Hi Dotan, On 11:24 Tue 27 Nov , Dotan Barak wrote: > Hi. > > When executing SDP tests (stress_connect) i got a kernel oops in my machine > in ib_umad: Is it reproducible somehow? > > Here are the machine props: > ************************************************************* > Host Name : sw112/3 > Host Architecture : x86_64 > Linux Distribution: SUSE Linux Enterprise Server 10 (x86_64) VERSION = 10 > Kernel Version : 2.6.16.21-0.8-smp > GCC Version : gcc (GCC) 4.1.0 (SUSE Linux) > Memory size : 4049452 kB > Number of CPUs : 4 > cpu MHz : 3192.308 > MST Version : 4.4.3 > Driver Version : ofa_1_3_dev-20071126-0855 > HCA ID(s) : mlx4_0 > HCA model(s) : 25418 > Board(s) : MT_04A0110002 > ************************************************************* > > Here is the dump of the /var/log/messages: > Nov 27 09:26:32 sw112 OpenSM[24713]: Exiting SM > Nov 27 09:26:32 sw112 kernel: general protection fault: 0000 [1] SMP > Nov 27 09:26:32 sw112 kernel: last sysfs file: /class/net/ib0/address > Nov 27 09:26:32 sw112 kernel: CPU 2 > Nov 27 09:26:32 sw112 kernel: Modules linked in: mst_pciconf mst_pci > rdma_ucm rds ib_sdp rdma_cm iw_cm ib_addr ib_ipoib ib_c > m ib_sa ib_uverbs ib_umad mlx4_ib mlx4_core ib_mthca ib_mad ib_core memtrack > autofs4 ipv6 nfs lockd nfs_acl sunrpc af_packet > button battery ac apparmor aamatch_pcre loop dm_mod ide_cd uhci_hcd ehci_hcd > cdrom shpchp pci_hotplug hw_random i8xx_tco us > bcore e1000 ext3 jbd edd fan thermal processor sg mptspi mptscsih mptbase > scsi_transport_spi piix sd_mod scsi_mod ide_disk i > de_core > Nov 27 09:26:32 sw112 kernel: Pid: 24713, comm: opensm Tainted: PF U > 2.6.16.21-0.8-smp #1 > Nov 27 09:26:32 sw112 kernel: RIP: 0010:[] > {:ib_umad:dequeue_send+26} > Nov 27 09:26:32 sw112 kernel: RSP: 0018:ffff8100c0d9fde8 EFLAGS: 00010046 > Nov 27 09:26:32 sw112 kernel: RAX: ffff8100c1a95658 RBX: 3f40a6f32b5a2004 > RCX: 3f40a6f32b5a2014 > Nov 27 09:26:32 sw112 kernel: RDX: ffff8100c0d9fe58 RSI: 3f40a6f32b5a2004 > RDI: ffff81007401ac3c > Nov 27 09:26:32 sw112 kernel: RBP: 3f40a6f32b5a2004 R08: 0000000000000206 > R09: 00000000000007d7 > Nov 27 09:26:32 sw112 kernel: R10: 0000000000000000 R11: 0000000000000246 > R12: ffff81007401ac00 > Nov 27 09:26:32 sw112 kernel: R13: ffff81007401a210 R14: 0000000000000005 > R15: 0000000000000000 > Nov 27 09:26:32 sw112 kernel: FS: 00002b13822edef0(0000) > GS:ffff81012bd6b340(0000) knlGS:0000000000000000 > Nov 27 09:26:32 sw112 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: > 000000008005003b > Nov 27 09:26:32 sw112 kernel: CR2: 00000000005d99c0 CR3: 0000000037079000 > CR4: 00000000000006e0 > Nov 27 09:26:32 sw112 kernel: Process opensm (pid: 24713, threadinfo > ffff8100c0d9e000, task ffff8100cd8047d0) > Nov 27 09:26:32 sw112 kernel: Stack: ffff81012d706b10 ffff8100c0d9fe68 > ffff81007401ac00 ffffffff8837d4b1 > Nov 27 09:26:32 sw112 kernel: 0000000000000296 ffff8100c0d9fe40 > ffff81007401a210 ffff81007401a200 > Nov 27 09:26:32 sw112 kernel: 0000000000000005 ffffffff8827261e > Nov 27 09:26:32 sw112 kernel: Call Trace: > {:ib_umad:send_handler+38} > Nov 27 09:26:32 sw112 kernel: > {:ib_mad:ib_unregister_mad_agent+359} > Nov 27 09:26:32 sw112 kernel: > {:ib_umad:ib_umad_unreg_agent+121} > Nov 27 09:26:32 sw112 kernel: > {:ib_umad:ib_umad_ioctl+74} > {do_ioctl+33} > Nov 27 09:26:32 sw112 kernel: {vfs_ioctl+584} > {__up_write+33} > Nov 27 09:26:32 sw112 kernel: {sys_ioctl+98} > {system_call+126} > Nov 27 09:26:32 sw112 kernel: > Nov 27 09:26:32 sw112 kernel: Code: 48 8b 53 10 48 8b 41 08 48 89 42 08 48 > 89 10 48 c7 41 08 00 > Nov 27 09:26:32 sw112 kernel: RIP > {:ib_umad:dequeue_send+26} RSP > > > > Here is the dump of /var/log/opensm.log: > > Nov 27 09:26:44 546327 [D6AC7EF0] 0x03 -> OpenSM 3.1.7 > Nov 27 09:26:44 546407 [D6AC7EF0] 0x80 -> OpenSM 3.1.7 > Nov 27 09:26:44 547422 [D6AC7EF0] 0x02 -> osm_vendor_bind: Binding to port > 0x4025 ^^^^^^ Is this a valid GUID? > Nov 27 09:26:44 673957 [D6AC7EF0] 0x01 -> osm_vendor_bind: ERR 5426: Unable > to register class 129 version 1 > Nov 27 09:26:44 674032 [D6AC7EF0] 0x01 -> osm_sm_mad_ctrl_bind: ERR 3118: > Vendor specific bind failed > Nov 27 09:26:44 674057 [D6AC7EF0] 0x01 -> osm_sm_bind: ERR 2E10: SM MAD > Controller bind failed (IB_ERROR) > Nov 27 09:26:44 674089 [D6AC7EF0] 0x01 -> osm_sa_mad_ctrl_unbind: ERR 1A11: > No previous bind > Nov 27 09:26:44 675165 [D6AC7EF0] 0x80 -> Exiting SM > > > can you check this issue? Could you send OpenSM log file too? Sasha From glebn at voltaire.com Wed Nov 28 07:15:07 2007 From: glebn at voltaire.com (Gleb Natapov) Date: Wed, 28 Nov 2007 17:15:07 +0200 Subject: [ofa-general] [PATCH] return ENOSYS instead of -ENOSYS Message-ID: <20071128151507.GV25881@minantech.com> Return ENOSYS instead of -ENOSYS. We are not in the kernel. diff --git a/src/verbs.c b/src/verbs.c index 4e7beff..7fa1dbc 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -227,7 +227,7 @@ err: int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe) { /* XXX resize CQ not implemented */ - return -ENOSYS; + return ENOSYS; } int mlx4_destroy_cq(struct ibv_cq *cq) -- Gleb. From Shainer at Mellanox.com Wed Nov 28 07:17:45 2007 From: Shainer at Mellanox.com (Gilad Shainer) Date: Wed, 28 Nov 2007 07:17:45 -0800 Subject: [ofa-general] DDR vs SDR performance In-Reply-To: <474D82D1.5080504@intec.ugent.be> Message-ID: <9FA59C95FFCBB34EA5E42C1A8573784FD47FF5@mtiexch01.mti.com> Here are some notes. You can contact me directly for more info. 1. You do not compare the same HW. The single port IB HCAs provides difference performance than the dual port devices. If you want to see the difference between SDR and DDR, you need to use the same IB configuration as well. 2. Saying that, with the single port DDR you should get around 1400MB/s with the RDMA tests but: - The benchmark you are using is not supported any more (well, for long time now). You should use the IB send, IB write etc tests - On Opteron, the HTxx00 chipset configuration is very important (not just for IB performance) - There is a difference of performance depends on the location of the memory. If you will run the tests you will see numbers in the high 1300 and low 1100 (with your current chipset config) Gilad. -----Original Message----- From: general-bounces at lists.openfabrics.org [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Stijn De Smet Sent: Wednesday, November 28, 2007 7:02 AM To: Gilad Shainer Cc: general at lists.openfabrics.org Subject: Re: [ofa-general] DDR vs SDR performance One ServerWorks HT2100 A PCI Express Bridge, one HT2100 B PCI Express Bridge, and one ServerWorks HT1000 South Bridge Regards, Stijn Gilad Shainer wrote: > Is the chipset in your servers HT2000? > > Gilad. > > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of Stijn De > Smet > Sent: Wednesday, November 28, 2007 6:43 AM > To: general at lists.openfabrics.org > Subject: [ofa-general] DDR vs SDR performance > > Hello, > > I have a problem with the DDR performance: > > Configuration: > 2 servers (IBM x3755, equiped with 4 dualcore opteron and 16GB RAM) > 3 HCA's installed (2 Cisco DDR(Cheetah) and 1 Cisco dual > SDR(LionMini), all PCI-e x8), all DDR HCA's at newest Cisco Firmware > v1.2.917 build 3.2.0.149, with label 'HCA.Cheetah-DDR.20' > > The DDR's are connected with a cable, and s3n1 is running a SM. The > SDR boards are connected over a Cisco SFS-7000D, but the DDR > performance is > +- the same over this SFS-7000D > > Both servers are running SLES10-SP1 with Ofed 1.2.5. > > > s3n1:~ # ibstatus > Infiniband device 'mthca0' port 1 status: < -- DDR board #1, not > connected > default gid: fe80:0000:0000:0000:0005:ad00:000b:cb39 > base lid: 0x0 > sm lid: 0x0 > state: 1: DOWN > phys state: 2: Polling > rate: 10 Gb/sec (4X) > > Infiniband device 'mthca1' port 1 status: <--- DDR board #2, > connected with cable > default gid: fe80:0000:0000:0000:0005:ad00:000b:cb31 > base lid: 0x16 > sm lid: 0x16 > state: 4: ACTIVE > phys state: 5: LinkUp > rate: 20 Gb/sec (4X DDR) > > Infiniband device 'mthca2' port 1 status: <--- SDR board, only port 1 > connected to the SFS-7000D > default gid: fe80:0000:0000:0000:0005:ad00:0008:a8d9 > base lid: 0x3 > sm lid: 0x2 > state: 4: ACTIVE > phys state: 5: LinkUp > rate: 10 Gb/sec (4X) > > Infiniband device 'mthca2' port 2 status: > default gid: fe80:0000:0000:0000:0005:ad00:0008:a8da > base lid: 0x0 > sm lid: 0x0 > state: 1: DOWN > phys state: 2: Polling > rate: 10 Gb/sec (4X) > > > RDMA test of : > -- SDR: > s3n2:~ # ib_rdma_bw -d mthca2 gpfs3n1 > 7190: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | > iters=1000 > | duplex=0 | cma=0 | > 7190: Local address: LID 0x05, QPN 0x0408, PSN 0xf10f03 RKey 0x003b00 > VAddr 0x002ba7b9943000 > 7190: Remote address: LID 0x03, QPN 0x040a, PSN 0xa9cf5c, RKey > 0x003e00 VAddr 0x002adb2f3bb000 > > > 7190: Bandwidth peak (#0 to #989): 937.129 MB/sec > 7190: Bandwidth average: 937.095 MB/sec > 7190: Service Demand peak (#0 to #989): 2709 cycles/KB > 7190: Service Demand Avg : 2709 cycles/KB > > -- DDR > s3n2:~ # ib_rdma_bw -d mthca1 gpfs3n1 > 7191: | port=18515 | ib_port=1 | size=65536 | tx_depth=100 | > iters=1000 > | duplex=0 | cma=0 | > 7191: Local address: LID 0x10, QPN 0x0405, PSN 0x5e19e RKey 0x002600 > VAddr 0x002b76eab20000 > 7191: Remote address: LID 0x16, QPN 0x0405, PSN 0xdd976e, RKey > 0x80002900 VAddr 0x002ba8ed10e000 > > > 7191: Bandwidth peak (#0 to #990): 1139.32 MB/sec > 7191: Bandwidth average: 1139.31 MB/sec > 7191: Service Demand peak (#0 to #990): 2228 cycles/KB > 7191: Service Demand Avg : 2228 cycles/KB > > So only 200MB/s increase between SDR and DDR With comparable > hardware(x3655, dual dualcore opteron, 8GB RAM), I get a little bit > better RDMA performance(1395MB/s so close to the PCI-e x8 limit), but > even worse IPoIB and SDP performance with kernels 2.6.22 and > 2.6.23.9 and Ofed 1.3b > > > > IPoIB test(iperf), IPoIB in connected mode, MTU 65520: > #ib2 is SDR, ib1 is DDR > #SDR: > s3n2:~ # iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 > MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.2 port 50598 connected with 192.168.1.1 port > 5001 [ 3] 0.0-10.0 sec 6.28 GBytes 5.40 Gbits/sec > > #DDR: > s3n2:~ # iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 1.00 > MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.2 port 32935 connected with 192.168.1.1 port > 5001 [ 3] 0.0-10.0 sec 6.91 GBytes 5.93 Gbits/sec > > > Now the increase is only 0.5Gbit > > And finally a test with SDP: > > DDR: > s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 > MByte > (default) > ------------------------------------------------------------ > [ 4] local 192.168.1.2 port 58186 connected with 192.168.1.1 port > 5001 [ 4] 0.0-10.0 sec 7.72 GBytes 6.63 Gbits/sec > > #SDR: > s3n2:~ # LD_PRELOAD=libsdp.so SIMPLE_LIBSDP="ok" iperf -c cic-s3n1 > ------------------------------------------------------------ > Client connecting to cic-s3n1, TCP port 5001 TCP window size: 3.91 > MByte > (default) > ------------------------------------------------------------ > [ 4] local 192.168.1.2 port 58187 connected with 192.168.1.1 port > 5001 [ 4] 0.0-10.0 sec 7.70 GBytes 6.61 Gbits/sec > > With SDP there is even no difference anymore between the 2 boards. > > > Even when using multiple connections(using 3 servers(s3s2,s3s3,s3s4), > x3655, 2.6.22, connecting all to one(s3s1) over DDR): > s3s2:~ # iperf -c cic-s3s1 -p 5001 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5001 TCP window size: 1.00 > MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.15 port 33576 connected with 192.168.1.14 port > 5001 [ 3] 0.0-30.0 sec 5.94 GBytes 1.70 Gbits/sec s3s3:~ # iperf > -c > cic-s3s1 -p 5002 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5002 TCP window size: 1.00 > MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.16 port 53558 connected with 192.168.1.14 port > 5002 [ 3] 0.0-30.0 sec 5.74 GBytes 1.64 Gbits/sec s3s4:~ # iperf > -c > cic-s3s1 -p 5003 -t 30 > ------------------------------------------------------------ > Client connecting to cic-s3s1, TCP port 5003 TCP window size: 1.00 > MByte > (default) > ------------------------------------------------------------ > [ 3] local 192.168.1.17 port 37169 connected with 192.168.1.14 port > 5003 [ 3] 0.0-30.0 sec 5.79 GBytes 1.66 Gbits/sec > > > This gives a total of 1.7+1.64+1.66Gbits/sec=5Gbits/sec > > Is this normal behavior(SDP and IPoIB not benefiting from DDR)? > > > Regards, > Stijn > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > _______________________________________________ general mailing list general at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From hrosenstock at xsigo.com Wed Nov 28 07:33:01 2007 From: hrosenstock at xsigo.com (Hal Rosenstock) Date: Wed, 28 Nov 2007 07:33:01 -0800 Subject: [ofa-general] [ANNOUNCE] ibsim-0.4 tarballs release In-Reply-To: <829ded920711272320g64f5992emf2c1e9d16283d22e@mail.gmail.com> References: <829ded920711252155t5e0f9682se75a920ecbfa1677@mail.gmail.com> <20071126190312.GC14894@sashak.voltaire.com> <829ded920711262018o6f216d8ei69e103813f90ccef@mail.gmail.com> <20071127155130.GC26160@sashak.voltaire.com> <1196178726.26651.299.camel@hrosenstock-ws.xsigo.com> <829ded920711272320g64f5992emf2c1e9d16283d22e@mail.gmail.com> Message-ID: <1196263981.26651.465.camel@hrosenstock-ws.xsigo.com> On Wed, 2007-11-28 at 12:50 +0530, Keshetti Mahesh wrote: > > ibutils maintainer is Oren Kladnitsky orenk at dev.mellanox.co.il > > Not sure if he monitors this list. > > Sorry, I actual wanted to know who are the developers of ibadm group > of utilities. ibadm or ibdm ? Your original question was about ibdm. ibdm is under the ibutils tree. I don't think Mellanox has open sourced ibadm but I might be wrong. Maybe it's just not part of OpenIB/OpenFabrics code. > > > LASH resolves credit loops by using different VLs, I don't think ibdmchk > > > takes this into account, but don't know for sure. > > Yes, I have verified in ibdmchk that it considers only one VL while > checking for > credit loops. > > > I also think ibdmchk needs some support to handle LASH. I don't think it > > is currently supported by it (although that is not documented AFAIK). > > > > Is anyone currently working on this part (adding support to ibdmchk to > handle LASH) > in OFED community. I seriously doubt it. -- Hal > -Mahesh From eli at mellanox.co.il Wed Nov 28 07:44:52 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Wed, 28 Nov 2007 17:44:52 +0200 Subject: [ofa-general] [PATCH] ipoib: Bug fix IPOIB CM dereferencing invalid pointer Message-ID: <1196264692.10306.1.camel@mtls03> Bug fix IPOIB CM dereferencing invalid pointer When ipoib_neigh_free gets called it needs to set to NULL its ->cm member so that a completion with error reaching ipoib_cm_handle_tx_wc will not access an invalid pointer. Signed-off-by: Eli Cohen --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65e..95c7714 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -869,6 +869,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) } if (ipoib_cm_get(neigh)) ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); + + neigh->cm = NULL; kfree(neigh); } -- 1.5.3.6 From eli at mellanox.co.il Wed Nov 28 08:05:57 2007 From: eli at mellanox.co.il (Eli Cohen) Date: Wed, 28 Nov 2007 18:05:57 +0200 Subject: [ofa-general] [PATCH] Bug fix IPOIB CM dereferencing invalid pointer - resend Message-ID: <1196265957.10306.4.camel@mtls03> Bug fix IPOIB CM dereferencing invalid pointer When ipoib_neigh_free gets called it needs to set to NULL its ->cm->neigh member So that a completion with error reaching ipoib_cm_handle_tx_wc will not access an invalid pointer. Signed-off-by: Eli Cohen --- This is what I really meant to send drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65e..0c66723 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -869,6 +869,10 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) } if (ipoib_cm_get(neigh)) ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); + + if (neigh->cm) + neigh->cm->neigh = NULL; + kfree(neigh); } -- 1.5.3.6 From jackm at dev.mellanox.co.il Wed Nov 28 08:16:20 2007 From: jackm at dev.mellanox.co.il (Jack Morgenstein) Date: Wed, 28 Nov 2007 18:16:20 +0200 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071127171725.GE4365@ics.muni.cz> References: <20071122140554.GB13609@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> <20071127171725.GE4365@ics.muni.cz> Message-ID: <200711281816.20748.jackm@dev.mellanox.co.il> On Tuesday 27 November 2007 19:17, Lukas Hejtmanek wrote: > On Tue, Nov 27, 2007 at 06:51:48PM +0200, Tziporet Koren wrote: > >> just found, that OFED 1.3a with 2.6.23 kernel runs at 2/3 speed compared to > >> 2.6.23 kernel with built in driver. Any reason for this? > >> > > Which benchmark? > > ib_rdma_bw > ib_send_bw > ibv_uc_pingpong > > > Which HCA? > > Mellanox InfiniBand HCA, HCA.Cheetah-DDR.20. > > > Is it the same with ofed beta release? > > Did you mean 1.3b? I have not tried it. > Which userspace libraries did you use with the built-in driver of the 2.6.23 kernel? - Jack From eli at dev.mellanox.co.il Wed Nov 28 08:32:14 2007 From: eli at dev.mellanox.co.il (Eli Cohen) Date: Wed, 28 Nov 2007 18:32:14 +0200 Subject: [ofa-general] [PATCH] Bug fix IPOIB CM dereferencing invalid pointer - resend In-Reply-To: <1196265957.10306.4.camel@mtls03> References: <1196265957.10306.4.camel@mtls03> Message-ID: <1196267534.10306.16.camel@mtls03> Actually I see that tx->neigh is already set to NULL in ipoib_cm_destroy_tx so this fixes nothing. Although when I did this my system stopped crashing. I guess I have to dig farther. By the way this happens when I run netperf UDP and the connection is closed during the test runs. On Wed, 2007-11-28 at 18:05 +0200, Eli Cohen wrote: > Bug fix IPOIB CM dereferencing invalid pointer > > When ipoib_neigh_free gets called it needs to set to NULL > its ->cm->neigh member So that a completion with error reaching > ipoib_cm_handle_tx_wc will not access an invalid pointer. > > Signed-off-by: Eli Cohen > --- > > This is what I really meant to send > > > drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++++ > 1 files changed, 4 insertions(+), 0 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index a03a65e..0c66723 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -869,6 +869,10 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh) > } > if (ipoib_cm_get(neigh)) > ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); > + > + if (neigh->cm) > + neigh->cm->neigh = NULL; > + > kfree(neigh); > } > From Caitlin.Bestler at neterion.com Wed Nov 28 08:43:38 2007 From: Caitlin.Bestler at neterion.com (Caitlin Bestler) Date: Wed, 28 Nov 2007 11:43:38 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474CBAB0.8020208@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> <474CBAB0.8020208@opengridcomputing.com> Message-ID: <78C9135A3D2ECE4B8162EBDCE82CAD77029FB290@nekter> > -----Original Message----- > From: Steve Wise [mailto:swise at opengridcomputing.com] > Sent: Tuesday, November 27, 2007 4:48 PM > To: Caitlin Bestler > Cc: Kanevsky, Arkady; Glenn Grundstrom; Leonid Grossman; openib- > general at openib.org > Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal > > Caitlin Bestler wrote: > > On Nov 27, 2007 3:58 PM, Steve Wise > wrote: > > > >> For the short term, I claim we just implement this as part of linux > >> iwarp connection setup (mandating a 0B read be sent from the active > >> side). Your proposal to add meta-data to the private data requires > a > >> standards change anyway and is, IMO, the 2nd phase of this whole > >> enchilada... > >> > >> Steve. > >> > > > > I don't see how you can have any solution here that does not require > meta-data. > > For non-peer-to-peer connections neither a zero length RDMA Read or > Write > > should be sent. An extraneous RDMA Read is particularly onerous for a > short > > lived connection that fits the classic active/passive model. So > *something* > > is telling the CMA layer that this connection may need an MPA unjam > action. > > If that isn't meta-data, what is it? > > I assumed the 0B read would _always_ be sent as part of establishing an > iWARP connection using linux and the rdma-cm. > That is an extra round-trip per connection setup, which is a significant penalty for a short lived connection. It is trivial for HPC/peer-to-peer applications, but would be a killer for something like HTTP over RDMA. Doing something like this for *every* connection makes it effectively a change to the MPA protocol. OFA is not the forum for such discussions, the IETF is. OFA drafting an understanding of how peer-to-peer applications use the existing protocol, on the other hand, is quite reasonable. But it has to be something done by peer-to-peer middleware or by the verbs layer in response to a flag from the peer-to-peer middleware. Otherwise it is not augmenting a protocol, it is changing it. From swise at opengridcomputing.com Wed Nov 28 09:12:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 28 Nov 2007 11:12:37 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77029FB290@nekter> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> <474CBAB0.8020208@opengridcomputing.com> <78C9135A3D2ECE4B8162EBDCE82CAD77029FB290@nekter> Message-ID: <474DA185.30702@opengridcomputing.com> Caitlin Bestler wrote: > >> -----Original Message----- >> From: Steve Wise [mailto:swise at opengridcomputing.com] >> Sent: Tuesday, November 27, 2007 4:48 PM >> To: Caitlin Bestler >> Cc: Kanevsky, Arkady; Glenn Grundstrom; Leonid Grossman; openib- >> general at openib.org >> Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal >> >> Caitlin Bestler wrote: >>> On Nov 27, 2007 3:58 PM, Steve Wise >> wrote: >>>> For the short term, I claim we just implement this as part of linux >>>> iwarp connection setup (mandating a 0B read be sent from the active >>>> side). Your proposal to add meta-data to the private data requires >> a >>>> standards change anyway and is, IMO, the 2nd phase of this whole >>>> enchilada... >>>> >>>> Steve. >>>> >>> I don't see how you can have any solution here that does not require >> meta-data. >>> For non-peer-to-peer connections neither a zero length RDMA Read or >> Write >>> should be sent. An extraneous RDMA Read is particularly onerous for a >> short >>> lived connection that fits the classic active/passive model. So >> *something* >>> is telling the CMA layer that this connection may need an MPA unjam >> action. >>> If that isn't meta-data, what is it? >> I assumed the 0B read would _always_ be sent as part of establishing an >> iWARP connection using linux and the rdma-cm. >> > > That is an extra round-trip per connection setup, which is a significant > penalty for a short lived connection. It is trivial for HPC/peer-to-peer > applications, but would be a killer for something like HTTP over RDMA. > > Doing something like this for *every* connection makes it effectively > a change to the MPA protocol. OFA is not the forum for such discussions, > the IETF is. > > OFA drafting an understanding of how peer-to-peer applications use the > existing protocol, on the other hand, is quite reasonable. But it has > to be something done by peer-to-peer middleware or by the verbs layer > in response to a flag from the peer-to-peer middleware. Otherwise it > is not augmenting a protocol, it is changing it. > posting a 0B read after the mpa setup isn't changing the MPA protocol. Its adding a protocol on top of the MPA setup in order to meet the requirements of the MPA protocol. Whether you add a private-data request for this or _assume_ the 0B read will happen doesn't change this. From tom at opengridcomputing.com Wed Nov 28 09:22:36 2007 From: tom at opengridcomputing.com (Tom Tucker) Date: Wed, 28 Nov 2007 11:22:36 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <78C9135A3D2ECE4B8162EBDCE82CAD77029FB290@nekter> References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> <474CBAB0.8020208@opengridcomputing.com> <78C9135A3D2ECE4B8162EBDCE82CAD77029FB290@nekter> Message-ID: <1196270556.24469.64.camel@trinity.ogc.int> On Wed, 2007-11-28 at 11:43 -0500, Caitlin Bestler wrote: > > > -----Original Message----- > > From: Steve Wise [mailto:swise at opengridcomputing.com] > > Sent: Tuesday, November 27, 2007 4:48 PM > > To: Caitlin Bestler > > Cc: Kanevsky, Arkady; Glenn Grundstrom; Leonid Grossman; openib- > > general at openib.org > > Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal > > > > Caitlin Bestler wrote: > > > On Nov 27, 2007 3:58 PM, Steve Wise > > wrote: > > > > > >> For the short term, I claim we just implement this as part of linux > > >> iwarp connection setup (mandating a 0B read be sent from the active > > >> side). Your proposal to add meta-data to the private data requires > > a > > >> standards change anyway and is, IMO, the 2nd phase of this whole > > >> enchilada... > > >> > > >> Steve. > > >> > > > > > > I don't see how you can have any solution here that does not require > > meta-data. > > > For non-peer-to-peer connections neither a zero length RDMA Read or > > Write > > > should be sent. An extraneous RDMA Read is particularly onerous for a > > short > > > lived connection that fits the classic active/passive model. So > > *something* > > > is telling the CMA layer that this connection may need an MPA unjam > > action. > > > If that isn't meta-data, what is it? > > > > I assumed the 0B read would _always_ be sent as part of establishing an > > iWARP connection using linux and the rdma-cm. > > > > That is an extra round-trip per connection setup, which is a significant > penalty for a short lived connection. It is trivial for HPC/peer-to-peer > applications, but would be a killer for something like HTTP over RDMA. > I find it hard to get excited about optimizing short lived connections for RDMA. I simply don't think it's an interesting use case. And btw, HTTP long ago got rid of short lived connections because it's painful even on TCP. > Doing something like this for *every* connection makes it effectively > a change to the MPA protocol. Uh. No, it doesn't. Normalizing the behavior of applications during connection setup doesn't change the underlying protocol. It adds another one on top. > OFA is not the forum for such discussions, > the IETF is. My living room, the dinner table, the local bar and this mailing list are perfectly acceptable forums for discussing a protocol. The IETF is the forum for standardizing one. Right now, I don't think we're ready to standardize, because we're still exploring the options; the first of which is NOT changing MPA. This group has the unique benefit of actually USING and IMPLEMENTING the protocol and therefore has some beneficial insights that may and should be shared. All that said revving the MPA protocol is way down the road. > > OFA drafting an understanding of how peer-to-peer applications use the > existing protocol, on the other hand, is quite reasonable. That's step 1 and the 0B READ is one way to do it. > But it has > to be something done by peer-to-peer middleware or by the verbs layer > in response to a flag from the peer-to-peer middleware. Otherwise it > is not augmenting a protocol, it is changing it. > The flag may be useful, however, I don't see the connection between the flag and complying with the MPA protocol. > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From xhejtman at ics.muni.cz Wed Nov 28 10:32:51 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Wed, 28 Nov 2007 19:32:51 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <200711281816.20748.jackm@dev.mellanox.co.il> References: <20071122140554.GB13609@ics.muni.cz> <474C4B24.4080809@mellanox.co.il> <20071127171725.GE4365@ics.muni.cz> <200711281816.20748.jackm@dev.mellanox.co.il> Message-ID: <20071128183251.GB4422@ics.muni.cz> On Wed, Nov 28, 2007 at 06:16:20PM +0200, Jack Morgenstein wrote: > Which userspace libraries did you use with the built-in driver > of the 2.6.23 kernel? for all the tests, I used OFED 1.1 user space tools and libraries. -- Lukáš Hejtmánek From swise at opengridcomputing.com Wed Nov 28 10:51:37 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 28 Nov 2007 12:51:37 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: <474473DD.3050507@opengridcomputing.com><4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> Message-ID: <474DB8B9.5000807@opengridcomputing.com> Kanevsky, Arkady wrote: > ULP can post recvs before connection is established but not to send > queue > prior to connection establishment. > I hate quoting specs (and the RDMAC verbs spec isn't really any standard), but, page 25 of draft-hilland-iwarp-verbs-v1.0 indicates its ok to post SQ WRs when in idle: ---- The QP MUST be in the Idle state following QP creation or when moved to this state with Modify QP. In this state, Send or Receive WRs MAY be posted but they MUST NOT be processed and CQEs MUST NOT be generated. ---- From swise at opengridcomputing.com Wed Nov 28 10:55:50 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 28 Nov 2007 12:55:50 -0600 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <469958e00711271511x9c04822q47d8196cbb6d9e87@mail.gmail.com> <474CA49B.3080806@opengridcomputing.com> <469958e00711271541i309cb170g8660345ad523a253@mail.gmail.com> <474CAF29.1020106@opengridcomputing.com> <469958e00711271620u259dc979v55a0c4f54acf95a7@mail.gmail.com> <474CBAB0.8020208@opengridcomputing.com> Message-ID: <474DB9B6.2090900@opengridcomputing.com> Kanevsky, Arkady wrote: > Agree with initiator/client sending signalled 0B RDMA Read. > This will handle client side. > > Still not 100% clear on passive/server side. > Two issues which bothers me. > 1. Is "bogus" S-tag allowed for incomming RDMA ops? The stag/to must not be validated if the incoming read is 0B length. http://www.ietf.org/rfc/rfc5040.txt: > * If the Data Source receives an RDMA Read Request Header with the > RDMA Read Message Size set to zero, the Data Source RDMAP: > > * MUST NOT validate the Data Source STag and Data Source Tagged > Offset contained in the RDMA Read Request Header, and > > * MUST respond with a zero-length RDMA Read Response Message. > From sean.hefty at intel.com Wed Nov 28 12:08:43 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 28 Nov 2007 12:08:43 -0800 Subject: [ofa-general] RE: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> Message-ID: <000001c831fa$79e03160$ff0da8c0@amr.corp.intel.com> >Some users have approached me and said that its unclear from the man >pages for some values of the connection param structure what are their >legal values. Reviewing this a little, I think we should add the >maximum values for the retry_count and rnr_retry_count under the >infiniband specific section of the rdma_connect and rdma_accept pages. I can do this. >Also, what about pushing all these documentation changes as a release >to OFED 1.3? I'm holding off on a release until I'm fairly sure that all of the documentation changes are in. I don't foresee a problem getting documentation only changes into OFED 1.3 though. - Sean From Arkady.Kanevsky at netapp.com Wed Nov 28 14:08:13 2007 From: Arkady.Kanevsky at netapp.com (Kanevsky, Arkady) Date: Wed, 28 Nov 2007 17:08:13 -0500 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: <474DB8B9.5000807@opengridcomputing.com> References: <474473DD.3050507@opengridcomputing.com><4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <474DB8B9.5000807@opengridcomputing.com> Message-ID: Another small discreptancy between IB and iWARP. Since RDMA_CM is used for ULP which are transport independent they will follow the stricter rule. That is IB. For IB any posting to SQ prior to QP being in RTS state shall be flushed. This semantic is actually very useful for ULPs which use insignalled completions. Because, once you see the completion for the request you posted after connection failure you are sure that all previously posted request on the same SQ are completed and had you had seen them all. So while, you are correct on the spec since we are working in IW_CM we can assume IB semantic on posting. Thanks, Arkady Kanevsky email: arkady at netapp.com Network Appliance Inc. phone: 781-768-5395 1601 Trapelo Rd. - Suite 16. Fax: 781-895-1195 Waltham, MA 02451 central phone: 781-768-5300 > -----Original Message----- > From: Steve Wise [mailto:swise at opengridcomputing.com] > Sent: Wednesday, November 28, 2007 1:52 PM > To: Kanevsky, Arkady > Cc: Glenn Grundstrom; Leonid Grossman; openib-general at openib.org > Subject: Re: [ofa-general] Re: iWARP peer-to-peer CM proposal > > Kanevsky, Arkady wrote: > > ULP can post recvs before connection is established but not to send > > queue prior to connection establishment. > > > > I hate quoting specs (and the RDMAC verbs spec isn't really any > standard), but, page 25 of draft-hilland-iwarp-verbs-v1.0 > indicates its > ok to post SQ WRs when in idle: > > ---- > The QP MUST be in the Idle state following QP creation or > when moved to > this state with Modify QP. In this state, Send or Receive WRs MAY be > posted but they MUST NOT be processed and CQEs MUST NOT be generated. > ---- > From or.gerlitz at gmail.com Wed Nov 28 14:14:04 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Thu, 29 Nov 2007 00:14:04 +0200 Subject: [ofa-general] Re: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <000001c831fa$79e03160$ff0da8c0@amr.corp.intel.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> <000001c831fa$79e03160$ff0da8c0@amr.corp.intel.com> Message-ID: <15ddcffd0711281414t6263572dr20926ec77faee401@mail.gmail.com> On 11/28/07, Sean Hefty wrote: > >Reviewing this a little, I think we should add the > >maximum values for the retry_count and rnr_retry_count under the > >infiniband specific section of the rdma_connect and rdma_accept pages. > > I can do this. thanks. > I'm holding off on a release until I'm fairly sure that all of the documentation > changes are in. I don't foresee a problem getting documentation only changes > into OFED 1.3 though. indeed, cool. Or. From or.gerlitz at gmail.com Wed Nov 28 14:25:03 2007 From: or.gerlitz at gmail.com (Or Gerlitz) Date: Thu, 29 Nov 2007 00:25:03 +0200 Subject: [ofa-general] Re: iWARP peer-to-peer CM proposal In-Reply-To: References: <474473DD.3050507@opengridcomputing.com> <4746F349.3040606@opengridcomputing.com> <5E701717F2B2ED4EA60F87C8AA57B7CC07A57202@venom2> <474DB8B9.5000807@opengridcomputing.com> Message-ID: <15ddcffd0711281425n66731170va29e588063d5e992@mail.gmail.com> On 11/29/07, Kanevsky, Arkady wrote: > So while, you are correct on the spec since we are working > in IW_CM we can assume IB semantic on posting. please spend a minute on http://www.zip.com.au/~akpm/linux/patches/stuff/top-posting.txt Or. From Jeffrey.C.Becker at nasa.gov Wed Nov 28 15:32:32 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Wed, 28 Nov 2007 15:32:32 -0800 Subject: [ofa-general] OFA server patching Message-ID: <474DFA90.6070907@nasa.gov> Hi all. In the interest of keeping our server up to date, I applied the latest Ubuntu patches. Several upgrades were made, including git. If you have any problems, let me know. Thanks. -jeff From rvm at obsidianresearch.com Wed Nov 28 16:00:39 2007 From: rvm at obsidianresearch.com (Rolf Manderscheid) Date: Wed, 28 Nov 2007 17:00:39 -0700 Subject: [ofa-general] [PATCH] opensm: allow multiple scopes in a partition Message-ID: <20071129000039.GG30090@obsidianresearch.com> Hi Sasha, This patch allows multiple scopes to be configured for a partition. This allows ipoib interfaces with different scopes to coexist in a partition. The partition configuration file can now have multiple scope=N flags and they all take effect (instead of just the last one). Signed-off-by: Rolf Manderscheid -- diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 index efd6ff0..c51f386 100644 --- a/opensm/man/opensm.8 +++ b/opensm/man/opensm.8 @@ -366,7 +366,8 @@ Currently recognized flags are: sl= - specifies SL for this IPoIB MC group (default is 0) scope= - specifies scope for this IPoIB MC group - (default is 2 (link local)) + (default is 2 (link local)). Multiple scope settings + are permitted for a partition. Note that values for rate, mtu, and scope should be specified as defined in the IBTA specification (for example, mtu=4 for 2048). diff --git a/opensm/opensm/osm_prtn_config.c b/opensm/opensm/osm_prtn_config.c index 1253031..646bf2a 100644 --- a/opensm/opensm/osm_prtn_config.c +++ b/opensm/opensm/osm_prtn_config.c @@ -68,7 +68,7 @@ struct part_conf { osm_log_t *p_log; osm_subn_t *p_subn; osm_prtn_t *p_prtn; - unsigned is_ipoib, mtu, rate, sl, scope; + unsigned is_ipoib, mtu, rate, sl, scope_mask; boolean_t full; }; @@ -89,6 +89,7 @@ static int partition_create(unsigned lineno, struct part_conf *conf, char *name, char *id, char *flag, char *flag_val) { uint16_t pkey; + unsigned int scope; if (!id && name && isdigit(*name)) { id = name; @@ -119,12 +120,26 @@ static int partition_create(unsigned lineno, struct part_conf *conf, } conf->p_prtn->sl = (uint8_t) conf->sl; - if (conf->is_ipoib) + if (! conf->is_ipoib) + return 0; + + if (! conf->scope_mask) { osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, (uint8_t) conf->rate, (uint8_t) conf->mtu, - (uint8_t) conf->scope); + 0); + return 0; + } + + for (scope = 0; scope < 16; scope++) { + if (((1<scope_mask) == 0) + continue; + osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, + (uint8_t) conf->rate, + (uint8_t) conf->mtu, + (uint8_t) scope); + } return 0; } @@ -147,11 +162,13 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, "flag \'rate\' requires valid value" " - skipped\n", lineno); } else if (!strncmp(flag, "scope", len)) { - if (!val || (conf->scope = strtoul(val, NULL, 0)) == 0) + unsigned int scope; + if (!val || (scope = strtoul(val, NULL, 0)) == 0 || scope > 0xF) osm_log(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'scope\' requires valid value" " - skipped\n", lineno); + conf->scope_mask |= (1< References: <474DFA90.6070907@nasa.gov> Message-ID: OFA bugzilla seems down, I get: Software error: Can't connect to the database. Error: Access denied for user 'ofabug_user'@'localhost' (using password: YES) Is your database installed and up and running? Do you have the correct username and password selected in localconfig? For help, please send mail to the webmaster (webmaster at openfabrics.org), giving this error message and the time and date of the error. Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems > -----Original Message----- > From: general-bounces at lists.openfabrics.org > [mailto:general-bounces at lists.openfabrics.org] On Behalf Of > Jeff Becker > Sent: Wednesday, November 28, 2007 3:33 PM > To: general at lists.openfabrics.org > Subject: [ofa-general] OFA server patching > > Hi all. In the interest of keeping our server up to date, I > applied the > latest Ubuntu patches. Several upgrades were made, including > git. If you > have any problems, let me know. Thanks. > > -jeff > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general > From sashak at voltaire.com Wed Nov 28 16:35:52 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 29 Nov 2007 00:35:52 +0000 Subject: [ofa-general] OFA server patching In-Reply-To: <474DFA90.6070907@nasa.gov> References: <474DFA90.6070907@nasa.gov> Message-ID: <20071129003552.GA375@sashak.voltaire.com> On 15:32 Wed 28 Nov , Jeff Becker wrote: > Hi all. In the interest of keeping our server up to date, I applied the > latest Ubuntu patches. Several upgrades were made, including git. git on the server was manually compiled and installed (from ~sashak/files/git-1.5.2). As far as I can see the same git installation still be there. Sasha From sean.hefty at intel.com Wed Nov 28 16:25:35 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 28 Nov 2007 16:25:35 -0800 Subject: [ofa-general] [PATCH] [RFC] rdma/ucm: add support for rdma_migrate_id() Message-ID: <000101c8321e$5c6b4240$ff0da8c0@amr.corp.intel.com> This is based on user feedback from Doug Ledford at RedHat: Events that occur on an rdma_cm_id are reported to userspace through an event channel. Connection request events are reported on the event channel associated with the listen. When the connection is accepted, a new rdma_cm_id is created and automatically uses the listen event channel. This is suboptimal where the user only wants listen events on that channel. Additionally, it may be desirable to have events related to connection establishment use a different event channel than those related to already established connections. Allow the user to migrate an rdma_cm_id between event channels. All pending events associated with the rdma_cm_id are moved to the new event channel. Signed-off-by: Sean Hefty --- I will follow this post with a patch to the librdmacm to make use of this. I wanted to get feedback on the approach, in particular about the locking and use of fget(). drivers/infiniband/core/ucma.c | 92 ++++++++++++++++++++++++++++++++++++++++ include/rdma/rdma_user_cm.h | 13 +++++- 2 files changed, 104 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 90d675a..15937eb 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -991,6 +992,96 @@ out: return ret; } +static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + /* Acquire mutex's based on pointer comparison to prevent deadlock. */ + if (file1 < file2) { + mutex_lock(&file1->mut); + mutex_lock(&file2->mut); + } else { + mutex_lock(&file2->mut); + mutex_lock(&file1->mut); + } +} + +static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) +{ + if (file1 < file2) { + mutex_unlock(&file2->mut); + mutex_unlock(&file1->mut); + } else { + mutex_unlock(&file1->mut); + mutex_unlock(&file2->mut); + } +} + +static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) +{ + struct ucma_event *uevent, *tmp; + + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &file->event_list); +} + +static ssize_t ucma_migrate_id(struct ucma_file *new_file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_migrate_id cmd; + struct rdma_ucm_migrate_resp resp; + struct ucma_context *ctx; + struct file *filp; + struct ucma_file *cur_file; + int ret = 0; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + /* Get current fd to protect against it being closed */ + filp = fget(cmd.fd); + if (!filp) + return -ENOENT; + + /* Validate current fd and prevent destruction of id. */ + ctx = ucma_get_ctx(filp->private_data, cmd.id); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto file_put; + } + + cur_file = ctx->file; + if (cur_file == new_file) { + resp.events_reported = ctx->events_reported; + goto response; + } + + /* + * Migrate events between fd's, maintaining order, and avoiding new + * events being added before existing events. + */ + ucma_lock_files(cur_file, new_file); + mutex_lock(&mut); + + list_move_tail(&ctx->list, &new_file->ctx_list); + ucma_move_events(ctx, new_file); + ctx->file = new_file; + resp.events_reported = ctx->events_reported; + + mutex_unlock(&mut); + ucma_unlock_files(cur_file, new_file); + +response: + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + + ucma_put_ctx(ctx); +file_put: + fput(filp); + return ret; +} + static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { @@ -1012,6 +1103,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id }; static ssize_t ucma_write(struct file *filp, const char __user *buf, diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index 9749c1b..c557054 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -60,7 +60,8 @@ enum { RDMA_USER_CM_CMD_SET_OPTION, RDMA_USER_CM_CMD_NOTIFY, RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_LEAVE_MCAST + RDMA_USER_CM_CMD_LEAVE_MCAST, + RDMA_USER_CM_CMD_MIGRATE_ID }; /* @@ -230,4 +231,14 @@ struct rdma_ucm_set_option { __u32 optlen; }; +struct rdma_ucm_migrate_id { + __u64 response; + __u32 id; + __u32 fd; +}; + +struct rdma_ucm_migrate_resp { + __u32 events_reported; +}; + #endif /* RDMA_USER_CM_H */ From sean.hefty at intel.com Wed Nov 28 16:32:31 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Wed, 28 Nov 2007 16:32:31 -0800 Subject: [ofa-general] [PATCH] [RFC] librdmacm: add rdma_migrate_id In-Reply-To: <000101c8321e$5c6b4240$ff0da8c0@amr.corp.intel.com> References: <000101c8321e$5c6b4240$ff0da8c0@amr.corp.intel.com> Message-ID: <000201c8321f$543036c0$ff0da8c0@amr.corp.intel.com> This is based on user feedback from Doug Ledford at RedHat: Events that occur on an rdma_cm_id are reported to userspace through an event channel. Connection request events are reported on the event channel associated with the listen. When the connection is accepted, a new rdma_cm_id is created and automatically uses the listen event channel. This is suboptimal where the user only wants listen events on that channel. Additionally, it may be desirable to have events related to connection establishment use a different event channel than those related to already established connections. Allow the user to migrate an rdma_cm_id between event channels. Signed-off-by: Sean Hefty --- I started to provide support for calling rdma_migrate_id() while the user is polling for events or making other calls on the migrating id, but while the complexity seemed doable, it just didn't seem justified based on the expected usage model. I believe that the kernel interface allows this support to be added later, if it is needed. For now, the documentation simply states that the user can only migrate an id if they are not processing events on the current event channel and not invoking another call on that id simultaneously. Makefile.am | 1 + examples/cmatose.c | 59 +++++++++++++++++++++++++++++++++++++++---- include/rdma/rdma_cma.h | 7 +++++ include/rdma/rdma_cma_abi.h | 13 +++++++++ man/rdma_migrate_id.3 | 27 ++++++++++++++++++++ man/ucmatose.1 | 4 +++ src/cma.c | 35 ++++++++++++++++++++++++++ src/librdmacm.map | 1 + 8 files changed, 140 insertions(+), 7 deletions(-) diff --git a/Makefile.am b/Makefile.am index 77782da..290cbc3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -54,6 +54,7 @@ man_MANS = \ man/rdma_join_multicast.3 \ man/rdma_leave_multicast.3 \ man/rdma_listen.3 \ + man/rdma_migrate_id.3 \ man/rdma_notify.3 \ man/rdma_reject.3 \ man/rdma_resolve_addr.3 \ diff --git a/examples/cmatose.c b/examples/cmatose.c index dcb6074..2f6e5f6 100644 --- a/examples/cmatose.c +++ b/examples/cmatose.c @@ -82,6 +82,7 @@ static int message_size = 100; static int message_count = 10; static uint8_t set_tos = 0; static uint8_t tos; +static uint8_t migrate = 0; static char *dst_addr; static char *src_addr; @@ -465,6 +466,35 @@ static int disconnect_events(void) return ret; } +static int migrate_channel(struct rdma_cm_id *listen_id) +{ + struct rdma_event_channel *channel; + int i, ret; + + printf("migrating to new event channel\n"); + + channel = rdma_create_event_channel(); + if (!channel) { + printf("cmatose: failed to create event channel\n"); + return -1; + } + + ret = 0; + if (listen_id) + ret = rdma_migrate_id(listen_id, channel); + + for (i = 0; i < connections && !ret; i++) + ret = rdma_migrate_id(test.nodes[i].cma_id, channel); + + if (!ret) { + rdma_destroy_event_channel(test.channel); + test.channel = channel; + } else + printf("cmatose: failure migrating to channel: %d\n", ret); + + return ret; +} + static int get_addr(char *dst, struct sockaddr_in *addr) { struct addrinfo *res; @@ -543,6 +573,13 @@ static int run_server(void) printf("data transfers complete\n"); } + + if (migrate) { + ret = migrate_channel(listen_id); + if (ret) + goto out; + } + printf("cmatose: disconnecting\n"); for (i = 0; i < connections; i++) { if (!test.nodes[i].connected) @@ -592,30 +629,36 @@ static int run_client(void) ret = connect_events(); if (ret) - goto out; + goto disc; if (message_count) { printf("receiving data transfers\n"); ret = poll_cqs(); if (ret) - goto out; + goto disc; printf("sending replies\n"); for (i = 0; i < connections; i++) { ret = post_sends(&test.nodes[i]); if (ret) - goto out; + goto disc; } printf("data transfers complete\n"); } ret = 0; -out: + + if (migrate) { + ret = migrate_channel(NULL); + if (ret) + goto out; + } +disc: ret2 = disconnect_events(); if (ret2) ret = ret2; - +out: return ret; } @@ -623,7 +666,7 @@ int main(int argc, char **argv) { int op, ret; - while ((op = getopt(argc, argv, "s:b:c:C:S:t:")) != -1) { + while ((op = getopt(argc, argv, "s:b:c:C:S:t:m")) != -1) { switch (op) { case 's': dst_addr = optarg; @@ -644,6 +687,9 @@ int main(int argc, char **argv) set_tos = 1; tos = (uint8_t) atoi(optarg); break; + case 'm': + migrate = 1; + break; default: printf("usage: %s\n", argv[0]); printf("\t[-s server_address]\n"); @@ -652,6 +698,7 @@ int main(int argc, char **argv) printf("\t[-C message_count]\n"); printf("\t[-S message_size]\n"); printf("\t[-t type_of_service]\n"); + printf("\t[-m(igrate)]\n"); exit(1); } } diff --git a/include/rdma/rdma_cma.h b/include/rdma/rdma_cma.h index 64b3008..76df90f 100644 --- a/include/rdma/rdma_cma.h +++ b/include/rdma/rdma_cma.h @@ -560,6 +560,13 @@ enum { int rdma_set_option(struct rdma_cm_id *id, int level, int optname, void *optval, size_t optlen); +/** + * rdma_migrate_id - Move an rdma_cm_id to a new event channel. + * @id: Communication identifier to migrate. + * @channel: New event channel for rdma_cm_id events. + */ +int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel); + #ifdef __cplusplus } #endif diff --git a/include/rdma/rdma_cma_abi.h b/include/rdma/rdma_cma_abi.h index ba0e7b9..1a3a9c2 100644 --- a/include/rdma/rdma_cma_abi.h +++ b/include/rdma/rdma_cma_abi.h @@ -63,7 +63,8 @@ enum { UCMA_CMD_SET_OPTION, UCMA_CMD_NOTIFY, UCMA_CMD_JOIN_MCAST, - UCMA_CMD_LEAVE_MCAST + UCMA_CMD_LEAVE_MCAST, + UCMA_CMD_MIGRATE_ID }; struct ucma_abi_cmd_hdr { @@ -221,4 +222,14 @@ struct ucma_abi_set_option { __u32 optlen; }; +struct ucma_abi_migrate_id { + __u64 response; + __u32 id; + __u32 fd; +}; + +struct ucma_abi_migrate_resp { + __u32 events_reported; +}; + #endif /* RDMA_CMA_ABI_H */ diff --git a/man/rdma_migrate_id.3 b/man/rdma_migrate_id.3 new file mode 100644 index 0000000..006fb61 --- /dev/null +++ b/man/rdma_migrate_id.3 @@ -0,0 +1,27 @@ +.TH "RDMA_MIGRATE_ID" 3 "2007-11-13" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.SH NAME +rdma_migrate_id \- Move a communication identifer to a different event channel. +.SH SYNOPSIS +.B "#include " +.P +.B "int" rdma_migrate_id +.BI "(struct rdma_cm_id *" id "," +.BI "struct rdma_event_channel *" channel ");" +.SH ARGUMENTS +.IP "id" 12 +An existing communication identifier to migrate. +.IP "channel" 12 +The communication channel that events associated with the +allocated rdma_cm_id will be reported on. +.SH "DESCRIPTION" +Migrates a communication identifier to a different event channel. +.SH "NOTES" +This routine migrates a communication identifier to the specified event +channel and moves any pending events associated with the rdma_cm_id +to the new channel. Users should not poll for events on the +rdma_cm_id's current event channel or invoke other routines on the +rdma_cm_id while migrating between channels. This call will block while +there are any unacknowledged events on the current event channel. +.SH "SEE ALSO" +rdma_cm(7), rdma_create_event_channel(3), rdma_create_id(3), +rdma_get_cm_event(3) \ No newline at end of file diff --git a/man/ucmatose.1 b/man/ucmatose.1 index 121ea63..73477ea 100644 --- a/man/ucmatose.1 +++ b/man/ucmatose.1 @@ -35,6 +35,10 @@ The size of each message transferred, in bytes. (default 100) \-t tos Indicates the type of service used for the communication. Type of service is implementation dependent based on subnet configuration. +.TP +\-m +Tests event channel migration. Migrates all communication identifiers to +a different event channel for disconnect events. .SH "NOTES" Basic usage is to start ucmatose on a server system, then run ucmatose -s server_name on a client system. diff --git a/src/cma.c b/src/cma.c index 00ea394..20cfc19 100644 --- a/src/cma.c +++ b/src/cma.c @@ -1471,3 +1471,38 @@ int rdma_set_option(struct rdma_cm_id *id, int level, int optname, return 0; } + +int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel) +{ + struct ucma_abi_migrate_resp *resp; + struct ucma_abi_migrate_id *cmd; + struct cma_id_private *id_priv; + void *msg; + int ret, size; + + id_priv = container_of(id, struct cma_id_private, id); + CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size); + cmd->id = id_priv->handle; + cmd->fd = id->channel->fd; + + ret = write(channel->fd, msg, size); + if (ret != size) + return (ret > 0) ? -ENODATA : ret; + + VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp); + + /* + * Eventually if we want to support migrating channels while events are + * being processed on the current channel, we need to block here while + * there are any outstanding events on the current channel for this id + * to prevent the user from processing events for this id on the old + * channel after this call returns. + */ + pthread_mutex_lock(&id_priv->mut); + id->channel = channel; + while (id_priv->events_completed < resp->events_reported) + pthread_cond_wait(&id_priv->cond, &id_priv->mut); + pthread_mutex_unlock(&id_priv->mut); + + return 0; +} diff --git a/src/librdmacm.map b/src/librdmacm.map index 84cec7f..cb94efe 100644 --- a/src/librdmacm.map +++ b/src/librdmacm.map @@ -27,5 +27,6 @@ RDMACM_1.0 { rdma_set_option; rdma_get_local_addr; rdma_get_peer_addr; + rdma_migrate_id; local: *; }; From Jeffrey.C.Becker at nasa.gov Wed Nov 28 17:03:26 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Wed, 28 Nov 2007 17:03:26 -0800 Subject: [ofa-general] OFA server patching In-Reply-To: References: <474DFA90.6070907@nasa.gov> Message-ID: <474E0FDE.6030608@nasa.gov> Working on it... Thanks. -jeff Scott Weitzenkamp (sweitzen) wrote: > OFA bugzilla seems down, I get: > > Software error: > Can't connect to the database. > Error: Access denied for user 'ofabug_user'@'localhost' (using password: > YES) > Is your database installed and up and running? > Do you have the correct username and password selected in localconfig? > > > For help, please send mail to the webmaster (webmaster at openfabrics.org), > giving this error message and the time and date of the error. > > Scott Weitzenkamp > SQA and Release Manager > Server Virtualization Business Unit > Cisco Systems > > > > > >> -----Original Message----- >> From: general-bounces at lists.openfabrics.org >> [mailto:general-bounces at lists.openfabrics.org] On Behalf Of >> Jeff Becker >> Sent: Wednesday, November 28, 2007 3:33 PM >> To: general at lists.openfabrics.org >> Subject: [ofa-general] OFA server patching >> >> Hi all. In the interest of keeping our server up to date, I >> applied the >> latest Ubuntu patches. Several upgrades were made, including >> git. If you >> have any problems, let me know. Thanks. >> >> -jeff >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> >> From info at agrards.net Wed Nov 28 18:09:41 2007 From: info at agrards.net (=?windows-1255?B?4OTl4eQg+fL55fI=?=) Date: Wed, 28 Nov 2007 20:09:41 -0600 Subject: [ofa-general] =?windows-1255?b?5+Xm5CDy+unj5fosIPfl+OD6IOH39OQg?= =?windows-1255?b?5e769/n4+g==?= Message-ID: <20071129020937.75AD0E601A8@openfabrics.org> An HTML attachment was scrubbed... URL: From rdreier at cisco.com Wed Nov 28 19:41:22 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 28 Nov 2007 19:41:22 -0800 Subject: [ofa-general] Re: [PATCH 2.6.25 2/2] RDMA/cxgb3: Support 5.0 firmware. In-Reply-To: <20071126172846.22792.8854.stgit@dell3.ogc.int> (Steve Wise's message of "Mon, 26 Nov 2007 11:28:46 -0600") References: <20071126172457.22792.62583.stgit@dell3.ogc.int> <20071126172846.22792.8854.stgit@dell3.ogc.int> Message-ID: OK, applied 1 and 2... > Note: this change requires 5.0 firmware. I assume the change to the cxgb3 FW versions is pending in a net driver change for 2.6.25? From rdreier at cisco.com Wed Nov 28 19:42:43 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 28 Nov 2007 19:42:43 -0800 Subject: [ofa-general] Re: [PATCH] IB/ehca: Fix static rate if path faster than link In-Reply-To: <200711281446.29085.fenkes@de.ibm.com> (Joachim Fenkes's message of "Wed, 28 Nov 2007 15:46:28 +0200") References: <200711281446.29085.fenkes@de.ibm.com> Message-ID: thanks, applied From rdreier at cisco.com Wed Nov 28 19:44:03 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 28 Nov 2007 19:44:03 -0800 Subject: [ofa-general] [PATCH] return ENOSYS instead of -ENOSYS In-Reply-To: <20071128151507.GV25881@minantech.com> (Gleb Natapov's message of "Wed, 28 Nov 2007 17:15:07 +0200") References: <20071128151507.GV25881@minantech.com> Message-ID: thanks, applied From rdreier at cisco.com Wed Nov 28 19:46:46 2007 From: rdreier at cisco.com (Roland Dreier) Date: Wed, 28 Nov 2007 19:46:46 -0800 Subject: [ofa-general] Re: [PATCH] libmlx4: max_recv_wr must be non-zero for non-SRQ QPs In-Reply-To: <200711281244.20552.jackm@dev.mellanox.co.il> (Jack Morgenstein's message of "Wed, 28 Nov 2007 12:44:20 +0200") References: <200711281244.20552.jackm@dev.mellanox.co.il> Message-ID: thanks, applied From swise at opengridcomputing.com Wed Nov 28 20:21:53 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Wed, 28 Nov 2007 22:21:53 -0600 Subject: [ofa-general] Re: [PATCH 2.6.25 2/2] RDMA/cxgb3: Support 5.0 firmware. In-Reply-To: References: <20071126172457.22792.62583.stgit@dell3.ogc.int> <20071126172846.22792.8854.stgit@dell3.ogc.int> Message-ID: <474E3E61.60201@opengridcomputing.com> Yes. Roland Dreier wrote: > OK, applied 1 and 2... > > > Note: this change requires 5.0 firmware. > > I assume the change to the cxgb3 FW versions is pending in a net > driver change for 2.6.25? > - > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo at vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > From kliteyn at mellanox.co.il Wed Nov 28 21:07:48 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 29 Nov 2007 07:07:48 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-29:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-28 OpenSM git rev = Mon_Nov_26_08:12:10_2007 [b989216e1ae91e0049ec3d4980cb8e2bdad8ed49] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=480 Fail=0 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 LidMgr IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo Failures: From ogerlitz at voltaire.com Wed Nov 28 23:23:26 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 29 Nov 2007 09:23:26 +0200 Subject: [ofa-general] [PATCH] mlx4_core: increase max number of qp's and of srq's to 128K In-Reply-To: <200711281008.03739.jackm@dev.mellanox.co.il> References: <200711201744.15766.jackm@dev.mellanox.co.il> <4743E7D3.8090201@voltaire.com> <200711281008.03739.jackm@dev.mellanox.co.il> Message-ID: <474E68EE.9000304@voltaire.com> Jack Morgenstein wrote: > On Wednesday 21 November 2007 10:09, Or Gerlitz wrote: >> Why you want to increase the maxima for SRQs as well? a 1:1 ratio >> between QPs to SRQs means a broken application design, isn't it? > Not really, for the new XRC qp type. In this case, we will have one XRC connection > per multi-process application per host, with a larger number of XRC_SRQs (one per process per host). > However, the XRC SRQs act more like RD qps, so we really don't need to increase the default max SRQs. Jack, I can't really follow you, is the XRC API documented anywhere? Or. From ogerlitz at voltaire.com Wed Nov 28 23:46:13 2007 From: ogerlitz at voltaire.com (Or Gerlitz) Date: Thu, 29 Nov 2007 09:46:13 +0200 Subject: [ofa-general] [PATCH] manpage for ibv_get_cq_event In-Reply-To: <474C46F8.4090303@systemfabricworks.com> References: <474C46F8.4090303@systemfabricworks.com> Message-ID: <474E6E45.9060509@voltaire.com> frank zago wrote: > The code sample in ibv_get_cq_event is missing the case where no wc is > available. Patch attached. Hi Dotan, I have brought the need for a general man page for libibverbs, similar in concept to rdma_cm(7), that provides some general information on the library and points to the other man pages, will you able to do that for OFED 1.3? Or. > ------------------------------------------------------------------------ > > --- ibv_get_cq_event.3.org 2007-11-27 10:24:54.000000000 -0600 > +++ ibv_get_cq_event.3 2007-11-27 10:26:52.000000000 -0600 > @@ -115,6 +115,9 @@ > return 1; > } > .PP > + if (ne == 0) > + return 1; > +.PP > if (wc.status != IBV_WC_SUCCESS) { > fprintf(stderr, "Completion with status 0x%x was found\en", wc.status); > return 1; > > > ------------------------------------------------------------------------ > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From dotanb at dev.mellanox.co.il Thu Nov 29 02:42:38 2007 From: dotanb at dev.mellanox.co.il (Dotan Barak) Date: Thu, 29 Nov 2007 12:42:38 +0200 Subject: [ofa-general] [PATCH] manpage for ibv_get_cq_event In-Reply-To: <474E6E45.9060509@voltaire.com> References: <474C46F8.4090303@systemfabricworks.com> <474E6E45.9060509@voltaire.com> Message-ID: <474E979E.6010206@dev.mellanox.co.il> Hi.. > frank zago wrote: >> The code sample in ibv_get_cq_event is missing the case where no wc >> is available. Patch attached. > > Hi Dotan, > > I have brought the need for a general man page for libibverbs, similar > in concept to rdma_cm(7), that provides some general information on > the library and points to the other man pages, will you able to do > that for OFED 1.3? It is in my todo list for the OFED 1.3 release. Dotan From jsquyres at cisco.com Thu Nov 29 04:45:28 2007 From: jsquyres at cisco.com (Jeff Squyres) Date: Thu, 29 Nov 2007 07:45:28 -0500 Subject: [ofa-general] Bugzilla down? Message-ID: I am getting the following error when attempting to connect to bugzilla: Software error: Can't connect to the database. Error: Access denied for user 'ofabug_user'@'localhost' (using password: YES) Is your database installed and up and running? Do you have the correct username and password selected in localconfig? For help, please send mail to the webmaster (webmaster at openfabrics.org), giving this error message and the time and date of the error. Can someone investigate / fix? Thanks. -- Jeff Squyres Cisco Systems From sashak at voltaire.com Thu Nov 29 06:02:39 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 29 Nov 2007 14:02:39 +0000 Subject: [ofa-general] Re: [PATCH] opensm: allow multiple scopes in a partition In-Reply-To: <20071129000039.GG30090@obsidianresearch.com> References: <20071129000039.GG30090@obsidianresearch.com> Message-ID: <20071129140239.GC375@sashak.voltaire.com> Hi Rolf, On 17:00 Wed 28 Nov , Rolf Manderscheid wrote: > Hi Sasha, > > This patch allows multiple scopes to be configured for a partition. > This allows ipoib interfaces with different scopes to coexist in a > partition. The partition configuration file can now have multiple > scope=N flags and they all take effect (instead of just the last one). > > Signed-off-by: Rolf Manderscheid The idea looks good for me. Some comments about the patch are below. > > -- > > diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 > index efd6ff0..c51f386 100644 > --- a/opensm/man/opensm.8 > +++ b/opensm/man/opensm.8 > @@ -366,7 +366,8 @@ Currently recognized flags are: > sl= - specifies SL for this IPoIB MC group > (default is 0) > scope= - specifies scope for this IPoIB MC group > - (default is 2 (link local)) > + (default is 2 (link local)). Multiple scope settings > + are permitted for a partition. > > Note that values for rate, mtu, and scope should be specified as > defined in the IBTA specification (for example, mtu=4 for 2048). > diff --git a/opensm/opensm/osm_prtn_config.c b/opensm/opensm/osm_prtn_config.c > index 1253031..646bf2a 100644 > --- a/opensm/opensm/osm_prtn_config.c > +++ b/opensm/opensm/osm_prtn_config.c > @@ -68,7 +68,7 @@ struct part_conf { > osm_log_t *p_log; > osm_subn_t *p_subn; > osm_prtn_t *p_prtn; > - unsigned is_ipoib, mtu, rate, sl, scope; > + unsigned is_ipoib, mtu, rate, sl, scope_mask; > boolean_t full; > }; > > @@ -89,6 +89,7 @@ static int partition_create(unsigned lineno, struct part_conf *conf, > char *name, char *id, char *flag, char *flag_val) > { > uint16_t pkey; > + unsigned int scope; > > if (!id && name && isdigit(*name)) { > id = name; > @@ -119,12 +120,26 @@ static int partition_create(unsigned lineno, struct part_conf *conf, > } > conf->p_prtn->sl = (uint8_t) conf->sl; > > - if (conf->is_ipoib) > + if (! conf->is_ipoib) No need a blank after '!'. > + return 0; > + > + if (! conf->scope_mask) { Ditto. > osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, > (uint8_t) conf->rate, > (uint8_t) conf->mtu, > - (uint8_t) conf->scope); > + 0); > + return 0; > + } > + > + for (scope = 0; scope < 16; scope++) { > + if (((1<scope_mask) == 0) > + continue; > > + osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, > + (uint8_t) conf->rate, > + (uint8_t) conf->mtu, > + (uint8_t) scope); > + } > return 0; > } > > @@ -147,11 +162,13 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, > "flag \'rate\' requires valid value" > " - skipped\n", lineno); > } else if (!strncmp(flag, "scope", len)) { > - if (!val || (conf->scope = strtoul(val, NULL, 0)) == 0) > + unsigned int scope; > + if (!val || (scope = strtoul(val, NULL, 0)) == 0 || scope > 0xF) > osm_log(conf->p_log, OSM_LOG_VERBOSE, > "PARSE WARN: line %d: " > "flag \'scope\' requires valid value" > " - skipped\n", lineno); > + conf->scope_mask |= (1<scope_mask will get a wrong value. And in case of other errors too... Sasha > } else if (!strncmp(flag, "sl", len)) { > unsigned sl; > char *end; From xhejtman at ics.muni.cz Thu Nov 29 06:02:27 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Thu, 29 Nov 2007 15:02:27 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> Message-ID: <20071129140227.GF4422@ics.muni.cz> Hello, just a note, I found that similar problem with dma_map_single/sync_single is in the ib_ipoib layer. -- Lukáš Hejtmánek From swise at opengridcomputing.com Thu Nov 29 07:07:14 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 29 Nov 2007 09:07:14 -0600 Subject: [ofa-general] OFA server patching In-Reply-To: <474DFA90.6070907@nasa.gov> References: <474DFA90.6070907@nasa.gov> Message-ID: <474ED5A2.7090601@opengridcomputing.com> Jeff Becker wrote: > Hi all. In the interest of keeping our server up to date, I applied the > latest Ubuntu patches. Several upgrades were made, including git. If you > have any problems, let me know. Thanks. > > -jeff > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general Git seems broken for me. I can no longer use the build_ofa_kernel.sh script. I get this sort of error: > fatal: corrupted pack file /home/vlad/scm/ofed_1_2/.git/objects/pack/pack-914d44 > 0d906ffa47a30611df81c0597e896040fa.pack > > Failed executing git From tom at opengridcomputing.com Thu Nov 29 07:14:52 2007 From: tom at opengridcomputing.com (Tom Tucker) Date: Thu, 29 Nov 2007 09:14:52 -0600 Subject: [ofa-general] OFA server patching In-Reply-To: <474ED5A2.7090601@opengridcomputing.com> References: <474DFA90.6070907@nasa.gov> <474ED5A2.7090601@opengridcomputing.com> Message-ID: <1196349292.16169.3.camel@trinity.ogc.int> On Thu, 2007-11-29 at 09:07 -0600, Steve Wise wrote: > Jeff Becker wrote: > > Hi all. In the interest of keeping our server up to date, I applied the > > latest Ubuntu patches. Several upgrades were made, including git. If you > > have any problems, let me know. Thanks. > > > > -jeff > > _______________________________________________ > > general mailing list > > general at lists.openfabrics.org > > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > > > Git seems broken for me. I can no longer use the build_ofa_kernel.sh > script. I get this sort of error: > > > > fatal: corrupted pack file /home/vlad/scm/ofed_1_2/.git/objects/pack/pack-914d44 > > 0d906ffa47a30611df81c0597e896040fa.pack I think the version of git you're using is old and doesn't recognize some of the object types in the repository. I saw this same thing when I tried to use a git tree that had remotes created with a newer version of git. > > > > Failed executing git > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general From swise at opengridcomputing.com Thu Nov 29 07:16:34 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 29 Nov 2007 09:16:34 -0600 Subject: [ofa-general] OFA server patching In-Reply-To: <474ED5A2.7090601@opengridcomputing.com> References: <474DFA90.6070907@nasa.gov> <474ED5A2.7090601@opengridcomputing.com> Message-ID: <474ED7D2.6020407@opengridcomputing.com> Steve Wise wrote: > Jeff Becker wrote: >> Hi all. In the interest of keeping our server up to date, I applied the >> latest Ubuntu patches. Several upgrades were made, including git. If you >> have any problems, let me know. Thanks. >> >> -jeff >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general > > > Git seems broken for me. I can no longer use the build_ofa_kernel.sh > script. I get this sort of error: > > >> fatal: corrupted pack file >> /home/vlad/scm/ofed_1_2/.git/objects/pack/pack-914d44 >> 0d906ffa47a30611df81c0597e896040fa.pack >> >> Failed executing git > _______________________________________________ > I removed the '--reference' from the build_ofa_kernel.sh script and its building now. Jeff, I think the git update has introduced a problem with old git trees created with the old git command and new git trees created with the new git cmd... Steve. From ardavis at ichips.intel.com Thu Nov 29 07:58:46 2007 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 29 Nov 2007 07:58:46 -0800 Subject: [ofa-general] OFA server patching In-Reply-To: <474ED5A2.7090601@opengridcomputing.com> References: <474DFA90.6070907@nasa.gov> <474ED5A2.7090601@opengridcomputing.com> Message-ID: <474EE1B6.4080708@ichips.intel.com> Steve Wise wrote: > Jeff Becker wrote: >> Hi all. In the interest of keeping our server up to date, I applied the >> latest Ubuntu patches. Several upgrades were made, including git. If you >> have any problems, let me know. Thanks. >> > Git seems broken for me. I can no longer use the build_ofa_kernel.sh > script. I get this sort of error: > > >> fatal: corrupted pack file >> /home/vlad/scm/ofed_1_2/.git/objects/pack/pack-914d44 >> 0d906ffa47a30611df81c0597e896040fa.pack >> I see the same problem when I try to clone from /home/ardavis/scm/dapl.git fatal: corrupted pack file .objects/pack/pack-112c41022deae9531c20e0d06a15ce4a6d145802.pack From dledford at redhat.com Thu Nov 29 08:33:50 2007 From: dledford at redhat.com (Doug Ledford) Date: Thu, 29 Nov 2007 11:33:50 -0500 Subject: [ofa-general] Re: [PATCH] [RFC] librdmacm: add rdma_migrate_id In-Reply-To: <000201c8321f$543036c0$ff0da8c0@amr.corp.intel.com> References: <000101c8321e$5c6b4240$ff0da8c0@amr.corp.intel.com> <000201c8321f$543036c0$ff0da8c0@amr.corp.intel.com> Message-ID: <1196354031.28600.0.camel@firewall.xsintricity.com> On Wed, 2007-11-28 at 16:32 -0800, Sean Hefty wrote: > For now, the > documentation simply states that the user can only migrate an id if they > are not processing events on the current event channel and not invoking > another call on that id simultaneously. Sounds reasonable to me. -- Doug Ledford GPG KeyID: CFBFF194 http://people.redhat.com/dledford Infiniband specific RPMs available at http://people.redhat.com/dledford/Infiniband -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 189 bytes Desc: This is a digitally signed message part URL: From rdreier at cisco.com Thu Nov 29 08:41:10 2007 From: rdreier at cisco.com (Roland Dreier) Date: Thu, 29 Nov 2007 08:41:10 -0800 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071129140227.GF4422@ics.muni.cz> (Lukas Hejtmanek's message of "Thu, 29 Nov 2007 15:02:27 +0100") References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071129140227.GF4422@ics.muni.cz> Message-ID: > just a note, I found that similar problem with dma_map_single/sync_single is > in the ib_ipoib layer. I don't see any calls to any kind of dma_sync function in IPoIB?? - R. From Ashish.Batwara at lsi.com Thu Nov 29 08:59:44 2007 From: Ashish.Batwara at lsi.com (Batwara, Ashish) Date: Thu, 29 Nov 2007 09:59:44 -0700 Subject: [ofa-general] IO Size more than 48K In-Reply-To: Message-ID: <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C@NAMAIL2.ad.lsil.com> Hi, We are using OFED-1.2, and using xdd and some other tools, and trying to send 1/2MB IOs, but what we are seeing in analyzer traces, that memory descriptor in SRP command shows max. 48K which means 1MB I/Os has broken into smaller SRP request from initiator. How can I have this I/O directly going to target? What parameter I need to change? Thanks Ashish From vuhuong at mellanox.com Thu Nov 29 09:08:15 2007 From: vuhuong at mellanox.com (Vu Pham) Date: Thu, 29 Nov 2007 09:08:15 -0800 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C@NAMAIL2.ad.lsil.com> References: <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C@NAMAIL2.ad.lsil.com> Message-ID: <474EF1FF.5030900@mellanox.com> > Hi, > We are using OFED-1.2, and using xdd and some other tools, and trying to > send 1/2MB IOs, but what we are seeing in analyzer traces, that memory > descriptor in SRP command shows max. 48K which means 1MB I/Os has broken > into smaller SRP request from initiator. > How can I have this I/O directly going to target? What parameter I need > to change? > > module param srp_sg_tablesize (default is 12 ie. 12 x 4K = 48K) and/or max_sect=yyy in echo id_ext=xxx,...,max_sect=1024,service_id= > /sys/class/infiniband_srp/... -vu > Thanks > Ashish > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From caitlin.bestler at neterion.com Thu Nov 29 09:17:04 2007 From: caitlin.bestler at neterion.com (Caitlin Bestler) Date: Thu, 29 Nov 2007 09:17:04 -0800 Subject: [ofa-general] iWARP issues In-Reply-To: References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> Message-ID: <469958e00711290917w2b3acf65ua1b137f8b8e1f043@mail.gmail.com> On Nov 2, 2007 4:02 AM, Talpey, Thomas wrote: > At 05:57 PM 11/1/2007, Sean Hefty wrote: > >Does anyone know the details regarding the TCP connection retry algorithm in > >Linux? (time between retries, number of retries, etc.) > > Sure. The time between retries is variable, it starts out as a few seconds > (think, three), and backs off exponentially for a variable number of tries > (/proc/sys/net/ipv4/*retries*). It comes out to a pretty large number, > a minute or two typically. > > You really don't want to depend on any of this however. TCP will use all > sorts of information from other connections, routing table entries and > congestion algorithms, etc to be as adaptive as it feels it needs to be. > Constants are NEVER a good idea in networking. > > Why wouldn't you just leave the timeout to TCP, and make CM's infinite? > I agree that TCP should handle the timeout on TCP connections. Even if there are two TCP stacks I see no reason for two TCP connection timeout policies. But the CM may want to timeout the MPA Request/Response exchange at least slightly more promptly than TCP would timeout an established connection. This probably is not urgent until iWARP is sufficiently deployed to attract DoS attacks, but it would not be a good idea to lock in an absolute "all timeouts are handled at the transport layer" policy for the iWARP CM. The gap between connection setup and connection visibility will eventually make an inviting target. From chas at cmf.nrl.navy.mil Thu Nov 29 09:17:34 2007 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Thu, 29 Nov 2007 12:17:34 -0500 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C@NAMAIL2.ad.lsil.com> Message-ID: <200711291717.lATHHYJp012566@cmf.nrl.navy.mil> In message <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C at NAMAIL2.ad.lsil.com>,"Batw ara, Ashish" writes: >We are using OFED-1.2, and using xdd and some other tools, and trying to >send 1/2MB IOs, but what we are seeing in analyzer traces, that memory >descriptor in SRP command shows max. 48K which means 1MB I/Os has broken >into smaller SRP request from initiator. >How can I have this I/O directly going to target? What parameter I need >to change? when you login to the storage you need to set max_sect to something other than the default of 512 (blocks). try 1024 or 2048. then use sg_dd's direct i/o mode (dio=1) or dd with iflag=direct/oflag=direct). you should be able to bigger rdma segments that way. From winningcocacolaaward at orangemail.es Thu Nov 29 09:22:39 2007 From: winningcocacolaaward at orangemail.es (=?iso-8859-1?q?jennifer=20williams?=) Date: Thu, 29 Nov 2007 18:22:39 +0100 (CET) Subject: [ofa-general] YOU HAVE WON COCA-COLA AWARD Message-ID: <20071129172239.99FD4A13D21@smtp.latinmail.com> An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: -------------- next part -------------- An HTML attachment was scrubbed... URL: From swise at opengridcomputing.com Thu Nov 29 09:38:25 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 29 Nov 2007 11:38:25 -0600 Subject: [ofa-general] OFA server patching In-Reply-To: <474EE1B6.4080708@ichips.intel.com> References: <474DFA90.6070907@nasa.gov> <474ED5A2.7090601@opengridcomputing.com> <474EE1B6.4080708@ichips.intel.com> Message-ID: <474EF911.5080304@opengridcomputing.com> Arlin Davis wrote: > Steve Wise wrote: >> Jeff Becker wrote: >>> Hi all. In the interest of keeping our server up to date, I applied the >>> latest Ubuntu patches. Several upgrades were made, including git. If you >>> have any problems, let me know. Thanks. >>> > > Git seems broken for me. I can no longer use the build_ofa_kernel.sh >> script. I get this sort of error: >> >> >>> fatal: corrupted pack file >>> /home/vlad/scm/ofed_1_2/.git/objects/pack/pack-914d44 >>> 0d906ffa47a30611df81c0597e896040fa.pack >>> > > I see the same problem when I try to clone from /home/ardavis/scm/dapl.git > > fatal: corrupted pack file > .objects/pack/pack-112c41022deae9531c20e0d06a15ce4a6d145802.pack Can we please back out the git changes? Its all balled up! Steve. From harms at alcf.anl.gov Thu Nov 29 09:39:51 2007 From: harms at alcf.anl.gov (Kevin Harms) Date: Thu, 29 Nov 2007 11:39:51 -0600 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <474EF1FF.5030900@mellanox.com> References: <01B9E81EECACE94DBBD0A556E768FB8A01E3C54C@NAMAIL2.ad.lsil.com> <474EF1FF.5030900@mellanox.com> Message-ID: <3A453CF1-5FFC-44BF-8F72-7E3EF5AA6E41@alcf.anl.gov> you may also have to go to /sys/block/sdX/queue and echo 1024 > max_sectors_kb if you use the srp_daemon you can also add: a max_sect=2048 to /etc/srp_daemon.conf kevin On Nov 29, 2007, at 11:08 AM, Vu Pham wrote: > >> Hi, >> We are using OFED-1.2, and using xdd and some other tools, and >> trying to >> send 1/2MB IOs, but what we are seeing in analyzer traces, that >> memory >> descriptor in SRP command shows max. 48K which means 1MB I/Os has >> broken >> into smaller SRP request from initiator. >> How can I have this I/O directly going to target? What parameter I >> need >> to change? >> >> > > module param srp_sg_tablesize (default is 12 ie. 12 x 4K = 48K) > and/or > max_sect=yyy in echo id_ext=xxx,...,max_sect=1024,service_id= > /sys/ > class/infiniband_srp/... > > -vu > >> Thanks >> Ashish >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general >> > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From bs at q-leap.de Thu Nov 29 09:45:21 2007 From: bs at q-leap.de (Bernd Schubert) Date: Thu, 29 Nov 2007 18:45:21 +0100 Subject: [ofa-general] MT25418 In-Reply-To: References: <200711082141.53113.bs@q-leap.de> <200711092141.51243.bs@q-leap.de> Message-ID: <200711291845.21883.bs@q-leap.de> Hello Roland, On Wednesday 28 November 2007 00:28:08 Roland Dreier wrote: > > Yes exactly and reproducable on all 6 nodes with connectX presently here > > in our test lab. > > Just by accident I first always had connected port 2. Shortly before I > > already thought it doesn't work at all, I tried the other port... > > Sorry for the slow reply. I just swapped cables on one of my test thanks for your help and thanks a lot for testing it yourself. No problem it took some time, we are presently also fighting a countless number of hardware and software bugs. This dual port problem actually has a low priority. > systems, and IPoIB works fine for me on port 2 of my ConnectX HCA. > This is with the kernel around 2.6.24-rc1 that happens to be running > there. So I'm not sure what's different about your system. > > I would suggest raising this with your HCA vendor since perhaps you > have a bad batch of HCAs or old firmware or something. So we know its either a problem with 2.6.22 or a hardware problem. I will test 2.6.24-rc1 during the next days and keep you posted. Thanks again, Bernd -- Bernd Schubert Q-Leap Networks GmbH From swise at opengridcomputing.com Thu Nov 29 09:46:19 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Thu, 29 Nov 2007 11:46:19 -0600 Subject: [ofa-general] iWARP issues In-Reply-To: <469958e00711290917w2b3acf65ua1b137f8b8e1f043@mail.gmail.com> References: <000701c81cd2$3d4178f0$9c98070a@amr.corp.intel.com> <469958e00711290917w2b3acf65ua1b137f8b8e1f043@mail.gmail.com> Message-ID: <474EFAEB.3080901@opengridcomputing.com> Caitlin Bestler wrote: > On Nov 2, 2007 4:02 AM, Talpey, Thomas wrote: >> At 05:57 PM 11/1/2007, Sean Hefty wrote: >>> Does anyone know the details regarding the TCP connection retry algorithm in >>> Linux? (time between retries, number of retries, etc.) >> Sure. The time between retries is variable, it starts out as a few seconds >> (think, three), and backs off exponentially for a variable number of tries >> (/proc/sys/net/ipv4/*retries*). It comes out to a pretty large number, >> a minute or two typically. >> >> You really don't want to depend on any of this however. TCP will use all >> sorts of information from other connections, routing table entries and >> congestion algorithms, etc to be as adaptive as it feels it needs to be. >> Constants are NEVER a good idea in networking. >> >> Why wouldn't you just leave the timeout to TCP, and make CM's infinite? >> > > I agree that TCP should handle the timeout on TCP connections. Even if > there are two TCP stacks I see no reason for two TCP connection timeout > policies. > > But the CM may want to timeout the MPA Request/Response exchange > at least slightly more promptly than TCP would timeout an established > connection. > > This probably is not urgent until iWARP is sufficiently deployed to attract > DoS attacks, but it would not be a good idea to lock in an absolute "all > timeouts are handled at the transport layer" policy for the iWARP CM. > The gap between connection setup and connection visibility will eventually > make an inviting target. I think the provider drivers handle the MPA timeout now. But the value used should probably be some sort of iwarp-specific connection setup attribute... From Jeffrey.C.Becker at nasa.gov Thu Nov 29 09:53:34 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 09:53:34 -0800 Subject: [ofa-general] Bugzilla down? In-Reply-To: References: Message-ID: <474EFC9E.4050203@nasa.gov> Hi Jeff. I upgraded our server to the latest patches yesterday, and unfortunately, bugzilla broke as a result. I'm investigating it, but any suggestions you have would be helpful. Do you know who set up Bugzilla originally? Thanks. -jeff Jeff Squyres wrote: > I am getting the following error when attempting to connect to bugzilla: > > Software error: > Can't connect to the database. Error: Access denied for user > 'ofabug_user'@'localhost' (using password: YES) Is your database > installed and up and running? Do you have the correct username and > password selected in localconfig? > For help, please send mail to the webmaster > (webmaster at openfabrics.org), giving this error message and the time > and date of the error. > > > Can someone investigate / fix? > > Thanks. > > --Jeff Squyres > Cisco Systems > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From rvm at obsidianresearch.com Thu Nov 29 09:56:52 2007 From: rvm at obsidianresearch.com (Rolf Manderscheid) Date: Thu, 29 Nov 2007 10:56:52 -0700 Subject: [ofa-general] Re: [PATCH] opensm: allow multiple scopes in a partition In-Reply-To: <20071129140239.GC375@sashak.voltaire.com> References: <20071129000039.GG30090@obsidianresearch.com> <20071129140239.GC375@sashak.voltaire.com> Message-ID: <474EFD64.3010301@obsidianresearch.com> >> >> @@ -147,11 +162,13 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, >> "flag \'rate\' requires valid value" >> " - skipped\n", lineno); >> } else if (!strncmp(flag, "scope", len)) { >> - if (!val || (conf->scope = strtoul(val, NULL, 0)) == 0) >> + unsigned int scope; >> + if (!val || (scope = strtoul(val, NULL, 0)) == 0 || scope > 0xF) >> osm_log(conf->p_log, OSM_LOG_VERBOSE, >> "PARSE WARN: line %d: " >> "flag \'scope\' requires valid value" >> " - skipped\n", lineno); >> + conf->scope_mask |= (1<> > > In case when val is NULL scope will be non-initialized and > conf->scope_mask will get a wrong value. And in case of other errors > too... > Yes, there's obviously a missing else. I'll repost right away. Rolf From rvm at obsidianresearch.com Thu Nov 29 09:58:46 2007 From: rvm at obsidianresearch.com (Rolf Manderscheid) Date: Thu, 29 Nov 2007 10:58:46 -0700 Subject: [ofa-general] [PATCHv2] opensm: allow multiple scopes in a partition Message-ID: <20071129175846.GH30090@obsidianresearch.com> Hi Sasha, This patch allows multiple scopes to be configured for a partition. This allows ipoib interfaces with different scopes to coexist in a partition. The partition configuration file can now have multiple scope=N flags and they all take effect (instead of just the last one). Signed-off-by: Rolf Manderscheid -- diff --git a/opensm/man/opensm.8 b/opensm/man/opensm.8 index efd6ff0..c51f386 100644 --- a/opensm/man/opensm.8 +++ b/opensm/man/opensm.8 @@ -366,7 +366,8 @@ Currently recognized flags are: sl= - specifies SL for this IPoIB MC group (default is 0) scope= - specifies scope for this IPoIB MC group - (default is 2 (link local)) + (default is 2 (link local)). Multiple scope settings + are permitted for a partition. Note that values for rate, mtu, and scope should be specified as defined in the IBTA specification (for example, mtu=4 for 2048). diff --git a/opensm/opensm/osm_prtn_config.c b/opensm/opensm/osm_prtn_config.c index 1253031..811b9eb 100644 --- a/opensm/opensm/osm_prtn_config.c +++ b/opensm/opensm/osm_prtn_config.c @@ -68,7 +68,7 @@ struct part_conf { osm_log_t *p_log; osm_subn_t *p_subn; osm_prtn_t *p_prtn; - unsigned is_ipoib, mtu, rate, sl, scope; + unsigned is_ipoib, mtu, rate, sl, scope_mask; boolean_t full; }; @@ -89,6 +89,7 @@ static int partition_create(unsigned lineno, struct part_conf *conf, char *name, char *id, char *flag, char *flag_val) { uint16_t pkey; + unsigned int scope; if (!id && name && isdigit(*name)) { id = name; @@ -119,12 +120,26 @@ static int partition_create(unsigned lineno, struct part_conf *conf, } conf->p_prtn->sl = (uint8_t) conf->sl; - if (conf->is_ipoib) + if (!conf->is_ipoib) + return 0; + + if (!conf->scope_mask) { osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, (uint8_t) conf->rate, (uint8_t) conf->mtu, - (uint8_t) conf->scope); + 0); + return 0; + } + + for (scope = 0; scope < 16; scope++) { + if (((1<scope_mask) == 0) + continue; + osm_prtn_add_mcgroup(conf->p_log, conf->p_subn, conf->p_prtn, + (uint8_t) conf->rate, + (uint8_t) conf->mtu, + (uint8_t) scope); + } return 0; } @@ -147,11 +162,14 @@ static int partition_add_flag(unsigned lineno, struct part_conf *conf, "flag \'rate\' requires valid value" " - skipped\n", lineno); } else if (!strncmp(flag, "scope", len)) { - if (!val || (conf->scope = strtoul(val, NULL, 0)) == 0) + unsigned int scope; + if (!val || (scope = strtoul(val, NULL, 0)) == 0 || scope > 0xF) osm_log(conf->p_log, OSM_LOG_VERBOSE, "PARSE WARN: line %d: " "flag \'scope\' requires valid value" " - skipped\n", lineno); + else + conf->scope_mask |= (1< References: <474EFC9E.4050203@nasa.gov> Message-ID: I unfortunately had nothing to do with the Bugzilla setup. Sorry! :-( -----Original Message----- From: Jeff Becker [mailto:Jeffrey.C.Becker at nasa.gov] Sent: Thursday, November 29, 2007 12:54 PM To: Jeff Squyres (jsquyres) Cc: OpenFabrics General Subject: Re: [ofa-general] Bugzilla down? Hi Jeff. I upgraded our server to the latest patches yesterday, and unfortunately, bugzilla broke as a result. I'm investigating it, but any suggestions you have would be helpful. Do you know who set up Bugzilla originally? Thanks. -jeff Jeff Squyres wrote: > I am getting the following error when attempting to connect to bugzilla: > > Software error: > Can't connect to the database. Error: Access denied for user > 'ofabug_user'@'localhost' (using password: YES) Is your database > installed and up and running? Do you have the correct username and > password selected in localconfig? > For help, please send mail to the webmaster > (webmaster at openfabrics.org), giving this error message and the time > and date of the error. > > > Can someone investigate / fix? > > Thanks. > > --Jeff Squyres > Cisco Systems > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From sean.hefty at intel.com Thu Nov 29 10:34:41 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 29 Nov 2007 10:34:41 -0800 Subject: [ofa-general] RE: [PATCH] librdmacm/man: fix-up man pages In-Reply-To: <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> References: <000101c81a64$3582de80$9c98070a@amr.corp.intel.com> <4726EEAC.3070105@voltaire.com> <472755C4.10600@ichips.intel.com> <47285F53.4060402@voltaire.com> <4728BF4A.1060301@ichips.intel.com> <15ddcffd0710311320v6b91b3cm3be0f7882e30ad2b@mail.gmail.com> <000001c81cb5$4ce12160$9c98070a@amr.corp.intel.com> <15ddcffd0711270435t12a18dc3waac2596b3884ac72@mail.gmail.com> <000001c8311a$176cdbe0$63248686@amr.corp.intel.com> <15ddcffd0711280307u7a89c6c2q2854b071f74d9123@mail.gmail.com> Message-ID: <000801c832b6$81feb850$f5d8180a@amr.corp.intel.com> >Some users have approached me and said that its unclear from the man >pages for some values of the connection param structure what are their >legal values. Reviewing this a little, I think we should add the >maximum values for the retry_count and rnr_retry_count under the >infiniband specific section of the rdma_connect and rdma_accept pages. These have been updated and pushed upstream. Please let me know if you're aware of any other documentation changes. - Sean From Ashish.Batwara at lsi.com Thu Nov 29 10:38:51 2007 From: Ashish.Batwara at lsi.com (Batwara, Ashish) Date: Thu, 29 Nov 2007 11:38:51 -0700 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <474EF1FF.5030900@mellanox.com> Message-ID: <01B9E81EECACE94DBBD0A556E768FB8A01E3C5BC@NAMAIL2.ad.lsil.com> echo id_ext=200600A0B81138C9,max_sect=2048,ioc_guid=00a0b81112da0003,dgid=fe8 000000000000000a0b81112da0001,pkey=ffff,service_id=200600a0b81138c9> /sys/class/infiniband_srp/srp-mthca0-1/add_target We have used above, but still no luck. Thanks Ashish -----Original Message----- From: Vu Pham [mailto:vuhuong at mellanox.com] Sent: Thursday, November 29, 2007 11:08 AM To: Batwara, Ashish Cc: openib-general at openib.org Subject: Re: [ofa-general] IO Size more than 48K > Hi, > We are using OFED-1.2, and using xdd and some other tools, and trying to > send 1/2MB IOs, but what we are seeing in analyzer traces, that memory > descriptor in SRP command shows max. 48K which means 1MB I/Os has broken > into smaller SRP request from initiator. > How can I have this I/O directly going to target? What parameter I need > to change? > > module param srp_sg_tablesize (default is 12 ie. 12 x 4K = 48K) and/or max_sect=yyy in echo id_ext=xxx,...,max_sect=1024,service_id= > /sys/class/infiniband_srp/... -vu > Thanks > Ashish > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From ssufficool at rov.sbcounty.gov Thu Nov 29 10:58:54 2007 From: ssufficool at rov.sbcounty.gov (Sufficool, Stanley) Date: Thu, 29 Nov 2007 10:58:54 -0800 Subject: [ofa-general] SRP Target Port Message-ID: I have a 2 port Lion cub DDR. How do I get the SRP Target to listen on a specific port? What I am trying to do is separate traffic from initiators or at best load balance across the ports. You can trunk IPoIB, can you trunk or round robin SRP on the target? -------------- next part -------------- An HTML attachment was scrubbed... URL: From vuhuong at mellanox.com Thu Nov 29 15:12:48 2007 From: vuhuong at mellanox.com (Vu Pham) Date: Thu, 29 Nov 2007 15:12:48 -0800 Subject: [ofa-general] SRP Target Port In-Reply-To: References: Message-ID: <474F4770.8030100@mellanox.com> > I am already doing this using Windows SRP and windows soft raid 1. Why RAID-1? I don't get the picture > What > I am looking for is how to isolate an exported partitions to a specific > port on the target. Currently SRPT picks the first port on the HCA and > I'm not clear on how to set SRPT to listen on port 2. > > No. You got it wrong. Current SRPT advertises the same target info (ioc_guid, id_ext, serviceID) and listens on all HCAs/ports. It depends on where the connection request coming which HCA and which port ie. cm_id->devcie for HCA and conn_req_param->port for which port on the HCA then SRPT will use that port to create qp... For linux srp initiator you can send the login request to specific target port with dgid=xxxxxxxx in login string ie. echo id_ext=.... dgid= and echo id_ext=... dgid= then you will have two paths to same set of luns of SRPT Finally you can use dm_multipath/multipath to set the policy for these two paths (round-robin, fail-over...) or if you don't use multipath you can manually spread the I/O to the first haft luns of the first set (thru path 1) and second half luns of the second set (thru path 2) for performance testing on two target ports for example I don't know Win srp initiator can do login twice using same target info to different target ports or not. > Do I need two SRPT instances or can a single instance be set to listen > on both ports? > > -----Original Message----- > From: Vu Pham [mailto:vuhuong at mellanox.com] > Sent: Thursday, November 29, 2007 11:07 AM > To: Sufficool, Stanley > Cc: OpenFabrics General > Subject: Re: [ofa-general] SRP Target Port > > > You can use dm-multipath and multipath on initiator side to trunk or > fail-over > > >> I have a 2 port Lion cub DDR. How do I get the SRP Target to listen on >> a specific port? What I am trying to do is separate traffic from >> initiators or at best load balance across the ports. >> >> You can trunk IPoIB, can you trunk or round robin SRP on the target? >> ---------------------------------------------------------------------- >> -- >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> > > From Jeffrey.C.Becker at nasa.gov Thu Nov 29 15:26:40 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 15:26:40 -0800 Subject: [ofa-general] test Message-ID: <474F4AB0.9020500@nasa.gov> From Jeffrey.C.Becker at nasa.gov Thu Nov 29 14:51:45 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 14:51:45 -0800 Subject: [ofa-general] test Message-ID: <474F4281.5070601@nasa.gov> From Jeffrey.C.Becker at nasa.gov Thu Nov 29 15:08:02 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 15:08:02 -0800 Subject: [ofa-general] test Message-ID: <474F4652.9090409@nasa.gov> From sean.hefty at intel.com Thu Nov 29 12:55:53 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Thu, 29 Nov 2007 12:55:53 -0800 Subject: [ofa-general] RE: disconnect issues/questions In-Reply-To: <000001c828e9$f0ad4f40$2ccc180a@amr.corp.intel.com> References: <15ddcffd0711142341g7b83d917t2fcc4b9a64e54f55@mail.gmail.com><15ddcffd0711142358m55192a25qaa2e419045f6d0ea@mail.gmail.com> <000001c828e9$f0ad4f40$2ccc180a@amr.corp.intel.com> Message-ID: <000001c832ca$3bedec50$60d8180a@amr.corp.intel.com> >>B) will RDMA_CM_EVENT_DISCONNECTED event would --always-- be generated >>also for the side that called rdma_disconnect()? in both cases (yes >>and no), we need to document this. Always is too strong, but this is typically the case for IB. (A device removal event would prevent this from occurring.) I do not know if this is the case for iWarp yet. - Sean From tom at opengridcomputing.com Thu Nov 29 15:38:33 2007 From: tom at opengridcomputing.com (Tom Tucker) Date: Thu, 29 Nov 2007 17:38:33 -0600 Subject: [ofa-general] NFS-RDMA for OFED 1.3 In-Reply-To: <474F4652.9090409@nasa.gov> References: <474F4652.9090409@nasa.gov> Message-ID: <1196379513.16169.39.camel@trinity.ogc.int> Jeff: There's an updated version of the server transport switch and rdma transport provider available here: git://linux-nfs.org/~tomtucker/nfs-rdma-dev-2.6.git Tom From sashak at voltaire.com Thu Nov 29 15:48:29 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Thu, 29 Nov 2007 23:48:29 +0000 Subject: [ofa-general] Re: [PATCHv2] opensm: allow multiple scopes in a partition In-Reply-To: <20071129175846.GH30090@obsidianresearch.com> References: <20071129175846.GH30090@obsidianresearch.com> Message-ID: <20071129234829.GI375@sashak.voltaire.com> On 10:58 Thu 29 Nov , Rolf Manderscheid wrote: > Hi Sasha, > > This patch allows multiple scopes to be configured for a partition. > This allows ipoib interfaces with different scopes to coexist in a > partition. The partition configuration file can now have multiple > scope=N flags and they all take effect (instead of just the last one). > > Signed-off-by: Rolf Manderscheid Applied. Thanks. Sasha From sweitzen at cisco.com Thu Nov 29 14:59:19 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Thu, 29 Nov 2007 14:59:19 -0800 Subject: [ofa-general] how to use Intel MPI with dapl2? In-Reply-To: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> References: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> Message-ID: I am using Intel MPI 3.1 build 26 with OFED 1.3 beta2 on RHEL4 x86_64. Intel MPI works as before with dapl1, but I see very slow performance with dapl2. Are there extra command-line params I need to use dapl2? For example: $ /data/software/qa/MPI/intel_mpi/intelmpi-3.1-`uname -m`/bin/mpiexec -genv I_MPI_DEBUG 3 -n 2 osu_latency.x [1] MPI startup(): DAPL provider on rank 1:svbu-qa1850-2 [0] MPI startup(): socket data transfer mode [0] MPI Startup(): process is pinned to CPU00 on node svbu-qa1850-1 [1] MPI startup(): socket data transfer mode [1] MPI Startup(): process is pinned to CPU00 on node svbu-qa1850-2 [0] Rank Pid Node name [0] 0 9605 svbu-qa1850-1 [0] 1 8547 svbu-qa1850-2 # OSU MPI Latency Test (Version 2.1) # Size Latency (us) 0 124.96 1 124.98 2 99.62 4 63.04 8 62.98 Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems -------------- next part -------------- An HTML attachment was scrubbed... URL: From Jeffrey.C.Becker at nasa.gov Thu Nov 29 12:41:35 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 12:41:35 -0800 Subject: [ofa-general] test Message-ID: <474F23FF.9050503@nasa.gov> plz ignore From Jeffrey.C.Becker at nasa.gov Thu Nov 29 14:37:05 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 14:37:05 -0800 Subject: [ofa-general] test Message-ID: <474F3F11.7030202@nasa.gov> plz ignore. -jeff From ssufficool at rov.sbcounty.gov Thu Nov 29 13:24:01 2007 From: ssufficool at rov.sbcounty.gov (Sufficool, Stanley) Date: Thu, 29 Nov 2007 13:24:01 -0800 Subject: [ofa-general] SRP Target Port In-Reply-To: <474F0DD2.5060606@mellanox.com> Message-ID: I am already doing this using Windows SRP and windows soft raid 1. What I am looking for is how to isolate an exported partitions to a specific port on the target. Currently SRPT picks the first port on the HCA and I'm not clear on how to set SRPT to listen on port 2. Do I need two SRPT instances or can a single instance be set to listen on both ports? -----Original Message----- From: Vu Pham [mailto:vuhuong at mellanox.com] Sent: Thursday, November 29, 2007 11:07 AM To: Sufficool, Stanley Cc: OpenFabrics General Subject: Re: [ofa-general] SRP Target Port You can use dm-multipath and multipath on initiator side to trunk or fail-over > I have a 2 port Lion cub DDR. How do I get the SRP Target to listen on > a specific port? What I am trying to do is separate traffic from > initiators or at best load balance across the ports. > > You can trunk IPoIB, can you trunk or round robin SRP on the target? > ---------------------------------------------------------------------- > -- > > _______________________________________________ > general mailing list > general at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > > To unsubscribe, please visit > http://openib.org/mailman/listinfo/openib-general From Ted.Kim at Sun.COM Thu Nov 29 13:50:11 2007 From: Ted.Kim at Sun.COM (Ted H. Kim) Date: Thu, 29 Nov 2007 13:50:11 -0800 Subject: [ofa-general] website problems Message-ID: <474F3413.3090003@sun.com> Folks, What's with the main OFA website? http://openfabrics.org gives me a raw directory listing as follows. What happened to the index.html? -ted Index of / Name Last modified Size Description [DIR] Parent Directory 05-Feb-2007 13:52 - [DIR] OLD-bugzilla222/ 21-Dec-2006 12:18 - [DIR] bugzilla-2.22.1/ 05-Jan-2007 13:55 - [DIR] bugzilla/ 05-Jan-2007 13:55 - [DIR] drupal-4.7.3/ 09-Oct-2006 15:17 - [DIR] drupal/ 09-Oct-2006 15:17 - [DIR] gitweb/ 28-May-2007 10:37 - [DIR] old-sites/ 09-Oct-2006 16:46 - [DIR] openfabrics.org/ 28-Nov-2007 10:48 - [DIR] openfabrics.org_ORIG/ 07-Mar-2007 08:13 - [DIR] openib.org/ 28-Nov-2007 10:48 - [DIR] svn.openfabrics.org/ 27-Dec-2006 05:05 - [DIR] tiki/ 04-Jan-2007 14:31 - [DIR] tikiwiki-1.9.7/ 04-Jan-2007 14:31 - [DIR] wiki/ 04-Jan-2007 14:31 - Apache/1.3.34 Server at www.openfabrics.org Port 80 -- Ted H. Kim Sun Microsystems, Inc. ted.kim at sun.com 222 North Sepulveda Blvd., 10th Floor (310) 341-1116 El Segundo, CA 90245 (310) 341-1120 FAX From Jeffrey.C.Becker at nasa.gov Thu Nov 29 16:19:43 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Thu, 29 Nov 2007 16:19:43 -0800 Subject: [ofa-general] website problems In-Reply-To: <474F3413.3090003@sun.com> References: <474F3413.3090003@sun.com> Message-ID: <474F571F.1020706@nasa.gov> Sorry. I botched it with my upgrade. It should be working now. I'll be more careful next time. -jeff Ted H. Kim wrote: > Folks, > > What's with the main OFA website? > > http://openfabrics.org > gives me a raw directory listing as follows. > What happened to the index.html? > > -ted > > > > Index of / > > Name Last modified Size Description > > [DIR] Parent Directory 05-Feb-2007 13:52 - > [DIR] OLD-bugzilla222/ 21-Dec-2006 12:18 - > [DIR] bugzilla-2.22.1/ 05-Jan-2007 13:55 - > [DIR] bugzilla/ 05-Jan-2007 13:55 - > [DIR] drupal-4.7.3/ 09-Oct-2006 15:17 - > [DIR] drupal/ 09-Oct-2006 15:17 - > [DIR] gitweb/ 28-May-2007 10:37 - > [DIR] old-sites/ 09-Oct-2006 16:46 - > [DIR] openfabrics.org/ 28-Nov-2007 10:48 - > [DIR] openfabrics.org_ORIG/ 07-Mar-2007 08:13 - > [DIR] openib.org/ 28-Nov-2007 10:48 - > [DIR] svn.openfabrics.org/ 27-Dec-2006 05:05 - > [DIR] tiki/ 04-Jan-2007 14:31 - > [DIR] tikiwiki-1.9.7/ 04-Jan-2007 14:31 - > [DIR] wiki/ 04-Jan-2007 14:31 - > > Apache/1.3.34 Server at www.openfabrics.org Port 80 > > > > From rreynolds at opengridcomputing.com Thu Nov 29 16:30:05 2007 From: rreynolds at opengridcomputing.com (Robert Reynolds) Date: Thu, 29 Nov 2007 18:30:05 -0600 Subject: [ofa-general] Two element types on cancel_list in cancel_mads()? Message-ID: <474F598D.70701@opengridcomputing.com> Sean, In cancel_mads, elements from two different lists are added to the cancel_list: wait_list and local_list. Subsequent processing of the cancel_list treats all elements as struct ib_mad_send_wr_private, and uses the send_buf field of that structure. But it appears to me that the items from local_list are actually of type struct ib_mad_local_private, and hence the reference to send_buf for these elements is incorrect. Can you help me understand how this works? Thanks. Robert From ssufficool at rov.sbcounty.gov Thu Nov 29 16:37:01 2007 From: ssufficool at rov.sbcounty.gov (Sufficool, Stanley) Date: Thu, 29 Nov 2007 16:37:01 -0800 Subject: [ofa-general] SRP Target Port In-Reply-To: <474F4770.8030100@mellanox.com> Message-ID: Sorry, I assumed it was only listening on the first port due to Windows initiators only connecting to the first port. The RAID-1 at the initiator (Windows Dynamic Disk) is used for mirroring across 2 SAN controllers. Overkill, but this is what was asked for. I will see about using multipath on Win-SRP, however I think I'm stuck with manually spreading the load per port. -----Original Message----- From: Vu Pham [mailto:vuhuong at mellanox.com] Sent: Thursday, November 29, 2007 3:13 PM To: Sufficool, Stanley Cc: OpenFabrics General Subject: Re: [ofa-general] SRP Target Port > I am already doing this using Windows SRP and windows soft raid 1. Why RAID-1? I don't get the picture > What > I am looking for is how to isolate an exported partitions to a > specific port on the target. Currently SRPT picks the first port on > the HCA and I'm not clear on how to set SRPT to listen on port 2. > > No. You got it wrong. Current SRPT advertises the same target info (ioc_guid, id_ext, serviceID) and listens on all HCAs/ports. It depends on where the connection request coming which HCA and which port ie. cm_id->devcie for HCA and conn_req_param->port for which port on the HCA then SRPT will use that port to create qp... For linux srp initiator you can send the login request to specific target port with dgid=xxxxxxxx in login string ie. echo id_ext=.... dgid= and echo id_ext=... dgid= then you will have two paths to same set of luns of SRPT Finally you can use dm_multipath/multipath to set the policy for these two paths (round-robin, fail-over...) or if you don't use multipath you can manually spread the I/O to the first haft luns of the first set (thru path 1) and second half luns of the second set (thru path 2) for performance testing on two target ports for example I don't know Win srp initiator can do login twice using same target info to different target ports or not. > Do I need two SRPT instances or can a single instance be set to listen > on both ports? > > -----Original Message----- > From: Vu Pham [mailto:vuhuong at mellanox.com] > Sent: Thursday, November 29, 2007 11:07 AM > To: Sufficool, Stanley > Cc: OpenFabrics General > Subject: Re: [ofa-general] SRP Target Port > > > You can use dm-multipath and multipath on initiator side to trunk or > fail-over > > >> I have a 2 port Lion cub DDR. How do I get the SRP Target to listen >> on a specific port? What I am trying to do is separate traffic from >> initiators or at best load balance across the ports. >> >> You can trunk IPoIB, can you trunk or round robin SRP on the target? >> --------------------------------------------------------------------- >> - >> -- >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit >> http://openib.org/mailman/listinfo/openib-general >> > > From mshefty at ichips.intel.com Thu Nov 29 16:43:16 2007 From: mshefty at ichips.intel.com (Sean Hefty) Date: Thu, 29 Nov 2007 16:43:16 -0800 Subject: [ofa-general] Two element types on cancel_list in cancel_mads()? In-Reply-To: <474F598D.70701@opengridcomputing.com> References: <474F598D.70701@opengridcomputing.com> Message-ID: <474F5CA4.8030604@ichips.intel.com> > In cancel_mads, elements from two different lists are added to the > cancel_list: wait_list and local_list. Subsequent processing of the > cancel_list treats all elements as struct ib_mad_send_wr_private, and > uses the send_buf field of that structure. But it appears to me that > the items from local_list are actually of type struct > ib_mad_local_private, and hence the reference to send_buf for these > elements is incorrect. Can you help me understand how this works? I was looking at the local_list handling in cancel_mads() and the rest of mad code myself. Hal knows this part of the code better than I do, maybe he can look here and see if there's a definite problem. This looks like the cause of the bug Dotan just reported. - Sean From service at bbandtsecure.com Thu Nov 29 16:39:10 2007 From: service at bbandtsecure.com (service at bbandtsecure.com) Date: Thu, 29 Nov 2007 19:39:10 -0500 Subject: [ofa-general] BB&T - IMPORTANT NOTICE Message-ID: An HTML attachment was scrubbed... URL: From ardavis at ichips.intel.com Thu Nov 29 19:23:45 2007 From: ardavis at ichips.intel.com (Arlin Davis) Date: Thu, 29 Nov 2007 19:23:45 -0800 Subject: [ofa-general] how to use Intel MPI with dapl2? In-Reply-To: References: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> Message-ID: <474F8241.4040802@ichips.intel.com> Scott Weitzenkamp (sweitzen) wrote: > I am using Intel MPI 3.1 build 26 with OFED 1.3 beta2 on RHEL4 x86_64. > Intel MPI works as before with dapl1, but I see very slow performance > with dapl2. Are there extra command-line params I need to use dapl2? Intel MPI 3.1.26 does not support uDAPL v2. How did you configure your servers to run Intel MPI with v2 libraries? Did you put the ofa-v2-ib0 entry at the top of the /etc/dat.conf file or specify a v2 device via "-genv I_MPI_DEVICE rdssm:ofa-v2-ib0"? I didn't see a device option on your command line. Anyway, uDAPL v2 support is not there yet. > > $ /data/software/qa/MPI/intel_mpi/intelmpi-3.1-`uname -m`/bin/mpiexec > -genv I_MPI_DEBUG 3 -n 2 osu_latency.x > *[1] MPI startup(): DAPL provider from L string> on rank 1:svbu-qa1850-2 Looks like it failed over to sockets device because of errors with the RDMA device specified in dat.conf. Something looks weird with your dat.conf configuration because Intel MPI is showing >From d7ac904ff0204b4bf84025f4141eb087934c2e94 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Tue, 27 Nov 2007 19:03:59 -0800 Subject: [PATCH] opensm/include/opensm/osm_event_plugin.h: fix comment Signed-off-by: Ira K. Weiny --- opensm/include/opensm/osm_event_plugin.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/opensm/include/opensm/osm_event_plugin.h b/opensm/include/opensm/osm_event_plugin.h index 02ba35b..0b69d48 100644 --- a/opensm/include/opensm/osm_event_plugin.h +++ b/opensm/include/opensm/osm_event_plugin.h @@ -160,7 +160,7 @@ typedef struct { } __osm_epi_plugin_t; /** ========================================================================= - * The database structure should be considered opaque + * The plugin structure should be considered opaque */ typedef struct { void *handle; -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-include-opensm-osm_event_plugin.h-fix-commen.patch Type: application/octet-stream Size: 892 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 29 19:36:06 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 29 Nov 2007 19:36:06 -0800 Subject: [ofa-general] [PATCH] opensm/opensm/osm_event_plugin.c: clean up version check error message a bit Message-ID: <20071129193606.525ebab3.weiny2@llnl.gov> >From 5d7d913e43dacc6cd41a5cdb3092ccd3c1896d5f Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Tue, 27 Nov 2007 19:12:27 -0800 Subject: [PATCH] opensm/opensm/osm_event_plugin.c: clean up version check error message a bit Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_event_plugin.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/opensm/opensm/osm_event_plugin.c b/opensm/opensm/osm_event_plugin.c index 6002a15..09d2121 100644 --- a/opensm/opensm/osm_event_plugin.c +++ b/opensm/opensm/osm_event_plugin.c @@ -97,7 +97,9 @@ osm_epi_plugin_t *osm_epi_construct(osm_log_t * p_log, char *plugin_name) /* Check the version to make sure this module will work with us */ if (rc->impl->interface_version != OSM_EVENT_PLUGIN_INTERFACE_VER) { osm_log(p_log, OSM_LOG_ERROR, + "Error opening %s: " "%s symbol is the wrong version %d != %d\n", + plugin_name, OSM_EVENT_PLUGIN_IMPL_NAME, rc->impl->interface_version, OSM_EVENT_PLUGIN_INTERFACE_VER); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-opensm-osm_event_plugin.c-clean-up-version-c.patch Type: application/octet-stream Size: 1045 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 29 19:36:34 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 29 Nov 2007 19:36:34 -0800 Subject: [ofa-general] [PATCH] opensm/opensm/osm_event_plugin.c: remove duplicate header include Message-ID: <20071129193634.7941995f.weiny2@llnl.gov> >From 5b0d395de6b958cf51b2b3cd42baaf6764550902 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 28 Nov 2007 12:45:04 -0800 Subject: [PATCH] opensm/opensm/osm_event_plugin.c: remove duplicate header include Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_event_plugin.c | 2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/opensm/opensm/osm_event_plugin.c b/opensm/opensm/osm_event_plugin.c index 09d2121..5f062cf 100644 --- a/opensm/opensm/osm_event_plugin.c +++ b/opensm/opensm/osm_event_plugin.c @@ -31,8 +31,6 @@ * */ -#include - /****h* OpenSM Event plugin interface * DESCRIPTION * Database interface to record subnet events -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-opensm-osm_event_plugin.c-remove-duplicate-h.patch Type: application/octet-stream Size: 751 bytes Desc: not available URL: From weiny2 at llnl.gov Thu Nov 29 19:37:20 2007 From: weiny2 at llnl.gov (Ira Weiny) Date: Thu, 29 Nov 2007 19:37:20 -0800 Subject: [ofa-general] [PATCH] opensm/opensm/osm_perfmgr_db.c: fix clearing previous count when "out of band" Message-ID: <20071129193720.05dec436.weiny2@llnl.gov> >From f0e6e9c967b4ea1bcd3528bab2f2785bc17762a5 Mon Sep 17 00:00:00 2001 From: Ira K. Weiny Date: Wed, 28 Nov 2007 12:41:42 -0800 Subject: [PATCH] opensm/opensm/osm_perfmgr_db.c: fix clearing previous count when "out of band" clear occurs Signed-off-by: Ira K. Weiny --- opensm/opensm/osm_perfmgr_db.c | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/opensm/opensm/osm_perfmgr_db.c b/opensm/opensm/osm_perfmgr_db.c index fd2db11..b98434b 100644 --- a/opensm/opensm/osm_perfmgr_db.c +++ b/opensm/opensm/osm_perfmgr_db.c @@ -337,7 +337,7 @@ perfmgr_db_err_t perfmgr_db_clear_prev_err(perfmgr_db_t * db, uint64_t guid, uint8_t port) { _db_node_t *node = NULL; - perfmgr_db_data_cnt_reading_t *previous = NULL; + perfmgr_db_err_reading_t *previous = NULL; perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; cl_plock_excl_acquire(&(db->lock)); @@ -345,10 +345,10 @@ perfmgr_db_clear_prev_err(perfmgr_db_t * db, uint64_t guid, uint8_t port) if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) goto Exit; - previous = &(node->ports[port].dc_previous); + previous = &(node->ports[port].err_previous); memset(previous, 0, sizeof(*previous)); - node->ports[port].dc_previous.time = time(NULL); + node->ports[port].err_previous.time = time(NULL); Exit: cl_plock_release(&(db->lock)); -- 1.5.1 -------------- next part -------------- A non-text attachment was scrubbed... Name: 0001-opensm-opensm-osm_perfmgr_db.c-fix-clearing-previou.patch Type: application/octet-stream Size: 1392 bytes Desc: not available URL: From kliteyn at mellanox.co.il Thu Nov 29 21:11:51 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 30 Nov 2007 07:11:51 +0200 Subject: [ofa-general] nightly osm_sim report 2007-11-30:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-29 OpenSM git rev = Mon_Nov_26_08:12:10_2007 [b989216e1ae91e0049ec3d4980cb8e2bdad8ed49] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=479 Fail=1 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 11 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From telmamalaquias at yahoo.com.br Thu Nov 29 21:37:09 2007 From: telmamalaquias at yahoo.com.br (Theresa Joseph) Date: Fri, 30 Nov 2007 12:37:09 +0700 Subject: [ofa-general] Man Lebt nur einmal - probiers aus ! Message-ID: <01c8334d$b9313f90$53f199ca@telmamalaquias> Versuchen Sie unser Produkt und Sie werden fuhlen was unsere Kunden bestatigen Original Qualitat - 100% wirksam Viiiaaaggra 10 pills x 100 mg + Ciiiaaaaaallis 10 pills x 20 mg 48,06 Euro Viiiaaaggra 30 x 50mg 42,12 Euro - 1,41 Euro pro Stuck - Sie sparen: 20,98 Euro Viiiaaaggra 90 x 50mg 112,50 Euro - 1.25 Euro pro Stuck - Sie sparen: 82,84 Euro Viiiaaaggra 120 x 50mg 135,96 Euro - 1.13 Euro pro Stuck - Sie sparen: 117,48 Euro Viiiaaaggra 180 x 50mg 192,58 Euro - 1.07 Euro pro Stuck - Sie sparen: 187,41 Euro Ciiiaaaaaallis 30 x 20mg 73,19 Euro - 2,44 Euro pro Stuck - Sie sparen: 9,09 Euro Ciiiaaaaaallis 90 x 20mg 169,27 Euro - 1,88 Euro pro Stuck - Sie sparen: 77,62 Euro Ciiiaaaaaallis 120 x 20mg 213,15 Euro - 1,78 Euro pro Stuck - Sie sparen: 116,08 Euro Ciiiaaaaaallis 180 x 20mg 311,62 Euro - 1,73 Euro pro Stuck - Sie sparen: 181,12 Euro {$konditions_de} {$konditions_de} {$konditions_de} {$konditions_de} {$konditions_de} {$konditions_de} {$konditions_de} Mit unseren Produkten vergessen Sie Ihre Enttauschungen, anhaltende Versagensangste und wiederholte peinliche Situationen Nur fur kurze Zeit - vier Pillen umsonst erhalten (bitte warten Sie einen Moment bis die Seite vollstandig geladen wird) -------------- next part -------------- An HTML attachment was scrubbed... URL: From sweitzen at cisco.com Thu Nov 29 22:20:56 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Thu, 29 Nov 2007 22:20:56 -0800 Subject: [ofa-general] how to use Intel MPI with dapl2? In-Reply-To: <474F8241.4040802@ichips.intel.com> References: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> <474F8241.4040802@ichips.intel.com> Message-ID: > How did you configure your servers to run Intel MPI with v2 > libraries? I only installed the DAPL 2.0 libs. Scott From vlad at lists.openfabrics.org Fri Nov 30 02:54:17 2007 From: vlad at lists.openfabrics.org (Vladimir Sokolovsky Mellanox) Date: Fri, 30 Nov 2007 02:54:17 -0800 (PST) Subject: [ofa-general] ofa_1_3_kernel 20071130-0200 daily build status Message-ID: <20071130105417.5748BE6002C@openfabrics.org> This email was generated automatically, please do not reply git_url: git://git.openfabrics.org/ofed_1_3/linux-2.6.git git_branch: ofed_kernel Common build parameters: --with-ipoib-mod --with-sdp-mod --with-srp-mod --with-user_mad-mod --with-user_access-mod --with-mthca-mod --with-mlx4-mod --with-core-mod --with-addr_trans-mod --with-rds-mod --with-cxgb3-mod --with-nes-mod Passed: Passed on i686 with linux-2.6.22 Passed on i686 with linux-2.6.21.1 Passed on i686 with linux-2.6.18 Passed on i686 with linux-2.6.17 Passed on i686 with linux-2.6.16 Passed on i686 with linux-2.6.19 Passed on i686 with linux-2.6.13 Passed on i686 with linux-2.6.14 Passed on i686 with linux-2.6.15 Passed on i686 with linux-2.6.12 Passed on ppc64 with linux-2.6.15 Passed on x86_64 with linux-2.6.22 Passed on x86_64 with linux-2.6.19 Passed on x86_64 with linux-2.6.18 Passed on x86_64 with linux-2.6.12 Passed on ppc64 with linux-2.6.12 Passed on ppc64 with linux-2.6.16 Passed on powerpc with linux-2.6.12 Passed on x86_64 with linux-2.6.16 Passed on powerpc with linux-2.6.13 Passed on ppc64 with linux-2.6.14 Passed on powerpc with linux-2.6.15 Passed on x86_64 with linux-2.6.20 Passed on x86_64 with linux-2.6.17 Passed on x86_64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.19 Passed on powerpc with linux-2.6.14 Passed on x86_64 with linux-2.6.14 Passed on ppc64 with linux-2.6.19 Passed on x86_64 with linux-2.6.15 Passed on ppc64 with linux-2.6.17 Passed on ppc64 with linux-2.6.18 Passed on ia64 with linux-2.6.12 Passed on x86_64 with linux-2.6.9-55.ELsmp Passed on x86_64 with linux-2.6.13 Passed on ia64 with linux-2.6.18 Passed on ia64 with linux-2.6.17 Passed on ia64 with linux-2.6.22 Passed on ia64 with linux-2.6.14 Passed on ia64 with linux-2.6.15 Passed on ia64 with linux-2.6.23 Passed on x86_64 with linux-2.6.16.21-0.8-smp Passed on x86_64 with linux-2.6.16.43-0.3-smp Passed on ia64 with linux-2.6.16 Passed on ia64 with linux-2.6.21.1 Passed on ia64 with linux-2.6.13 Passed on x86_64 with linux-2.6.9-42.ELsmp Passed on ppc64 with linux-2.6.13 Passed on x86_64 with linux-2.6.18-8.el5 Passed on ia64 with linux-2.6.16.21-0.8-default Passed on x86_64 with linux-2.6.18-53.el5 Passed on x86_64 with linux-2.6.18-1.2798.fc6 Passed on ppc64 with linux-2.6.18-8.el5 Failed: Build failed on i686 with 2.6.15-23-server Log: -I/usr/local/include/scst \ -I/home/vlad/tmp/ofa_1_3_kernel-20071130-0200_check/drivers/infiniband/ulp/srpt \ -I/home/vlad/tmp/ofa_1_3_kernel-20071130-0200_check/drivers/net/cxgb3 \ -Iinclude \ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ ' \ modules make: *** /lib/modules/2.6.15-23-server/build: No such file or directory. Stop. make: *** [kernel] Error 2 ---------------------------------------------------------------------------------- From xhejtman at ics.muni.cz Fri Nov 30 04:19:28 2007 From: xhejtman at ics.muni.cz (Lukas Hejtmanek) Date: Fri, 30 Nov 2007 13:19:28 +0100 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071129140227.GF4422@ics.muni.cz> Message-ID: <20071130121928.GB4259@ics.muni.cz> On Thu, Nov 29, 2007 at 08:41:10AM -0800, Roland Dreier wrote: > > just a note, I found that similar problem with dma_map_single/sync_single > > is in the ib_ipoib layer. > > I don't see any calls to any kind of dma_sync function in IPoIB?? You can see stack trace here. Fatal DMA error! Please use 'swiotlb=force' ----------- cut here --------- please bite here --------- Kernel BUG at arch/x86_64/kernel/../../i386/kernel/pci-dma-xen.c:333 invalid opcode: 0000 1 SMP CPU 0 Modules linked in: ib_ipoib ib_cm ipv6 nfs lockd nfs_acl sunrpc ib_sa ib_mthca ib_mad ib_core e1000 dm_mod parport_pc lp parport xfs ata_piix ahci piix mptsas mptscsih mptbase scsi_transport_sas raid0 sata_nv libata amd74xx sd_mod scsi_mod ide_disk ide_core Pid: 2183, comm: modprobe Not tainted 2.6.18-xen31-smp #19 RIP: e030: dma_map_single+0x13f/0x18f RSP: e02b:ffff880000dd7c68 EFLAGS: 00010296 RAX: 000000000000002f RBX: ffff88009032401c RCX: ffffffff80426ec8 RDX: ffffffffff57c000 RSI: 0000000000000001 RDI: ffffffff80426ec0 RBP: 000000005ee7101c R08: ffffffff80426ec8 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000ff4 R13: ffff8800005ccc50 R14: ffff88008f498008 R15: ffff880099bad8f0 FS: 00002aaaaadedb00(0000) GS:ffffffff804aa000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 Process modprobe (pid: 2183, threadinfo ffff880000dd6000, task ffff8800005da0c0) Stack: 0000000000000000 ffff88009c6cfbd0 ffff880090366500 0000000000000000 ffff880090366000 ffffffff883179cf 000000000000000f ffff880090366000 ffff880090366500 0000000000000000 ffff880090366000 ffff880000f8d000 Call Trace: :ib_ipoib:ipoib_cm_alloc_rx_skb+0x92/0x282 :ib_ipoib:ipoib_cm_dev_init+0x351/0x38f :ib_ipoib:ipoib_transport_dev_init+0xe0/0x257 __kmalloc_track_caller+0x12f/0x13f :ib_ipoib:ipoib_ib_dev_init+0x2d/0x6f :ib_ipoib:ipoib_dev_init+0x9c/0xda :ib_ipoib:ipoib_add_one+0x16d/0x3a1 :ib_core:ib_register_client+0x58/0x86 :ib_ipoib:ipoib_init_module+0xc0/0xe7 sys_init_module+0x16e1/0x180a system_call+0x86/0x8b system_call+0x0/0x8b -- Lukáš Hejtmánek From swise at opengridcomputing.com Fri Nov 30 06:32:41 2007 From: swise at opengridcomputing.com (Steve Wise) Date: Fri, 30 Nov 2007 08:32:41 -0600 Subject: [ofa-general] [GIT PULL ofed-1.2.5] - RDMA/cxgb3 - fixes and 5.0 firmware support Message-ID: <47501F09.4060800@opengridcomputing.com> Vlad, please pull cxgb3 fixes for ofed-1.2.5 from: git://git.openfabrics.org/~swise/ofed-1.2.5 stevo These are cxgb3 bug fixes and PPC64 additions that we need for ofed-1.2.5 (stay tuned for ofed-1.3 patches soon). The patches are all accepted upstream and were posted here: http://www.spinics.net/lists/netdev/msg47492.html and here: http://www.spinics.net/lists/netdev/msg48240.html Also, please pull version 1.1.0 of libcxgb3 from: git://git.openfabrics.org/~swise/libcxgb3 ofed_1_2_5 The library and drivers need to be included together as they are both needed to support the chelsio 5.0 firmware. Alsoalso: After you integrate these, can you crank a daily OFED-1.2.5.3 build including all this? Thanks, Steve. From sashak at voltaire.com Fri Nov 30 07:05:16 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:05:16 +0000 Subject: [ofa-general] Re: [PATCH] opensm/include/opensm/osm_event_plugin.h: fix comment In-Reply-To: <20071129193540.70e5798d.weiny2@llnl.gov> References: <20071129193540.70e5798d.weiny2@llnl.gov> Message-ID: <20071130150516.GK375@sashak.voltaire.com> On 19:35 Thu 29 Nov , Ira Weiny wrote: > From d7ac904ff0204b4bf84025f4141eb087934c2e94 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Tue, 27 Nov 2007 19:03:59 -0800 > Subject: [PATCH] opensm/include/opensm/osm_event_plugin.h: fix comment > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 30 07:06:22 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:06:22 +0000 Subject: [ofa-general] Re: [PATCH] opensm/opensm/osm_event_plugin.c: clean up version check error message a bit In-Reply-To: <20071129193606.525ebab3.weiny2@llnl.gov> References: <20071129193606.525ebab3.weiny2@llnl.gov> Message-ID: <20071130150622.GL375@sashak.voltaire.com> On 19:36 Thu 29 Nov , Ira Weiny wrote: > From 5d7d913e43dacc6cd41a5cdb3092ccd3c1896d5f Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Tue, 27 Nov 2007 19:12:27 -0800 > Subject: [PATCH] opensm/opensm/osm_event_plugin.c: clean up version check error message a bit > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 30 07:06:37 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:06:37 +0000 Subject: [ofa-general] Re: [PATCH] opensm/opensm/osm_event_plugin.c: remove duplicate header include In-Reply-To: <20071129193634.7941995f.weiny2@llnl.gov> References: <20071129193634.7941995f.weiny2@llnl.gov> Message-ID: <20071130150637.GM375@sashak.voltaire.com> On 19:36 Thu 29 Nov , Ira Weiny wrote: > From 5b0d395de6b958cf51b2b3cd42baaf6764550902 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 28 Nov 2007 12:45:04 -0800 > Subject: [PATCH] opensm/opensm/osm_event_plugin.c: remove duplicate header include > > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 30 07:06:54 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:06:54 +0000 Subject: [ofa-general] Re: [PATCH] opensm/opensm/osm_perfmgr_db.c: fix clearing previous count when "out of band" In-Reply-To: <20071129193720.05dec436.weiny2@llnl.gov> References: <20071129193720.05dec436.weiny2@llnl.gov> Message-ID: <20071130150654.GN375@sashak.voltaire.com> On 19:37 Thu 29 Nov , Ira Weiny wrote: > From f0e6e9c967b4ea1bcd3528bab2f2785bc17762a5 Mon Sep 17 00:00:00 2001 > From: Ira K. Weiny > Date: Wed, 28 Nov 2007 12:41:42 -0800 > Subject: [PATCH] opensm/opensm/osm_perfmgr_db.c: fix clearing previous count when "out of band" > clear occurs > > Signed-off-by: Ira K. Weiny Applied. Thanks. Sasha From sashak at voltaire.com Fri Nov 30 07:42:19 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:42:19 +0000 Subject: [ofa-general] [PATCH] opensm: move vendor specific header files to include/vendor Message-ID: <20071130154219.GO375@sashak.voltaire.com> Move vendor specific header files osm_ts_useraccess.h and osm_umadt.h from core opensm to vendor include directory. Signed-off-by: Sasha Khapyorsky --- opensm/include/Makefile.am | 4 +- opensm/include/opensm/osm_ts_useraccess.h | 52 ---------- opensm/include/opensm/osm_umadt.h | 142 --------------------------- opensm/include/vendor/osm_ts_useraccess.h | 52 ++++++++++ opensm/include/vendor/osm_umadt.h | 142 +++++++++++++++++++++++++++ opensm/libvendor/osm_vendor_mlx_anafa.c | 3 +- opensm/libvendor/osm_vendor_mlx_hca_anafa.c | 2 +- opensm/libvendor/osm_vendor_mlx_ts.c | 3 +- opensm/libvendor/osm_vendor_mlx_ts_anafa.c | 3 +- opensm/libvendor/osm_vendor_ts.c | 3 +- opensm/libvendor/osm_vendor_umadt.c | 2 +- 11 files changed, 202 insertions(+), 206 deletions(-) delete mode 100644 opensm/include/opensm/osm_ts_useraccess.h delete mode 100644 opensm/include/opensm/osm_umadt.h create mode 100644 opensm/include/vendor/osm_ts_useraccess.h create mode 100644 opensm/include/vendor/osm_umadt.h diff --git a/opensm/include/Makefile.am b/opensm/include/Makefile.am index 540aa26..d9ed2c3 100644 --- a/opensm/include/Makefile.am +++ b/opensm/include/Makefile.am @@ -45,9 +45,7 @@ EXTRA_DIST = \ $(srcdir)/opensm/osm_port_profile.h \ $(srcdir)/opensm/osm_sminfo_rcv.h \ $(srcdir)/opensm/osm_multicast.h \ - $(srcdir)/opensm/osm_ts_useraccess.h \ $(srcdir)/opensm/osm_sa_class_port_info.h \ - $(srcdir)/opensm/osm_umadt.h \ $(srcdir)/opensm/osm_node_info_rcv.h \ $(srcdir)/opensm/osm_base.h \ $(srcdir)/opensm/osm_sa_sminfo_record.h \ @@ -155,6 +153,8 @@ EXTRA_DIST = \ $(srcdir)/vendor/osm_vendor_al.h \ $(srcdir)/vendor/osm_vendor_mtl.h \ $(srcdir)/vendor/osm_vendor_sa_api.h \ + $(srcdir)/vendor/osm_ts_useraccess.h \ + $(srcdir)/vendor/osm_umadt.h \ $(srcdir)/vendor/osm_mtl_bind.h pkgincludedir = $(includedir)/infiniband diff --git a/opensm/include/opensm/osm_ts_useraccess.h b/opensm/include/opensm/osm_ts_useraccess.h deleted file mode 100644 index d68c924..0000000 --- a/opensm/include/opensm/osm_ts_useraccess.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include "ts_ib_useraccess.h" - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS -typedef struct ib_user_mad_filter osm_ts_user_mad_filter; -typedef struct ib_set_port_info_ioctl osm_ts_set_port_info_ioctl; -typedef struct ib_get_port_info_ioctl osm_ts_get_port_info_ioctl; -typedef struct ib_gid_entry_ioctl osm_ts_gid_entry_ioctl; - -END_C_DECLS diff --git a/opensm/include/opensm/osm_umadt.h b/opensm/include/opensm/osm_umadt.h deleted file mode 100644 index 809f478..0000000 --- a/opensm/include/opensm/osm_umadt.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. - * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. - * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -/* - * Abstract: - * Declaration of osm_mad_wrapper_t. - * This object represents the context wrapper for OpenSM MAD processing. - * This object is part of the OpenSM family of objects. - * - * Environment: - * Linux User Mode - * - * $Revision: 1.4 $ - */ - -#ifndef _OSM_UMADT_h_ -#define _OSM_UMADT_h_ - -#include "iba/ib_types.h" -#include -#include -#include "umadt.h" -#include "ibt.h" - -#ifdef __cplusplus -# define BEGIN_C_DECLS extern "C" { -# define END_C_DECLS } -#else /* !__cplusplus */ -# define BEGIN_C_DECLS -# define END_C_DECLS -#endif /* __cplusplus */ - -BEGIN_C_DECLS - -typedef struct _umadt_obj_t { - void *umadt_handle; - UMADT_INTERFACE uMadtInterface; - IBT_INTERFACE IbtInterface; - boolean init_done; - cl_spinlock_t register_lock; - cl_qlist_t register_list; - osm_log_t *p_log; - uint32_t timeout; - -} umadt_obj_t; -/*********/ - -/****s* OpenSM: Umadt MAD Wrapper/osm_bind_info -* NAME -* osm_bind_info -* -* DESCRIPTION -* Context needed for processing individual MADs -* -* SYNOPSIS -*/ - -typedef struct _mad_bind_info_t { - cl_list_item_t list_item; - umadt_obj_t *p_umadt_obj; - osm_mad_pool_t *p_mad_pool; - osm_vend_mad_recv_callback_t mad_recv_callback; - void *client_context; - cl_thread_t recv_processor_thread; - cl_spinlock_t trans_ctxt_lock; - cl_qlist_t trans_ctxt_list; - cl_timer_t timeout_timer; - cl_spinlock_t timeout_list_lock; - cl_qlist_t timeout_list; - RegisterClassStruct umadt_reg_class; - MADT_HANDLE umadt_handle; /* Umadt type */ - -} mad_bind_info_t; - -typedef struct _trans_context_t { - cl_list_item_t list_item; - uint64_t trans_id; - uint64_t sent_time; /* micro secs */ - void *context; -} trans_context_t; - -/* -* FIELDS -* list_item -* List linkage for pools and lists. MUST BE FIRST MEMBER! -* -* p_mad_pool -* Pointer to the MAD pool to be used by mads with this bind handle. -* -* mad_recv_callback -* Callback function called by the mad receive processor. -* -* client_context -* context to be passed to the receive callback. -* -* recv_processor_thread -* Thread structure for the receive processor thread. -* -* umadt_reg_class -* Umadt register class struct used to register with Umadt. -* -* umadt_handle -* Umadt returns this handle from a registration call. The transport layer -* uses this handle to talk to Umadt. -* -* SEE ALSO -*********/ - -END_C_DECLS -#endif /*_OSM_UMADT_h_ */ diff --git a/opensm/include/vendor/osm_ts_useraccess.h b/opensm/include/vendor/osm_ts_useraccess.h new file mode 100644 index 0000000..d68c924 --- /dev/null +++ b/opensm/include/vendor/osm_ts_useraccess.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include "ts_ib_useraccess.h" + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS +typedef struct ib_user_mad_filter osm_ts_user_mad_filter; +typedef struct ib_set_port_info_ioctl osm_ts_set_port_info_ioctl; +typedef struct ib_get_port_info_ioctl osm_ts_get_port_info_ioctl; +typedef struct ib_gid_entry_ioctl osm_ts_gid_entry_ioctl; + +END_C_DECLS diff --git a/opensm/include/vendor/osm_umadt.h b/opensm/include/vendor/osm_umadt.h new file mode 100644 index 0000000..809f478 --- /dev/null +++ b/opensm/include/vendor/osm_umadt.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* + * Abstract: + * Declaration of osm_mad_wrapper_t. + * This object represents the context wrapper for OpenSM MAD processing. + * This object is part of the OpenSM family of objects. + * + * Environment: + * Linux User Mode + * + * $Revision: 1.4 $ + */ + +#ifndef _OSM_UMADT_h_ +#define _OSM_UMADT_h_ + +#include "iba/ib_types.h" +#include +#include +#include "umadt.h" +#include "ibt.h" + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +typedef struct _umadt_obj_t { + void *umadt_handle; + UMADT_INTERFACE uMadtInterface; + IBT_INTERFACE IbtInterface; + boolean init_done; + cl_spinlock_t register_lock; + cl_qlist_t register_list; + osm_log_t *p_log; + uint32_t timeout; + +} umadt_obj_t; +/*********/ + +/****s* OpenSM: Umadt MAD Wrapper/osm_bind_info +* NAME +* osm_bind_info +* +* DESCRIPTION +* Context needed for processing individual MADs +* +* SYNOPSIS +*/ + +typedef struct _mad_bind_info_t { + cl_list_item_t list_item; + umadt_obj_t *p_umadt_obj; + osm_mad_pool_t *p_mad_pool; + osm_vend_mad_recv_callback_t mad_recv_callback; + void *client_context; + cl_thread_t recv_processor_thread; + cl_spinlock_t trans_ctxt_lock; + cl_qlist_t trans_ctxt_list; + cl_timer_t timeout_timer; + cl_spinlock_t timeout_list_lock; + cl_qlist_t timeout_list; + RegisterClassStruct umadt_reg_class; + MADT_HANDLE umadt_handle; /* Umadt type */ + +} mad_bind_info_t; + +typedef struct _trans_context_t { + cl_list_item_t list_item; + uint64_t trans_id; + uint64_t sent_time; /* micro secs */ + void *context; +} trans_context_t; + +/* +* FIELDS +* list_item +* List linkage for pools and lists. MUST BE FIRST MEMBER! +* +* p_mad_pool +* Pointer to the MAD pool to be used by mads with this bind handle. +* +* mad_recv_callback +* Callback function called by the mad receive processor. +* +* client_context +* context to be passed to the receive callback. +* +* recv_processor_thread +* Thread structure for the receive processor thread. +* +* umadt_reg_class +* Umadt register class struct used to register with Umadt. +* +* umadt_handle +* Umadt returns this handle from a registration call. The transport layer +* uses this handle to talk to Umadt. +* +* SEE ALSO +*********/ + +END_C_DECLS +#endif /*_OSM_UMADT_h_ */ diff --git a/opensm/libvendor/osm_vendor_mlx_anafa.c b/opensm/libvendor/osm_vendor_mlx_anafa.c index bc79af8..41a521a 100644 --- a/opensm/libvendor/osm_vendor_mlx_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_anafa.c @@ -51,8 +51,7 @@ #include #include #include - -#include +#include /** * FORWARD REFERENCES diff --git a/opensm/libvendor/osm_vendor_mlx_hca_anafa.c b/opensm/libvendor/osm_vendor_mlx_hca_anafa.c index ca3b4ea..81656ea 100644 --- a/opensm/libvendor/osm_vendor_mlx_hca_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_hca_anafa.c @@ -50,7 +50,7 @@ #include #include -#include +#include /******************************************************************************** * diff --git a/opensm/libvendor/osm_vendor_mlx_ts.c b/opensm/libvendor/osm_vendor_mlx_ts.c index 83ed1b1..26955d2 100644 --- a/opensm/libvendor/osm_vendor_mlx_ts.c +++ b/opensm/libvendor/osm_vendor_mlx_ts.c @@ -57,8 +57,7 @@ #include #include #include - -#include +#include typedef struct _osmv_TOPSPIN_transport_mgr_ { int device_fd; diff --git a/opensm/libvendor/osm_vendor_mlx_ts_anafa.c b/opensm/libvendor/osm_vendor_mlx_ts_anafa.c index f3569a7..6498d18 100644 --- a/opensm/libvendor/osm_vendor_mlx_ts_anafa.c +++ b/opensm/libvendor/osm_vendor_mlx_ts_anafa.c @@ -59,8 +59,7 @@ #include #include #include - -#include +#include static void __osmv_TOPSPIN_ANAFA_mad_addr_to_osm_addr(IN osm_vendor_t const *p_vend, diff --git a/opensm/libvendor/osm_vendor_ts.c b/opensm/libvendor/osm_vendor_ts.c index 04b4331..9351004 100644 --- a/opensm/libvendor/osm_vendor_ts.c +++ b/opensm/libvendor/osm_vendor_ts.c @@ -42,11 +42,10 @@ #include #include #include +#include #include #include -#include - /* Since a race can accure on requests. Meaning - a response is received before the send_callback is called - we will save both the madw_p and the fact diff --git a/opensm/libvendor/osm_vendor_umadt.c b/opensm/libvendor/osm_vendor_umadt.c index fd643fa..e761452 100644 --- a/opensm/libvendor/osm_vendor_umadt.c +++ b/opensm/libvendor/osm_vendor_umadt.c @@ -71,7 +71,7 @@ #include #include -#include +#include /* GEN1 includes */ #include "umadt_so.h" -- 1.5.3.4.206.g58ba4 From sashak at voltaire.com Fri Nov 30 07:43:12 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:43:12 +0000 Subject: [ofa-general] [PATCH] opensm: remove unused flag Message-ID: <20071130154312.GP375@sashak.voltaire.com> Remove unused (always FALSE) 'ignore_errors' flag from the context of port_info mad. Pointed out by Hal Rosenstock . Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_madw.h | 1 - opensm/opensm/osm_lid_mgr.c | 1 - opensm/opensm/osm_link_mgr.c | 12 +----------- opensm/opensm/osm_node_info_rcv.c | 2 -- opensm/opensm/osm_pkey_mgr.c | 1 - opensm/opensm/osm_port_info_rcv.c | 2 +- opensm/opensm/osm_state_mgr.c | 1 - opensm/opensm/osm_sw_info_rcv.c | 1 - 8 files changed, 2 insertions(+), 19 deletions(-) diff --git a/opensm/include/opensm/osm_madw.h b/opensm/include/opensm/osm_madw.h index bdaa7bc..d4bcbc1 100644 --- a/opensm/include/opensm/osm_madw.h +++ b/opensm/include/opensm/osm_madw.h @@ -178,7 +178,6 @@ typedef struct _osm_pi_context { boolean_t set_method; boolean_t light_sweep; boolean_t update_master_sm_base_lid; - boolean_t ignore_errors; boolean_t active_transition; } osm_pi_context_t; /*********/ diff --git a/opensm/opensm/osm_lid_mgr.c b/opensm/opensm/osm_lid_mgr.c index c7ab0e9..30e5713 100644 --- a/opensm/opensm/osm_lid_mgr.c +++ b/opensm/opensm/osm_lid_mgr.c @@ -1127,7 +1127,6 @@ __osm_lid_mgr_set_physp_pi(IN osm_lid_mgr_t * const p_mgr, context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); context.pi_context.set_method = TRUE; context.pi_context.update_master_sm_base_lid = FALSE; - context.pi_context.ignore_errors = FALSE; context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; diff --git a/opensm/opensm/osm_link_mgr.c b/opensm/opensm/osm_link_mgr.c index 56c65e4..b96b741 100644 --- a/opensm/opensm/osm_link_mgr.c +++ b/opensm/opensm/osm_link_mgr.c @@ -346,17 +346,7 @@ __osm_link_mgr_set_physp_pi(IN osm_link_mgr_t * const p_mgr, send_set = TRUE; p_pi->vl_high_limit = p_physp->vl_high_limit; } - - /* also the context can flag the need to check for errors. */ - context.pi_context.ignore_errors = FALSE; - } else - /* - Since the only change we try to do is to modify the port - state we can ignore the errors that might be caused by a - race in setting the state and the actual state the port is - in. - */ - context.pi_context.ignore_errors = FALSE; + } if (port_state != IB_LINK_NO_CHANGE && port_state != ib_port_info_get_port_state(p_old_pi)) { diff --git a/opensm/opensm/osm_node_info_rcv.c b/opensm/opensm/osm_node_info_rcv.c index c889ce6..4571a0f 100644 --- a/opensm/opensm/osm_node_info_rcv.c +++ b/opensm/opensm/osm_node_info_rcv.c @@ -311,7 +311,6 @@ __osm_ni_rcv_process_new_node(IN const osm_ni_rcv_t * const p_rcv, context.pi_context.port_guid = p_ni->port_guid; context.pi_context.set_method = FALSE; context.pi_context.update_master_sm_base_lid = FALSE; - context.pi_context.ignore_errors = FALSE; context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; @@ -509,7 +508,6 @@ __osm_ni_rcv_process_existing_ca_or_router(IN const osm_ni_rcv_t * const p_rcv, context.pi_context.port_guid = p_ni->port_guid; context.pi_context.set_method = FALSE; context.pi_context.update_master_sm_base_lid = FALSE; - context.pi_context.ignore_errors = FALSE; context.pi_context.light_sweep = FALSE; status = osm_req_get(p_rcv->p_gen_req, diff --git a/opensm/opensm/osm_pkey_mgr.c b/opensm/opensm/osm_pkey_mgr.c index 05caaf5..eb6cf54 100644 --- a/opensm/opensm/osm_pkey_mgr.c +++ b/opensm/opensm/osm_pkey_mgr.c @@ -239,7 +239,6 @@ pkey_mgr_enforce_partition(IN osm_log_t * p_log, context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); context.pi_context.set_method = TRUE; context.pi_context.update_master_sm_base_lid = FALSE; - context.pi_context.ignore_errors = FALSE; context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; diff --git a/opensm/opensm/osm_port_info_rcv.c b/opensm/opensm/osm_port_info_rcv.c index 70ee7df..dd3642d 100644 --- a/opensm/opensm/osm_port_info_rcv.c +++ b/opensm/opensm/osm_port_info_rcv.c @@ -528,7 +528,7 @@ osm_pi_rcv_process_set(IN const osm_pi_rcv_t * const p_rcv, p_pi = (ib_port_info_t *) ib_smp_get_payload_ptr(p_smp); /* check for error */ - if (!p_context->ignore_errors && (cl_ntoh16(p_smp->status) & 0x7fff)) { + if (cl_ntoh16(p_smp->status) & 0x7fff) { /* If port already ACTIVE, don't treat status 7 as error */ if (p_context->active_transition && (cl_ntoh16(p_smp->status) & 0x7fff) == 0x1c) { diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c index c849741..1ff8eb7 100644 --- a/opensm/opensm/osm_state_mgr.c +++ b/opensm/opensm/osm_state_mgr.c @@ -565,7 +565,6 @@ __osm_state_mgr_get_remote_port_info(IN osm_state_mgr_t * const p_mgr, cl_hton64(osm_physp_get_port_num(p_physp)); mad_context.pi_context.set_method = FALSE; mad_context.pi_context.light_sweep = TRUE; - mad_context.pi_context.ignore_errors = FALSE; mad_context.pi_context.update_master_sm_base_lid = FALSE; mad_context.pi_context.active_transition = FALSE; diff --git a/opensm/opensm/osm_sw_info_rcv.c b/opensm/opensm/osm_sw_info_rcv.c index fbaa23a..55c43e6 100644 --- a/opensm/opensm/osm_sw_info_rcv.c +++ b/opensm/opensm/osm_sw_info_rcv.c @@ -103,7 +103,6 @@ __osm_si_rcv_get_port_info(IN const osm_si_rcv_t * const p_rcv, context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); context.pi_context.set_method = FALSE; context.pi_context.update_master_sm_base_lid = FALSE; - context.pi_context.ignore_errors = FALSE; context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; -- 1.5.3.4.206.g58ba4 From sashak at voltaire.com Fri Nov 30 07:44:01 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:44:01 +0000 Subject: [ofa-general] [PATCH] opensm: move IBA constants from osm_sa_mcmember_record.h to ib_types.h Message-ID: <20071130154401.GQ375@sashak.voltaire.com> Move IBA constants MC_SCOPE_* (and rename to IB_MC_SCOPE_*) from osm_sa_mcmember_record.h to iba/ib_types.h. Use defined in iba/ib_types.h MC membership constants (IB_MC_REC_STATE_*) instead of locally defined in osm_sa_mcmember_record.h. Signed-off-by: Sasha Khapyorsky --- opensm/include/iba/ib_types.h | 12 ++++++++++++ opensm/include/opensm/osm_sa_mcmember_record.h | 13 +------------ opensm/opensm/osm_prtn.c | 6 +++--- opensm/opensm/osm_sa_mcmember_record.c | 4 ++-- opensm/opensm/osm_sa_path_record.c | 2 +- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/opensm/include/iba/ib_types.h b/opensm/include/iba/ib_types.h index d904d9c..672184b 100644 --- a/opensm/include/iba/ib_types.h +++ b/opensm/include/iba/ib_types.h @@ -255,6 +255,18 @@ BEGIN_C_DECLS */ #define IB_HOPLIMIT_MAX 255 /*********/ +/****d* IBA Base: Constants/IB_MC_SCOPE_* +* NAME +* IB_MC_SCOPE_* +* +* DESCRIPTION +* Scope component definitions from IBA 1.2 (Table 3 p. 146) +*/ +#define IB_MC_SCOPE_LINK_LOCAL 0x2 +#define IB_MC_SCOPE_SITE_LOCAL 0x5 +#define IB_MC_SCOPE_ORG_LOCAL 0x8 +#define IB_MC_SCOPE_GLOBAL 0xE +/*********/ /****d* IBA Base: Constants/IB_PKEY_MAX_BLOCKS * NAME * IB_PKEY_MAX_BLOCKS diff --git a/opensm/include/opensm/osm_sa_mcmember_record.h b/opensm/include/opensm/osm_sa_mcmember_record.h index a7102ca..dfe423c 100644 --- a/opensm/include/opensm/osm_sa_mcmember_record.h +++ b/opensm/include/opensm/osm_sa_mcmember_record.h @@ -379,12 +379,6 @@ osm_mcmr_rcv_find_or_create_new_mgrp(IN osm_mcmr_recv_t * const p_mcmr, #define OSM_DEFAULT_MGRP_RATE 0x03 /***********/ -/* Scope component definitions from IBA 1.2 (Table 3 p. 146) */ -#define MC_SCOPE_LINK_LOCAL 0x2 -#define MC_SCOPE_SITE_LOCAL 0x5 -#define MC_SCOPE_ORG_LOCAL 0x8 -#define MC_SCOPE_GLOBAL 0xE - /****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_SCOPE * Name * OSM_DEFAULT_MGRP_SCOPE @@ -394,13 +388,8 @@ osm_mcmr_rcv_find_or_create_new_mgrp(IN osm_mcmr_recv_t * const p_mcmr, * * SYNOPSIS */ -#define OSM_DEFAULT_MGRP_SCOPE MC_SCOPE_LINK_LOCAL +#define OSM_DEFAULT_MGRP_SCOPE IB_MC_SCOPE_LINK_LOCAL /***********/ -/* JoinState definitions from IBA 1.2 */ -#define MC_FULL_MEMBER 0x1 -#define MC_NON_MEMBER 0x2 -#define MC_SENDONLY_NON_MEMBER 0x4 - END_C_DECLS #endif /* _OSM_MCMR_H_ */ diff --git a/opensm/opensm/osm_prtn.c b/opensm/opensm/osm_prtn.c index ec42da1..f0168fc 100644 --- a/opensm/opensm/osm_prtn.c +++ b/opensm/opensm/osm_prtn.c @@ -203,7 +203,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, pkey = p->pkey | cl_hton16(0x8000); if (!scope) scope = OSM_DEFAULT_MGRP_SCOPE; - hop_limit = (scope == MC_SCOPE_LINK_LOCAL) ? 0 : IB_HOPLIMIT_MAX; + hop_limit = (scope == IB_MC_SCOPE_LINK_LOCAL) ? 0 : IB_HOPLIMIT_MAX; memset(&mc_rec, 0, sizeof(mc_rec)); @@ -218,7 +218,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, mc_rec.pkt_life = OSM_DEFAULT_SUBNET_TIMEOUT; mc_rec.sl_flow_hop = ib_member_set_sl_flow_hop(p->sl, 0, hop_limit); /* Scope in MCMemberRecord (if present) needs to be consistent with MGID */ - mc_rec.scope_state = ib_member_set_scope_state(scope, MC_FULL_MEMBER); + mc_rec.scope_state = ib_member_set_scope_state(scope, IB_MC_REC_STATE_FULL_MEMBER); ib_mgid_set_scope(&mc_rec.mgid, scope); /* don't update rate, mtu */ @@ -240,7 +240,7 @@ ib_api_status_t osm_prtn_add_mcgroup(osm_log_t * p_log, mc_rec.mgid = osm_ts_ipoib_mgid; memcpy(&mc_rec.mgid.raw[4], &pkey, sizeof(pkey)); /* Scope in MCMemberRecord (if present) needs to be consistent with MGID */ - mc_rec.scope_state = ib_member_set_scope_state(scope, MC_FULL_MEMBER); + mc_rec.scope_state = ib_member_set_scope_state(scope, IB_MC_REC_STATE_FULL_MEMBER); ib_mgid_set_scope(&mc_rec.mgid, scope); status = diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c index 4d4adfb..5d5fb8d 100644 --- a/opensm/opensm/osm_sa_mcmember_record.c +++ b/opensm/opensm/osm_sa_mcmember_record.c @@ -953,7 +953,7 @@ __validate_requested_mgid(IN osm_mcmr_recv_t * const p_rcv, the scope should not be link local */ if ((signature == 0xA01B) && ((p_mcm_rec->mgid.multicast.header[1] & 0x0F) == - MC_SCOPE_LINK_LOCAL)) { + IB_MC_SCOPE_LINK_LOCAL)) { osm_log(p_rcv->p_log, OSM_LOG_ERROR, "__validate_requested_mgid: ERR 1B24: " "MGID uses 0xA01B signature but with link-local scope\n"); @@ -1180,7 +1180,7 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_mcmr_recv_t * const p_rcv, scope_state, &scope, NULL); } else { /* to guarantee no collision with other subnets use local scope! */ - scope = MC_SCOPE_LINK_LOCAL; + scope = IB_MC_SCOPE_LINK_LOCAL; } p_mgid = &(mcm_rec.mgid); diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c index 2597046..f46a3be 100644 --- a/opensm/opensm/osm_sa_path_record.c +++ b/opensm/opensm/osm_sa_path_record.c @@ -2151,7 +2151,7 @@ void osm_pr_rcv_process(IN void *context, IN void *data) /* HopLimit is not yet set in non link local MC groups */ /* If it were, this would not be needed */ - if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != MC_SCOPE_LINK_LOCAL) + if (ib_mgid_get_scope(&p_mgrp->mcmember_rec.mgid) != IB_MC_SCOPE_LINK_LOCAL) hop_limit = IB_HOPLIMIT_MAX; p_pr_item->path_rec.hop_flow_raw = -- 1.5.3.4.206.g58ba4 From sashak at voltaire.com Fri Nov 30 07:45:00 2007 From: sashak at voltaire.com (Sasha Khapyorsky) Date: Fri, 30 Nov 2007 15:45:00 +0000 Subject: [ofa-general] [PATCH] opensm: move OpenSM constants from osm_sa_mcmember-record.h to osm_base.h In-Reply-To: <20071130154401.GQ375@sashak.voltaire.com> References: <20071130154401.GQ375@sashak.voltaire.com> Message-ID: <20071130154500.GR375@sashak.voltaire.com> Move OpenSM constants OSM_DEFAULT_MGRP_* from osm_sa_mcmember-record.h to osm_base.h. Signed-off-by: Sasha Khapyorsky --- opensm/include/opensm/osm_base.h | 35 ++++++++++++++++++++++ opensm/include/opensm/osm_sa_mcmember_record.h | 38 ------------------------ 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/opensm/include/opensm/osm_base.h b/opensm/include/opensm/osm_base.h index 9d0318e..f1f3491 100644 --- a/opensm/include/opensm/osm_base.h +++ b/opensm/include/opensm/osm_base.h @@ -512,6 +512,41 @@ BEGIN_C_DECLS */ #define OSM_SM_DEFAULT_POLLING_RETRY_NUMBER 4 /**********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_MTU +* Name +* OSM_DEFAULT_MGRP_MTU +* +* DESCRIPTION +* Default MTU used for new MGRP creation (2048 bytes) +* Note it includes the MTUSelector which is set to "Greater Than" +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_MTU 0x04 +/***********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_RATE +* Name +* OSM_DEFAULT_MGRP_RATE +* +* DESCRIPTION +* Default RATE used for new MGRP creation (10Gb/sec) +* Note it includes the RateSelector which is set to "Greater Than" +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_RATE 0x03 +/***********/ +/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_SCOPE +* Name +* OSM_DEFAULT_MGRP_SCOPE +* +* DESCRIPTION +* Default SCOPE used for new MGRP creation (link local) +* +* SYNOPSIS +*/ +#define OSM_DEFAULT_MGRP_SCOPE IB_MC_SCOPE_LINK_LOCAL +/***********/ /****d* OpenSM: Base/OSM_DEFAULT_QOS_MAX_VLS * Name * OSM_DEFAULT_QOS_MAX_VLS diff --git a/opensm/include/opensm/osm_sa_mcmember_record.h b/opensm/include/opensm/osm_sa_mcmember_record.h index dfe423c..f13bc98 100644 --- a/opensm/include/opensm/osm_sa_mcmember_record.h +++ b/opensm/include/opensm/osm_sa_mcmember_record.h @@ -353,43 +353,5 @@ osm_mcmr_rcv_find_or_create_new_mgrp(IN osm_mcmr_recv_t * const p_mcmr, IB_MCR_COMPMASK_FLOW | \ IB_MCR_COMPMASK_SL) -/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_MTU -* Name -* OSM_DEFAULT_MGRP_MTU -* -* DESCRIPTION -* Default MTU used for new MGRP creation (2048 bytes) -* Note it includes the MTUSelector which is set to "Greater Than" -* -* SYNOPSIS -*/ -#define OSM_DEFAULT_MGRP_MTU 0x04 -/***********/ - -/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_RATE -* Name -* OSM_DEFAULT_MGRP_RATE -* -* DESCRIPTION -* Default RATE used for new MGRP creation (10Gb/sec) -* Note it includes the RateSelector which is set to "Greater Than" -* -* SYNOPSIS -*/ -#define OSM_DEFAULT_MGRP_RATE 0x03 -/***********/ - -/****d* OpenSM: MC Member Record Receiver/OSM_DEFAULT_MGRP_SCOPE -* Name -* OSM_DEFAULT_MGRP_SCOPE -* -* DESCRIPTION -* Default SCOPE used for new MGRP creation (link local) -* -* SYNOPSIS -*/ -#define OSM_DEFAULT_MGRP_SCOPE IB_MC_SCOPE_LINK_LOCAL -/***********/ - END_C_DECLS #endif /* _OSM_MCMR_H_ */ -- 1.5.3.4.206.g58ba4 From rdreier at cisco.com Fri Nov 30 07:44:04 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 30 Nov 2007 07:44:04 -0800 Subject: [ofa-general] MTHCA driver from OFED 1.3a package In-Reply-To: <20071130121928.GB4259@ics.muni.cz> (Lukas Hejtmanek's message of "Fri, 30 Nov 2007 13:19:28 +0100") References: <20071122140554.GB13609@ics.muni.cz> <20071124223117.GA4265@ics.muni.cz> <15ddcffd0711260322i6fd82fd6r40e4362184a5b9b7@mail.gmail.com> <20071126131637.GC4296@ics.muni.cz> <15ddcffd0711260537h633c2e6j9b374b2c9c06b439@mail.gmail.com> <20071129140227.GF4422@ics.muni.cz> <20071130121928.GB4259@ics.muni.cz> Message-ID: > Fatal DMA error! Please use 'swiotlb=force' > ----------- cut here --------- please bite here --------- > Kernel BUG at arch/x86_64/kernel/../../i386/kernel/pci-dma-xen.c:333 What is this bug being caused by? That is, what is line 333 of pci-dma-xen.c in your source tree? > RIP: e030: dma_map_single+0x13f/0x18f > :ib_ipoib:ipoib_cm_alloc_rx_skb+0x92/0x282 The code that calls dma_map_single in ipoib_cm_alloc_rx_skb() looks fine to me, at least in the upstream kernel (I don't know what extra patches OFED might add on). It is basically: skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); skb_reserve(skb, 12); mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); and I don't see anything at all wrong with with that. - R. From Jeffrey.C.Becker at nasa.gov Fri Nov 30 09:20:26 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Fri, 30 Nov 2007 09:20:26 -0800 Subject: [ofa-general] OFA server Message-ID: <4750465A.5010607@nasa.gov> Hi all. As several of you noticed, the OFA server was badly broken yesterday. This happened while I was attempting to back out my git changes (which had messed up git). Unfortunately, before I realized what happened, the Ubuntu/Debian package manager (aptitude) not only removed git, but took a large number of other important packages with it. This was a result of my inexperience with aptitude, and I am very sorry this happened. Fortunately, aptitude left most of the configuration and web content alone. Thus I was able to bring back the web server and the mail lists. Currently, our SPAM blocker is set up but I need to fix our virus checker. Thanks to Sasha, I also resurrected git to its old working state prior to my patching. I still need to bring back our wiki, and bugzilla. Also I fixed my php downloads page (the one that checks for WEB_README), and handed it off to SplitRock to link to the main web page. Unfortunately, I need to fix apache so php works again, but this shouldn't be difficult. One bright side of this event is that it forces me to fix the issue of the old certificates (from staging.openib...). This can be fixed if I switch the https pages to just http. Is this OK with everyone (otherwise we need to get new certificates)? Another issue is: do we still want to keep the openib gen2 archives available from the Developer Resource page? I currently have the link turned off, but can resurrect it if needed. If we decide to leave it turned off, I'll remove the "Linux Development Archives" section from that page. Again, I apologize for my blunder. Hopefully the server will be at least as good if not better than before after I fix everything. In the meantime, if you find any problems with the server, please don't hesitate to let me know. Thanks for your understanding. -jeff From chas at cmf.nrl.navy.mil Fri Nov 30 09:42:33 2007 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Fri, 30 Nov 2007 12:42:33 -0500 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <3A453CF1-5FFC-44BF-8F72-7E3EF5AA6E41@alcf.anl.gov> Message-ID: <200711301742.lAUHgX7t027338@cmf.nrl.navy.mil> addtionally, you might need to echo 'blocks' > /sys/block/,Kevin Harms writ es: > > you may also have to go to /sys/block/sdX/queue and echo 1024 > >max_sectors_kb > if you use the srp_daemon you can also add: > a max_sect=2048 to /etc/srp_daemon.conf > >kevin > >On Nov 29, 2007, at 11:08 AM, Vu Pham wrote: > >> >>> Hi, >>> We are using OFED-1.2, and using xdd and some other tools, and >>> trying to >>> send 1/2MB IOs, but what we are seeing in analyzer traces, that >>> memory >>> descriptor in SRP command shows max. 48K which means 1MB I/Os has >>> broken >>> into smaller SRP request from initiator. >>> How can I have this I/O directly going to target? What parameter I >>> need >>> to change? >>> >>> >> >> module param srp_sg_tablesize (default is 12 ie. 12 x 4K = 48K) >> and/or >> max_sect=yyy in echo id_ext=xxx,...,max_sect=1024,service_id= > /sys/ >> class/infiniband_srp/... >> >> -vu >> >>> Thanks >>> Ashish >>> _______________________________________________ >>> general mailing list >>> general at lists.openfabrics.org >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>> >>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-gene >ral >>> >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-gener >al >> > >_______________________________________________ >general mailing list >general at lists.openfabrics.org >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > From Ashish.Batwara at lsi.com Fri Nov 30 09:48:42 2007 From: Ashish.Batwara at lsi.com (Batwara, Ashish) Date: Fri, 30 Nov 2007 10:48:42 -0700 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <200711301742.lAUHgX7t027338@cmf.nrl.navy.mil> Message-ID: <01B9E81EECACE94DBBD0A556E768FB8A01E3C7A0@NAMAIL2.ad.lsil.com> This is what I did as suggested by Vu and it seems to be working. However, when I send 2MB IO, it gets broken into 512K+1MB+512K by SRP as seen on analyzer. I am just wondering what the logic is? On the other side, when we increase the srp_sg_tablesize beyond 256, we are seeing following message in /var/log/messages "Nov 29 21:17:50 p50 kernel: REJ reason 0x3" which indicates "IB_CM_REJ_NO_RESOURCES", so not sure how to get around to this problem to send larger IO than 1MB in one shot. modprobe ib_srp srp_sg_tablesize=256 echo id_ext=200600A0B81138C9,max_sect=4096,ioc_guid=00a0b81112da0003,dgid=fe8 000000000000000a0b81112da0001,pkey=ffff,service_id=200600a0b81138c9> /sys/class/infiniband_srp/srp-mthca0-1/add_target -----Original Message----- From: chas williams - CONTRACTOR [mailto:chas at cmf.nrl.navy.mil] Sent: Friday, November 30, 2007 11:43 AM To: Kevin Harms Cc: Vu Pham; openib-general at openib.org; Batwara, Ashish Subject: Re: [ofa-general] IO Size more than 48K addtionally, you might need to echo 'blocks' > /sys/block/,Kevin Harms writ es: > > you may also have to go to /sys/block/sdX/queue and echo 1024 > >max_sectors_kb > if you use the srp_daemon you can also add: > a max_sect=2048 to /etc/srp_daemon.conf > >kevin > >On Nov 29, 2007, at 11:08 AM, Vu Pham wrote: > >> >>> Hi, >>> We are using OFED-1.2, and using xdd and some other tools, and >>> trying to >>> send 1/2MB IOs, but what we are seeing in analyzer traces, that >>> memory >>> descriptor in SRP command shows max. 48K which means 1MB I/Os has >>> broken >>> into smaller SRP request from initiator. >>> How can I have this I/O directly going to target? What parameter I >>> need >>> to change? >>> >>> >> >> module param srp_sg_tablesize (default is 12 ie. 12 x 4K = 48K) >> and/or >> max_sect=yyy in echo id_ext=xxx,...,max_sect=1024,service_id= > /sys/ >> class/infiniband_srp/... >> >> -vu >> >>> Thanks >>> Ashish >>> _______________________________________________ >>> general mailing list >>> general at lists.openfabrics.org >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >>> >>> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-gene >ral >>> >> >> _______________________________________________ >> general mailing list >> general at lists.openfabrics.org >> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general >> >> To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-gener >al >> > >_______________________________________________ >general mailing list >general at lists.openfabrics.org >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general > -------------- next part -------------- An HTML attachment was scrubbed... URL: From uilc at brandrud.com Fri Nov 30 09:40:57 2007 From: uilc at brandrud.com (Julian Nicholas) Date: Fri, 30 Nov 2007 13:40:57 -0400 Subject: [ofa-general] When on the they an early to the Message-ID: <01c83356$a2b47a80$7c68f3c9@uilc> We carry all popular Rep!ica_watches_online http://brianabelgravemd.googlepages.com From glenn at lists.openfabrics.org Fri Nov 30 09:51:23 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Fri, 30 Nov 2007 09:51:23 -0800 (PST) Subject: [ofa-general] [PATCH 1/5] nes: accelerated loopback support Message-ID: <20071130175123.D962FE6019D@openfabrics.org> This patch allows accelerated loopback connections to be made through the driver. Prior to this patch iWarp acclerated loopback requests were not handled. Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 4f7ae5c..a5e0bb5 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -175,6 +175,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_write_indexed(nesdev, NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0); + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); nesvnic->local_ipaddr = 0; return NOTIFY_OK; break; @@ -191,6 +193,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_write_indexed(nesdev, NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), ntohl(ifa->ifa_address)); + nes_manage_arp_cache(netdev, netdev->dev_addr, + ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); return NOTIFY_OK; break; default: diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 4023a2c..3005cb1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1616,6 +1616,7 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; + struct nes_cm_info loopback_cm_info; u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + ntohs(mpa_frame->priv_data_len); @@ -1632,6 +1633,7 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, // set our node side to client (active) side cm_node->tcp_cntxt.client = 1; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; if (cm_info->loc_addr == cm_info->rem_addr) { loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, @@ -1639,13 +1641,14 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { - u16 temp; - temp = cm_info->loc_port; - cm_info->loc_port = cm_info->rem_port; - cm_info->rem_port = temp; - loopbackremotenode = make_cm_node(cm_core, nesvnic, cm_info, + loopback_cm_info = *cm_info; + loopback_cm_info.loc_port = cm_info->rem_port; + loopback_cm_info.rem_port = cm_info->loc_port; + loopback_cm_info.cm_id = loopbackremotelistener->cm_id; + loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, loopbackremotelistener); loopbackremotenode->loopbackpartner = cm_node; + loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, mpa_frame_size); @@ -1654,6 +1657,14 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, // we are done handling this state, set node to a TSA state cm_node->state = NES_CM_STATE_TSA; + cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } @@ -1665,7 +1676,6 @@ struct nes_cm_node * mini_cm_connect(struct nes_cm_core *cm_core, /* init our MPA frame ptr */ memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); cm_node->mpa_frame_size = mpa_frame_size; - cm_node->tcp_cntxt.snd_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; @@ -2284,7 +2294,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iw_cm_event cm_event; struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; - struct iw_cm_id *lb_cm_id; int ret; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); @@ -2338,24 +2347,29 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* setup our first outgoing iWarp send WQE (the IETF frame response) */ wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (u64)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)(u64temp)); - wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(u64temp>>32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesqp->ietf_frame_pbase); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); + if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { + u64temp = (u64)nesqp; + u64temp |= NES_SW_CONTEXT_ALIGN>>1; + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX] = cpu_to_le32((u32)(u64temp)); + wqe->wqe_words[NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX] = cpu_to_le32((u32)(u64temp>>32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)nesqp->ietf_frame_pbase); + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( + NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); + } nesqp->skip_lsmm = 1; @@ -2530,8 +2544,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->ietf_frame->rev = IETF_MPA_VERSION; nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); - nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); /* set up the connection params for the node */ cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); @@ -2547,8 +2562,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* create a connect CM node connection */ cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); if (!cm_node) { - nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) + nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); nes_rem_ref(&nesqp->ibqp); kfree(nesqp->ietf_frame); nesqp->ietf_frame = NULL; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 311127e..5fb241a 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2712,6 +2712,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, ®ion->chunk_list, list) { for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> PAGE_SHIFT; + /* nespbl->page = chunk->page_list[0].page; */ nespbl->page = sg_page(&chunk->page_list[0]); for (page_index=0; page_index References: <4750465A.5010607@nasa.gov> Message-ID: On Fri, 30 Nov 2007, Jeff Becker wrote: > Another issue is: do we still want to keep the openib gen2 archives > available from the Developer Resource page? I currently have the > link turned off, but can resurrect it if needed. If we decide to > leave it turned off, I'll remove the "Linux Development Archives" > section from that page. I think there should be someway of reaching the old SVN code. From sean.hefty at intel.com Fri Nov 30 09:59:50 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 30 Nov 2007 09:59:50 -0800 Subject: [ofa-general] [PATCH] ib/mad: fix incorrect access to items on local_list In-Reply-To: References: <474BE237.8050602@dev.mellanox.co.il> Message-ID: <000001c8337a$cdc18e60$ff0da8c0@amr.corp.intel.com> In cancel_mads(), MADs are moved from the wait_list and local_list to a cancel_list for processing. However, the structures on these two lists are not the same. The wait_list references struct ib_mad_send_wr_private, but local_list references struct ib_mad_local_private. Cancel_mads() treats all items moved to the cancel_list as struct ib_mad_send_wr_private. This leads to a system crash when requests are moved from the local_list to the cancel_list. Fix this by leaving local_list alone. All requests on the local_list have completed are just awaiting processing by a queued worker thread. Bug (crash) reported by Dotan Barak . Problem with local_list access reported by Robert Reynolds . Signed-off-by: Sean Hefty --- This patch is untested. Dotan, can you see if this fixes the crash that you were seeing? drivers/infiniband/core/mad.c | 2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 91e62c3..7ef2c7c 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2284,8 +2284,6 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); - /* Empty local completion list as well */ - list_splice_init(&mad_agent_priv->local_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ From glenn at lists.openfabrics.org Fri Nov 30 10:09:21 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Fri, 30 Nov 2007 10:09:21 -0800 (PST) Subject: [ofa-general] [PATCH 2/5] nes: provider listener cleanup Message-ID: <20071130180921.C460BE60177@openfabrics.org> If an error occurs during the provider listen call the reference count can be off. This will prevent the listener from being destroyed properly. This is fixed by correcting the reference counts when a problem is detected. Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 3005cb1..933f31c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -668,7 +668,7 @@ int send_syn(struct nes_cm_node *cm_node, u32 sendack) options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; options->as_windowscale.length = sizeof(struct option_windowscale); - options->as_windowscale.shiftcount = cm_node->tcp_cntxt.snd_wscale; + options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) @@ -1387,15 +1387,12 @@ int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_CLOSED: break; case NES_CM_STATE_LISTENING: - if (!(tcph->syn)) { - nes_debug(NES_DBG_CM, "Received an ack without a SYN on a listening port\n"); - send_reset(cm_node); - /* send_reset bumps refcount, this should have been a new node */ - rem_ref_cm_node(cm_core, cm_node); - return -1; - } else { - nes_debug(NES_DBG_CM, "Received an ack on a listening port (syn-ack maybe?)\n"); - } + nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + send_reset(cm_node); + /* send_reset bumps refcount, this should have been a new node */ + rem_ref_cm_node(cm_core, cm_node); + return -1; break; case NES_CM_STATE_TSA: nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); @@ -1832,6 +1829,10 @@ int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { nes_debug(NES_DBG_CM, "Unable to allocate node\n"); + if (listener) { + nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + atomic_dec(&listener->ref_count); + } ret = -1; goto out; } From chas at cmf.nrl.navy.mil Fri Nov 30 10:18:43 2007 From: chas at cmf.nrl.navy.mil (chas williams - CONTRACTOR) Date: Fri, 30 Nov 2007 13:18:43 -0500 Subject: [ofa-general] IO Size more than 48K In-Reply-To: <01B9E81EECACE94DBBD0A556E768FB8A01E3C7A0@NAMAIL2.ad.lsil.com> Message-ID: <200711301818.lAUIIhZM027808@cmf.nrl.navy.mil> increase the srp_sg_tablesize isnt going to help much. you want to make the rdma segments bigger, and with the default sg tablesize is big enough for large i/o if you get the segment size up. increasing the srp_sg_tablesize leads to a bigger iu message size and some devices have some limitations in this area. checking with our analyzer here on sles10, i cannot get dd to issue a read with an rdma segment larger than 512k. however: echo 256 > /sys/block//queue/max_phys_segments echo 256 > /sys/block//queue/max_hw_segments echo 1 > /proc/scsi/sg/allow_dio sg_dd if=/dev/ of=/tmp/file dio=1 bpt=2048 count=10240 blk_sgio=1 does produce 5 scsi reads with a single rdma segment of 1MB. In message <01B9E81EECACE94DBBD0A556E768FB8A01E3C7A0 at NAMAIL2.ad.lsil.com>,"Batw ara, Ashish" writes: >This is what I did as suggested by Vu and it seems to be working. >However, when I send 2MB IO, it gets broken into 512K+1MB+512K by SRP as >seen on analyzer. I am just wondering what the logic is? On the other >side, when we increase the srp_sg_tablesize beyond 256, we are seeing >following message in /var/log/messages "Nov 29 21:17:50 p50 kernel: >REJ reason 0x3" which indicates "IB_CM_REJ_NO_RESOURCES", so not sure >how to get around to this problem to send larger IO than 1MB in one >shot. > >=20 > >=20 > >modprobe ib_srp srp_sg_tablesize=3D256 > >echo >id_ext=3D200600A0B81138C9,max_sect=3D4096,ioc_guid=3D00a0b81112da0003,dgi= >d=3Dfe8 >000000000000000a0b81112da0001,pkey=3Dffff,service_id=3D200600a0b81138c9> >/sys/class/infiniband_srp/srp-mthca0-1/add_target > >=20 > >-----Original Message----- >From: chas williams - CONTRACTOR [mailto:chas at cmf.nrl.navy.mil]=20 >Sent: Friday, November 30, 2007 11:43 AM >To: Kevin Harms >Cc: Vu Pham; openib-general at openib.org; Batwara, Ashish >Subject: Re: [ofa-general] IO Size more than 48K=20 > >=20 > >addtionally, you might need to echo 'blocks' > > >/sys/block/ >rdma segments. > >=20 > >max_hw_segments doesnt exist on all kernels i think. > >=20 > >In message <3A453CF1-5FFC-44BF-8F72-7E3EF5AA6E41 at alcf.anl.gov>,Kevin >Harms writ > >es: > >>=20 > >> you may also have to go to /sys/block/sdX/queue and echo 1024 > =20 > >>max_sectors_kb > >> if you use the srp_daemon you can also add: > >> a max_sect=3D2048 to /etc/srp_daemon.conf > >>=20 > >>kevin > >>=20 > >>On Nov 29, 2007, at 11:08 AM, Vu Pham wrote: > >>=20 > >>>=20 > >>>> Hi, > >>>> We are using OFED-1.2, and using xdd and some other tools, and =20 > >>>> trying to > >>>> send 1/2MB IOs, but what we are seeing in analyzer traces, that =20 > >>>> memory > >>>> descriptor in SRP command shows max. 48K which means 1MB I/Os has =20 > >>>> broken > >>>> into smaller SRP request from initiator. > >>>> How can I have this I/O directly going to target? What parameter I =20 > >>>> need > >>>> to change? > >>>>=20 > >>>>=20 > >>>=20 > >>> module param srp_sg_tablesize (default is 12 ie. 12 x 4K =3D 48K) > >>> and/or > >>> max_sect=3Dyyy in echo id_ext=3Dxxx,...,max_sect=3D1024,service_id=3D = >> /sys/ > > >>> class/infiniband_srp/... > >>>=20 > >>> -vu > >>>=20 > >>>> Thanks > >>>> Ashish > >>>> _______________________________________________ > >>>> general mailing list > >>>> general at lists.openfabrics.org > >>>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>>>=20 > >>>> To unsubscribe, please visit >http://openib.org/mailman/listinfo/openib-gene > >>ral > >>>>=20 > >>>=20 > >>> _______________________________________________ > >>> general mailing list > >>> general at lists.openfabrics.org > >>> http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>>=20 > >>> To unsubscribe, please visit >http://openib.org/mailman/listinfo/openib-gener > >>al > >>>=20 > >>=20 > >>_______________________________________________ > >>general mailing list > >>general at lists.openfabrics.org > >>http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general > >>=20 > >>To unsubscribe, please visit >http://openib.org/mailman/listinfo/openib-general > >>=20 > > >------_=_NextPart_001_01C83379.3F66B657 >Content-Type: text/html; > charset="us-ascii" >Content-Transfer-Encoding: quoted-printable > >xmlns:w=3D"urn:schemas-microsoft-com:office:word" = >xmlns:st1=3D"urn:schemas-microsoft-com:office:smarttags" = >xmlns=3D"http://www.w3.org/TR/REC-html40"> > > >charset=3Dus-ascii"> > >namespaceuri=3D"urn:schemas-microsoft-com:office:smarttags" > name=3D"PersonName"/> > > > > > > > >
> >

style=3D'font-size: >10.0pt'>This is what I did as suggested by Vu and it seems to be = >working. >However, when I send 2MB IO, it gets broken into 512K+1MB+512K by SRP as = >seen >on analyzer. I am just wondering what the logic is? On the other side, = >when we >increase the srp_sg_tablesize beyond 256, we are seeing following = >message in >/var/log/messages “Nov 29 21:17:50 p50 kernel:   REJ = >reason 0x3” >which indicates “IB_CM_REJ_NO_RESOURCES”, so not sure how to = >get >around to this problem to send larger IO than 1MB in one = >shot.

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'>modprobe ib_srp style=3D'color:red;font-weight: >bold'>srp_sg_tablesize=3D256p> > >

style=3D'font-size: >10.0pt'>echo id_ext=3D200600A0B81138C9,style=3D'color: >red;font-weight:bold'>max_sect=3D4096,ioc_guid=3D00a0b8= >1112da0003,dgid=3Dfe8000000000000000a0b81112da0001,pkey=3Dffff,service_id= >=3D200600a0b81138c9> >/sys/class/infiniband_srp/srp-mthca0-1/add_target>

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'>-----Original Message-----
>From: chas williams - CONTRACTOR [mailto:chas at cmf.nrl.navy.mil]
>Sent: Friday, November 30, 2007 11:43 AM
>To: Kevin Harms
>Cc: Vu Pham; openib-general at openib.org; w:st=3D"on">Batwara, > Ashish
>Subject: Re: [ofa-general] IO Size more than 48K

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'>addtionally, you might need to echo 'blocks' = >>

> >

style=3D'font-size: >10.0pt'>/sys/block/<device/queue/max_hw_segments to increase the size = >of the

> >

style=3D'font-size: >10.0pt'>rdma segments.

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'>max_hw_segments doesnt exist on all kernels i = >think.

> >

style=3D'font-size: >10.0pt'> 

> >

style=3D'font-size: >10.0pt'>In message ><3A453CF1-5FFC-44BF-8F72-7E3EF5AA6E41 at alcf.anl.gov>,Kevin Harms = >writ

> >

style=3D'font-size: >10.0pt'>es:

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>     you may also have to go to = >/sys/block/sdX/queue and echo 1024 >> 

> >

style=3D'font-size: >10.0pt'>>max_sectors_kb

> >

style=3D'font-size: >10.0pt'>>     if you use the srp_daemon you can = >also add:

> >

style=3D'font-size: >10.0pt'>>     a max_sect=3D2048 to = >/etc/srp_daemon.conf

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>kevin

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>On Nov 29, 2007, at 11:08 AM, Vu Pham = >wrote:

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>>> Hi,

> >

style=3D'font-size: >10.0pt'>>>> We are using OFED-1.2, and using xdd and some other = >tools, >and 

> >

style=3D'font-size: >10.0pt'>>>> trying to

> >

style=3D'font-size: >10.0pt'>>>> send 1/2MB IOs, but what we are seeing in analyzer = >traces, >that 

> >

style=3D'font-size: >10.0pt'>>>> memory

> >

style=3D'font-size: >10.0pt'>>>> descriptor in SRP command shows max. 48K which = >means 1MB >I/Os has 

> >

style=3D'font-size: >10.0pt'>>>> broken

> >

style=3D'font-size: >10.0pt'>>>> into smaller SRP request from = >initiator.

> >

style=3D'font-size: >10.0pt'>>>> How can I have this I/O directly going to target? = >What >parameter I 

> >

style=3D'font-size: >10.0pt'>>>> need

> >

style=3D'font-size: >10.0pt'>>>> to change?

> >

style=3D'font-size: >10.0pt'>>>> 

> >

style=3D'font-size: >10.0pt'>>>> 

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>> module param srp_sg_tablesize (default is 12 ie. 12 x = >4K =3D >48K)

> >

style=3D'font-size: >10.0pt'>>> and/or

> >

style=3D'font-size: >10.0pt'>>> max_sect=3Dyyy in echo = >id_ext=3Dxxx,...,max_sect=3D1024,service_id=3D >> /sys/

> >

style=3D'font-size: >10.0pt'>>> class/infiniband_srp/...

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>> -vu

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>>> Thanks

> >

style=3D'font-size: >10.0pt'>>>> Ashish

> >

style=3D'font-size: >10.0pt'>>>> = >_______________________________________________<= >/p> > >

style=3D'font-size: >10.0pt'>>>> general mailing list

> >

style=3D'font-size: >10.0pt'>>>> = >general at lists.openfabrics.org

> >

style=3D'font-size: >10.0pt'>>>> >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general<= >/span>

> >

style=3D'font-size: >10.0pt'>>>> 

> >

style=3D'font-size: >10.0pt'>>>> To unsubscribe, please visit >http://openib.org/mailman/listinfo/openib-genep> > >

style=3D'font-size: >10.0pt'>>ral

> >

style=3D'font-size: >10.0pt'>>>> 

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>> = >_______________________________________________<= >/p> > >

style=3D'font-size: >10.0pt'>>> general mailing list

> >

style=3D'font-size: >10.0pt'>>> = >general at lists.openfabrics.org

> >

style=3D'font-size: >10.0pt'>>> = >http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general<= >/span>

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>>> To unsubscribe, please visit = >http://openib.org/mailman/listinfo/openib-gener<= >/p> > >

style=3D'font-size: >10.0pt'>>al

> >

style=3D'font-size: >10.0pt'>>> 

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>_______________________________________________pan>

> >

style=3D'font-size: >10.0pt'>>general mailing list

> >

style=3D'font-size: >10.0pt'>>general at lists.openfabrics.org

> >

style=3D'font-size: >10.0pt'>>http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general= >

> >

style=3D'font-size: >10.0pt'>> 

> >

style=3D'font-size: >10.0pt'>>To unsubscribe, please visit = >http://openib.org/mailman/listinfo/openib-general>

> >

style=3D'font-size: >10.0pt'>> 

> >
> > > > > >------_=_NextPart_001_01C83379.3F66B657-- > From glenn at lists.openfabrics.org Fri Nov 30 10:21:06 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Fri, 30 Nov 2007 10:21:06 -0800 (PST) Subject: [ofa-general] [PATCH 3/5] nes: fix link reset for certain phy types Message-ID: <20071130182106.69D1FE60177@openfabrics.org> Link status and link reset was not being handled correctly for certain board phy types. The link would always show up. The fix was to detect the phy type and properly reset it. A fallout of this fix was to add rx/tx port discard counters to ethtool. Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 48082ed..31d3cf5 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -249,6 +249,8 @@ struct nes_device { unsigned long mac_rx_jabber_frames; unsigned long mac_rx_oversized_frames; unsigned long mac_rx_short_frames; + unsigned long port_rx_discards; + unsigned long port_tx_discards; unsigned int mac_index; unsigned int nes_stack_start; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 933f31c..623037d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2102,7 +2102,7 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) struct iw_cm_id *cm_id; struct iw_cm_event cm_event; struct nes_vnic *nesvnic; - struct nes_cm_node *cm_node = NULL; + /* struct nes_cm_node *cm_node = NULL; */ u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index d2ab5a7..8b0193d 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -745,6 +745,10 @@ void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count) u32temp = nes_read_indexed(nesdev, 0x000008e8); u32temp |= 0x80000000; nes_write_indexed(nesdev, 0x000008e8, u32temp); + u32temp = nes_read_indexed(nesdev, 0x000021f8); + u32temp &= 0x7fffffff; + u32temp |= 0x7fff0010; + nes_write_indexed(nesdev, 0x000021f8, u32temp); } } @@ -1934,6 +1938,32 @@ void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) aeq->aeq_head = head; } +static void nes_reset_link(struct nes_device *nesdev, u32 mac_index) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 reset_value; + u32 i=0; + + if (nesadapter->hw_rev == NE020_REV) { + return; + } + + + reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); + if ((mac_index == 0)||((mac_index == 1) && (nesadapter->OneG_Mode))) { + reset_value |= 0x0000001d; + } else { + reset_value |= 0x0000002d; + } + nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); + + while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) + & 0x00000040) != 0x00000040) && (i++ < 5000)) { + } + + + +} /** * nes_process_mac_intr @@ -1967,6 +1997,12 @@ void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) { nesdev->link_status_interrupts++; + if ((++nesadapter->link_interrupt_count[mac_index]) > ((u16)NES_MAX_LINK_INTERRUPTS)) { + nesadapter->link_interrupt_count[mac_index] = 0; + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_reset_link(nesdev, mac_index); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + } /* read the PHY interrupt status register */ if (nesadapter->OneG_Mode) { do { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 67fd2f3..21ec22c 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -80,6 +80,8 @@ enum indexed_regs { NES_IDX_DST_IP_ADDR = 0x0400, NES_IDX_PCIX_DIAG = 0x08e8, NES_IDX_MPP_DEBUG = 0x0a00, + NES_IDX_PORT_RX_DISCARDS = 0x0a30, + NES_IDX_PORT_TX_DISCARDS = 0x0a34, NES_IDX_MPP_LB_DEBUG = 0x0b00, NES_IDX_DENALI_CTL_22 = 0x1058, NES_IDX_MAC_TX_CONTROL = 0x2000, @@ -571,6 +573,9 @@ enum nes_nic_cqe_word_idx { #define NES_PKT_TYPE_APBVT_BITS 0xC112 #define NES_PKT_TYPE_APBVT_MASK 0xff3e +#define NES_PKT_TYPE_PVALID_BITS 0x10000000 +#define NES_PKT_TYPE_PVALID_MASK 0x30000000 + #define NES_PKT_TYPE_TCPV4_BITS 0x0110 #define NES_PKT_TYPE_TCPV4_MASK 0x3f30 @@ -959,6 +964,7 @@ struct nes_hw_tune_timer { #define NES_TIMER_INT_LIMIT 2 #define NES_TIMER_INT_LIMIT_DYNAMIC 10 #define NES_TIMER_ENABLE_LIMIT 4 +#define NES_MAX_LINK_INTERRUPTS 512 struct nes_adapter { u64 fw_ver; @@ -1061,6 +1067,8 @@ struct nes_adapter { u16 pd_config_size[4]; u16 pd_config_base[4]; + u16 link_interrupt_count[4]; + /* the phy index for each port */ u8 phy_index[4]; u8 mac_sw_state[4]; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index d75b327..c29ab12 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1117,6 +1117,16 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); nesvnic->nesdev->mac_pause_frames_received += u32temp; + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_rx_discards += u32temp; + nesvnic->netstats.rx_dropped += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40)); + nesvnic->nesdev->port_tx_discards += u32temp; + nesvnic->netstats.tx_dropped += u32temp; + for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) { if (nesvnic->qp_nic_index[nic_count] == 0xf) break; @@ -1661,6 +1671,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, (0x200*(nesvnic->logical_port&1)) ); if ((u32temp&0x0f1f0000) == 0x0f0f0000) { if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) { + nes_init_phy(nesdev); nes_read_10G_phy_reg(nesdev, 1, nesdev->nesadapter->phy_index[nesvnic->logical_port]); temp_phy_data = (u16)nes_read_indexed(nesdev, @@ -1692,7 +1703,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS+(0x200*nesvnic->logical_port), u32temp); - nes_init_phy(nesdev); + if (nesdev->nesadapter->phy_type[nesvnic->logical_port] != NES_PHY_TYPE_IRIS) + nes_init_phy(nesdev); + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesvnic->logical_port), ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); From glenn at lists.openfabrics.org Fri Nov 30 10:24:56 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Fri, 30 Nov 2007 10:24:56 -0800 (PST) Subject: [ofa-general] [PATCH 4/5] nes: add interrupt coalesce timer stat to ethtool Message-ID: <20071130182456.82DA4E60177@openfabrics.org> Adding support to show the interrupt coalesce timer through ethtool. Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index c29ab12..5c9ab37 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1330,6 +1330,9 @@ static int nes_netdev_get_coalesce(struct net_device *netdev, temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high; temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min; temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max; + if (nesadapter->et_use_adaptive_rx_coalesce) { + temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use; + } spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags); memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce)); return 0; From sean.hefty at intel.com Fri Nov 30 10:25:01 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 30 Nov 2007 10:25:01 -0800 Subject: [ofa-general] [PATCH 2/5] nes: provider listener cleanup In-Reply-To: <20071130180921.C460BE60177@openfabrics.org> References: <20071130180921.C460BE60177@openfabrics.org> Message-ID: <000101c8337e$51d72810$ff0da8c0@amr.corp.intel.com> >@@ -668,7 +668,7 @@ int send_syn(struct nes_cm_node *cm_node, u32 sendack) > options = (union all_known_options *)&optionsbuffer[optionssize]; > options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; > options->as_windowscale.length = sizeof(struct option_windowscale); >- options->as_windowscale.shiftcount = cm_node->tcp_cntxt.snd_wscale; >+ options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; This change looks unrelated. - Sean From sweitzen at cisco.com Fri Nov 30 10:36:30 2007 From: sweitzen at cisco.com (Scott Weitzenkamp (sweitzen)) Date: Fri, 30 Nov 2007 10:36:30 -0800 Subject: [ofa-general] Not seeing any SDP performance changes in OFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh In-Reply-To: References: <47445630.10000@dev.mellanox.co.il> Message-ID: Jim, Using netperf with TCP_STREAM and TCP_RR, I'm not seeing any changes in SDP throughput or CPU utilization comparing OFED 1.3 beta and OFED 1.2.5. Looks like I need to set a non-zero value in /sys/module/ib_sdp/sdp_zcopy_thresh? Do you plan to enable this by default soon? I tried "echo 4096 > /sys/module/ib_sdp/sdp_zcopy_thresh" on RHEL4 and then tried netperf, and got an Oops. Unable to handle kernel NULL pointer deref erence at 0000000000000000 RIP: {put_page+0} PML4 1a3047067 PGD 1a7a6d067 PMD 0 Oops: 0000 [1] SMP CPU 0 Modules linked in: parport_pc lp parport autofs4 i2c_dev i2c_co re nfs lockd nfs_acl sunrpc rdma_ucm(U) rds(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_ addr(U) mlx4_ib(U) mlx4_core(U) ds yenta_socket pcmcia_core dm_mirror dm_multipa th dm_mod joydev button battery ac uhci_hcd ehci_hcd shpchp ib_mthca(U) ib_ipoib (U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) md5 ipv6 e1000 floppy ata_piix libata sg ext3 jbd mptscsih mptsas mptspi mptscsi mp tbase sd_mod scsi_mod Pid: 6802, comm: netperf241 Not tainted 2.6.9-55.ELlargesmp RIP: 0010:[] {put_page+0} RSP: 0018:00000101a7bcbbc0 EFLAGS: 00010203 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000002 02 RDX: 00000101b0b43e80 RSI: 0000000000000202 RDI: 00000000000000 00 RBP: 00000101b85761c0 R08: 0000000000000000 R09: 00000000000000 00 R10: 0000000000000246 R11: ffffffffa02e0e36 R12: 00000101a4b330 80 R13: 00000101a7bcbd58 R14: 0000000000000000 R15: 00000000000100 00 FS: 0000002a95696940(0000) GS:ffffffff80500380(0000) knlGS:000 0000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000000101000 CR4: 00000000000006 e0 Process netperf241 (pid: 6802, threadinfo 00000101a7bca000, tas k 00000101a70df030) Stack: ffffffffa02e110a 0000000000000100 0000000000000000 00000 00000529780 0001000000000246 0000000000000246 000000008013feac 00000 800ffffffe0 0000000000000000 00000101a7bcbe88 Call Trace:{:ib_sdp:sdp_sendmsg+724} {queue_delayed_work+101} {:ib_addr:queue_req+122} {sock_sendmsg+271} {do_no_page+916} {au toremove_wake_function+0} {sockfd_lookup+16} { sys_sendto+195} {do_page_fault+577} {dnotify_parent+34} {vfs_read+248} {syst em_call+126} Code: 8b 07 48 89 fa f6 c4 80 74 3b 48 8b 57 10 8b 02 48 89 d1 f6 RIP {put_page+0} RSP <00000101a7bcbbc0> CR2: 0000000000000000 <0>Kernel panic - not syncing: Oops Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems From glenn at lists.openfabrics.org Fri Nov 30 10:37:05 2007 From: glenn at lists.openfabrics.org (Glenn Grundstrom NetEffect) Date: Fri, 30 Nov 2007 10:37:05 -0800 (PST) Subject: [ofa-general] [PATCH 5/5] nes: napi interface fix Message-ID: <20071130183705.8A819E601CE@openfabrics.org> Modified the driver to support the 2.6.24 napi interface changes. The napi interface is now used by default. Signed-off-by: Glenn Grundstrom --- diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile index 3514851..15a1a13 100644 --- a/drivers/infiniband/hw/nes/Makefile +++ b/drivers/infiniband/hw/nes/Makefile @@ -1,3 +1,5 @@ +EXTRA_CFLAGS += -DNES_NAPI + obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 623037d..d101117 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2102,7 +2102,6 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) struct iw_cm_id *cm_id; struct iw_cm_event cm_event; struct nes_vnic *nesvnic; - /* struct nes_cm_node *cm_node = NULL; */ u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 8b0193d..3a21a08 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1232,6 +1232,12 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) nesnic = &nesvnic->nic; nesdev = nesvnic->nesdev; spin_lock_irqsave(&nesnic->rq_lock, flags); + if (nesnic->replenishing_rq !=0) { + spin_unlock_irqrestore(&nesnic->rq_lock, flags); + return; + } + nesnic->replenishing_rq = 1; + spin_unlock_irqrestore(&nesnic->rq_lock, flags); do { skb = dev_alloc_skb(nesvnic->max_frame_size); if (skb) { @@ -1275,7 +1281,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) if (rx_wqes_posted) { nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id); } - spin_unlock_irqrestore(&nesnic->rq_lock, flags); + nesnic->replenishing_rq = 0; } @@ -2121,10 +2127,11 @@ void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq { struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); - netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index]); + netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi); } #endif + /* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before * getting out of nic_ce_handler */ @@ -2160,7 +2167,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) head = cq->cq_head; cq_size = cq->cq_size; #ifdef NES_NAPI - nesvnic->cqes_pending = 1; + cq->cqes_pending = 1; #endif do { if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) & @@ -2210,7 +2217,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } else { rqes_processed ++; #ifdef NES_NAPI - nesvnic->rx_cqes_completed++; + cq->rx_cqes_completed++; + cq->rx_pkts_indicated++; #endif rx_pkt_size = cqe_misc & 0x0000ffff; nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail]; @@ -2281,18 +2289,9 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) >> 16); nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", nesvnic->netdev->name, vlan_tag); - -#ifdef NES_NAPI - vlan_hwaccel_receive_skb(rx_skb, nesvnic->vlan_grp, vlan_tag); -#else - vlan_hwaccel_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); -#endif + nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); } else { -#ifdef NES_NAPI - netif_receive_skb(rx_skb); -#else - netif_rx(rx_skb); -#endif + nes_netif_rx(rx_skb); } } @@ -2314,11 +2313,11 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) cqe_count = 0; } #ifdef NES_NAPI - if (nesvnic->rx_cqes_completed >= nesvnic->budget) + if (cq->rx_cqes_completed >= nesvnic->budget) break; #endif } else { - nesvnic->cqes_pending = 0; + cq->cqes_pending = 0; break; } #ifndef NES_NAPI @@ -2332,7 +2331,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n", cq->cq_number, cqe_count, cq->cq_head); */ #ifdef NES_NAPI - nesvnic->cqe_allocs_pending = cqe_count; + cq->cqe_allocs_pending = cqe_count; #else /* Arm the CCQ */ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 21ec22c..51bb87f 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -361,6 +361,7 @@ enum nes_cqe_opcode_bits { NES_CQE_VALID = (1<<31), }; + enum nes_cqe_word_idx { NES_CQE_PAYLOAD_LENGTH_IDX = 0, NES_CQE_COMP_COMP_CTX_LOW_IDX = 2, @@ -810,6 +811,7 @@ struct nes_hw_aeqe { __le32 aeqe_words[4]; }; + struct nes_cqp_request { wait_queue_head_t waitq; struct nes_hw_cqp_wqe cqp_wqe; @@ -857,6 +859,8 @@ struct nes_hw_nic { u16 rq_head; u16 rq_tail; u16 rq_size; + u8 replenishing_rq; + u8 reserved; spinlock_t sq_lock; spinlock_t rq_lock; @@ -866,9 +870,13 @@ struct nes_hw_nic_cq { struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */ void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); dma_addr_t cq_pbase; /* PCI memory for host rings */ + int rx_cqes_completed; + int cqe_allocs_pending; + int rx_pkts_indicated; u16 cq_head; u16 cq_size; u16 cq_number; + u8 cqes_pending; }; struct nes_hw_qp { @@ -1131,12 +1139,12 @@ struct nes_vnic { atomic_t rx_skbs_needed; atomic_t rx_skb_timer_running; int budget; - int rx_cqes_completed; - int cqe_allocs_pending; u32 msg_enable; /* u32 tx_avail; */ __be32 local_ipaddr; - +#ifdef NES_NAPI + struct napi_struct napi; +#endif spinlock_t tx_lock; /* could use netdev tx lock? */ struct timer_list rq_wqes_timer; u32 nic_mem_size; @@ -1159,7 +1167,6 @@ struct nes_vnic { u8 next_qp_nic_index; u8 of_device_registered; u8 rdma_enabled; - u8 cqes_pending; u8 rx_checksum_disabled; }; @@ -1178,5 +1185,13 @@ struct nes_ib_device { u32 num_pd; }; +#ifdef NES_NAPI +#define nes_vlan_rx vlan_hwaccel_receive_skb +#define nes_netif_rx netif_receive_skb +#else +#define nes_vlan_rx vlan_hwaccel_rx +#define nes_netif_rx netif_rx +#endif + #endif /* __NES_HW_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 5c9ab37..4133a44 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -139,37 +139,36 @@ static int nes_netdev_change_mtu(struct net_device *, int); /** * nes_netdev_poll */ -static int nes_netdev_poll(struct net_device* netdev, int* budget) +static int nes_netdev_poll(struct napi_struct *napi, int budget) { - struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi); + struct net_device *netdev = nesvnic->netdev; struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq; - nesvnic->budget = *budget; - nesvnic->cqes_pending = 0; - nesvnic->rx_cqes_completed = 0; - nesvnic->cqe_allocs_pending = 0; + nesvnic->budget = budget; + nescq->cqes_pending = 0; + nescq->rx_cqes_completed = 0; + nescq->cqe_allocs_pending = 0; + nescq->rx_pkts_indicated = 0; nes_nic_ce_handler(nesdev, nescq); - netdev->quota -= nesvnic->rx_cqes_completed; - *budget -= nesvnic->rx_cqes_completed; - - if (nesvnic->cqes_pending == 0) { - netif_rx_complete(netdev); + if (nescq->cqes_pending == 0) { + netif_rx_complete(netdev, napi); /* clear out completed cqes and arm */ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | - nescq->cq_number | (nesvnic->cqe_allocs_pending << 16)); + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); nes_read32(nesdev->regs+NES_CQE_ALLOC); } else { /* clear out completed cqes but don't arm */ nes_write32(nesdev->regs+NES_CQE_ALLOC, - nescq->cq_number | (nesvnic->cqe_allocs_pending << 16)); + nescq->cq_number | (nescq->cqe_allocs_pending << 16)); nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n", nesvnic->netdev->name); } - return (nesvnic->cqes_pending == 0) ? 0 : 1; + return nescq->rx_pkts_indicated; } #endif @@ -277,9 +276,11 @@ static int nes_netdev_open(struct net_device *netdev) /* Enable network packets */ nesvnic->linkup = 1; netif_start_queue(netdev); - } else { - netif_carrier_off(netdev); + netif_carrier_on(netdev); } +#ifdef NES_NAPI + napi_enable(&nesvnic->napi); +#endif nesvnic->netdev_open = 1; return 0; @@ -305,6 +306,9 @@ static int nes_netdev_stop(struct net_device *netdev) printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); /* Disable network packets */ +#ifdef NES_NAPI + napi_disable(&nesvnic->napi); +#endif netif_stop_queue(netdev); if ((nesdev->netdev[0] == netdev) & (nesvnic->logical_port == nesdev->mac_index)) { nes_write_indexed(nesdev, @@ -1548,6 +1552,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); + nesvnic = netdev_priv(netdev); + memset(nesvnic, 0, sizeof(*nesvnic)); + netdev->open = nes_netdev_open; netdev->stop = nes_netdev_stop; netdev->hard_start_xmit = nes_netdev_start_xmit; @@ -1567,8 +1574,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->features = NETIF_F_HIGHDMA; netdev->ethtool_ops = &nes_ethtool_ops; #ifdef NES_NAPI - netdev->poll = nes_netdev_poll; - netdev->weight = 128; + netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); #endif #ifdef NETIF_F_HW_VLAN_TX nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); @@ -1580,9 +1586,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, #endif /* Fill in the port structure */ - nesvnic = netdev_priv(netdev); - - memset(nesvnic, 0, sizeof(*nesvnic)); nesvnic->netdev = netdev; nesvnic->nesdev = nesdev; nesvnic->msg_enable = netif_msg_init(debug, default_msg); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 5fb241a..36d34f4 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1099,8 +1099,6 @@ static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, nesqp->hwqp.sq_vbase = kmap(nespbl->page); nesqp->page = nespbl->page; - - nesqp->hwqp.sq_vbase = ioremap(nesqp->hwqp.sq_pbase, PAGE_SIZE); if (!nesqp->hwqp.sq_vbase) { nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n"); kfree(nespbl); @@ -2712,7 +2710,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, list_for_each_entry(chunk, ®ion->chunk_list, list) { for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> PAGE_SHIFT; - /* nespbl->page = chunk->page_list[0].page; */ nespbl->page = sg_page(&chunk->page_list[0]); for (page_index=0; page_index References: <20071130182106.69D1FE60177@openfabrics.org> Message-ID: <000201c83380$2aa00620$ff0da8c0@amr.corp.intel.com> >@@ -2102,7 +2102,7 @@ int nes_cm_disconn_true(struct nes_qp *nesqp) > struct iw_cm_id *cm_id; > struct iw_cm_event cm_event; > struct nes_vnic *nesvnic; >- struct nes_cm_node *cm_node = NULL; >+ /* struct nes_cm_node *cm_node = NULL; */ Just delete this. >+static void nes_reset_link(struct nes_device *nesdev, u32 mac_index) >+{ >+ struct nes_adapter *nesadapter = nesdev->nesadapter; >+ u32 reset_value; >+ u32 i=0; >+ >+ if (nesadapter->hw_rev == NE020_REV) { >+ return; >+ } >+ >+ Extra blank line and {} are not needed. >+ reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); >+ if ((mac_index == 0)||((mac_index == 1) && (nesadapter->OneG_Mode))) { >+ reset_value |= 0x0000001d; >+ } else { >+ reset_value |= 0x0000002d; >+ } {}'s are not needed. >+ nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value); Need spacing around '+' - similar spacing issues in other places >+ >+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) >+ & 0x00000040) != 0x00000040) && (i++ < 5000)) { >+ } Is there a better way to wait for the read? >+ >+ >+ >+} Lots of extra blank lines - Sean From rdreier at cisco.com Fri Nov 30 10:50:21 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 30 Nov 2007 10:50:21 -0800 Subject: [ofa-general] Re: [PATCH] ib/mad: fix incorrect access to items on local_list In-Reply-To: <000001c8337a$cdc18e60$ff0da8c0@amr.corp.intel.com> (Sean Hefty's message of "Fri, 30 Nov 2007 09:59:50 -0800") References: <474BE237.8050602@dev.mellanox.co.il> <000001c8337a$cdc18e60$ff0da8c0@amr.corp.intel.com> Message-ID: > Fix this by leaving local_list alone. All requests on the local_list > have completed are just awaiting processing by a queued worker thread. Is this OK? It means that some MADs may be processed after cancel_mads() has returned, which seems risky. I don't know what ends up on the local_list but eg if this leads to completions after an agent is unregistered then it won't be good. From sean.hefty at intel.com Fri Nov 30 10:50:23 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 30 Nov 2007 10:50:23 -0800 Subject: [ofa-general] [PATCH 5/5] nes: napi interface fix In-Reply-To: <20071130183705.8A819E601CE@openfabrics.org> References: <20071130183705.8A819E601CE@openfabrics.org> Message-ID: <000301c83381$dd3fc1c0$ff0da8c0@amr.corp.intel.com> >--- a/drivers/infiniband/hw/nes/nes_hw.c >+++ b/drivers/infiniband/hw/nes/nes_hw.c >@@ -1232,6 +1232,12 @@ static void nes_replenish_nic_rq(struct nes_vnic >*nesvnic) > nesnic = &nesvnic->nic; > nesdev = nesvnic->nesdev; > spin_lock_irqsave(&nesnic->rq_lock, flags); >+ if (nesnic->replenishing_rq !=0) { >+ spin_unlock_irqrestore(&nesnic->rq_lock, flags); >+ return; >+ } >+ nesnic->replenishing_rq = 1; >+ spin_unlock_irqrestore(&nesnic->rq_lock, flags); > do { > skb = dev_alloc_skb(nesvnic->max_frame_size); > if (skb) { >@@ -1275,7 +1281,7 @@ static void nes_replenish_nic_rq(struct nes_vnic >*nesvnic) > if (rx_wqes_posted) { > nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | >nesnic->qp_id); > } >- spin_unlock_irqrestore(&nesnic->rq_lock, flags); >+ nesnic->replenishing_rq = 0; It seems racy that this is set under lock, but cleared without the lock held. How do you ensure that the nic_rq will always be replenished? >@@ -2314,11 +2313,11 @@ void nes_nic_ce_handler(struct nes_device *nesdev, >struct nes_hw_nic_cq *cq) > cqe_count = 0; > } > #ifdef NES_NAPI Is #ifdef napi sprinkled throughout the code common for most drivers? Is there a better way to handle this? (Is this OFED only for backports, or for upstream?) >@@ -361,6 +361,7 @@ enum nes_cqe_opcode_bits { > NES_CQE_VALID = (1<<31), > }; > >+ > enum nes_cqe_word_idx { > NES_CQE_PAYLOAD_LENGTH_IDX = 0, > NES_CQE_COMP_COMP_CTX_LOW_IDX = 2, >@@ -810,6 +811,7 @@ struct nes_hw_aeqe { > __le32 aeqe_words[4]; > }; > >+ couple extra blank lines were added > struct nes_cqp_request { > wait_queue_head_t waitq; > struct nes_hw_cqp_wqe cqp_wqe; >@@ -857,6 +859,8 @@ struct nes_hw_nic { > u16 rq_head; > u16 rq_tail; > u16 rq_size; >+ u8 replenishing_rq; >+ u8 reserved; Why is reserved added? - Sean From sean.hefty at intel.com Fri Nov 30 10:58:22 2007 From: sean.hefty at intel.com (Sean Hefty) Date: Fri, 30 Nov 2007 10:58:22 -0800 Subject: [ofa-general] RE: [PATCH] ib/mad: fix incorrect access to items on local_list In-Reply-To: References: <474BE237.8050602@dev.mellanox.co.il> <000001c8337a$cdc18e60$ff0da8c0@amr.corp.intel.com> Message-ID: <000401c83382$fa733b40$ff0da8c0@amr.corp.intel.com> >Is this OK? It means that some MADs may be processed after >cancel_mads() has returned, which seems risky. I don't know what ends >up on the local_list but eg if this leads to completions after an >agent is unregistered then it won't be good. The mads on the local_list hold a reference on the agent. The completions may come after cancel_mads() return, but should be processed before the agent is fully unregistered. This shouldn't be any different than mads that are removed from the local_list in local_completions(), which get missed by cancel_mads(), but are still completed. The mads on the wait_list are canceled, because we don't know how long it will take them to complete, since they could have a fairly long timeout. The mads on the local_list are done, and are simply waiting for processing. At least this was how I interpreted the code... - Sean From rdreier at cisco.com Fri Nov 30 11:08:45 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 30 Nov 2007 11:08:45 -0800 Subject: [ofa-general] Re: [PATCH] ib/mad: fix incorrect access to items on local_list In-Reply-To: <000401c83382$fa733b40$ff0da8c0@amr.corp.intel.com> (Sean Hefty's message of "Fri, 30 Nov 2007 10:58:22 -0800") References: <474BE237.8050602@dev.mellanox.co.il> <000001c8337a$cdc18e60$ff0da8c0@amr.corp.intel.com> <000401c83382$fa733b40$ff0da8c0@amr.corp.intel.com> Message-ID: > The mads on the local_list hold a reference on the agent. The completions may > come after cancel_mads() return, but should be processed before the agent is > fully unregistered. This shouldn't be any different than mads that are removed > from the local_list in local_completions(), which get missed by cancel_mads(), > but are still completed. Yes, makes sense. I think this patch is a good idea, but let's try to get confirmation that it fixes the issue. Thanks... From rick.jones2 at hp.com Fri Nov 30 11:16:02 2007 From: rick.jones2 at hp.com (Rick Jones) Date: Fri, 30 Nov 2007 11:16:02 -0800 Subject: [ofa-general] Not seeing any SDP performance changes in OFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh In-Reply-To: References: <47445630.10000@dev.mellanox.co.il> Message-ID: <47506172.1060807@hp.com> Scott Weitzenkamp (sweitzen) wrote: > Using netperf with TCP_STREAM and TCP_RR, I'm not seeing any changes in > SDP throughput or CPU utilization comparing OFED 1.3 beta and OFED > 1.2.5. Looks like I need to set a non-zero value in > /sys/module/ib_sdp/sdp_zcopy_thresh? Do you plan to enable this by > default soon? I know there wasn't univeral agreement as to the need, but there _are_ "native" SDP tests in netperf2 now, which I think would be better to use where possible since they will have "correct" headers lest someone look at a cut and paste ages from now and mistakenly think it was actually TCP rather than SDP. happy benchmarking, rick jones unless of course one wants to test the LD_PRELOAD mechanism... From ggrundstrom at NetEffect.com Fri Nov 30 11:49:13 2007 From: ggrundstrom at NetEffect.com (Glenn Grundstrom) Date: Fri, 30 Nov 2007 13:49:13 -0600 Subject: [ewg] RE: [ofa-general] [PATCH 3/5] nes: fix link reset for certainphy types In-Reply-To: <000201c83380$2aa00620$ff0da8c0@amr.corp.intel.com> References: <20071130182106.69D1FE60177@openfabrics.org> <000201c83380$2aa00620$ff0da8c0@amr.corp.intel.com> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57A41@venom2> > >+ > >+ while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) > >+ & 0x00000040) != 0x00000040) && (i++ < 5000)) { > >+ } > > Is there a better way to wait for the read? Typically, the reset is pretty quick and is complete within a few loops. The i++ counter is there to prevent a driver hang in case the reset fails for some reason. Thanks, Glenn. > > - Sean > _______________________________________________ > ewg mailing list > ewg at lists.openfabrics.org > http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg > From jimmott at austin.rr.com Fri Nov 30 12:03:50 2007 From: jimmott at austin.rr.com (Jim Mott) Date: Fri, 30 Nov 2007 14:03:50 -0600 Subject: [ofa-general] RE: [ewg] Not seeing any SDP performance changes in OFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh In-Reply-To: References: <47445630.10000@dev.mellanox.co.il> Message-ID: <000001c8338c$206e80d0$614b8270$@rr.com> Hi, This kernel Oops is new and I will look at it. Dotan and the Mellanox regression tests have been keeping me busy recently. There was a problem like this, but only in multi-threaded apps using a single socket or when doing cleanup after ^C. I will re-enable default bzcopy behavior once all the important Mellanox regression tests are passing. Until then, setting the sdp_zcopy_threah variable by hand (8192 and up should give better performance) and running simple tests like netperf should be working fine. You should not be seeing any problem here. [I have only tested locally with x86_64 rhat4u4, rhat5, 2.6.23.8, and 2.6.24-rc2. Mellanox regression tests everything and they have not submitted this Oops yet.] I have opened bugs in the openfabrics bugzilla for everything I am currently working on. It is down right now or I would add pointers. Here is my work list; additions or priority changes welcome: SDP OPEN ISSUES LIST (Priority order) ===================================== 1) DONE: BUG: Unload of mlx4 and ib_sdp fails while SDP active 11/6 [PATCH 1/1 V2] SDP - Fix reference count bug ... 2) DONE: BUG: Many data corruption failures 11/11 [PATCH 1/1] SDP - Fix bug where zcopy bcopy returns ... 3) DONE: Bug 793 - kernel BUG at net/core/skbuff.c:95! 11/26 [PATCH 1/1] SDP - bug793; skbuff changes ... 4) TODO: BUG: kernel oops in SDP regression Replicated problem by hitting ^C during a transfer. I have created a patch that fixes the problem, but it needs more work to move into production. There are some side effects I do not yet understand. This is the one I am working on now. I hope to drop it soon. There is a bug open tracking it. 5) TODO: BUG: libsdp returns good RC when it should fail 6) TODO: BUG: aio_test fails in SDP regression 7) TODO: Bug 779 - Lock ordering problem during accept on 1.2.5 After building a 2.6.23.8 kernel with lock checking enabled, I can not reproduce this problem. Looks like I'll need more input from the reporter. (Bug updated to say this). I will continue to code review though. 8) DONE: Bug 294 - connect does not allow AF_INET_SDP [fix in bugzilla dropped] 9) DONE: Backport work needed to support 2.6.24 10) TODO: Package user space libsdp for Redhat This is supposed to be easy to do, but it will take me some time to figure out the detail. 11) DONE: BUG: Memory leak 11/20 [PATCH 1/1 v2] SDP - Fix a memory leak in bzcopy -----Original Message----- From: ewg-bounces at lists.openfabrics.org [mailto:ewg-bounces at lists.openfabrics.org] On Behalf Of Scott Weitzenkamp (sweitzen) Sent: Friday, November 30, 2007 12:37 PM To: Jim Mott; Scott Weitzenkamp (sweitzen); ewg at lists.openfabrics.org Cc: general at lists.openfabrics.org Subject: [ewg] Not seeing any SDP performance changes in OFED 1.3 beta, and I get Oops when enabling sdp_zcopy_thresh Jim, Using netperf with TCP_STREAM and TCP_RR, I'm not seeing any changes in SDP throughput or CPU utilization comparing OFED 1.3 beta and OFED 1.2.5. Looks like I need to set a non-zero value in /sys/module/ib_sdp/sdp_zcopy_thresh? Do you plan to enable this by default soon? I tried "echo 4096 > /sys/module/ib_sdp/sdp_zcopy_thresh" on RHEL4 and then tried netperf, and got an Oops. Unable to handle kernel NULL pointer deref erence at 0000000000000000 RIP: {put_page+0} PML4 1a3047067 PGD 1a7a6d067 PMD 0 Oops: 0000 [1] SMP CPU 0 Modules linked in: parport_pc lp parport autofs4 i2c_dev i2c_co re nfs lockd nfs_acl sunrpc rdma_ucm(U) rds(U) ib_sdp(U) rdma_cm(U) iw_cm(U) ib_ addr(U) mlx4_ib(U) mlx4_core(U) ds yenta_socket pcmcia_core dm_mirror dm_multipa th dm_mod joydev button battery ac uhci_hcd ehci_hcd shpchp ib_mthca(U) ib_ipoib (U) ib_umad(U) ib_ucm(U) ib_uverbs(U) ib_cm(U) ib_sa(U) ib_mad(U) ib_core(U) md5 ipv6 e1000 floppy ata_piix libata sg ext3 jbd mptscsih mptsas mptspi mptscsi mp tbase sd_mod scsi_mod Pid: 6802, comm: netperf241 Not tainted 2.6.9-55.ELlargesmp RIP: 0010:[] {put_page+0} RSP: 0018:00000101a7bcbbc0 EFLAGS: 00010203 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000002 02 RDX: 00000101b0b43e80 RSI: 0000000000000202 RDI: 00000000000000 00 RBP: 00000101b85761c0 R08: 0000000000000000 R09: 00000000000000 00 R10: 0000000000000246 R11: ffffffffa02e0e36 R12: 00000101a4b330 80 R13: 00000101a7bcbd58 R14: 0000000000000000 R15: 00000000000100 00 FS: 0000002a95696940(0000) GS:ffffffff80500380(0000) knlGS:000 0000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 0000000000101000 CR4: 00000000000006 e0 Process netperf241 (pid: 6802, threadinfo 00000101a7bca000, tas k 00000101a70df030) Stack: ffffffffa02e110a 0000000000000100 0000000000000000 00000 00000529780 0001000000000246 0000000000000246 000000008013feac 00000 800ffffffe0 0000000000000000 00000101a7bcbe88 Call Trace:{:ib_sdp:sdp_sendmsg+724} {queue_delayed_work+101} {:ib_addr:queue_req+122} {sock_sendmsg+271} {do_no_page+916} {au toremove_wake_function+0} {sockfd_lookup+16} { sys_sendto+195} {do_page_fault+577} {dnotify_parent+34} {vfs_read+248} {syst em_call+126} Code: 8b 07 48 89 fa f6 c4 80 74 3b 48 8b 57 10 8b 02 48 89 d1 f6 RIP {put_page+0} RSP <00000101a7bcbbc0> CR2: 0000000000000000 <0>Kernel panic - not syncing: Oops Scott Weitzenkamp SQA and Release Manager Server Virtualization Business Unit Cisco Systems _______________________________________________ ewg mailing list ewg at lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg From ggrundstrom at NetEffect.com Fri Nov 30 12:42:25 2007 From: ggrundstrom at NetEffect.com (Glenn Grundstrom) Date: Fri, 30 Nov 2007 14:42:25 -0600 Subject: [ofa-general] [PATCH 5/5] nes: napi interface fix In-Reply-To: <000301c83381$dd3fc1c0$ff0da8c0@amr.corp.intel.com> References: <20071130183705.8A819E601CE@openfabrics.org> <000301c83381$dd3fc1c0$ff0da8c0@amr.corp.intel.com> Message-ID: <5E701717F2B2ED4EA60F87C8AA57B7CC07A57A58@venom2> > > >--- a/drivers/infiniband/hw/nes/nes_hw.c > >+++ b/drivers/infiniband/hw/nes/nes_hw.c > >@@ -1232,6 +1232,12 @@ static void > nes_replenish_nic_rq(struct nes_vnic > >*nesvnic) > > nesnic = &nesvnic->nic; > > nesdev = nesvnic->nesdev; > > spin_lock_irqsave(&nesnic->rq_lock, flags); > >+ if (nesnic->replenishing_rq !=0) { > >+ spin_unlock_irqrestore(&nesnic->rq_lock, flags); > >+ return; > >+ } > >+ nesnic->replenishing_rq = 1; > >+ spin_unlock_irqrestore(&nesnic->rq_lock, flags); > > do { > > skb = dev_alloc_skb(nesvnic->max_frame_size); > > if (skb) { > >@@ -1275,7 +1281,7 @@ static void > nes_replenish_nic_rq(struct nes_vnic > >*nesvnic) > > if (rx_wqes_posted) { > > nes_write32(nesdev->regs+NES_WQE_ALLOC, > (rx_wqes_posted << 24) | > >nesnic->qp_id); > > } > >- spin_unlock_irqrestore(&nesnic->rq_lock, flags); > >+ nesnic->replenishing_rq = 0; > > It seems racy that this is set under lock, but cleared > without the lock held. > How do you ensure that the nic_rq will always be replenished? > nes_replenish_nic_rq() gets called only when an atomic(rx_skbs_needed) flag is set. Skbs are alloc'd until no more are needed, rx_skbs_needed flag is cleared, and then replenishing_rq is cleared. Therefore, the lock is really only needed around the set. It's much more clear by viewing more of the code rather than just this patch. > >@@ -2314,11 +2313,11 @@ void nes_nic_ce_handler(struct > nes_device *nesdev, > >struct nes_hw_nic_cq *cq) > > cqe_count = 0; > > } > > #ifdef NES_NAPI > > Is #ifdef napi sprinkled throughout the code common for most > drivers? Is there > a better way to handle this? (Is this OFED only for backports, or for > upstream?) I'll need to think of a better way to handle this one. > > struct nes_cqp_request { > > wait_queue_head_t waitq; > > struct nes_hw_cqp_wqe cqp_wqe; > >@@ -857,6 +859,8 @@ struct nes_hw_nic { > > u16 rq_head; > > u16 rq_tail; > > u16 rq_size; > >+ u8 replenishing_rq; > >+ u8 reserved; > > Why is reserved added? For structure padding. Glenn. > > - Sean From Jeffrey.C.Becker at nasa.gov Fri Nov 30 13:29:36 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Fri, 30 Nov 2007 13:29:36 -0800 Subject: [ofa-general] Re: Weblink borken on OpenFabrics page In-Reply-To: References: <9FA59C95FFCBB34EA5E42C1A8573784FD48259@mtiexch01.mti.com> Message-ID: <475080C0.7000503@nasa.gov> Jeffrey Scott wrote: > > Any idea how to fix this? The wiki is accessible via the “Developer > Resources” tab on the OpenFabrics website. We’ve never touched it. We > simply provide a link to the wiki. Do you know if the wiki simply has > a new URL? > As I mentioned in my note to general at lists.openfabrics.org this morning, the wiki is one of the things that broke as a result of my applying patches to the server, and subsequent mis-adventures with the aptitude package manager. I am working on fixing it now. Also it will become a regular http (not https) URL. I'll send out a note when it's fixed. Thanks. -jeff > ------------------------------------------------------------------------ > > *From:* Brian Sparks [mailto:Brian at mellanox.com] > *Sent:* Friday, November 30, 2007 11:38 AM > *To:* Jeffrey Scott > *Subject:* Weblink borken on OpenFabrics page > > This needs fixing… > > https://wiki.openfabrics.org/tiki-index.php > > Brian Sparks > > Marketing Communications Manager > > **Mellanox Technologies** > > 2900 Stender Way > > Santa Clara, CA 95054 > > 408-916-0008 office > > 408-802-2775 cell > > www.mellanox.com > From Jeffrey.C.Becker at nasa.gov Fri Nov 30 13:43:31 2007 From: Jeffrey.C.Becker at nasa.gov (Jeff Becker) Date: Fri, 30 Nov 2007 13:43:31 -0800 Subject: [ofa-general] New Downloads page Message-ID: <47508403.4090406@nasa.gov> Hi all. I fixed the PHP problem with apache and thanks to the SplitRock team, my new page is linked to the Downloads tab on the OFA website. As you may recall, this page was requested by Arlin a while back. There is a directory on the server for each of the OFED components. If you put a WEB_README in your directory (several of you have done this), the text will appear under the directory name on the downloads page. Also, as before, clicking on the directory link gives you a directory listing from which you can download. Please try it out and send me any questions or comments. Thanks. -jeff From ardavis at ichips.intel.com Fri Nov 30 13:44:51 2007 From: ardavis at ichips.intel.com (Arlin Davis) Date: Fri, 30 Nov 2007 13:44:51 -0800 Subject: [ofa-general] how to use Intel MPI with dapl2? In-Reply-To: References: <6C2C79E72C305246B504CBA17B5500C90282E357@mtlexch01.mtl.com> <474F8241.4040802@ichips.intel.com> Message-ID: <47508453.70408@ichips.intel.com> Scott Weitzenkamp (sweitzen) wrote: >> How did you configure your servers to run Intel MPI with v2 >> libraries? > > I only installed the DAPL 2.0 libs. > Did you happen to see the following message (I_MPI_DEBUG=50) before failover to sockets? I_MPI: [0] I_MPI_dat_ia_openv_wrap(): DAPL version compatibility requirement check failed; required DAPL 1.2, provided DAPL 2.0 -arlin From xma at us.ibm.com Fri Nov 30 15:28:27 2007 From: xma at us.ibm.com (Shirley Ma) Date: Fri, 30 Nov 2007 15:28:27 -0800 Subject: [ofa-general] OFED-1.3beta IPoIB testing questions In-Reply-To: Message-ID: I just touch tested ofed-1.3 beta IPoIB. And found there was a kernel parameter hw_csum being added in IPoIB. I have several questions here: 1. Why not using ethtool to set up these HW_CSUM flags? 2. I haven't looked at the detailed code yet, is that possible with this flag, TCP/IP will not do CSUM for HCA which has no TCP/IP offload support? If so, then these packets should be limited to be IB network. Routing to ethernet network, the packets would be dropped. If not, I tested IPoIB-UD, why I saw 30% improvement with hw_csum set in none connectX mthca SDR environment? 3. I saw switching between IPoIB-cm and IPoIB-ud corrupted interface IP address (unicast address, subnet mask, broadcast address. Anybody saw the same problem? Thanks Shirley From wei.fang at hermes-microvision.com Fri Nov 30 15:56:17 2007 From: wei.fang at hermes-microvision.com (Wei Fang) Date: Fri, 30 Nov 2007 15:56:17 -0800 Subject: [ofa-general] Question: Verbs API Error code recover Message-ID: <4750A321.5080406@hermes-microvision.com> Hi, All: I'm new here just some days ago. Right now I'm facing a problem to using OFED 1.2.5's verb api. In my programming, I use RDMA Write function to transfer data ( ibv_post_send ). Then I use ibv_poll_cq to get this CQ's finish. Sometimes, ibv_poll_cq's return error is IBV_WC_RETRY_EXC_ERR (error code is 12). When this error code happen, any next transfer will always fail. In this case, I have to restart computer. Anyone can tell me how to recover this error without quit program or restart PC? -- Best Regards Wei Fang From rdreier at cisco.com Fri Nov 30 16:16:33 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 30 Nov 2007 16:16:33 -0800 Subject: [ofa-general] [ANNOUNCE] libmlx4 1.0-rc1 released Message-ID: libmlx4 is a userspace driver for Mellanox ConnectX InfiniBand HCAs. It is a plug-in module for libibverbs that allows programs to use Mellanox hardware directly from userspace. The first tarball release on the way to a stable release, libmlx4-1.0-rc1, is available from http://www.openfabrics.org/downloads/mlx4/libmlx4-1.0-rc1.tar.gz with sha1sum db40fcfe987dea4dd68735e150922433b69d1215 libmlx4-1.0-rc1.tar.gz I also tagged the 1.0-rc1 release of libmlx4 and pushed it out to my git tree on kernel.org: git://git.kernel.org/pub/scm/libs/infiniband/libmlx4.git (the name of the tag is libmlx4-1.0-rc1). This is the first formal release of libmlx4. Things appear quite usable at the moment. Please test and let me know if you see anything that needs to be fixed. I plan to make another release in a week -- if nothing major comes up, this will be 1.0; otherwise it will be 1.0-rc2. I will also start the process of getting libmlx4 packages into the Debian and Fedora archives. From rdreier at cisco.com Fri Nov 30 20:03:31 2007 From: rdreier at cisco.com (Roland Dreier) Date: Fri, 30 Nov 2007 20:03:31 -0800 Subject: [ofa-general] [GIT PULL] please pull infiniband.git Message-ID: Linus, please pull from master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This tree is also available from kernel.org mirrors at: git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git for-linus This will get two small fixes for 2.6.24: Jack Morgenstein (1): IPoIB: Fix oops if xmit is called when priv->broadcast is NULL Joachim Fenkes (1): IB/ehca: Fix static rate if path faster than link drivers/infiniband/hw/ehca/ehca_av.c | 8 ++++++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 453eb99..f7782c8 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -76,8 +76,12 @@ int ehca_calc_ipd(struct ehca_shca *shca, int port, link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a03a65e..c9f6077 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -460,6 +460,9 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + if (!priv->broadcast) + return NULL; + path = kzalloc(sizeof *path, GFP_ATOMIC); if (!path) return NULL; From kliteyn at mellanox.co.il Fri Nov 30 21:09:39 2007 From: kliteyn at mellanox.co.il (kliteyn at mellanox.co.il) Date: 1 Dec 2007 07:09:39 +0200 Subject: [ofa-general] nightly osm_sim report 2007-12-01:normal completion Message-ID: OSM Simulation Regression Summary [Generated mail - please do NOT reply] OpenSM binary date = 2007-11-30 OpenSM git rev = Thu_Nov_29_19:37:20_2007 [498e13f7145f77d468054688d8cbea61677b624a] ibutils git rev = Tue_Sep_4_17:57:34_2007 [4bf283f6a0d7c0264c3a1d2de92745e457585fdb] Total=480 Pass=479 Fail=1 Pass: 36 Stability IS1-16.topo 36 Pkey IS1-16.topo 36 OsmTest IS1-16.topo 36 OsmStress IS1-16.topo 36 Multicast IS1-16.topo 36 LidMgr IS1-16.topo 12 Stability IS3-loop.topo 12 Stability IS3-128.topo 12 Pkey IS3-128.topo 12 OsmTest IS3-loop.topo 12 OsmTest IS3-128.topo 12 OsmStress IS3-128.topo 12 Multicast IS3-loop.topo 12 Multicast IS3-128.topo 12 FatTree merge-roots-4-ary-2-tree.topo 12 FatTree merge-root-4-ary-3-tree.topo 12 FatTree gnu-stallion-64.topo 12 FatTree blend-4-ary-2-tree.topo 12 FatTree RhinoDDR.topo 12 FatTree FullGnu.topo 12 FatTree 4-ary-2-tree.topo 12 FatTree 2-ary-4-tree.topo 12 FatTree 12-node-spaced.topo 12 FTreeFail 4-ary-2-tree-missing-sw-link.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-2.topo 12 FTreeFail 4-ary-2-tree-links-at-same-rank-1.topo 12 FTreeFail 4-ary-2-tree-diff-num-pgroups.topo 11 LidMgr IS3-128.topo Failures: 1 LidMgr IS3-128.topo From MelodyTravis at olgafilippova.com Fri Nov 30 20:12:21 2007 From: MelodyTravis at olgafilippova.com (Euro VIP Table) Date: Sat, 01 Dec 2007 04:12:21 +0000 Subject: [ofa-general] 400 Euro in Welcome bonuses Message-ID: <76326.ralph@safaa> Euro VIP invites you to its tables, with an offer of 400€ in Welcome bonuses! 400€ FREE! – New players at Euro VIP receive a match bonus on their first 4 deposits, each worth 100€. That’s a total of 400€, giving you even more winning chances, for free! Join the most refined and reliable casino online today, where you can choose from a list of over 100 cash games including progressive jackpots worth millions of Euro. Experience the difference, only at Euro VIP. -------------- next part -------------- An HTML attachment was scrubbed... URL: