[openib-general] segmentation fault in ibv_modify_srq

Sayantan Sur surs at cse.ohio-state.edu
Wed Oct 5 11:36:52 PDT 2005


Hello,

This is in regard to the use of `ibv_modify_srq' call. When I use this
call, I get a segmentation fault. I have included the code snippet,
output of strace -ewrite=all command and dmesg output below. I'd be glad
if someone could help me get around the problem. Please let me know if
additional debug information is required.

TIA,
Sayantan.

Platform: Opteron 2.2GHz, Tyan S2895 motherboard, 2GB memory
OS: Linux 2.6.13.1-smp, SuSe 9.3
Firmware: 5.1.0
OpenIB svn rev: 3665 (the revision number might be off by a little, but
this version was checked out yesterday evening 04/10).

Code Snippet:
=============
static void create_srq(void)
{
    struct ibv_srq_init_attr srq_init_attr;
    struct ibv_srq_attr srq_attr;

    memset(&srq_init_attr, 0, sizeof(srq_init_attr));
    memset(&srq_attr, 0, sizeof(srq_attr));

    srq_init_attr.srq_context = ibv_dev.context;
    srq_init_attr.attr.max_wr = viadev_rq_size; // is 300.
    srq_init_attr.attr.max_sge = 1;
    srq_init_attr.attr.srq_limit = 10;

    ibv_dev.srq_hndl = ibv_create_srq(ibv_dev.ptag, &srq_init_attr);

    if(!ibv_dev.srq_hndl) {
        error_abort_all(GEN_EXIT_ERR, "Error creating SRQ\n");
    }

    srq_attr.max_wr = viadev_rq_size;
    srq_attr.max_sge = 1;
    srq_attr.srq_limit = 10;

    // Fails after this call
    if(ibv_modify_srq(ibv_dev.srq_hndl, &srq_attr, IBV_SRQ_LIMIT)) {

            error_abort_all(GEN_EXIT_ERR, "Couldn't modify SRQ
limit\n");
    }

    fprintf(stderr,"[%d] limit %d\n", ibv_dev.me, srq_attr.srq_limit);
}

===========

Strace output
===========
[surs at ro0:osu_benchmarks] ../bin/mpirun_rsh -np 2 ro0 ro1 strace -ewrite
-ewrite=all ./lat 
write(3, "\0\0\0\0\4\0\4\0PT\317\377\377\177\0\0", 16write(3,
"\0\0\0\0\4\0\4\0\20\370\233\377\377\177\0\0", 16) = 16
 | 00000  00 00 00 00 04 00 04 00  10 f8 9b ff ff 7f 00 00  ........
........ |
write(3, "\3\0\0\0\4\0\3\0\320\367\233\377\377\177\0\0", 16) = 16
 | 00000  03 00 00 00 04 00 03 00  d0 f7 9b ff ff 7f 00 00  ........
........ |
write(3, "\3\0\0\0\4\0\3\0 \370\233\377\377\177\0\0", 16) = 16
 | 00000  03 00 00 00 04 00 03 00  20 f8 9b ff ff 7f 00 00  ........
....... |
write(3, "\2\0\0\0\6\0\n\0\340\367\233\377\377\177\0\0\1\335\324"...,
24) = 24
 | 00000  02 00 00 00 06 00 0a 00  e0 f7 9b ff ff 7f 00 00  ........
........ |
 | 00010  01 dd d4 00 00 00 00 00                           ........
|
) = 16
 | 00000  00 00 00 00 04 00 04 00  50 54 cf ff ff 7f 00 00  ........
PT...... |
write(3, "\3\0\0\0\4\0\3\0\20T\317\377\377\177\0\0", 16) = 16
 | 00000  03 00 00 00 04 00 03 00  10 54 cf ff ff 7f 00 00  ........
.T...... |
write(3, "\3\0\0\0\4\0\3\0`T\317\377\377\177\0\0", 16) = 16
 | 00000  03 00 00 00 04 00 03 00  60 54 cf ff ff 7f 00 00  ........
`T...... |
write(3, "\2\0\0\0\6\0\n\0 T\317\377\377\177\0\0\1\335\324\0\0\0"...,
24) = 24
 | 00000  02 00 00 00 06 00 0a 00  20 54 cf ff ff 7f 00 00  ........
T...... |
 | 00010  01 dd d4 00 00 00 00 00                           ........
|
write(3, "\t\0\0\0\f\0\3\0 S\317\377\377\177\0\0\0\20\325\0\0\0\0"...,
48) = 48
 | 00000  09 00 00 00 0c 00 03 00  20 53 cf ff ff 7f 00 00  ........
S...... |
write(3, "\t\0\0\0\f\0\3\0\340\366\233\377\377\177\0\0\0\20\325\0"...,
48) = 48
 | 00000  09 00 00 00 0c 00 03 00  e0 f6 9b ff ff 7f 00 00  ........
........ |
 | 00010  00 10 d5 00 00 00 00 00  00 00 20 00 00 00 00 00  ........ ..
..... |
 | 00020  00 00 00 00 00 00 00 00  00 00 00 00 01 00 00 00  ........
........ |
write(3, "\22\0\0\0\22\0\4\0\260\367\233\377\377\177\0\0 \331\324"...,
72) = 72
 | 00000  12 00 00 00 12 00 04 00  b0 f7 9b ff ff 7f 00 00  ........
........ |
 | 00010  20 d9 d4 00 00 00 00 00  ff ff 00 00 00 00 00 00   .......
........ |
 | 00020  ff ff ff ff 00 00 00 00  02 26 00 4c 07 00 12 00  ........
.&.L.... |
 | 00030  00 40 f5 00 00 00 00 00  00 20 f5 00 00 00 00 00  . at ...... .
...... |
 | 00040  00 00 00 00 ff 7f 00 00                           ........
|
write(3, "\t\0\0\0\f\0\3\0 \367\233\377\377\177\0\0\0`\365\0\0\0"...,
48) = 48
 | 00000  09 00 00 00 0c 00 03 00  20 f7 9b ff ff 7f 00 00  ........
....... |
 | 00010  00 60 f5 00 00 00 00 00  00 80 00 00 00 00 00 00  .`......
........ |
 | 00020  00 00 00 00 00 00 00 00  01 00 00 00 00 00 00 00  ........
........ |
write(3, " \0\0\0\16\0\3\0\340\367\233\377\377\177\0\0\0\1\325\0"...,
56) = 56
 | 00000  20 00 00 00 0e 00 03 00  e0 f7 9b ff ff 7f 00 00   .......
........ |
 | 00010  00 01 d5 00 00 00 00 00  01 00 00 00 2c 01 00 00  ........
....,... |
 | 00010  00 10 d5 00 00 00 00 00  00 00 20 00 00 00 00 00  ........ ..
..... |
 | 00020  00 00 00 00 00 00 00 00  00 00 00 00 01 00 00 00  ........
........ |
write(3, "\22\0\0\0\22\0\4\0\360S\317\377\377\177\0\0 \331\324\0"...,
72) = 72
 | 00000  12 00 00 00 12 00 04 00  f0 53 cf ff ff 7f 00 00  ........
.S...... |
 | 00010  20 d9 d4 00 00 00 00 00  ff ff 00 00 00 00 00 00   .......
........ |
 | 00020  ff ff ff ff 00 00 00 00  02 26 00 4c 07 00 12 00  ........
.&.L.... |
 | 00030  00 40 f5 00 00 00 00 00  00 20 f5 00 00 00 00 00  . at ...... .
...... |
 | 00040  00 00 00 00 ff 7f 00 00                           ........
|
write(3, "\t\0\0\0\f\0\3\0`S\317\377\377\177\0\0\0`\365\0\0\0\0\0"...,
48) = 48
 | 00000  09 00 00 00 0c 00 03 00  60 53 cf ff ff 7f 00 00  ........
`S...... |
 | 00010  00 60 f5 00 00 00 00 00  00 80 00 00 00 00 00 00  .`......
........ |
 | 00020  00 00 00 00 00 00 00 00  01 00 00 00 00 00 00 00  ........
........ |
write(3, " \0\0\0\16\0\3\0 T\317\377\377\177\0\0\0\1\325\0\0\0\0"...,
56) = 56
 | 00020  01 00 00 00 0a 00 00 00  02 27 00 4c fe 7f 00 00  ........
.'.L.... |
 | 00030  00 20 f5 00 00 00 00 00                           . ......
|
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
 | 00000  20 00 00 00 0e 00 03 00  20 54 cf ff ff 7f 00 00   .......
T...... |
 | 00010  00 01 d5 00 00 00 00 00  01 00 00 00 2c 01 00 00  ........
....,... |
 | 00020  01 00 00 00 0a 00 00 00  02 27 00 4c fe 7f 00 00  ........
.'.L.... |
 | 00030  00 20 f5 00 00 00 00 00                           . ......
|
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
+++ killed by SIGSEGV +++


dmesg output
============

lat[18631]: segfault at 0000000000000000 rip 0000000000000000 rsp
00007fffff9748c8 error 14
lat[18755]: segfault at 0000000000000000 rip 0000000000000000 rsp
00007fffffb3aa58 error 14
lat[18777]: segfault at 0000000000000000 rip 0000000000000000 rsp
00007fffffe7bb88 error 14
lat[19128]: segfault at 0000000000000000 rip 0000000000000000 rsp
00007fffff942018 error 14


============



-- 
http://www.cse.ohio-state.edu/~surs



More information about the general mailing list