[ofa-general] fork() failing in mvapich1 and mvapich2, using OFED 1.4

Mike Heinz michael.heinz at qlogic.com
Wed Nov 12 08:52:29 PST 2008


I'm not sure when this stopped working, but I'm getting a complaint from
our QA people that our fork() test program is failing with mvapich1 and
mvapich2 when tested with OFED 1.4. When I tested with OFED 1.3.1, I got
a similar result:


[root at panic mpi_fork]$ mpirun_rsh -np 2 panic homer mpi_fork 128 1024
Exit code -3 signaled from homer
Abort signaled by rank 0: [panic:0] Got completion with error
IBV_WC_LOC_LEN_ERR, code=1, dest rank=1

Killing remote processes...MPI process terminated unexpectedly
DONE


This is the program that generates the failure:

#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include <sys/wait.h>


#define MYBUFSIZE (4*1024*1028)
#define MAX_REQ_NUM 100000

char s_buf1[MYBUFSIZE];
char r_buf1[MYBUFSIZE];


MPI_Request request[MAX_REQ_NUM];
MPI_Status my_stat[MAX_REQ_NUM];

int main(int argc,char *argv[])
{
    int  myid, numprocs, i;
    int size, loop, page_size;
    char *s_buf, *r_buf;
    double t_start=0.0, t_end=0.0, t=0.0;


    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD,&myid);

    if ( argc < 3 ) {
       fprintf(stderr, "Usage: mpi_fork loop msg_size\n");
       MPI_Finalize();
       return 0;
    }
    size=atoi(argv[2]);
    loop = atoi(argv[1]);

    if(size > MYBUFSIZE){
         fprintf(stderr, "Maximum message size is %d\n",MYBUFSIZE);
         MPI_Finalize();
         return 0;
    }

    if(loop > MAX_REQ_NUM){
         fprintf(stderr, "Maximum number of iterations is
%d\n",MAX_REQ_NUM);
         MPI_Finalize();
         return 0;
    }

    page_size = getpagesize();

    s_buf = (char*)(((unsigned long)s_buf1 + (page_size -1))/page_size *
page_size);
    r_buf = (char*)(((unsigned long)r_buf1 + (page_size -1))/page_size *
page_size);

    assert( (s_buf != NULL) && (r_buf != NULL) );

    for ( i=0; i<size; i++ ){
           s_buf[i]='a';
           r_buf[i]='b';
    }

    /*warmup */
    if (myid == 0)
    {
        for ( i=0; i< loop; i++ ) {
            MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD,
request+i);
        }

        MPI_Waitall(loop, request, my_stat);
        MPI_Recv(r_buf, 4, MPI_CHAR, 1, 101, MPI_COMM_WORLD,
&my_stat[0]);

    }else{
        for ( i=0; i< loop; i++ ) {
        MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD,
request+i);
        }
    MPI_Waitall(loop, request, my_stat);
        MPI_Send(s_buf, 4, MPI_CHAR, 0, 101, MPI_COMM_WORLD);
    }
    // fork a child process and make sure it lives beyond parent
touching pages
    // if fork is not properly handled in stack, parent would get a copy
    // of its registered/locked pages (such as qp wqes) on 1st access
    // and problems such as Local Length Error would be reported by HCA
    if (fork() == 0) {
        // child exists but doesn't touch anything, parent still owns
pages
        sleep(10);
        // exec another program
        execlp("date", "date", NULL);
        // just in case exec fails
        exit(0);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (myid == 0)
    {
        t_start=MPI_Wtime();
        for ( i=0; i< loop; i++ ) {
            MPI_Isend(s_buf, size, MPI_CHAR, 1, 100, MPI_COMM_WORLD,
request+i);
        }

        MPI_Waitall(loop, request, my_stat);
        MPI_Recv(r_buf, 4, MPI_CHAR, 1, 101, MPI_COMM_WORLD,
&my_stat[0]);

        t_end=MPI_Wtime();
        t = t_end - t_start;

    }else{
        for ( i=0; i< loop; i++ ) {
        MPI_Irecv(r_buf, size, MPI_CHAR, 0, 100, MPI_COMM_WORLD,
request+i);
        }
    MPI_Waitall(loop, request, my_stat);
        MPI_Send(s_buf, 4, MPI_CHAR, 0, 101, MPI_COMM_WORLD);
    }

    if ( myid == 0 ) {
       double tmp;
       tmp = ((size*1.0)/1.0e6)*loop;
       fprintf(stdout,"%d\t%f\n", size, tmp/t);
    }
    {
        int status;
        int ret;

        ret = wait(&status);
        if (ret == -1 || ! WIFEXITED(status) || WEXITSTATUS(status) !=
0)
        {
           fprintf(stdout,"ERROR: child failure: ret=%d, status=0x%x,
exit_status=%d\n", ret, status, WEXITSTATUS(status));
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    return 0;
}

 
--
Michael Heinz
Principal Engineer, Qlogic Corporation
King of Prussia, Pennsylvania



More information about the general mailing list