[openib-general] RE: [PATCH] opensm: fixes in signal handling

Yael Kalka yael at mellanox.co.il
Mon Feb 27 02:07:48 PST 2006


Hi Sasha,
The patch compiles fine under windows.
Thanks,
Yael

> -----Original Message-----
> From: Sasha Khapyorsky [mailto:sashak at voltaire.com]
> Sent: Wednesday, February 22, 2006 1:33 AM
> To: halr at voltaire.com; Yael Kalka
> Cc: openib-general at openib.org
> Subject: [PATCH] opensm: fixes in signal handling
> 
> Hello,
> 
> There are fixes for broken signal handling stuff. Finally this should
> prevent some opensm crashes (actually deadlocks), for instance caused
> by such command:
> 
>   while [ 1 ] ; do kill -HUP <opensm-pid> ; done
> 
> 
> Yael, I hope this patch does not broke windows compilations (this
masks
> signal related functions under __WIN__), but cannot be absolutely sure
-
> please look at this from "under windows" point of view. Thanks.
> 
> Sasha.
> 
> 
> This fixes broken signal handling. In this patch:
> 
>  - signal handling stuff is moved to main.c
>  - cl_sig_* is replaced by more powerfull posix (I hope it should not
>    be bad for win because this is under !__WIN__ anyway)
>  - signal handler does not call resweeper or wakeup directly, but only
>    update new osm_hup_flag (or osm_exit_flag on SIGINT or SIGTERM)
>  - signal delivery are masked in all threads expept first one, so only
>    expected thread will be interrupted (from sleep() or poll())
>  - resweep thread will be wakeuped from main.c thread instead direct
>  - poll was added to osm_console - this provides timeout ability and
>    workarouds getline()'s signal interruption problem.
> 
> 
> diff --git a/osm/include/opensm/osm_console.h
> b/osm/include/opensm/osm_console.h
> index 5a4036f..c5cd22a 100644
> --- a/osm/include/opensm/osm_console.h
> +++ b/osm/include/opensm/osm_console.h
> @@ -50,6 +50,7 @@
>  BEGIN_C_DECLS
> 
>  void osm_console(osm_opensm_t *p_osm);
> +void osm_console_prompt(void);
> 
>  END_C_DECLS
> 
> diff --git a/osm/include/opensm/osm_opensm.h
> b/osm/include/opensm/osm_opensm.h
> index 833c4c3..3235ad4 100644
> --- a/osm/include/opensm/osm_opensm.h
> +++ b/osm/include/opensm/osm_opensm.h
> @@ -388,38 +388,12 @@ osm_opensm_wait_for_subnet_up(
> 
>  /****v* OpenSM/osm_exit_flag
>  */
> -extern volatile int osm_exit_flag;
> +extern volatile unsigned int osm_exit_flag;
>  /*
>  * DESCRIPTION
>  *  Set to one to cause all threads to leave
>  *********/
> 
> -#ifndef __WIN__
> -/****f* OpenSM: OpenSM/osm_reg_sig_handler
> -* NAME
> -*	 osm_reg_sig_handler
> -*
> -* DESCRIPTION
> -*	Registers the common signal handler
> -*
> -* SYNOPSIS
> -*/
> -void osm_reg_sig_handler(
> -IN osm_opensm_t* const p_osm);
> -/*
> -* PARAMETERS
> -*	p_osm
> -*		[in] Pointer to a OpenSM object to handle signals on.
> -*
> -* RETURN VALUES
> -*	None
> -*
> -* NOTES
> -*
> -* SEE ALSO
> -*********/
> -#endif /* __WIN__ */
> -
>  END_C_DECLS
> 
>  #endif	/* _OSM_OPENSM_H_ */
> diff --git a/osm/opensm/main.c b/osm/opensm/main.c
> index c5ba443..797b14c 100644
> --- a/osm/opensm/main.c
> +++ b/osm/opensm/main.c
> @@ -77,14 +77,59 @@
>    instantiating more than one opensm object.
>  */
>  osm_opensm_t osm;
> -volatile int osm_exit_flag = 0;
> +
> +volatile unsigned int osm_exit_flag = 0;
> +
> +static volatile unsigned int osm_hup_flag = 0;
> 
>  #define GUID_ARRAY_SIZE 64
>  #define INVALID_GUID (0xFFFFFFFFFFFFFFFFULL)
> 
> +
> +#ifdef __WIN__
> +#define block_signals()
> +#define setup_signals()
> +#else
> +
> +static void mark_exit_flag(int signum)
> +{
> +	if(!osm_exit_flag)
> +		printf("OpenSM: Got signal %d - exiting...\n", signum);
> +	osm_exit_flag = 1;
> +}
> +
> +static void mark_hup_flag(int signum)
> +{
> +	osm_hup_flag = 1;
> +}
> +
> +static sigset_t saved_sigset;
> +
> +static void block_signals()
> +{
> +	sigset_t set;
> +	sigfillset(&set);
> +	sigprocmask(SIG_SETMASK, &set, &saved_sigset);
> +}
> +
> +static void setup_signals()
> +{
> +	struct sigaction act;
> +	sigfillset(&act.sa_mask);
> +	act.sa_handler = mark_exit_flag;
> +	act.sa_flags = 0;
> +#ifndef OSM_VENDOR_INTF_OPENIB
> +	sigaction(SIGINT, &act, NULL);
> +#endif
> +	sigaction(SIGTERM, &act, NULL);
> +	act.sa_handler = mark_hup_flag;
> +	sigaction(SIGHUP, &act, NULL);
> +	sigprocmask(SIG_SETMASK, &saved_sigset, NULL);
> +}
> +#endif /* __WIN__ */
> +
>
/**********************************************************************
>
**********************************************************************/
> -void show_usage(void);
> 
>  void
>  show_usage(void)
> @@ -247,7 +292,6 @@ show_usage(void)
> 
>
/**********************************************************************
>
**********************************************************************/
> -void show_menu(void);
> 
>  void
>  show_menu(void)
> @@ -764,6 +808,8 @@ main(
>    if ( cache_options == TRUE )
>      osm_subn_write_conf_file( &opt );
> 
> +  block_signals();
> +
>    status = osm_opensm_init( &osm, &opt );
>    if( status != IB_SUCCESS )
>    {
> @@ -794,9 +840,6 @@ main(
>      goto Exit;
>    }
> 
> -  /* this should handle ^C etc */
> -  osm_reg_sig_handler( &osm );
> -
>    status = osm_opensm_bind( &osm, guid );
>    if( status != IB_SUCCESS )
>    {
> @@ -817,6 +860,8 @@ main(
>      }
>    }
> 
> +  setup_signals();
> +
>    osm_opensm_sweep( &osm );
>    /* since osm_opensm_init get opt as RO we'll set the opt value with
UI
> pfn here */
>    /* Now do the registration */
> @@ -839,11 +884,23 @@ main(
>        In the future, some sort of console interactivity could
>        be implemented in this loop.
>      */
> -    while( !osm_exit_flag )
> +    if (opt.console) {
> +      printf("\nOpenSM Console\n\n");
> +      osm_console_prompt();
> +    }
> +    while( !osm_exit_flag ) {
>        if (opt.console)
>          osm_console(&osm);
>        else
>          cl_thread_suspend( 10000 );
> +
> +      if (osm_hup_flag) {
> +        osm_hup_flag = 0;
> +        /* a HUP signal should only start a new heavy sweep */
> +        osm.subn.force_immediate_heavy_sweep = TRUE;
> +        cl_event_signal(&osm.sm.signal);
> +      }
> +    }
>    }
> 
>  #if 0
> diff --git a/osm/opensm/osm_console.c b/osm/opensm/osm_console.c
> index c470b49..43d9f87 100644
> --- a/osm/opensm/osm_console.c
> +++ b/osm/opensm/osm_console.c
> @@ -39,6 +39,7 @@
>  #define _GNU_SOURCE		/* for getline */
>  #include <stdio.h>
>  #include <stdlib.h>
> +#include <sys/poll.h>
>  #include <opensm/osm_console.h>
> 
>  #define OSM_COMMAND_LINE_LEN	120
> @@ -186,15 +187,27 @@ static void parse_cmd_line(char *line, o
>  	}
>  }
> 
> +void osm_console_prompt(void)
> +{
> +	printf("%s", OSM_COMMAND_PROMPT);
> +	fflush(stdout);
> +}
> +
>  void osm_console(osm_opensm_t *p_osm)
>  {
> +	struct pollfd pollfd;
>  	char *p_line;
>  	size_t len;
>  	ssize_t n;
> 
> -	printf("\nOpenSM Console\n\n");
> -	while (1) {
> -		printf("%s", OSM_COMMAND_PROMPT);
> +	pollfd.fd = 0;
> +	pollfd.events = POLLIN;
> +	pollfd.revents = 0;
> +
> +	if (poll(&pollfd, 1, 10000) <= 0)
> +		return;
> +
> +	if (pollfd.revents|POLLIN) {
>  		p_line = NULL;
>  		/* Get input line */
>  		n = getline(&p_line, &len, stdin);
> @@ -206,6 +219,7 @@ void osm_console(osm_opensm_t *p_osm)
>  			printf("Input error\n");
>  			fflush(stdin);
>  		}
> +		osm_console_prompt();
>  	}
>  }
> 
> diff --git a/osm/opensm/osm_opensm.c b/osm/opensm/osm_opensm.c
> index 6ca6796..54d0ae3 100644
> --- a/osm/opensm/osm_opensm.c
> +++ b/osm/opensm/osm_opensm.c
> @@ -54,7 +54,6 @@
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <complib/cl_memory.h>
> -#include <complib/cl_signal_osd.h>
>  #include <complib/cl_dispatcher.h>
>  #include <complib/cl_passivelock.h>
>  #include <vendor/osm_vendor_api.h>
> @@ -149,52 +148,6 @@ osm_opensm_create_mcgroups(
>  }
> 
>
/**********************************************************************
> - * SHUT DOWN IS CONTROLLED BY A GLOBAL EXIT FLAG
> -
**********************************************************************/
> -#ifndef __WIN__
> -static osm_opensm_t *__p_osm_to_signal;
> -
> -void
> -__sig_handler(
> -   int signum )
> -{
> -   static int got_signal = 0;
> -
> -   if( signum != SIGHUP )
> -   {
> -      if( !got_signal )
> -      {
> -         got_signal++;
> -         printf( "OpenSM: Got signal %d - exiting...\n", signum );
> -         osm_exit_flag = 1;
> -      }
> -   }
> -   else
> -   {
> -      /* a HUP signal should only start a new heavy sweep */
> -      __p_osm_to_signal->subn.force_immediate_heavy_sweep = TRUE;
> -      osm_state_mgr_process( &__p_osm_to_signal->sm.state_mgr,
> -                             OSM_SIGNAL_SWEEP );
> -   }
> -}
> -
> -void
> -osm_reg_sig_handler(
> -   IN osm_opensm_t * const p_osm )
> -{
> -   __p_osm_to_signal = p_osm;
> -#ifndef OSM_VENDOR_INTF_OPENIB
> -   cl_reg_sig_hdl( SIGINT, __sig_handler );
> -#endif
> -   cl_reg_sig_hdl( SIGTERM, __sig_handler );
> -   cl_reg_sig_hdl( SIGHUP, __sig_handler );
> -   osm_exit_flag = 0;
> -
> -   return;
> -}
> -#endif /* __WIN__ */
> -
>
-/**********************************************************************
>
**********************************************************************/
>  ib_api_status_t
>  osm_opensm_init(
> diff --git a/osm/opensm/osm_sm.c b/osm/opensm/osm_sm.c
> index 8ace290..e252861 100644
> --- a/osm/opensm/osm_sm.c
> +++ b/osm/opensm/osm_sm.c
> @@ -87,10 +87,6 @@ __osm_sm_sweeper(
> 
>     if( p_sm->thread_state == OSM_THREAD_STATE_INIT )
>     {
> -      osm_log( p_sm->p_log, OSM_LOG_DEBUG,
> -               "__osm_sm_sweeper: " "Masking ^C Signals\n" );
> -      cl_sig_mask_sigint(  );
> -
>        p_sm->thread_state = OSM_THREAD_STATE_RUN;
>     }
> 
> diff --git a/osm/opensm/osm_vl15intf.c b/osm/opensm/osm_vl15intf.c
> index ef18e54..4796a17 100644
> --- a/osm/opensm/osm_vl15intf.c
> +++ b/osm/opensm/osm_vl15intf.c
> @@ -85,7 +85,6 @@ __osm_vl15_poller(
> 
>    if ( p_vl->thread_state == OSM_THREAD_STATE_NONE)
>    {
> -    cl_sig_mask_sigint( );
>      p_vl->thread_state = OSM_THREAD_STATE_RUN;
>    }
> 



More information about the general mailing list