[openib-general] [PATCH] opensm: truncate log file when fs is overflowed

Hal Rosenstock halr at voltaire.com
Sun Aug 20 10:01:55 PDT 2006


Hi Sasha,

On Sun, 2006-08-20 at 12:05, Sasha Khapyorsky wrote:
> In case when OpenSM log file overflows filesystem and write() fails with
> 'No space left on device' try to truncate the log file and wrap-around
> logging.

Should it be an (admin) option as to whether to truncate the file or not
or is there no way to continue without logging (other than this) once
the log file fills the disk ?

See comment below as well.

-- Hal

> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> ---
> 
>  osm/opensm/osm_log.c |   23 +++++++++++++++--------
>  1 files changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/osm/opensm/osm_log.c b/osm/opensm/osm_log.c
> index 668e9a6..b4700c8 100644
> --- a/osm/opensm/osm_log.c
> +++ b/osm/opensm/osm_log.c
> @@ -58,6 +58,7 @@ #include <stdarg.h>
>  #include <fcntl.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
> +#include <errno.h>
>  
>  #ifndef WIN32
>  #include <sys/time.h>
> @@ -152,6 +153,7 @@ #endif    
>      cl_spinlock_acquire( &p_log->lock );
>  #ifdef WIN32
>      GetLocalTime(&st);
> + _retry:
>      ret = fprintf(   p_log->out_port, "[%02d:%02d:%02d:%03d][%04X] -> %s",
>                       st.wHour, st.wMinute, st.wSecond, st.wMilliseconds,
>                       pid, buffer);
> @@ -159,6 +161,7 @@ #ifdef WIN32
>  #else
>      pid = pthread_self();
>      tim = time(NULL);
> + _retry:
>      ret = fprintf( p_log->out_port, "%s %02d %02d:%02d:%02d %06d [%04X] -> %s",
>                     ((result.tm_mon < 12) && (result.tm_mon >= 0) ? 
>                      month_str[result.tm_mon] : "???"),
> @@ -166,6 +169,18 @@ #else
>                     result.tm_min, result.tm_sec,
>                     usecs, pid, buffer);
>  #endif /*  WIN32 */
> +
> +    if (ret >= 0)
> +      log_exit_count = 0;
> +    else if (errno == ENOSPC && log_exit_count < 3) {
> +      int fd = fileno(p_log->out_port);
> +      fprintf(stderr, "log write failed: %s. Will truncate the log file.\n",
> +              strerror(errno));
> +      ftruncate(fd, 0);

Should return from ftruncate be checked here ?

> +      lseek(fd, 0, SEEK_SET);
> +      log_exit_count++;
> +      goto _retry;
> +    }
>      
>      /*
>        Flush log on errors too.
> @@ -174,14 +189,6 @@ #endif /*  WIN32 */
>        fflush( p_log->out_port );
>      
>      cl_spinlock_release( &p_log->lock );
> -    
> -    if (ret < 0)
> -    {
> -      if (log_exit_count++ < 10)
> -      {
> -        fprintf(stderr, "OSM LOG FAILURE! Quota probably exceeded\n");
> -      }
> -    }
>    }
>  }
>  





More information about the general mailing list