[ofa-general] Re: [PATCH] osmtest: Add SA get PathRecord stress test

Sasha Khapyorsky sashak at voltaire.com
Sun Sep 20 03:20:45 PDT 2009


Hi Hal,

On 15:21 Mon 31 Aug     , Hal Rosenstock wrote:
> 
> Signed-off-by: Hal Rosenstock <hal.rosenstock at gmail.com>
> ---
> diff --git a/opensm/man/osmtest.8 b/opensm/man/osmtest.8
> index fa0cd52..f0d6323 100644
> --- a/opensm/man/osmtest.8
> +++ b/opensm/man/osmtest.8
> @@ -1,4 +1,4 @@
> -.TH OSMTEST 8 "August 11, 2008" "OpenIB" "OpenIB Management"
> +.TH OSMTEST 8 "August 31, 2009" "OpenIB" "OpenIB Management"
>  
>  .SH NAME
>  osmtest \- InfiniBand subnet manager and administration (SM/SA) test program
> @@ -108,9 +108,10 @@ Stress test options are as follows:
>  
>   OPT    Description
>   ---    -----------------
> - -s1  - Single-MAD response SA queries
> + -s1  - Single-MAD (RMPP) response SA queries
>   -s2  - Multi-MAD (RMPP) response SA queries
>   -s3  - Multi-MAD (RMPP) Path Record SA queries
> + -s4  - Single-MAD (non RMPP) get Path Record SA queries 
>  
>  Without -s, stress testing is not performed
>  .TP
> diff --git a/opensm/osmtest/include/osmtest_base.h b/opensm/osmtest/include/osmtest_base.h
> index 7c33da3..cda3a31 100644
> --- a/opensm/osmtest/include/osmtest_base.h
> +++ b/opensm/osmtest/include/osmtest_base.h
> @@ -56,11 +56,12 @@
>  
>  #define STRESS_SMALL_RMPP_THR 100000
>  /*
> -    Take long times when quering big clusters (over 40 nodes) , an average of : 0.25 sec for query
> +    Take long times when querying big clusters (over 40 nodes), an average of : 0.25 sec for query
>      each query receives 1000 records
>  */
>  #define STRESS_LARGE_RMPP_THR 4000
>  #define STRESS_LARGE_PR_RMPP_THR 20000
> +#define STRESS_GET_PR 100000
>  
>  extern const char *const p_file;
>  
> diff --git a/opensm/osmtest/main.c b/opensm/osmtest/main.c
> index bb2d6bc..4bb9f82 100644
> --- a/opensm/osmtest/main.c
> +++ b/opensm/osmtest/main.c
> @@ -143,9 +143,10 @@ void show_usage()
>  	       "          Stress test options are as follows:\n"
>  	       "          OPT    Description\n"
>  	       "          ---    -----------------\n"
> -	       "          -s1  - Single-MAD response SA queries\n"
> +	       "          -s1  - Single-MAD (RMPP) response SA queries\n"
>  	       "          -s2  - Multi-MAD (RMPP) response SA queries\n"
>  	       "          -s3  - Multi-MAD (RMPP) Path Record SA queries\n"
> +	       "          -s4  - Single-MAD (non RMPP) get Path Record SA queries\n"
>  	       "          Without -s, stress testing is not performed\n\n");
>  	printf("-M\n"
>  	       "--Multicast_Mode\n"
> @@ -499,6 +500,9 @@ int main(int argc, char *argv[])
>  			case 3:
>  				printf("Large Path Record SA queries\n");
>  				break;
> +			case 4:
> +				printf("SA Get Path Record queries\n");
> +				break;
>  			default:
>  				printf("Unknown value %u (ignored)\n",
>  				       opt.stress);
> diff --git a/opensm/osmtest/osmtest.c b/opensm/osmtest/osmtest.c
> index 986a8d2..8357d90 100644
> --- a/opensm/osmtest/osmtest.c
> +++ b/opensm/osmtest/osmtest.c
> @@ -2882,6 +2882,151 @@ Exit:
>  
>  /**********************************************************************
>   **********************************************************************/
> +ib_api_status_t
> +osmtest_stress_path_recs_by_lid(IN osmtest_t * const p_osmt,
> +				IN int mode,
> +				OUT uint32_t * const p_num_recs,
> +				OUT uint32_t * const p_num_queries)
> +{
> +	osmtest_req_context_t context;
> +	ib_path_rec_t *p_rec;
> +	cl_status_t status;
> +	ib_net16_t dlid, slid;
> +	int num_recs, i;
> +
> +	OSM_LOG_ENTER(&p_osmt->log);
> +
> +	memset(&context, 0, sizeof(context));
> +
> +	slid = cl_ntoh16(p_osmt->local_port.lid);
> +	if (!mode)
> +		dlid = cl_ntoh16(p_osmt->local_port.sm_lid);
> +	else
> +		dlid = cl_ntoh16(p_osmt->local_port.lid);

What is purpose of this "mode" variable? I see (below) that it is not
used.

> +
> +	/*
> +	 * Do a blocking query for the PathRecord.
> +	 */
> +	status = osmtest_get_path_rec_by_lid_pair(p_osmt, slid, dlid, &context);
> +	if (status != IB_SUCCESS) {
> +		OSM_LOG(&p_osmt->log, OSM_LOG_ERROR, "ERR 000A: "
> +			"osmtest_get_path_rec_by_lid_pair failed (%s)\n",
> +			ib_get_err_str(status));
> +		goto Exit;
> +	}

It is not really "stress" testing, just pinging. Shouldn't it be
clarified in test description?

> +
> +	/*
> +	 * Populate the database with the received records.
> +	 */
> +	num_recs = context.result.result_cnt;
> +	*p_num_recs += num_recs;
> +	++*p_num_queries;
> +
> +	if (osm_log_is_active(&p_osmt->log, OSM_LOG_VERBOSE)) {
> +		OSM_LOG(&p_osmt->log, OSM_LOG_VERBOSE,
> +			"Received %u records\n", num_recs);
> +
> +		for (i = 0; i < num_recs; i++) {
> +			p_rec = osmv_get_query_path_rec(context.result.p_result_madw, 0);
> +			osm_dump_path_record(&p_osmt->log, p_rec, OSM_LOG_VERBOSE);
> +		}
> +	}
> +
> +Exit:
> +	/*
> +	 * Return the IB query MAD to the pool as necessary.
> +	 */
> +	if (context.result.p_result_madw != NULL) {
> +		osm_mad_pool_put(&p_osmt->mad_pool,
> +				 context.result.p_result_madw);
> +		context.result.p_result_madw = NULL;
> +	}
> +
> +	OSM_LOG_EXIT(&p_osmt->log);
> +	return (status);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +static ib_api_status_t osmtest_stress_get_pr(IN osmtest_t * const p_osmt,
> +					     IN int mode)
> +{
> +	ib_api_status_t status = IB_SUCCESS;
> +	uint64_t num_recs = 0;
> +	uint64_t num_queries = 0;
> +	uint32_t delta_recs;
> +	uint32_t delta_queries;
> +	uint32_t print_freq = 0;
> +	int num_timeouts = 0;
> +	struct timeval start_tv, end_tv;
> +	long sec_diff, usec_diff;
> +
> +	OSM_LOG_ENTER(&p_osmt->log);
> +	gettimeofday(&start_tv, NULL);
> +	printf("-I- Start time is : %09ld:%06ld [sec:usec]\n",
> +	       start_tv.tv_sec, (long)start_tv.tv_usec);
> +
> +	while ((num_queries < STRESS_GET_PR) && (num_timeouts < 100)) {
> +		delta_recs = 0;
> +		delta_queries = 0;
> +
> +		status = osmtest_stress_path_recs_by_lid(p_osmt, mode,
> +							 &delta_recs,
> +							 &delta_queries);
> +		if (status != IB_SUCCESS)
> +			goto Exit;
> +
> +		num_recs += delta_recs;
> +		num_queries += delta_queries;
> +
> +		print_freq += delta_recs;
> +		if (print_freq > 5000) {
> +			gettimeofday(&end_tv, NULL);
> +			printf("%" PRIu64 " records, %" PRIu64 " queries\n",
> +			       num_recs, num_queries);
> +			if (end_tv.tv_usec > start_tv.tv_usec) {
> +				sec_diff = end_tv.tv_sec - start_tv.tv_sec;
> +				usec_diff = end_tv.tv_usec - start_tv.tv_usec;
> +			} else {
> +				sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1;
> +				usec_diff =
> +				    1000000 - (start_tv.tv_usec -
> +					       end_tv.tv_usec);
> +			}
> +			printf("-I- End time is : %09ld:%06ld [sec:usec]\n",
> +			       end_tv.tv_sec, (long)end_tv.tv_usec);
> +			printf("-I- Querying %" PRId64
> +			       " path_rec queries took %04ld:%06ld [sec:usec]\n",
> +			       num_queries, sec_diff, usec_diff);
> +			print_freq = 0;
> +		}
> +	}
> +
> +Exit:
> +	gettimeofday(&end_tv, NULL);
> +	printf("-I- End time is : %09ld:%06ld [sec:usec]\n",
> +	       end_tv.tv_sec, (long)end_tv.tv_usec);
> +	if (end_tv.tv_usec > start_tv.tv_usec) {
> +		sec_diff = end_tv.tv_sec - start_tv.tv_sec;
> +		usec_diff = end_tv.tv_usec - start_tv.tv_usec;
> +	} else {
> +		sec_diff = end_tv.tv_sec - start_tv.tv_sec - 1;
> +		usec_diff = 1000000 - (start_tv.tv_usec - end_tv.tv_usec);
> +	}

Not for specific patch, but in general for osmtest - it would be really
nice to consolidate all those duplications over osmtest code.

Sasha

> +
> +	printf("-I- Querying %" PRId64
> +	       " path_rec queries took %04ld:%06ld [sec:usec]\n",
> +	       num_queries, sec_diff, usec_diff);
> +	if (num_timeouts > 50) {
> +		status = IB_TIMEOUT;
> +	}
> +	/* Exit: */
> +	OSM_LOG_EXIT(&p_osmt->log);
> +	return (status);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
>  static void
>  osmtest_prepare_db_generic(IN osmtest_t * const p_osmt,
>  			   IN cl_qmap_t * const p_tbl)
> @@ -7247,6 +7392,16 @@ ib_api_status_t osmtest_run(IN osmtest_t * const p_osmt)
>  					goto Exit;
>  				}
>  				break;
> +			case 4: /* SA Get PR to SA LID */
> +				status = osmtest_stress_get_pr(p_osmt, 0);
> +				if (status != IB_SUCCESS) {
> +					OSM_LOG(&p_osmt->log, OSM_LOG_ERROR,
> +						"ERR 014B: "
> +						"SA Get PR stress test failed (%s)\n",
> +						ib_get_err_str(status));
> +					goto Exit;
> +				}
> +				break;
>  			default:
>  				OSM_LOG(&p_osmt->log, OSM_LOG_ERROR,
>  					"ERR 0144: "



More information about the general mailing list