[ewg] Interop test failure using OFED-3.5 RC4
Marciniszyn, Mike
mike.marciniszyn at intel.com
Fri Jan 11 07:50:32 PST 2013
This is definitely a perftest bug.
This is a significant re-write of these utilities and this bug is a regression in the routine ctx_set_out_reads().
In 1.4 the code is this:
/******************************************************************************
*
******************************************************************************/
static int ctx_set_out_reads(struct ibv_context *context,int num_user_reads) {
int max_reads;
max_reads = (is_dev_hermon(context) == HERMON) ? MAX_OUT_READ_HERMON : MAX_OUT_READ;<---------------
if (num_user_reads > max_reads) {
fprintf(stderr," Number of outstanding reads is above max = %d\n",max_reads);
fprintf(stderr," Changing to that max value\n");
num_user_reads = max_reads;
}
else if (num_user_reads <= 0) {
num_user_reads = max_reads;
}
return num_user_reads;
}
The new 2.0 code is:
/******************************************************************************
*
******************************************************************************/
static int ctx_set_out_reads(struct ibv_context *context,int num_user_reads) {
int max_reads;
Device ib_fdev = ib_dev_name(context);
switch (ib_fdev) {
case CONNECTIB : ;
case CONNECTX3 : ;
case CONNECTX2 : ;
case CONNECTX : max_reads = MAX_OUT_READ_HERMON; break;
case LEGACY : max_reads = MAX_OUT_READ; break;
default : max_reads = 0; <--------------------
}
if (num_user_reads > max_reads) {
printf(RESULT_LINE);
fprintf(stderr," Number of outstanding reads is above max = %d\n",max_reads);
fprintf(stderr," Changing to that max value\n");
num_user_reads = max_reads;
}
else if (num_user_reads <= 0) {
num_user_reads = max_reads;
}
return num_user_reads;
}
The old code will return MAX_OUT_READ, while the new code for any other HCAs (qib and probably others), will return 0.
I have a patch that works, while preserving the desired hardcoded values for "known/legacy" devices:
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int device_max_reads(struct ibv_context *context) {
+ struct ibv_device_attr attr;
+ int ret = 0;
+
+ if (!ibv_query_device(context,&attr)) {
+ ret = attr.max_qp_rd_atom;
+ }
+ return ret;
+}
+
/******************************************************************************
*
******************************************************************************/
@@ -496,7 +510,7 @@ static int ctx_set_out_reads(struct ibv_
case CONNECTX2 : ;
case CONNECTX : max_reads = MAX_OUT_READ_HERMON; break;
case LEGACY : max_reads = MAX_OUT_READ; break;
- default : max_reads = 0;
+ default : max_reads = device_max_reads(context);
}
if (num_user_reads > max_reads) {
I'm curious why the old and new code used hardcoded values?
Mike
More information about the ewg
mailing list