[openib-general] Unknown SMP Recv

Hal Rosenstock halr at voltaire.com
Sat Feb 10 07:49:15 PST 2007


On Fri, 2007-02-09 at 15:19, Michael Arndt wrote:
> Hi,
> 
> > It is strange, I did similar thing (you can see in
> > management/diags/src/mcm_rereg_test.c) and it worked fine for me.
> 
> What location is that?
> 
> >Which libibumad version you are using? Also I understand you did some
> >changes in the stack, is it related to user_mad? Could you publish this?
> 
> I use OFED-1.1 and attached libibumad version. The stack where I have tested 
> this context wasn't changed to exclude this. It is a diploma thesis and will 
> publish as soon as posible ;)...in german ...sorry.
> 
> The hole example code Hal was asking for is below.

Some comments interspersed below with my modified version which sends
the 10 SMPs.

-- Hal

>  I have marked the 
> position with /* here */. Currently is the retry parameter zero, but I also 
> tested 3.
> 
> Thanks Michael
> 
> // ---- Includes --------------------------------
> #include <infiniband/umad.h>
> #include <string.h>
> #include <errno.h>
> 
> #include "sender.h"
> 
> // ---- Defines und Deklarationen ---------------
> 
>  static const uint8_t  CLASS_SUBN_DIRECTED_ROUTE = 0x81;
>  static const uint8_t  CLASS_SUBN_LID_ROUTE = 0x1;
> 
>  static int long drmad_tid = 0x123;
> 
>  // Prototypes
> 
>  void drsmp_get_init(void *umad, DRPath *path, uint16_t attr, int mod);
>  void drsmp_set_init(void *umad, DRPath *path, uint16_t attr, int mod, void 
> *data);
>  char * drmad_status_str(struct drsmp *drsmp);
>  int str2DRPath(char *str, DRPath *path);
>  int set_bit(int nr, void *method_mask);
> 
> 
> 
> // ---- Main ------------------------------------
> 
> int main (void){
> 
>  int Port_ID = 0;
>  int Agent_ID = 0;
>  int ret;
>  int i;
>  int length, timeout_ms = 10000;
> 
> 
>  void *umad;
>  struct drsmp *smp;
> 
> 
> // ---- Einstellungen ---------------------------
>  int Portnummer = 1;
>  char Devicename [2][UMAD_CA_NAME_LEN];
>  DRPath Path;
>  char Path_Str[64];
> 
>  uint16_t attribute = MAD_ATTR_PORT_INFO; // PortInfo
>  int modifier = 1;
> 
>  struct _register_info{
>   int Management_Class;
>   int Management_Version;
>   uint8_t RMPP_Version;
>   uint32_t Method_Mask[4];
>  } Register_Info;
> 
>  // ++ Wertzuweisung ++
> 
>  Register_Info.Management_Class = CLASS_SUBN_DIRECTED_ROUTE;
>  Register_Info.Management_Version = 1;
>  Register_Info.RMPP_Version = 0;
> 
>  set_bit(0x01,&(Register_Info.Method_Mask));
>  set_bit(0x02,&(Register_Info.Method_Mask));
>  set_bit(0x81,&(Register_Info.Method_Mask));

This overwrites something past method mask.

>  set_bit(0x03,&(Register_Info.Method_Mask));
>  set_bit(0x05,&(Register_Info.Method_Mask));
>  set_bit(0x06,&(Register_Info.Method_Mask));

Several of these methods don't apply to SM class.

Also, your umad_register doesn't use this so this is not needed if that
is the case but are you trying to use solicited or unsolicited sending ?
That is unclear to me as to what you really want.

>  sprintf(Path_Str,"0,1,1,1");
> 
> 
> // ---- Init Phase ------------------------------
>  printf("... Init Lib ...");
>  umad_init();
>  printf("done\n\n");
> 
>  // ++ Debug ++
>  umad_debug(0);
> 
>  printf("... Get CAs Names ...");
>  ret = umad_get_cas_names(Devicename,2);
>  if (!ret) {
>   printf("Fehler: umad_get_cas_names: %i\n",ret);
>   return -1;
>  }
>  else {
>   printf("done\n\n");
>   for (i = 0;i < ret;i++){
>    printf("Devicename: %s\n",Devicename[i]);
>   }
> 
>  }
>  // ++ Open ++
>  printf("... Open Port ...");
>  if ((Port_ID = umad_open_port(Devicename[0],Portnummer)) < 0)
>  {
>   printf("Fehler: umad_open_port: %i\n",Port_ID);
>   return -1;
>  }
>  else printf("done\n\n");
>  // ++ Register ++
>  printf("... Register User Mad ...");
>  if ((Agent_ID = umad_register(Port_ID,Register_Info.Management_Class,
>             Register_Info.Management_Version,
>             Register_Info.RMPP_Version,
>             0)) < 0){

See previous comment on this.

>   printf("Fehler: umad_register : %i\n",Agent_ID);
>   goto Exit;
>  }
>  else printf("done\n\n");
> // ---- Paket bauen -----------------------------
> 
>  printf("... Paket allokieren ...");
>  if (!(umad = umad_alloc(1, umad_size() + IB_MAD_SIZE))){
>   printf("Fehler: umad_alloc\n");
>   goto Exit;
>  }
>  printf("done\n\n");
> 
>  smp = umad_get_mad(umad);
>  printf("... Smp Pointer ... done\n");
> 
>  if ((str2DRPath(Path_Str, &Path)) < 0) printf("Fehler: str2DRPath\n");

I moved this up to where Path_Str was initially set. It wouldn't
actually send the packets multiple times without doing this. I didn't
investigate this further.

>  printf("... SMP bauen ...");
>  drsmp_get_init(umad,&Path,attribute,modifier);
>  printf("... done ...\n\n");
> 
> 
>  //xdump(stderr, "before send:\n", smp, 256);
>  dump_dr_smp(smp);

I got seg fault on this so I commented it out.

>  length = IB_MAD_SIZE;
> 
> /* here */
>  for (i = 0; i < 10; i++){
>      printf("... Send Mad ...");
>        if ((ret = umad_send(Port_ID, Agent_ID, umad, length, 200, 0)) < 0)

The main problem is this:
You cannot reuse the same umad allocation for multiple umad_sends.
That's why you get the error. So I changed this.

Also, since you are not using solicited sends there is no need for the
timeout to be specified but that doesn't really matter.

>           printf("Fehler: umad_send : %i\n",ret);
>        else printf("done\n\n");
>  }
> 
> /*
>  for (i = 0; i < 10; i++){
>    printf("... Recv Mad ...");
>    if (umad_recv(Port_ID, umad, &length, timeout_ms) != Agent_ID)
>         printf("Fehler umad_recv: %s\n", drmad_status_str(smp));
>    else printf("done\n\n");
>  }
> */
> 
>  dump_dr_smp(smp);

Also, got seg fault on this so also commented it out.

>  switch (attribute){
>   case MAD_ATTR_NODE_INFO : dump_node_info((const struct 
> node_info*)&(smp->data[0])); break;
>   case MAD_ATTR_PORT_INFO : dump_port_info(0,0,0,(const struct 
> port_info*)&(smp->data[0])); break;
>  }

Also, got seg fault on this so also commented it out.

> 
> // ---- Down Phase ------------------------------ 
> Exit:
>  printf("... Unregister User Mad ...");
>  if (umad_unregister(Port_ID,Agent_ID) < 0)
>   printf("Fehler: umad_unregister\n");
>  else printf("done\n\n");
> 
>  printf("... Close Port ...");
>  if (Port_ID != -1)
>   if ((umad_close_port(Port_ID)) != 0){
>    printf("Fehler: umad_close_port\n");
>   }
>   else printf("done\n\n");
>  else printf("nix zu tun\n\n");
> 
> }
> 
> // ---- SMP Paket -------------------------------
> 
> 
> void drsmp_get_init(void *umad, DRPath *path, uint16_t attr, int mod)
> {
>    struct drsmp *smp = (struct drsmp *)(umad_get_mad(umad));
> 
>    memset(smp, 0, sizeof (*smp));
> 
>    smp->base_version  = 1;
>    smp->mgmt_class    = CLASS_SUBN_DIRECTED_ROUTE;
>    smp->class_version = 1;
> 
>    smp->method        = 0x01;
>    smp->attr_id      = (uint16_t)htons((uint16_t)attr);
>    smp->attr_mod     = htonl(mod);
>    smp->tid           = htonll(drmad_tid++);
>    smp->dr_slid       = 0xffff;
>    smp->dr_dlid       = 0xffff;
> 
>    umad_set_addr(umad, 0xffff, 0, 0, 0);
> 
>    if (path)
>       memcpy(smp->initial_path, path->path, path->hop_cnt+1);
> 
>    smp->hop_cnt = path->hop_cnt;
> }
> 
> void drsmp_set_init(void *umad, DRPath *path, uint16_t attr, int mod, void 
> *data)
> {
>    struct drsmp *smp = (struct drsmp *)(umad_get_mad(umad));
> 
>    memset(smp, 0, sizeof (*smp));
> 
>    smp->method        = 2;    /* SET */
>    smp->attr_id      = (uint16_t)htons((uint16_t)attr);
>    smp->attr_mod     = htonl(mod);
>    smp->tid           = htonll(drmad_tid++);
>    smp->dr_slid       = 0xffff;
>    smp->dr_dlid       = 0xffff;
> 
>    umad_set_addr(umad, 0xffff, 0, 0, 0);
> 
>    if (path)
>       memcpy(smp->initial_path, path->path, path->hop_cnt+1);
> 
>    if (data)
>       memcpy(smp->data, data, sizeof smp->data);
> 
>    smp->hop_cnt = path->hop_cnt;
> }
> 
> int str2DRPath(char *str, DRPath *path)
> {
>    char *s;
> 
>    path->hop_cnt = -1;
> 
>    //DEBUG("DR str: %s", str);
>    while (str && *str) {
>       if ((s = strchr(str, ',')))
>          *s = 0;
>       path->path[++path->hop_cnt] = atoi(str);
>       if (!s)
>          break;
>       str = s+1;
>    }
> 
> #if 0
>    if (path->path[0] != 0 ||
>       (path->hop_cnt > 0 && dev_port && path->path[1] != dev_port)) {
>       DEBUG("hop 0 != 0 or hop 1 != dev_port");
>       return -1;
>    }
> #endif
> 
>    return path->hop_cnt;
> }
> 

Here's my modified version.

---
// ---- Includes --------------------------------
#include <infiniband/umad.h>
#include <string.h>
#include <errno.h>

#include "sender.h"

// ---- Defines und Deklarationen ---------------

 static const uint8_t  CLASS_SUBN_DIRECTED_ROUTE = 0x81;
 static const uint8_t  CLASS_SUBN_LID_ROUTE = 0x1;

 static int long drmad_tid = 0x123;

 // Prototypes

 void drsmp_get_init(void *umad, DRPath *path, uint16_t attr, int mod);
 void drsmp_set_init(void *umad, DRPath *path, uint16_t attr, int mod, void 
*data);
 char * drmad_status_str(struct drsmp *drsmp);
 int str2DRPath(char *str, DRPath *path);
 int set_bit(int nr, void *method_mask);



// ---- Main ------------------------------------

int main (void){

 int Port_ID = 0;
 int Agent_ID = 0;
 int ret;
 int i;
 int length, timeout_ms = 10000;


 void *umad;
 struct drsmp *smp;


// ---- Einstellungen ---------------------------
 int Portnummer = 1;
 char Devicename [2][UMAD_CA_NAME_LEN];
 DRPath Path;
 char Path_Str[64];

 uint16_t attribute = MAD_ATTR_PORT_INFO; // PortInfo
 int modifier = 1;

 struct _register_info{
  int Management_Class;
  int Management_Version;
  uint8_t RMPP_Version;
  uint32_t Method_Mask[4];
 } Register_Info;

 // ++ Wertzuweisung ++

 Register_Info.Management_Class = CLASS_SUBN_DIRECTED_ROUTE;
 Register_Info.Management_Version = 1;
 Register_Info.RMPP_Version = 0;

 set_bit(0x01,&(Register_Info.Method_Mask));
 set_bit(0x02,&(Register_Info.Method_Mask));
#if 0
 set_bit(0x81,&(Register_Info.Method_Mask));
#endif
 set_bit(0x03,&(Register_Info.Method_Mask));
 set_bit(0x05,&(Register_Info.Method_Mask));
 set_bit(0x06,&(Register_Info.Method_Mask));

 sprintf(Path_Str,"0,1,1,1");
#if 1
if ((str2DRPath(Path_Str, &Path)) < 0) printf("Fehler: str2DRPath\n");
#endif

// ---- Init Phase ------------------------------
 printf("... Init Lib ...");
 umad_init();
 printf("done\n\n");

 // ++ Debug ++
 umad_debug(0);

 printf("... Get CAs Names ...");
 ret = umad_get_cas_names(Devicename,2);
 if (!ret) {
  printf("Fehler: umad_get_cas_names: %i\n",ret);
  return -1;
 }
 else {
  printf("done\n\n");
  for (i = 0;i < ret;i++){
   printf("Devicename: %s\n",Devicename[i]);
  }

 }
 // ++ Open ++
 printf("... Open Port ...");
 if ((Port_ID = umad_open_port(Devicename[0],Portnummer)) < 0)
 {
  printf("Fehler: umad_open_port: %i\n",Port_ID);
  return -1;
 }
 else printf("done\n\n");
 // ++ Register ++
 printf("... Register User Mad ...");
#if 1
 if ((Agent_ID = umad_register(Port_ID,Register_Info.Management_Class,
            Register_Info.Management_Version,
            Register_Info.RMPP_Version,
            0)) < 0){
#else
 if ((Agent_ID = umad_register(Port_ID,Register_Info.Management_Class,
            Register_Info.Management_Version,
            Register_Info.RMPP_Version,
            &(Register_Info.Method_Mask[0]))) < 0){
#endif
  printf("Fehler: umad_register : %i\n",Agent_ID);
  goto Exit;
 }
 else printf("done\n\n");
// ---- Paket bauen -----------------------------

#if 0
 printf("... Paket allokieren ...");
 if (!(umad = umad_alloc(1, umad_size() + IB_MAD_SIZE))){
  printf("Fehler: umad_alloc\n");
  goto Exit;
 }
 printf("done\n\n");

 smp = umad_get_mad(umad);
 printf("... Smp Pointer ... done\n");

 if ((str2DRPath(Path_Str, &Path)) < 0) printf("Fehler: str2DRPath\n");

 printf("... SMP bauen ...");
 drsmp_get_init(umad,&Path,attribute,modifier);
 printf("... done ...\n\n");
#endif

 //xdump(stderr, "before send:\n", smp, 256);
#if 0
 dump_dr_smp(smp);
#endif

 length = IB_MAD_SIZE;

/* here */
 for (i = 0; i < 10; i++){

#if 1
 printf("... Paket allokieren ...");
 if (!(umad = umad_alloc(1, umad_size() + IB_MAD_SIZE))){
  printf("Fehler: umad_alloc %p\n", umad);
  goto Exit;
 }
 printf("done\n\n");

 smp = umad_get_mad(umad);
 printf("... Smp Pointer ... done\n");

#if 0
 if ((str2DRPath(Path_Str, &Path)) < 0) printf("Fehler: str2DRPath\n");
#endif

 printf("... SMP bauen ...");
 drsmp_get_init(umad,&Path,attribute,modifier);
 printf("... done ...\n\n");
#endif

     printf("... Send Mad ...");
#if 0
      if ((ret = umad_send(Port_ID, Agent_ID, umad, length, 200, 0)) < 0)
#else
      if ((ret = umad_send(Port_ID, Agent_ID, umad, length, 0, 0)) < 0)
#endif
          printf("Fehler: umad_send : %i\n",ret);
       else printf("done\n\n");
 }

/*
 for (i = 0; i < 10; i++){
   printf("... Recv Mad ...");
   if (umad_recv(Port_ID, umad, &length, timeout_ms) != Agent_ID)
        printf("Fehler umad_recv: %s\n", drmad_status_str(smp));
   else printf("done\n\n");
 }
*/

#if 0
 dump_dr_smp(smp);

 switch (attribute){
  case MAD_ATTR_NODE_INFO : dump_node_info((const struct 
node_info*)&(smp->data[0])); break;
  case MAD_ATTR_PORT_INFO : dump_port_info(0,0,0,(const struct 
port_info*)&(smp->data[0])); break;
 }
#endif

// ---- Down Phase ------------------------------ 
Exit:
 printf("... Unregister User Mad ...");
 if (umad_unregister(Port_ID,Agent_ID) < 0)
  printf("Fehler: umad_unregister\n");
 else printf("done\n\n");

 printf("... Close Port ...");
 if (Port_ID != -1)
  if ((umad_close_port(Port_ID)) != 0){
   printf("Fehler: umad_close_port\n");
  }
  else printf("done\n\n");
 else printf("nix zu tun\n\n");

}

// ---- SMP Paket -------------------------------


void drsmp_get_init(void *umad, DRPath *path, uint16_t attr, int mod)
{
   struct drsmp *smp = (struct drsmp *)(umad_get_mad(umad));

   memset(smp, 0, sizeof (*smp));

   smp->base_version  = 1;
   smp->mgmt_class    = CLASS_SUBN_DIRECTED_ROUTE;
   smp->class_version = 1;

   smp->method        = 0x01;
   smp->attr_id      = (uint16_t)htons((uint16_t)attr);
   smp->attr_mod     = htonl(mod);
   smp->tid           = htonll(drmad_tid++);
   smp->dr_slid       = 0xffff;
   smp->dr_dlid       = 0xffff;

   umad_set_addr(umad, 0xffff, 0, 0, 0);

   if (path)
      memcpy(smp->initial_path, path->path, path->hop_cnt+1);

   smp->hop_cnt = path->hop_cnt;
}

void drsmp_set_init(void *umad, DRPath *path, uint16_t attr, int mod, void 
*data)
{
   struct drsmp *smp = (struct drsmp *)(umad_get_mad(umad));

   memset(smp, 0, sizeof (*smp));

   smp->method        = 2;    /* SET */
   smp->attr_id      = (uint16_t)htons((uint16_t)attr);
   smp->attr_mod     = htonl(mod);
   smp->tid           = htonll(drmad_tid++);
   smp->dr_slid       = 0xffff;
   smp->dr_dlid       = 0xffff;

   umad_set_addr(umad, 0xffff, 0, 0, 0);

   if (path)
      memcpy(smp->initial_path, path->path, path->hop_cnt+1);

   if (data)
      memcpy(smp->data, data, sizeof smp->data);

   smp->hop_cnt = path->hop_cnt;
}

int str2DRPath(char *str, DRPath *path)
{
   char *s;

   path->hop_cnt = -1;

   //DEBUG("DR str: %s", str);
   while (str && *str) {
      if ((s = strchr(str, ',')))
         *s = 0;
      path->path[++path->hop_cnt] = atoi(str);
      if (!s)
         break;
      str = s+1;
   }

#if 0
   if (path->path[0] != 0 ||
      (path->hop_cnt > 0 && dev_port && path->path[1] != dev_port)) {
      DEBUG("hop 0 != 0 or hop 1 != dev_port");
      return -1;
   }
#endif

   return path->hop_cnt;
}










More information about the general mailing list